docarray · samsja · Nov 28, 2022 · Nov 24, 2022 · Nov 24, 2022 · Nov 24, 2022
diff --git a/docarray/document/mixins/image.py b/docarray/document/mixins/image.py
@@ -3,7 +3,7 @@
 import math
 import struct
 import warnings
-from typing import Optional, Tuple, Union, BinaryIO, TYPE_CHECKING
+from typing import Optional, Tuple, Union, BinaryIO, TYPE_CHECKING, List
 
 import numpy as np
 
@@ -186,10 +186,25 @@ def load_uri_to_image_tensor(
 
         :return: itself after processed
         """
+        from docarray import Document, DocumentArray
 
         buffer = _uri_to_blob(self.uri, **kwargs)
         tensor = _to_image_tensor(io.BytesIO(buffer), width=width, height=height)
-        self.tensor = _move_channel_axis(tensor, original_channel_axis=channel_axis)
+
+        if isinstance(tensor, np.ndarray):
+            self.tensor = _move_channel_axis(tensor, original_channel_axis=channel_axis)
+        elif isinstance(tensor, List):
+            self.chunks = DocumentArray(
+                [
+                    Document(
+                        tensor=_move_channel_axis(
+                            tensor[i], original_channel_axis=channel_axis
+                        )
+                    )
+                    for i in range(len(tensor))
+                ]
+            )
+
         return self
 
     def set_image_tensor_inv_normalization(
@@ -359,26 +374,53 @@ def _to_image_tensor(
     source,
     width: Optional[int] = None,
     height: Optional[int] = None,
-) -> 'np.ndarray':
+) -> Union[np.ndarray, List[np.array]]:
     """
-    Convert an image blob to tensor
+    Convert an image blob to tensor or List of image tensors, if multiple images are stored in file.
 
     :param source: binary blob or file path
     :param width: the width of the image tensor.
     :param height: the height of the tensor.
-    :return: image tensor
+    :return: image tensor or list of image tensors
     """
     from PIL import Image
 
     raw_img = Image.open(source)
+
     if width or height:
         new_width = width or raw_img.width
         new_height = height or raw_img.height
-        raw_img = raw_img.resize((new_width, new_height))
+
+    # support multi page tiff images
     try:
-        return np.array(raw_img.convert('RGB'))
-    except:
-        return np.array(raw_img)
+        n_frames = raw_img.n_frames
+    except AttributeError:
+        n_frames = 1
+
+    if n_frames > 1:
+
+        frames = []
+        for i in range(raw_img.n_frames):
+            raw_img.seek(i)
+            try:
+                img = raw_img.convert('RGB')
+            except:
+                img = raw_img
+
+            if width or height:
+                frames.append(np.array(img.resize((new_width, new_height))))
+            else:
+                frames.append(np.array(img))
+
+        return frames
+
+    else:
+        if width or height:
+            raw_img = raw_img.resize((new_width, new_height))
+        try:
+            return np.array(raw_img.convert('RGB'))
+        except:
+            return np.array(raw_img)
 
 
 def _to_image_buffer(arr: 'np.ndarray', image_format: str) -> bytes:

diff --git a/docs/datatypes/image/index.md b/docs/datatypes/image/index.md
@@ -33,6 +33,26 @@ print(d.tensor, d.tensor.shape)
 (618, 641, 3)
 ```
 
+DocArray also supports loading multi-page tiff files. In this case, the image tensors are stored to the `.tensor` attributes at the chunk-level instead of the top-level.
+
+```python
+from docarray import Document
+
+d = Document(uri='muti_page_tiff_file.tiff')
+d.load_uri_to_image_tensor()
+
+d.summary()
+```
+
+```text
+<Document ('id', 'uri', 'chunks') at 7f907d786d6c11ec840a1e008a366d49>
+    └─ chunks
+          ├─ <Document ('id', 'parent_id', 'granularity', 'tensor') at 7aa4c0ba66cf6c300b7f07fdcbc2fdc8>
+          ├─ <Document ('id', 'parent_id', 'granularity', 'tensor') at bc94a3e3ca60352f2e4c9ab1b1bb9c22>
+          └─ <Document ('id', 'parent_id', 'granularity', 'tensor') at 36fe0d1daf4442ad6461c619f8bb25b7>
+```
+
+
 ## Simple image processing
 
 DocArray provides some functions to help you preprocess the image data. You can resize it (i.e. downsampling/upsampling) and normalize it; you can switch the channel axis of the `.tensor` to meet certain requirements of other framework; and finally you can chain all these preprocessing steps together in one line. For example, before feeding data into a Pytorch-based ResNet Executor, the image needs to be normalized and the color axis should be at first, not at the last. You can do this via:
@@ -150,7 +170,9 @@ d.chunks.plot_image_sprites('simpsons-chunks.png')
 Hmm, doesn't change so much. This is because we scan the whole image using sliding windows with no overlap (i.e. stride). Let's do a bit oversampling:
 
 ```python
-d.convert_image_tensor_to_sliding_windows(window_shape=(64, 64), strides=(10, 10), as_chunks=True)
+d.convert_image_tensor_to_sliding_windows(
+    window_shape=(64, 64), strides=(10, 10), as_chunks=True
+)
 d.chunks.plot_image_sprites('simpsons-chunks-stride-10.png')
 ```
 

diff --git a/tests/unit/document/test_converters.py b/tests/unit/document/test_converters.py
@@ -56,6 +56,18 @@ def test_uri_to_tensor():
     assert doc.mime_type == 'image/png'
 
 
+def test_uri_to_tensors_with_multi_page_tiff():
+    doc = Document(uri=os.path.join(cur_dir, 'toydata/multi-page.tif'))
+    doc.load_uri_to_image_tensor()
+
+    assert doc.tensor is None
+    assert len(doc.chunks) == 3
+    for chunk in doc.chunks:
+        assert isinstance(chunk.tensor, np.ndarray)
+        assert chunk.tensor.ndim == 3
+        assert chunk.tensor.shape[-1] == 3
+
+
 def test_datauri_to_tensor():
     doc = Document(uri=os.path.join(cur_dir, 'toydata/test.png'))
     doc.convert_uri_to_datauri()

diff --git a/tests/unit/document/toydata/multi-page.tif b/tests/unit/document/toydata/multi-page.tif