diff --git a/docarray/document/mixins/image.py b/docarray/document/mixins/image.py index 6d055747262..7f4c32d22f0 100644 --- a/docarray/document/mixins/image.py +++ b/docarray/document/mixins/image.py @@ -3,7 +3,7 @@ import math import struct import warnings -from typing import Optional, Tuple, Union, BinaryIO, TYPE_CHECKING +from typing import Optional, Tuple, Union, BinaryIO, TYPE_CHECKING, List import numpy as np @@ -186,10 +186,25 @@ def load_uri_to_image_tensor( :return: itself after processed """ + from docarray import Document, DocumentArray buffer = _uri_to_blob(self.uri, **kwargs) tensor = _to_image_tensor(io.BytesIO(buffer), width=width, height=height) - self.tensor = _move_channel_axis(tensor, original_channel_axis=channel_axis) + + if isinstance(tensor, np.ndarray): + self.tensor = _move_channel_axis(tensor, original_channel_axis=channel_axis) + elif isinstance(tensor, List): + self.chunks = DocumentArray( + [ + Document( + tensor=_move_channel_axis( + tensor[i], original_channel_axis=channel_axis + ) + ) + for i in range(len(tensor)) + ] + ) + return self def set_image_tensor_inv_normalization( @@ -359,26 +374,53 @@ def _to_image_tensor( source, width: Optional[int] = None, height: Optional[int] = None, -) -> 'np.ndarray': +) -> Union[np.ndarray, List[np.array]]: """ - Convert an image blob to tensor + Convert an image blob to tensor or List of image tensors, if multiple images are stored in file. :param source: binary blob or file path :param width: the width of the image tensor. :param height: the height of the tensor. - :return: image tensor + :return: image tensor or list of image tensors """ from PIL import Image raw_img = Image.open(source) + if width or height: new_width = width or raw_img.width new_height = height or raw_img.height - raw_img = raw_img.resize((new_width, new_height)) + + # support multi page tiff images try: - return np.array(raw_img.convert('RGB')) - except: - return np.array(raw_img) + n_frames = raw_img.n_frames + except AttributeError: + n_frames = 1 + + if n_frames > 1: + + frames = [] + for i in range(raw_img.n_frames): + raw_img.seek(i) + try: + img = raw_img.convert('RGB') + except: + img = raw_img + + if width or height: + frames.append(np.array(img.resize((new_width, new_height)))) + else: + frames.append(np.array(img)) + + return frames + + else: + if width or height: + raw_img = raw_img.resize((new_width, new_height)) + try: + return np.array(raw_img.convert('RGB')) + except: + return np.array(raw_img) def _to_image_buffer(arr: 'np.ndarray', image_format: str) -> bytes: diff --git a/docs/datatypes/image/index.md b/docs/datatypes/image/index.md index e92067a6a5e..2284b51c3c5 100644 --- a/docs/datatypes/image/index.md +++ b/docs/datatypes/image/index.md @@ -33,6 +33,26 @@ print(d.tensor, d.tensor.shape) (618, 641, 3) ``` +DocArray also supports loading multi-page tiff files. In this case, the image tensors are stored to the `.tensor` attributes at the chunk-level instead of the top-level. + +```python +from docarray import Document + +d = Document(uri='muti_page_tiff_file.tiff') +d.load_uri_to_image_tensor() + +d.summary() +``` + +```text + + └─ chunks + ├─ + ├─ + └─ +``` + + ## Simple image processing DocArray provides some functions to help you preprocess the image data. You can resize it (i.e. downsampling/upsampling) and normalize it; you can switch the channel axis of the `.tensor` to meet certain requirements of other framework; and finally you can chain all these preprocessing steps together in one line. For example, before feeding data into a Pytorch-based ResNet Executor, the image needs to be normalized and the color axis should be at first, not at the last. You can do this via: @@ -150,7 +170,9 @@ d.chunks.plot_image_sprites('simpsons-chunks.png') Hmm, doesn't change so much. This is because we scan the whole image using sliding windows with no overlap (i.e. stride). Let's do a bit oversampling: ```python -d.convert_image_tensor_to_sliding_windows(window_shape=(64, 64), strides=(10, 10), as_chunks=True) +d.convert_image_tensor_to_sliding_windows( + window_shape=(64, 64), strides=(10, 10), as_chunks=True +) d.chunks.plot_image_sprites('simpsons-chunks-stride-10.png') ``` diff --git a/tests/unit/document/test_converters.py b/tests/unit/document/test_converters.py index f1ddf50b3d6..6c010809e0c 100644 --- a/tests/unit/document/test_converters.py +++ b/tests/unit/document/test_converters.py @@ -56,6 +56,18 @@ def test_uri_to_tensor(): assert doc.mime_type == 'image/png' +def test_uri_to_tensors_with_multi_page_tiff(): + doc = Document(uri=os.path.join(cur_dir, 'toydata/multi-page.tif')) + doc.load_uri_to_image_tensor() + + assert doc.tensor is None + assert len(doc.chunks) == 3 + for chunk in doc.chunks: + assert isinstance(chunk.tensor, np.ndarray) + assert chunk.tensor.ndim == 3 + assert chunk.tensor.shape[-1] == 3 + + def test_datauri_to_tensor(): doc = Document(uri=os.path.join(cur_dir, 'toydata/test.png')) doc.convert_uri_to_datauri() diff --git a/tests/unit/document/toydata/multi-page.tif b/tests/unit/document/toydata/multi-page.tif new file mode 100644 index 00000000000..66b9ee395a4 Binary files /dev/null and b/tests/unit/document/toydata/multi-page.tif differ