Skip to content
60 changes: 51 additions & 9 deletions docarray/document/mixins/image.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import math
import struct
import warnings
from typing import Optional, Tuple, Union, BinaryIO, TYPE_CHECKING
from typing import Optional, Tuple, Union, BinaryIO, TYPE_CHECKING, List

import numpy as np

Expand Down Expand Up @@ -186,10 +186,25 @@ def load_uri_to_image_tensor(

:return: itself after processed
"""
from docarray import Document, DocumentArray

buffer = _uri_to_blob(self.uri, **kwargs)
tensor = _to_image_tensor(io.BytesIO(buffer), width=width, height=height)
self.tensor = _move_channel_axis(tensor, original_channel_axis=channel_axis)

if isinstance(tensor, np.ndarray):
self.tensor = _move_channel_axis(tensor, original_channel_axis=channel_axis)
elif isinstance(tensor, List):
self.chunks = DocumentArray(
[
Document(
tensor=_move_channel_axis(
tensor[i], original_channel_axis=channel_axis
)
)
for i in range(len(tensor))
]
)

return self

def set_image_tensor_inv_normalization(
Expand Down Expand Up @@ -359,26 +374,53 @@ def _to_image_tensor(
source,
width: Optional[int] = None,
height: Optional[int] = None,
) -> 'np.ndarray':
) -> Union[np.ndarray, List[np.array]]:
"""
Convert an image blob to tensor
Convert an image blob to tensor or List of image tensors, if multiple images are stored in file.

:param source: binary blob or file path
:param width: the width of the image tensor.
:param height: the height of the tensor.
:return: image tensor
:return: image tensor or list of image tensors
"""
from PIL import Image

raw_img = Image.open(source)

if width or height:
new_width = width or raw_img.width
new_height = height or raw_img.height
raw_img = raw_img.resize((new_width, new_height))

# support multi page tiff images
try:
return np.array(raw_img.convert('RGB'))
except:
return np.array(raw_img)
n_frames = raw_img.n_frames
except AttributeError:
n_frames = 1

if n_frames > 1:

frames = []
for i in range(raw_img.n_frames):
raw_img.seek(i)
try:
img = raw_img.convert('RGB')
except:
img = raw_img

if width or height:
frames.append(np.array(img.resize((new_width, new_height))))
else:
frames.append(np.array(img))

return frames

else:
if width or height:
raw_img = raw_img.resize((new_width, new_height))
try:
return np.array(raw_img.convert('RGB'))
except:
return np.array(raw_img)


def _to_image_buffer(arr: 'np.ndarray', image_format: str) -> bytes:
Expand Down
24 changes: 23 additions & 1 deletion docs/datatypes/image/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,26 @@ print(d.tensor, d.tensor.shape)
(618, 641, 3)
```

DocArray also supports loading multi-page tiff files. In this case, the image tensors are stored to the `.tensor` attributes at the chunk-level instead of the top-level.

```python
from docarray import Document

d = Document(uri='muti_page_tiff_file.tiff')
d.load_uri_to_image_tensor()

d.summary()
```

```text
<Document ('id', 'uri', 'chunks') at 7f907d786d6c11ec840a1e008a366d49>
└─ chunks
├─ <Document ('id', 'parent_id', 'granularity', 'tensor') at 7aa4c0ba66cf6c300b7f07fdcbc2fdc8>
├─ <Document ('id', 'parent_id', 'granularity', 'tensor') at bc94a3e3ca60352f2e4c9ab1b1bb9c22>
└─ <Document ('id', 'parent_id', 'granularity', 'tensor') at 36fe0d1daf4442ad6461c619f8bb25b7>
```


## Simple image processing

DocArray provides some functions to help you preprocess the image data. You can resize it (i.e. downsampling/upsampling) and normalize it; you can switch the channel axis of the `.tensor` to meet certain requirements of other framework; and finally you can chain all these preprocessing steps together in one line. For example, before feeding data into a Pytorch-based ResNet Executor, the image needs to be normalized and the color axis should be at first, not at the last. You can do this via:
Expand Down Expand Up @@ -150,7 +170,9 @@ d.chunks.plot_image_sprites('simpsons-chunks.png')
Hmm, doesn't change so much. This is because we scan the whole image using sliding windows with no overlap (i.e. stride). Let's do a bit oversampling:

```python
d.convert_image_tensor_to_sliding_windows(window_shape=(64, 64), strides=(10, 10), as_chunks=True)
d.convert_image_tensor_to_sliding_windows(
window_shape=(64, 64), strides=(10, 10), as_chunks=True
)
d.chunks.plot_image_sprites('simpsons-chunks-stride-10.png')
```

Expand Down
12 changes: 12 additions & 0 deletions tests/unit/document/test_converters.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,18 @@ def test_uri_to_tensor():
assert doc.mime_type == 'image/png'


def test_uri_to_tensors_with_multi_page_tiff():
doc = Document(uri=os.path.join(cur_dir, 'toydata/multi-page.tif'))
doc.load_uri_to_image_tensor()

assert doc.tensor is None
assert len(doc.chunks) == 3
for chunk in doc.chunks:
assert isinstance(chunk.tensor, np.ndarray)
assert chunk.tensor.ndim == 3
assert chunk.tensor.shape[-1] == 3


def test_datauri_to_tensor():
doc = Document(uri=os.path.join(cur_dir, 'toydata/test.png'))
doc.convert_uri_to_datauri()
Expand Down
Binary file added tests/unit/document/toydata/multi-page.tif
Binary file not shown.