From b072ff3f7e20888bb423fd104d3d0109ba91dd9d Mon Sep 17 00:00:00 2001 From: anna-charlotte Date: Wed, 25 Jan 2023 15:58:52 +0100 Subject: [PATCH 1/8] feat: add columns information for stacked array to display Signed-off-by: anna-charlotte --- docarray/display/document_array_summary.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/docarray/display/document_array_summary.py b/docarray/display/document_array_summary.py index 97357cba2d3..1e264abfdcc 100644 --- a/docarray/display/document_array_summary.py +++ b/docarray/display/document_array_summary.py @@ -18,10 +18,22 @@ def summary(self) -> None: from rich.panel import Panel from rich.table import Table + from docarray.array import DocumentArrayStacked + table = Table(box=box.SIMPLE, highlight=True) table.show_header = False table.add_row('Type', self.da.__class__.__name__) table.add_row('Length', str(len(self.da))) + if isinstance(self.da, DocumentArrayStacked): + table.add_section() + table.add_row('Stacked columns:') + for field_name, value in self.da._columns.items(): + shape = value.get_comp_backend().shape(value) + table.add_row( + f' • {field_name}:', + f'{value.__class__.__name__} of shape {shape}', + ) + Console().print(Panel(table, title='DocumentArray Summary', expand=False)) self.da.document_type.schema_summary() From de84cc12966e3430d3665236aca609d124ceaecb Mon Sep 17 00:00:00 2001 From: anna-charlotte Date: Thu, 26 Jan 2023 12:01:36 +0100 Subject: [PATCH 2/8] feat: add isnan and dtype to comp backends Signed-off-by: anna-charlotte --- docarray/computation/abstract_comp_backend.py | 14 +++++++++++++- docarray/computation/numpy_backend.py | 12 +++++++++++- docarray/computation/torch_backend.py | 12 +++++++++++- 3 files changed, 35 insertions(+), 3 deletions(-) diff --git a/docarray/computation/abstract_comp_backend.py b/docarray/computation/abstract_comp_backend.py index 1bf19495e99..9b8c4e0b4c6 100644 --- a/docarray/computation/abstract_comp_backend.py +++ b/docarray/computation/abstract_comp_backend.py @@ -104,6 +104,18 @@ def detach(tensor: 'TTensor') -> 'TTensor': """ ... + @staticmethod + @abstractmethod + def dtype(tensor: 'TTensor'): + """Get the data type of the tensor.""" + ... + + @staticmethod + @abstractmethod + def isnan(tensor: 'TTensor') -> 'TTensor': + """Check element-wise for nan and return result as a boolean array""" + ... + @staticmethod @abstractmethod def minmax_normalize( @@ -111,7 +123,7 @@ def minmax_normalize( t_range: Tuple = (0, 1), x_range: Optional[Tuple] = None, eps: float = 1e-7, - ): + ) -> 'TTensor': """ Normalize values in `tensor` into `t_range`. diff --git a/docarray/computation/numpy_backend.py b/docarray/computation/numpy_backend.py index fd51d254a20..a6d7825e3bb 100644 --- a/docarray/computation/numpy_backend.py +++ b/docarray/computation/numpy_backend.py @@ -102,13 +102,23 @@ def detach(tensor: 'np.ndarray') -> 'np.ndarray': """ return tensor + @staticmethod + def dtype(tensor: 'np.ndarray') -> np.dtype: + """Get the data type of the tensor.""" + return tensor.dtype + + @staticmethod + def isnan(tensor: 'np.ndarray') -> 'np.ndarray': + """Check element-wise for nan and return result as a boolean array""" + return np.isnan(tensor) + @staticmethod def minmax_normalize( tensor: 'np.ndarray', t_range: Tuple = (0, 1), x_range: Optional[Tuple] = None, eps: float = 1e-7, - ): + ) -> 'np.ndarray': """ Normalize values in `tensor` into `t_range`. diff --git a/docarray/computation/torch_backend.py b/docarray/computation/torch_backend.py index 13d2aa8471a..fe21e69fcf9 100644 --- a/docarray/computation/torch_backend.py +++ b/docarray/computation/torch_backend.py @@ -106,13 +106,23 @@ def detach(tensor: 'torch.Tensor') -> 'torch.Tensor': """ return tensor.detach() + @staticmethod + def dtype(tensor: 'torch.Tensor') -> torch.dtype: + """Get the data type of the tensor.""" + return tensor.dtype + + @staticmethod + def isnan(tensor: 'torch.Tensor') -> 'torch.Tensor': + """Check element-wise for nan and return result as a boolean array""" + return torch.isnan(tensor) + @staticmethod def minmax_normalize( tensor: 'torch.Tensor', t_range: Tuple = (0, 1), x_range: Optional[Tuple] = None, eps: float = 1e-7, - ): + ) -> 'torch.Tensor': """ Normalize values in `tensor` into `t_range`. From 77c6120c0505c2e841718bf234acffd1ffcd2e7d Mon Sep 17 00:00:00 2001 From: anna-charlotte Date: Thu, 26 Jan 2023 12:03:05 +0100 Subject: [PATCH 3/8] fix: check for nested stacked fields in summary Signed-off-by: anna-charlotte --- docarray/display/document_array_summary.py | 52 +++++++++++++++++++--- 1 file changed, 45 insertions(+), 7 deletions(-) diff --git a/docarray/display/document_array_summary.py b/docarray/display/document_array_summary.py index 1e264abfdcc..8c821992881 100644 --- a/docarray/display/document_array_summary.py +++ b/docarray/display/document_array_summary.py @@ -1,6 +1,9 @@ -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, List + +from docarray.typing.tensor.abstract_tensor import AbstractTensor if TYPE_CHECKING: + from docarray.array import DocumentArrayStacked from docarray.array.abstract_array import AnyDocumentArray @@ -28,12 +31,47 @@ def summary(self) -> None: if isinstance(self.da, DocumentArrayStacked): table.add_section() table.add_row('Stacked columns:') - for field_name, value in self.da._columns.items(): - shape = value.get_comp_backend().shape(value) - table.add_row( - f' • {field_name}:', - f'{value.__class__.__name__} of shape {shape}', - ) + stacked_fields = self._get_stacked_fields(da=self.da) + for field in stacked_fields: + da = self.da + for attr in field.split('.'): + da = getattr(da, attr) + + if isinstance(da, AbstractTensor): + col_1 = f' • {field}:' + comp_be = da.get_comp_backend() + cls_name = da.__class__.__name__ + if comp_be.isnan(da).all(): + col_2 = f'None ({cls_name})' + else: + col_2 = ( + f'{cls_name} of shape {comp_be.shape(da)}, ' + f'dtype: {comp_be.shape(da)}' + ) + table.add_row(col_1, col_2) Console().print(Panel(table, title='DocumentArray Summary', expand=False)) self.da.document_type.schema_summary() + + @staticmethod + def _get_stacked_fields(da: 'DocumentArrayStacked') -> List[str]: + """ + Returns a list of field names that are stacked of a DocumentArrayStacked + instance, i.e. all the fields that are of type AbstractTensor. Nested field + paths are dot separated. + """ + from docarray.array import DocumentArrayStacked + + fields = [] + for field_name, value in da._columns.items(): + if isinstance(value, AbstractTensor): + fields.append(field_name) + elif isinstance(value, DocumentArrayStacked): + fields.extend( + [ + f'{field_name}.{x}' + for x in DocumentArraySummary._get_stacked_fields(da=value) + ] + ) + + return fields From d7567efe7471439fc1f3937f7b8f960ce6a31eac Mon Sep 17 00:00:00 2001 From: anna-charlotte Date: Thu, 26 Jan 2023 13:11:38 +0100 Subject: [PATCH 4/8] feat: add device to comp backends Signed-off-by: anna-charlotte --- docarray/computation/abstract_comp_backend.py | 6 ++++++ docarray/computation/numpy_backend.py | 5 +++++ docarray/computation/torch_backend.py | 5 +++++ 3 files changed, 16 insertions(+) diff --git a/docarray/computation/abstract_comp_backend.py b/docarray/computation/abstract_comp_backend.py index 9b8c4e0b4c6..8a76971d610 100644 --- a/docarray/computation/abstract_comp_backend.py +++ b/docarray/computation/abstract_comp_backend.py @@ -74,6 +74,12 @@ def to_device(tensor: 'TTensor', device: str) -> 'TTensor': """Move the tensor to the specified device.""" ... + @staticmethod + @abstractmethod + def device(tensor: 'TTensor') -> Optional[str]: + """Return device on which the tensor is allocated.""" + ... + @staticmethod @abstractmethod def shape(tensor: 'TTensor') -> Tuple[int, ...]: diff --git a/docarray/computation/numpy_backend.py b/docarray/computation/numpy_backend.py index a6d7825e3bb..afa2733c074 100644 --- a/docarray/computation/numpy_backend.py +++ b/docarray/computation/numpy_backend.py @@ -45,6 +45,11 @@ def to_device(tensor: 'np.ndarray', device: str) -> 'np.ndarray': """Move the tensor to the specified device.""" raise NotImplementedError('Numpy does not support devices (GPU).') + @staticmethod + def device(tensor: 'np.ndarray') -> Optional[str]: + """Return device on which the tensor is allocated.""" + return None + @staticmethod def n_dim(array: 'np.ndarray') -> int: return array.ndim diff --git a/docarray/computation/torch_backend.py b/docarray/computation/torch_backend.py index fe21e69fcf9..2dc22499df0 100644 --- a/docarray/computation/torch_backend.py +++ b/docarray/computation/torch_backend.py @@ -45,6 +45,11 @@ def to_device(tensor: 'torch.Tensor', device: str) -> 'torch.Tensor': """Move the tensor to the specified device.""" return tensor.to(device) + @staticmethod + def device(tensor: 'torch.Tensor') -> Optional[str]: + """Return device on which the tensor is allocated.""" + return tensor.device + @staticmethod def empty( shape: Tuple[int, ...], From 3e45748483b973295e031c540ad30f16a84628d8 Mon Sep 17 00:00:00 2001 From: anna-charlotte Date: Thu, 26 Jan 2023 13:18:15 +0100 Subject: [PATCH 5/8] fix: mypy Signed-off-by: anna-charlotte --- docarray/computation/torch_backend.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docarray/computation/torch_backend.py b/docarray/computation/torch_backend.py index 2dc22499df0..c05f9fb4c29 100644 --- a/docarray/computation/torch_backend.py +++ b/docarray/computation/torch_backend.py @@ -48,7 +48,7 @@ def to_device(tensor: 'torch.Tensor', device: str) -> 'torch.Tensor': @staticmethod def device(tensor: 'torch.Tensor') -> Optional[str]: """Return device on which the tensor is allocated.""" - return tensor.device + return str(tensor.device) @staticmethod def empty( From 3598727b35cbc6cd4b36ea54cb611ec8ef6ffe84 Mon Sep 17 00:00:00 2001 From: anna-charlotte Date: Thu, 26 Jan 2023 13:29:57 +0100 Subject: [PATCH 6/8] fix: clean up Signed-off-by: anna-charlotte --- docarray/display/document_array_summary.py | 36 +++++++++++----------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/docarray/display/document_array_summary.py b/docarray/display/document_array_summary.py index 8c821992881..1f32b9f970e 100644 --- a/docarray/display/document_array_summary.py +++ b/docarray/display/document_array_summary.py @@ -26,29 +26,29 @@ def summary(self) -> None: table = Table(box=box.SIMPLE, highlight=True) table.show_header = False table.add_row('Type', self.da.__class__.__name__) - table.add_row('Length', str(len(self.da))) + table.add_row('Length', str(len(self.da)), end_section=True) if isinstance(self.da, DocumentArrayStacked): - table.add_section() table.add_row('Stacked columns:') stacked_fields = self._get_stacked_fields(da=self.da) - for field in stacked_fields: - da = self.da - for attr in field.split('.'): - da = getattr(da, attr) + for field_name in stacked_fields: + val = self.da + for attr in field_name.split('.'): + val = getattr(val, attr) - if isinstance(da, AbstractTensor): - col_1 = f' • {field}:' - comp_be = da.get_comp_backend() - cls_name = da.__class__.__name__ - if comp_be.isnan(da).all(): - col_2 = f'None ({cls_name})' + if isinstance(val, AbstractTensor): + comp_be = val.get_comp_backend() + if comp_be.isnan(val).all(): + col_2 = f'None ({val.__class__.__name__})' else: col_2 = ( - f'{cls_name} of shape {comp_be.shape(da)}, ' - f'dtype: {comp_be.shape(da)}' + f'{val.__class__.__name__} of shape {comp_be.shape(val)}' + f', dtype: {comp_be.dtype(val)}' ) - table.add_row(col_1, col_2) + if comp_be.device(val): + col_2 += f', device: {comp_be.device(val)}' + + table.add_row(f' • {field_name}:', col_2) Console().print(Panel(table, title='DocumentArray Summary', expand=False)) self.da.document_type.schema_summary() @@ -56,9 +56,9 @@ def summary(self) -> None: @staticmethod def _get_stacked_fields(da: 'DocumentArrayStacked') -> List[str]: """ - Returns a list of field names that are stacked of a DocumentArrayStacked - instance, i.e. all the fields that are of type AbstractTensor. Nested field - paths are dot separated. + Return a list of the field names of a DocumentArrayStacked instance that are + stacked, i.e. all the fields that are of type AbstractTensor. Nested field + paths are separated by dot, such as: 'attr.nested_attr'. """ from docarray.array import DocumentArrayStacked From e5c5dcff255922fce279143da776685780c45d87 Mon Sep 17 00:00:00 2001 From: anna-charlotte Date: Thu, 26 Jan 2023 13:40:42 +0100 Subject: [PATCH 7/8] test: add tests to comp backend Signed-off-by: anna-charlotte --- .../computation_backends/numpy_backend/test_basics.py | 11 +++++++++++ .../computation_backends/torch_backend/test_basics.py | 11 +++++++++++ 2 files changed, 22 insertions(+) diff --git a/tests/units/computation_backends/numpy_backend/test_basics.py b/tests/units/computation_backends/numpy_backend/test_basics.py index 5f34456f21a..29cebb0d22b 100644 --- a/tests/units/computation_backends/numpy_backend/test_basics.py +++ b/tests/units/computation_backends/numpy_backend/test_basics.py @@ -36,6 +36,17 @@ def test_shape(array, result): assert type(shape) == tuple +def test_device(): + array = np.array([1, 2, 3]) + assert NumpyCompBackend.device(array) is None + + +@pytest.mark.parametrize('dtype', [np.int64, np.float64, np.int, np.float]) +def test_dtype(dtype): + array = np.array([1, 2, 3], dtype=dtype) + assert NumpyCompBackend.dtype(array) == dtype + + def test_empty(): array = NumpyCompBackend.empty((10, 3)) assert array.shape == (10, 3) diff --git a/tests/units/computation_backends/torch_backend/test_basics.py b/tests/units/computation_backends/torch_backend/test_basics.py index f1d06779293..eab546615f0 100644 --- a/tests/units/computation_backends/torch_backend/test_basics.py +++ b/tests/units/computation_backends/torch_backend/test_basics.py @@ -38,6 +38,17 @@ def test_shape(array, result): assert type(shape) == tuple +@pytest.mark.parametrize('dtype', [torch.int64, torch.float64, torch.int, torch.float]) +def test_dtype(dtype): + tensor = torch.tensor([1, 2, 3], dtype=dtype) + assert TorchCompBackend.dtype(tensor) == dtype + + +def test_device(): + tensor = torch.tensor([1, 2, 3]) + assert TorchCompBackend.device(tensor) == 'cpu' + + def test_empty(): tensor = TorchCompBackend.empty((10, 3)) assert tensor.shape == (10, 3) From 0435be72a1d881e6f41dd96cbca011b09aae33d3 Mon Sep 17 00:00:00 2001 From: anna-charlotte Date: Thu, 26 Jan 2023 14:27:01 +0100 Subject: [PATCH 8/8] fix: apply suggestion from code review Signed-off-by: anna-charlotte --- docarray/computation/abstract_comp_backend.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docarray/computation/abstract_comp_backend.py b/docarray/computation/abstract_comp_backend.py index 8a76971d610..d29af1c2cd4 100644 --- a/docarray/computation/abstract_comp_backend.py +++ b/docarray/computation/abstract_comp_backend.py @@ -112,7 +112,7 @@ def detach(tensor: 'TTensor') -> 'TTensor': @staticmethod @abstractmethod - def dtype(tensor: 'TTensor'): + def dtype(tensor: 'TTensor') -> Any: """Get the data type of the tensor.""" ...