diff --git a/docarray/computation/abstract_comp_backend.py b/docarray/computation/abstract_comp_backend.py index 1bf19495e99..d29af1c2cd4 100644 --- a/docarray/computation/abstract_comp_backend.py +++ b/docarray/computation/abstract_comp_backend.py @@ -74,6 +74,12 @@ def to_device(tensor: 'TTensor', device: str) -> 'TTensor': """Move the tensor to the specified device.""" ... + @staticmethod + @abstractmethod + def device(tensor: 'TTensor') -> Optional[str]: + """Return device on which the tensor is allocated.""" + ... + @staticmethod @abstractmethod def shape(tensor: 'TTensor') -> Tuple[int, ...]: @@ -104,6 +110,18 @@ def detach(tensor: 'TTensor') -> 'TTensor': """ ... + @staticmethod + @abstractmethod + def dtype(tensor: 'TTensor') -> Any: + """Get the data type of the tensor.""" + ... + + @staticmethod + @abstractmethod + def isnan(tensor: 'TTensor') -> 'TTensor': + """Check element-wise for nan and return result as a boolean array""" + ... + @staticmethod @abstractmethod def minmax_normalize( @@ -111,7 +129,7 @@ def minmax_normalize( t_range: Tuple = (0, 1), x_range: Optional[Tuple] = None, eps: float = 1e-7, - ): + ) -> 'TTensor': """ Normalize values in `tensor` into `t_range`. diff --git a/docarray/computation/numpy_backend.py b/docarray/computation/numpy_backend.py index fd51d254a20..afa2733c074 100644 --- a/docarray/computation/numpy_backend.py +++ b/docarray/computation/numpy_backend.py @@ -45,6 +45,11 @@ def to_device(tensor: 'np.ndarray', device: str) -> 'np.ndarray': """Move the tensor to the specified device.""" raise NotImplementedError('Numpy does not support devices (GPU).') + @staticmethod + def device(tensor: 'np.ndarray') -> Optional[str]: + """Return device on which the tensor is allocated.""" + return None + @staticmethod def n_dim(array: 'np.ndarray') -> int: return array.ndim @@ -102,13 +107,23 @@ def detach(tensor: 'np.ndarray') -> 'np.ndarray': """ return tensor + @staticmethod + def dtype(tensor: 'np.ndarray') -> np.dtype: + """Get the data type of the tensor.""" + return tensor.dtype + + @staticmethod + def isnan(tensor: 'np.ndarray') -> 'np.ndarray': + """Check element-wise for nan and return result as a boolean array""" + return np.isnan(tensor) + @staticmethod def minmax_normalize( tensor: 'np.ndarray', t_range: Tuple = (0, 1), x_range: Optional[Tuple] = None, eps: float = 1e-7, - ): + ) -> 'np.ndarray': """ Normalize values in `tensor` into `t_range`. diff --git a/docarray/computation/torch_backend.py b/docarray/computation/torch_backend.py index 13d2aa8471a..c05f9fb4c29 100644 --- a/docarray/computation/torch_backend.py +++ b/docarray/computation/torch_backend.py @@ -45,6 +45,11 @@ def to_device(tensor: 'torch.Tensor', device: str) -> 'torch.Tensor': """Move the tensor to the specified device.""" return tensor.to(device) + @staticmethod + def device(tensor: 'torch.Tensor') -> Optional[str]: + """Return device on which the tensor is allocated.""" + return str(tensor.device) + @staticmethod def empty( shape: Tuple[int, ...], @@ -106,13 +111,23 @@ def detach(tensor: 'torch.Tensor') -> 'torch.Tensor': """ return tensor.detach() + @staticmethod + def dtype(tensor: 'torch.Tensor') -> torch.dtype: + """Get the data type of the tensor.""" + return tensor.dtype + + @staticmethod + def isnan(tensor: 'torch.Tensor') -> 'torch.Tensor': + """Check element-wise for nan and return result as a boolean array""" + return torch.isnan(tensor) + @staticmethod def minmax_normalize( tensor: 'torch.Tensor', t_range: Tuple = (0, 1), x_range: Optional[Tuple] = None, eps: float = 1e-7, - ): + ) -> 'torch.Tensor': """ Normalize values in `tensor` into `t_range`. diff --git a/docarray/display/document_array_summary.py b/docarray/display/document_array_summary.py index 97357cba2d3..1f32b9f970e 100644 --- a/docarray/display/document_array_summary.py +++ b/docarray/display/document_array_summary.py @@ -1,6 +1,9 @@ -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, List + +from docarray.typing.tensor.abstract_tensor import AbstractTensor if TYPE_CHECKING: + from docarray.array import DocumentArrayStacked from docarray.array.abstract_array import AnyDocumentArray @@ -18,10 +21,57 @@ def summary(self) -> None: from rich.panel import Panel from rich.table import Table + from docarray.array import DocumentArrayStacked + table = Table(box=box.SIMPLE, highlight=True) table.show_header = False table.add_row('Type', self.da.__class__.__name__) - table.add_row('Length', str(len(self.da))) + table.add_row('Length', str(len(self.da)), end_section=True) + + if isinstance(self.da, DocumentArrayStacked): + table.add_row('Stacked columns:') + stacked_fields = self._get_stacked_fields(da=self.da) + for field_name in stacked_fields: + val = self.da + for attr in field_name.split('.'): + val = getattr(val, attr) + + if isinstance(val, AbstractTensor): + comp_be = val.get_comp_backend() + if comp_be.isnan(val).all(): + col_2 = f'None ({val.__class__.__name__})' + else: + col_2 = ( + f'{val.__class__.__name__} of shape {comp_be.shape(val)}' + f', dtype: {comp_be.dtype(val)}' + ) + if comp_be.device(val): + col_2 += f', device: {comp_be.device(val)}' + + table.add_row(f' • {field_name}:', col_2) Console().print(Panel(table, title='DocumentArray Summary', expand=False)) self.da.document_type.schema_summary() + + @staticmethod + def _get_stacked_fields(da: 'DocumentArrayStacked') -> List[str]: + """ + Return a list of the field names of a DocumentArrayStacked instance that are + stacked, i.e. all the fields that are of type AbstractTensor. Nested field + paths are separated by dot, such as: 'attr.nested_attr'. + """ + from docarray.array import DocumentArrayStacked + + fields = [] + for field_name, value in da._columns.items(): + if isinstance(value, AbstractTensor): + fields.append(field_name) + elif isinstance(value, DocumentArrayStacked): + fields.extend( + [ + f'{field_name}.{x}' + for x in DocumentArraySummary._get_stacked_fields(da=value) + ] + ) + + return fields diff --git a/tests/units/computation_backends/numpy_backend/test_basics.py b/tests/units/computation_backends/numpy_backend/test_basics.py index 5f34456f21a..29cebb0d22b 100644 --- a/tests/units/computation_backends/numpy_backend/test_basics.py +++ b/tests/units/computation_backends/numpy_backend/test_basics.py @@ -36,6 +36,17 @@ def test_shape(array, result): assert type(shape) == tuple +def test_device(): + array = np.array([1, 2, 3]) + assert NumpyCompBackend.device(array) is None + + +@pytest.mark.parametrize('dtype', [np.int64, np.float64, np.int, np.float]) +def test_dtype(dtype): + array = np.array([1, 2, 3], dtype=dtype) + assert NumpyCompBackend.dtype(array) == dtype + + def test_empty(): array = NumpyCompBackend.empty((10, 3)) assert array.shape == (10, 3) diff --git a/tests/units/computation_backends/torch_backend/test_basics.py b/tests/units/computation_backends/torch_backend/test_basics.py index f1d06779293..eab546615f0 100644 --- a/tests/units/computation_backends/torch_backend/test_basics.py +++ b/tests/units/computation_backends/torch_backend/test_basics.py @@ -38,6 +38,17 @@ def test_shape(array, result): assert type(shape) == tuple +@pytest.mark.parametrize('dtype', [torch.int64, torch.float64, torch.int, torch.float]) +def test_dtype(dtype): + tensor = torch.tensor([1, 2, 3], dtype=dtype) + assert TorchCompBackend.dtype(tensor) == dtype + + +def test_device(): + tensor = torch.tensor([1, 2, 3]) + assert TorchCompBackend.device(tensor) == 'cpu' + + def test_empty(): tensor = TorchCompBackend.empty((10, 3)) assert tensor.shape == (10, 3)