docarray · JohannesMessner · Nov 23, 2022 · Nov 16, 2022 · Nov 16, 2022 · Nov 16, 2022
diff --git a/docarray/document/document.py b/docarray/document/document.py
@@ -1,14 +1,15 @@
 import os
 from typing import Type
 
+import orjson
 from pydantic import BaseModel, Field
 
 from docarray.document.abstract_document import AbstractDocument
 from docarray.document.base_node import BaseNode
+from docarray.document.io.json import orjson_dumps
+from docarray.document.mixins import ProtoMixin
 from docarray.typing import ID
 
-from .mixins import ProtoMixin
-
 
 class BaseDocument(BaseModel, ProtoMixin, AbstractDocument, BaseNode):
     """
@@ -17,6 +18,10 @@ class BaseDocument(BaseModel, ProtoMixin, AbstractDocument, BaseNode):
 
     id: ID = Field(default_factory=lambda: ID.validate(os.urandom(16).hex()))
 
+    class Config:
+        json_loads = orjson.loads
+        json_dumps = orjson_dumps
+
     @classmethod
     def _get_nested_document_class(cls, field: str) -> Type['BaseDocument']:
         """

diff --git a/docarray/document/io/__init__.py b/docarray/document/io/__init__.py
diff --git a/docarray/document/io/json.py b/docarray/document/io/json.py
@@ -0,0 +1,22 @@
+import orjson
+
+
+def _default_orjson(obj):
+    """
+    default option for orjson dumps. It will call _to_json_compatible
+    from docarray typing object that expose such method.
+    :param obj:
+    :return: return a json compatible object
+    """
+
+    if getattr(obj, '_to_json_compatible'):
+        return obj._to_json_compatible()
+    else:
+        return obj
+
+
+def orjson_dumps(v, *, default=None):
+    # orjson.dumps returns bytes, to match standard json.dumps we need to decode
+    return orjson.dumps(
+        v, default=_default_orjson, option=orjson.OPT_SERIALIZE_NUMPY
+    ).decode()
diff --git a/docarray/typing/tensor/tensor.py b/docarray/typing/tensor/tensor.py
@@ -1,4 +1,4 @@
-from typing import TYPE_CHECKING, Any, Type, TypeVar, Union, cast
+from typing import TYPE_CHECKING, Any, Dict, List, Tuple, Type, TypeVar, Union, cast
 
 import numpy as np
 
@@ -22,26 +22,71 @@ def __get_validators__(cls):
 
     @classmethod
     def validate(
-        cls: Type[T], value: Union[T, Any], field: 'ModelField', config: 'BaseConfig'
+        cls: Type[T],
+        value: Union[T, np.ndarray, List[Any], Tuple[Any], Any],
+        field: 'ModelField',
+        config: 'BaseConfig',
     ) -> T:
         if isinstance(value, np.ndarray):
             return cls.from_ndarray(value)
         elif isinstance(value, Tensor):
             return cast(T, value)
+        elif isinstance(value, list) or isinstance(value, tuple):
+            try:
+                arr_from_list: np.ndarray = np.asarray(value)
+                return cls.from_ndarray(arr_from_list)
+            except Exception:
+                pass  # handled below
         else:
             try:
                 arr: np.ndarray = np.ndarray(value)
                 return cls.from_ndarray(arr)
             except Exception:
                 pass  # handled below
-        raise ValueError(f'Expected a numpy.ndarray, got {type(value)}')
+        raise ValueError(f'Expected a numpy.ndarray compatible type, got {type(value)}')
 
     @classmethod
     def from_ndarray(cls: Type[T], value: np.ndarray) -> T:
         return value.view(cls)
 
+    @classmethod
+    def __modify_schema__(cls, field_schema: Dict[str, Any]) -> None:
+        # this is needed to dump to json
+        field_schema.update(type='string', format='tensor')
+
+    def _to_json_compatible(self) -> np.ndarray:
+        """
+        Convert tensor into a json compatible object
+        :return: a list representation of the tensor
+        """
+        return self.unwrap()
+
+    def unwrap(self) -> np.ndarray:
+        """
+        Return the original ndarray without any memory copy.
+
+        The original view rest intact and is still a Document Tensor
+        but the return object is a pure np.ndarray but both object share
+        the same memory layout.
+
+        EXAMPLE USAGE
+        .. code-block:: python
+            from docarray.typing import Tensor
+            import numpy as np
+
+            t1 = Tensor.validate(np.zeros((3, 224, 224)), None, None)
+            # here t is a docarray Tensor
+            t2 = t.unwrap()
+            # here t2 is a pure np.ndarray but t1 is still a Docarray Tensor
+            # But both share the same underlying memory
+
+
+        :return: a numpy ndarray
+        """
+        return self.view(np.ndarray)
+
     def _to_node_protobuf(self: T, field: str = 'tensor') -> NodeProto:
-        """Convert Document into a NodeProto protobuf message. This function should
+        """Convert itself into a NodeProto protobuf message. This function should
         be called when the Document is nested into another Document that need to be
         converted into a protobuf
         :param field: field in which to store the content in the node proto

diff --git a/docarray/typing/tensor/torch_tensor.py b/docarray/typing/tensor/torch_tensor.py
@@ -1,4 +1,5 @@
-from typing import TYPE_CHECKING, Any, Type, TypeVar, Union, cast
+from copy import copy
+from typing import TYPE_CHECKING, Any, Dict, Type, TypeVar, Union, cast
 
 import numpy as np
 import torch  # type: ignore
@@ -49,7 +50,47 @@ def validate(
                 return cls.from_native_torch_tensor(arr)
             except Exception:
                 pass  # handled below
-        raise ValueError(f'Expected a torch.Tensor, got {type(value)}')
+        raise ValueError(f'Expected a torch.Tensor compatible type, got {type(value)}')
+
+    @classmethod
+    def __modify_schema__(cls, field_schema: Dict[str, Any]) -> None:
+        # this is needed to dump to json
+        field_schema.update(type='string', format='tensor')
+
+    def _to_json_compatible(self) -> np.ndarray:
+        """
+        Convert torch Tensor into a json compatible object
+        :return: a list representation of the torch tensor
+        """
+        return self.numpy()  ## might need to  check device later
+
+    def unwrap(self) -> torch.Tensor:
+        """
+        Return the original torch.Tensor without any memory copy.
+
+        The original view rest intact and is still a Document Tensor
+        but the return object is a pure torch Tensor but both object share
+        the same memory layout.
+
+        EXAMPLE USAGE
+        .. code-block:: python
+            from docarray.typing import TorchTensor
+            import torch
+
+            t = Tensor.validate(torch.zeros(3, 224, 224), None, None)
+            # here t is a docarray Tensor
+            t2 = t.unwrap()
+            # here t2 is a pure torch.Tensor but t1 is still a Docarray Tensor
+            # But both share the same underlying memory
+
+
+        :return: a torch Tensor
+        """
+        value = copy(self)  # as unintuitive as it sounds, this
+        # does not do any relevant memory copying, just shallow
+        # reference to the torch data
+        value.__class__ = torch.Tensor  # type: ignore
+        return value
 
     @classmethod
     def from_native_torch_tensor(cls: Type[T], value: torch.Tensor) -> T: