Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
77a2055
feat: allow da bulk access to return da for document
samsja Nov 16, 2022
bd6d128
fix: fix mypy type pb
samsja Nov 16, 2022
d778cec
fix: add link to the mypy issue
samsja Nov 16, 2022
717e7ee
fix: remove useless list type hint
samsja Nov 16, 2022
2c3d5f1
feat: torch tensor type (#800)
JohannesMessner Nov 17, 2022
e5d678c
feat: add fastapi to dependency
samsja Nov 17, 2022
414a04c
feat(wip): add fake method to dump tensor to json
samsja Nov 17, 2022
42c6fe4
feat(wip): add fastapi test
samsja Nov 17, 2022
56497b9
feat: add json dump for type and document
samsja Nov 18, 2022
945a72a
feat: add json compatible with orjson
samsja Nov 21, 2022
83236cd
refactor: clean tests
samsja Nov 21, 2022
f1a4d7b
fix: remove duplicate
samsja Nov 21, 2022
c983246
fix: better json schema for tensor
samsja Nov 21, 2022
3873a9a
fix: fix fast api test
samsja Nov 22, 2022
51a402d
refactor: move to json test to integration
samsja Nov 22, 2022
2e92866
fix: json laod from tensor type now working
samsja Nov 22, 2022
606134b
fix: add tensor to fastapi test
samsja Nov 22, 2022
731feff
fix: add new fastapi test
samsja Nov 22, 2022
e114b91
Merge branch 'feat-rewrite-v2' into feat-v2-fastapi-test
samsja Nov 22, 2022
061b05e
fix: fix mypy
samsja Nov 22, 2022
94c0069
feat: add more testing for text uri about json
samsja Nov 22, 2022
e04285a
fix: fix default orson not returning
samsja Nov 22, 2022
b8de8bc
fix: apply johannes suggestion on docstring
samsja Nov 22, 2022
19ffb0b
fix: does not perform copy anymore on torch tensor unwrap
samsja Nov 22, 2022
0b7dac7
fix: add johannes suggestion
samsja Nov 22, 2022
a334c65
fix: fix ruff line lenght
samsja Nov 23, 2022
909ee74
fix: fix mypy pb
samsja Nov 23, 2022
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions docarray/document/document.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
import os
from typing import Type

import orjson
from pydantic import BaseModel, Field

from docarray.document.abstract_document import AbstractDocument
from docarray.document.base_node import BaseNode
from docarray.document.io.json import orjson_dumps
from docarray.document.mixins import ProtoMixin
from docarray.typing import ID

from .mixins import ProtoMixin


class BaseDocument(BaseModel, ProtoMixin, AbstractDocument, BaseNode):
"""
Expand All @@ -17,6 +18,10 @@ class BaseDocument(BaseModel, ProtoMixin, AbstractDocument, BaseNode):

id: ID = Field(default_factory=lambda: ID.validate(os.urandom(16).hex()))

class Config:
json_loads = orjson.loads
json_dumps = orjson_dumps

@classmethod
def _get_nested_document_class(cls, field: str) -> Type['BaseDocument']:
"""
Expand Down
Empty file.
22 changes: 22 additions & 0 deletions docarray/document/io/json.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
import orjson


def _default_orjson(obj):
"""
default option for orjson dumps. It will call _to_json_compatible
from docarray typing object that expose such method.
:param obj:
:return: return a json compatible object
"""

if getattr(obj, '_to_json_compatible'):
return obj._to_json_compatible()
else:
return obj


def orjson_dumps(v, *, default=None):
# orjson.dumps returns bytes, to match standard json.dumps we need to decode
return orjson.dumps(
v, default=_default_orjson, option=orjson.OPT_SERIALIZE_NUMPY
).decode()
53 changes: 49 additions & 4 deletions docarray/typing/tensor/tensor.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import TYPE_CHECKING, Any, Type, TypeVar, Union, cast
from typing import TYPE_CHECKING, Any, Dict, List, Tuple, Type, TypeVar, Union, cast

import numpy as np

Expand All @@ -22,26 +22,71 @@ def __get_validators__(cls):

@classmethod
def validate(
cls: Type[T], value: Union[T, Any], field: 'ModelField', config: 'BaseConfig'
cls: Type[T],
value: Union[T, np.ndarray, List[Any], Tuple[Any], Any],
field: 'ModelField',
config: 'BaseConfig',
) -> T:
if isinstance(value, np.ndarray):
return cls.from_ndarray(value)
elif isinstance(value, Tensor):
return cast(T, value)
elif isinstance(value, list) or isinstance(value, tuple):
try:
arr_from_list: np.ndarray = np.asarray(value)
return cls.from_ndarray(arr_from_list)
except Exception:
pass # handled below
else:
try:
arr: np.ndarray = np.ndarray(value)
return cls.from_ndarray(arr)
except Exception:
pass # handled below
raise ValueError(f'Expected a numpy.ndarray, got {type(value)}')
raise ValueError(f'Expected a numpy.ndarray compatible type, got {type(value)}')

@classmethod
def from_ndarray(cls: Type[T], value: np.ndarray) -> T:
return value.view(cls)

@classmethod
def __modify_schema__(cls, field_schema: Dict[str, Any]) -> None:
# this is needed to dump to json
field_schema.update(type='string', format='tensor')

def _to_json_compatible(self) -> np.ndarray:
"""
Convert tensor into a json compatible object
:return: a list representation of the tensor
"""
return self.unwrap()

def unwrap(self) -> np.ndarray:
"""
Return the original ndarray without any memory copy.

The original view rest intact and is still a Document Tensor
but the return object is a pure np.ndarray but both object share
the same memory layout.

EXAMPLE USAGE
.. code-block:: python
from docarray.typing import Tensor
import numpy as np

t1 = Tensor.validate(np.zeros((3, 224, 224)), None, None)
# here t is a docarray Tensor
t2 = t.unwrap()
# here t2 is a pure np.ndarray but t1 is still a Docarray Tensor
# But both share the same underlying memory


:return: a numpy ndarray
"""
return self.view(np.ndarray)

def _to_node_protobuf(self: T, field: str = 'tensor') -> NodeProto:
"""Convert Document into a NodeProto protobuf message. This function should
"""Convert itself into a NodeProto protobuf message. This function should
be called when the Document is nested into another Document that need to be
converted into a protobuf
:param field: field in which to store the content in the node proto
Expand Down
45 changes: 43 additions & 2 deletions docarray/typing/tensor/torch_tensor.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from typing import TYPE_CHECKING, Any, Type, TypeVar, Union, cast
from copy import copy
from typing import TYPE_CHECKING, Any, Dict, Type, TypeVar, Union, cast

import numpy as np
import torch # type: ignore
Expand Down Expand Up @@ -49,7 +50,47 @@ def validate(
return cls.from_native_torch_tensor(arr)
except Exception:
pass # handled below
raise ValueError(f'Expected a torch.Tensor, got {type(value)}')
raise ValueError(f'Expected a torch.Tensor compatible type, got {type(value)}')

@classmethod
def __modify_schema__(cls, field_schema: Dict[str, Any]) -> None:
# this is needed to dump to json
field_schema.update(type='string', format='tensor')

def _to_json_compatible(self) -> np.ndarray:
"""
Convert torch Tensor into a json compatible object
:return: a list representation of the torch tensor
"""
return self.numpy() ## might need to check device later

def unwrap(self) -> torch.Tensor:
"""
Return the original torch.Tensor without any memory copy.

The original view rest intact and is still a Document Tensor
but the return object is a pure torch Tensor but both object share
the same memory layout.

EXAMPLE USAGE
.. code-block:: python
from docarray.typing import TorchTensor
import torch

t = Tensor.validate(torch.zeros(3, 224, 224), None, None)
# here t is a docarray Tensor
t2 = t.unwrap()
# here t2 is a pure torch.Tensor but t1 is still a Docarray Tensor
# But both share the same underlying memory


:return: a torch Tensor
"""
value = copy(self) # as unintuitive as it sounds, this
# does not do any relevant memory copying, just shallow
# reference to the torch data
value.__class__ = torch.Tensor # type: ignore
return value

@classmethod
def from_native_torch_tensor(cls: Type[T], value: torch.Tensor) -> T:
Expand Down
Loading