From 0f126493c516368187b2c7a15879e73f57f15797 Mon Sep 17 00:00:00 2001 From: Joan Fontanals Martinez Date: Wed, 26 Apr 2023 12:48:35 +0200 Subject: [PATCH 01/15] feat: try to make DocList and actual Python List --- docarray/array/doc_list/doc_list.py | 2 +- docarray/array/doc_list/sequence_indexing_mixin.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/docarray/array/doc_list/doc_list.py b/docarray/array/doc_list/doc_list.py index 9d1ca90a916..4c0ccb673be 100644 --- a/docarray/array/doc_list/doc_list.py +++ b/docarray/array/doc_list/doc_list.py @@ -58,7 +58,7 @@ def _delegate_meth(self, *args, **kwargs): class DocList( - IndexingSequenceMixin[T_doc], PushPullMixin, IOMixinArray, AnyDocArray[T_doc] + IndexingSequenceMixin[T_doc], PushPullMixin, IOMixinArray, AnyDocArray[T_doc], List[T_doc] ): """ DocList is a container of Documents. diff --git a/docarray/array/doc_list/sequence_indexing_mixin.py b/docarray/array/doc_list/sequence_indexing_mixin.py index 8513c82bee0..d661eddcf73 100644 --- a/docarray/array/doc_list/sequence_indexing_mixin.py +++ b/docarray/array/doc_list/sequence_indexing_mixin.py @@ -11,6 +11,7 @@ cast, no_type_check, overload, + List ) import numpy as np @@ -34,7 +35,7 @@ def _is_np_int(item: Any) -> bool: return False # this is unreachable, but mypy wants it -class IndexingSequenceMixin(Iterable[T_item]): +class IndexingSequenceMixin(List[T_item]): """ This mixin allow sto extend a list into an object that can be indexed a la numpy/pytorch. From c3467095244604baf07303dd72adbdafb05d3ee4 Mon Sep 17 00:00:00 2001 From: Joan Fontanals Martinez Date: Wed, 26 Apr 2023 15:52:33 +0200 Subject: [PATCH 02/15] fix: fix mypy for sequcence indexing mixin --- docarray/array/doc_list/doc_list.py | 6 +++++- docarray/array/doc_list/sequence_indexing_mixin.py | 12 +++++++----- 2 files changed, 12 insertions(+), 6 deletions(-) diff --git a/docarray/array/doc_list/doc_list.py b/docarray/array/doc_list/doc_list.py index 4c0ccb673be..5a336ef3ade 100644 --- a/docarray/array/doc_list/doc_list.py +++ b/docarray/array/doc_list/doc_list.py @@ -58,7 +58,11 @@ def _delegate_meth(self, *args, **kwargs): class DocList( - IndexingSequenceMixin[T_doc], PushPullMixin, IOMixinArray, AnyDocArray[T_doc], List[T_doc] + IndexingSequenceMixin[T_doc], + PushPullMixin, + IOMixinArray, + AnyDocArray[T_doc], + List[T_doc], ): """ DocList is a container of Documents. diff --git a/docarray/array/doc_list/sequence_indexing_mixin.py b/docarray/array/doc_list/sequence_indexing_mixin.py index d661eddcf73..7f0d23380e6 100644 --- a/docarray/array/doc_list/sequence_indexing_mixin.py +++ b/docarray/array/doc_list/sequence_indexing_mixin.py @@ -11,9 +11,11 @@ cast, no_type_check, overload, - List + List, ) +from typing_extensions import SupportsIndex + import numpy as np from docarray.utils._internal.misc import import_library @@ -140,7 +142,7 @@ def _del_from_indices(self: T, item: Iterable[int]) -> None: # each delete del self._data[ix] - def __delitem__(self, key: Union[int, IndexIterType]) -> None: + def __delitem__(self, key: Union[SupportsIndex, IndexIterType]) -> None: item = self._normalize_index_item(key) if item is None: @@ -158,7 +160,7 @@ def __delitem__(self, key: Union[int, IndexIterType]) -> None: raise TypeError(f'Invalid type {type(head)} for indexing') @overload - def __getitem__(self: T, item: int) -> T_item: + def __getitem__(self: T, item: SupportsIndex) -> T_item: ... @overload @@ -187,11 +189,11 @@ def __getitem__(self, item): raise TypeError(f'Invalid type {type(head)} for indexing') @overload - def __setitem__(self: T, key: IndexIterType, value: Sequence[T_item]): + def __setitem__(self: T, key: SupportsIndex, value: T_item) -> None: ... @overload - def __setitem__(self: T, key: int, value: T_item): + def __setitem__(self: T, key: IndexIterType, value: Iterable[T_item]): ... @no_type_check From ba6f5f9ff5cbf9866456833be882bc30ab65d26b Mon Sep 17 00:00:00 2001 From: Joan Fontanals Martinez Date: Wed, 26 Apr 2023 15:54:58 +0200 Subject: [PATCH 03/15] fix: fix mypy for doclist Signed-off-by: Joan Fontanals Martinez --- docarray/array/doc_list/doc_list.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/docarray/array/doc_list/doc_list.py b/docarray/array/doc_list/doc_list.py index 5a336ef3ade..fd211298f6b 100644 --- a/docarray/array/doc_list/doc_list.py +++ b/docarray/array/doc_list/doc_list.py @@ -16,6 +16,7 @@ ) from typing_inspect import is_union_type +from typing_extensions import SupportsIndex from docarray.array.any_array import AnyDocArray from docarray.array.doc_list.io import IOMixinArray @@ -62,7 +63,6 @@ class DocList( PushPullMixin, IOMixinArray, AnyDocArray[T_doc], - List[T_doc], ): """ DocList is a container of Documents. @@ -200,7 +200,7 @@ def extend(self, docs: Iterable[T_doc]): """ self._data.extend(self._validate_docs(docs)) - def insert(self, i: int, doc: T_doc): + def insert(self, i: SupportsIndex, doc: T_doc): """ Insert a Document to the `DocList`. The Document must be from the same class as the doc_type of this `DocList` otherwise it will fail. @@ -303,7 +303,7 @@ def from_protobuf(cls: Type[T], pb_msg: 'DocListProto') -> T: return super().from_protobuf(pb_msg) @overload - def __getitem__(self, item: int) -> T_doc: + def __getitem__(self, item: SupportsIndex) -> T_doc: ... @overload From ff5eabec72f8298d4910568ee50f235efae15b95 Mon Sep 17 00:00:00 2001 From: Joan Fontanals Martinez Date: Wed, 26 Apr 2023 16:30:27 +0200 Subject: [PATCH 04/15] test: make tests pass --- docarray/index/abstract.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docarray/index/abstract.py b/docarray/index/abstract.py index 13f4837cd61..a18b28f3a69 100644 --- a/docarray/index/abstract.py +++ b/docarray/index/abstract.py @@ -892,7 +892,7 @@ def _convert_dict_to_doc( doc_dict[field_name] = self._convert_dict_to_doc(inner_dict, t_) schema_cls = cast(Type[BaseDoc], schema) - return schema_cls(**doc_dict) + return schema_cls(**dict(doc_dict)) def _dict_list_to_docarray(self, dict_list: Sequence[Dict[str, Any]]) -> DocList: """Convert a list of docs in dict type to a DocList of the schema type.""" From 65b89d59727a160ef73ecbc675a21b52381de0ae Mon Sep 17 00:00:00 2001 From: Joan Fontanals Martinez Date: Wed, 26 Apr 2023 17:33:26 +0200 Subject: [PATCH 05/15] fix: remove _data from Mixin Signed-off-by: Joan Fontanals Martinez --- docarray/array/doc_list/doc_list.py | 38 ++----------------- .../array/doc_list/sequence_indexing_mixin.py | 25 +++++------- 2 files changed, 13 insertions(+), 50 deletions(-) diff --git a/docarray/array/doc_list/doc_list.py b/docarray/array/doc_list/doc_list.py index fd211298f6b..d4dac1fb9e3 100644 --- a/docarray/array/doc_list/doc_list.py +++ b/docarray/array/doc_list/doc_list.py @@ -1,9 +1,7 @@ import io -from functools import wraps from typing import ( TYPE_CHECKING, Any, - Callable, Iterable, List, MutableSequence, @@ -41,23 +39,6 @@ T_doc = TypeVar('T_doc', bound=BaseDoc) -def _delegate_meth_to_data(meth_name: str) -> Callable: - """ - create a function that mimic a function call to the data attribute of the - DocList - - :param meth_name: name of the method - :return: a method that mimic the meth_name - """ - func = getattr(list, meth_name) - - @wraps(func) - def _delegate_meth(self, *args, **kwargs): - return getattr(self._data, meth_name)(*args, **kwargs) - - return _delegate_meth - - class DocList( IndexingSequenceMixin[T_doc], PushPullMixin, @@ -134,7 +115,7 @@ def __init__( self, docs: Optional[Iterable[T_doc]] = None, ): - self._data: List[T_doc] = list(self._validate_docs(docs)) if docs else [] + super().__init__(self._validate_docs(docs) if docs else []) @classmethod def construct( @@ -172,12 +153,6 @@ def _validate_one_doc(self, doc: T_doc) -> T_doc: raise ValueError(f'{doc} is not a {self.doc_type}') return doc - def __len__(self): - return len(self._data) - - def __iter__(self): - return iter(self._data) - def __bytes__(self) -> bytes: with io.BytesIO() as bf: self._write_bytes(bf=bf) @@ -189,7 +164,7 @@ def append(self, doc: T_doc): as the `.doc_type` of this `DocList` otherwise it will fail. :param doc: A Document """ - self._data.append(self._validate_one_doc(doc)) + super().append(self._validate_one_doc(doc)) def extend(self, docs: Iterable[T_doc]): """ @@ -198,7 +173,7 @@ def extend(self, docs: Iterable[T_doc]): fail. :param docs: Iterable of Documents """ - self._data.extend(self._validate_docs(docs)) + super().extend(self._validate_docs(docs)) def insert(self, i: SupportsIndex, doc: T_doc): """ @@ -207,12 +182,7 @@ class as the doc_type of this `DocList` otherwise it will fail. :param i: index to insert :param doc: A Document """ - self._data.insert(i, self._validate_one_doc(doc)) - - pop = _delegate_meth_to_data('pop') - remove = _delegate_meth_to_data('remove') - reverse = _delegate_meth_to_data('reverse') - sort = _delegate_meth_to_data('sort') + super().insert(i, self._validate_one_doc(doc)) def _get_data_column( self: T, diff --git a/docarray/array/doc_list/sequence_indexing_mixin.py b/docarray/array/doc_list/sequence_indexing_mixin.py index 7f0d23380e6..2d85a75ed8e 100644 --- a/docarray/array/doc_list/sequence_indexing_mixin.py +++ b/docarray/array/doc_list/sequence_indexing_mixin.py @@ -3,7 +3,6 @@ TYPE_CHECKING, Any, Iterable, - MutableSequence, Optional, Sequence, TypeVar, @@ -57,8 +56,6 @@ class IndexingSequenceMixin(List[T_item]): """ - _data: MutableSequence[T_item] - @abc.abstractmethod def __init__( self, @@ -66,10 +63,6 @@ def __init__( ): ... - @abc.abstractmethod - def __len__(self) -> int: - ... - @staticmethod def _normalize_index_item( item: Any, @@ -110,13 +103,13 @@ def _normalize_index_item( def _get_from_indices(self: T, item: Iterable[int]) -> T: results = [] for ix in item: - results.append(self._data[ix]) + results.append(self[ix]) return self.__class__(results) def _set_by_indices(self: T, item: Iterable[int], value: Iterable[T_item]): for ix, doc_to_set in zip(item, value): try: - self._data[ix] = doc_to_set + self[ix] = doc_to_set except KeyError: raise IndexError(f'Index {ix} is out of range') @@ -129,7 +122,7 @@ def _set_by_mask(self: T, item: Iterable[bool], value: Sequence[T_item]): i_value = 0 for i, mask_value in zip(range(len(self)), item): if mask_value: - self._data[i] = value[i_value] + self[i] = value[i_value] i_value += 1 def _del_from_mask(self: T, item: Iterable[bool]) -> None: @@ -140,7 +133,7 @@ def _del_from_indices(self: T, item: Iterable[int]) -> None: for ix in sorted(item, reverse=True): # reversed is needed here otherwise some the indices are not up to date after # each delete - del self._data[ix] + del self[ix] def __delitem__(self, key: Union[SupportsIndex, IndexIterType]) -> None: item = self._normalize_index_item(key) @@ -148,7 +141,7 @@ def __delitem__(self, key: Union[SupportsIndex, IndexIterType]) -> None: if item is None: return elif isinstance(item, (int, slice)): - del self._data[item] + del self[item] else: head = item[0] # type: ignore if isinstance(head, bool): @@ -171,10 +164,10 @@ def __getitem__(self, item): item = self._normalize_index_item(item) if type(item) == slice: - return self.__class__(self._data[item]) + return self.__class__(self[item]) if isinstance(item, int): - return self._data[item] + return self[item] if item is None: return self @@ -201,9 +194,9 @@ def __setitem__(self: T, key, value): key_norm = self._normalize_index_item(key) if isinstance(key_norm, int): - self._data[key_norm] = value + self[key_norm] = value elif isinstance(key_norm, slice): - self._data[key_norm] = value + self[key_norm] = value else: # _normalize_index_item() guarantees the line below is correct head = key_norm[0] From f42b6600f90cdd83e3505311b53c5ad540d459ee Mon Sep 17 00:00:00 2001 From: Joan Fontanals Martinez Date: Wed, 26 Apr 2023 19:46:15 +0200 Subject: [PATCH 06/15] fix: small fixes --- docarray/array/doc_list/doc_list.py | 4 ++-- docarray/array/doc_list/io.py | 4 ++-- .../array/doc_list/sequence_indexing_mixin.py | 15 +++------------ tests/units/array/test_array.py | 4 ++-- 4 files changed, 9 insertions(+), 18 deletions(-) diff --git a/docarray/array/doc_list/doc_list.py b/docarray/array/doc_list/doc_list.py index d4dac1fb9e3..f514e63132b 100644 --- a/docarray/array/doc_list/doc_list.py +++ b/docarray/array/doc_list/doc_list.py @@ -128,8 +128,8 @@ def construct( :param docs: a Sequence (list) of Document with the same schema :return: a `DocList` object """ - new_docs = cls.__new__(cls) - new_docs._data = docs if isinstance(docs, list) else list(docs) + new_docs = cls() + new_docs = docs if isinstance(docs, list) else list(docs) return new_docs def __eq__(self, other: Any) -> bool: diff --git a/docarray/array/doc_list/io.py b/docarray/array/doc_list/io.py index fdad272b94c..fe465cc8217 100644 --- a/docarray/array/doc_list/io.py +++ b/docarray/array/doc_list/io.py @@ -327,14 +327,14 @@ def to_json(self) -> bytes: """Convert the object into JSON bytes. Can be loaded via `.from_json`. :return: JSON serialization of `DocList` """ - return orjson_dumps(self._data) + return orjson_dumps(self) def _docarray_to_json_compatible(self) -> List[T_doc]: """ Convert itself into a json compatible object :return: A list of documents """ - return self._data + return self @classmethod def from_csv( diff --git a/docarray/array/doc_list/sequence_indexing_mixin.py b/docarray/array/doc_list/sequence_indexing_mixin.py index 2d85a75ed8e..c7918d41150 100644 --- a/docarray/array/doc_list/sequence_indexing_mixin.py +++ b/docarray/array/doc_list/sequence_indexing_mixin.py @@ -1,9 +1,7 @@ -import abc from typing import ( TYPE_CHECKING, Any, Iterable, - Optional, Sequence, TypeVar, Union, @@ -56,13 +54,6 @@ class IndexingSequenceMixin(List[T_item]): """ - @abc.abstractmethod - def __init__( - self, - docs: Optional[Iterable[T_item]] = None, - ): - ... - @staticmethod def _normalize_index_item( item: Any, @@ -141,7 +132,7 @@ def __delitem__(self, key: Union[SupportsIndex, IndexIterType]) -> None: if item is None: return elif isinstance(item, (int, slice)): - del self[item] + super().__delitem__(item) else: head = item[0] # type: ignore if isinstance(head, bool): @@ -164,10 +155,10 @@ def __getitem__(self, item): item = self._normalize_index_item(item) if type(item) == slice: - return self.__class__(self[item]) + return self.__class__(super().__getitem__(item)) if isinstance(item, int): - return self[item] + return super().__getitem__(item) if item is None: return self diff --git a/tests/units/array/test_array.py b/tests/units/array/test_array.py index 79d50b64e82..b65929b2ea5 100644 --- a/tests/units/array/test_array.py +++ b/tests/units/array/test_array.py @@ -24,7 +24,7 @@ class Text(BaseDoc): def test_iterate(da): - for doc, doc2 in zip(da, da._data): + for doc, doc2 in zip(da, da): assert doc.id == doc2.id @@ -384,7 +384,7 @@ class Text(BaseDoc): da = DocList[Text].construct(docs) - assert da._data is docs + assert da is docs def test_reverse(): From 40751368110b455cd3d9789ccb79ec063764ccab Mon Sep 17 00:00:00 2001 From: Joan Fontanals Martinez Date: Wed, 26 Apr 2023 19:55:40 +0200 Subject: [PATCH 07/15] fix: fix mypy --- docarray/array/doc_list/doc_list.py | 11 +++++++---- docarray/array/doc_list/io.py | 8 -------- 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/docarray/array/doc_list/doc_list.py b/docarray/array/doc_list/doc_list.py index f514e63132b..aa2cdfe9ec5 100644 --- a/docarray/array/doc_list/doc_list.py +++ b/docarray/array/doc_list/doc_list.py @@ -114,8 +114,13 @@ class Image(BaseDoc): def __init__( self, docs: Optional[Iterable[T_doc]] = None, + validate_input_docs: bool = True, ): - super().__init__(self._validate_docs(docs) if docs else []) + if validate_input_docs: + docs = self._validate_docs(docs) if docs else [] + else: + docs = docs if docs else [] + super().__init__(docs) @classmethod def construct( @@ -128,9 +133,7 @@ def construct( :param docs: a Sequence (list) of Document with the same schema :return: a `DocList` object """ - new_docs = cls() - new_docs = docs if isinstance(docs, list) else list(docs) - return new_docs + return cls(docs, False) def __eq__(self, other: Any) -> bool: if self.__len__() != other.__len__(): diff --git a/docarray/array/doc_list/io.py b/docarray/array/doc_list/io.py index fe465cc8217..5c2f1c9190a 100644 --- a/docarray/array/doc_list/io.py +++ b/docarray/array/doc_list/io.py @@ -99,7 +99,6 @@ def __getitem__(self, item: slice): class IOMixinArray(Iterable[T_doc]): doc_type: Type[T_doc] - _data: List[T_doc] @abstractmethod def __len__(self): @@ -329,13 +328,6 @@ def to_json(self) -> bytes: """ return orjson_dumps(self) - def _docarray_to_json_compatible(self) -> List[T_doc]: - """ - Convert itself into a json compatible object - :return: A list of documents - """ - return self - @classmethod def from_csv( cls, From 638d06cfaf791e0f6bb19f55d91bf16902fe7278 Mon Sep 17 00:00:00 2001 From: samsja Date: Thu, 27 Apr 2023 09:36:27 +0200 Subject: [PATCH 08/15] fix: fix class getitem for doc list Signed-off-by: samsja --- docarray/array/doc_list/doc_list.py | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/docarray/array/doc_list/doc_list.py b/docarray/array/doc_list/doc_list.py index aa2cdfe9ec5..04806d882ad 100644 --- a/docarray/array/doc_list/doc_list.py +++ b/docarray/array/doc_list/doc_list.py @@ -13,8 +13,8 @@ overload, ) -from typing_inspect import is_union_type from typing_extensions import SupportsIndex +from typing_inspect import is_union_type from docarray.array.any_array import AnyDocArray from docarray.array.doc_list.io import IOMixinArray @@ -191,11 +191,13 @@ def _get_data_column( self: T, field: str, ) -> Union[MutableSequence, T, 'TorchTensor', 'NdArray']: - """Return all values of the fields from all docs this doc_list contains - - :param field: name of the fields to extract - :return: Returns a list of the field value for each document - in the doc_list like container + """Return all v @classmethod + def __class_getitem__(cls, item: Union[Type[BaseDoc], TypeVar, str]):alues of the fields from all docs this doc_list contains + @classmethod + def __class_getitem__(cls, item: Union[Type[BaseDoc], TypeVar, str]): + :param field: name of the fields to extract + :return: Returns a list of the field value for each document + in the doc_list like container """ field_type = self.__class__.doc_type._get_field_type(field) @@ -285,3 +287,11 @@ def __getitem__(self: T, item: IndexIterType) -> T: def __getitem__(self, item): return super().__getitem__(item) + + @classmethod + def __class_getitem__(cls, item: Union[Type[BaseDoc], TypeVar, str]): + + if isinstance(item, type) and issubclass(item, BaseDoc): + return AnyDocArray.__class_getitem__.__func__(cls, item) + else: + return super().__class_getitem__(item) From 5b13d06b88f3960672ea0e1bf9928d7ac065259a Mon Sep 17 00:00:00 2001 From: samsja Date: Thu, 27 Apr 2023 09:38:01 +0200 Subject: [PATCH 09/15] fix: fix construct test Signed-off-by: samsja --- tests/units/array/test_array.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/units/array/test_array.py b/tests/units/array/test_array.py index b65929b2ea5..316baa26d38 100644 --- a/tests/units/array/test_array.py +++ b/tests/units/array/test_array.py @@ -380,11 +380,11 @@ def test_construct(): class Text(BaseDoc): text: str - docs = [Text(text=f'hello {i}') for i in range(10)] + docs = [Text(text=f'hello {i}') for i in range(10)] + [BaseDoc()] da = DocList[Text].construct(docs) - assert da is docs + assert type(da[-1]) == BaseDoc def test_reverse(): From b2f302cc440d1a8fad91747c3c0b1aeeeb60a933 Mon Sep 17 00:00:00 2001 From: samsja Date: Thu, 27 Apr 2023 09:52:22 +0200 Subject: [PATCH 10/15] fix: fix doc vec Signed-off-by: samsja --- .../array/doc_list/sequence_indexing_mixin.py | 9 ++++----- docarray/array/doc_vec/doc_vec.py | 4 ++-- .../array/doc_vec/list_advance_indexing.py | 18 ++---------------- 3 files changed, 8 insertions(+), 23 deletions(-) diff --git a/docarray/array/doc_list/sequence_indexing_mixin.py b/docarray/array/doc_list/sequence_indexing_mixin.py index c7918d41150..e29a5c49da9 100644 --- a/docarray/array/doc_list/sequence_indexing_mixin.py +++ b/docarray/array/doc_list/sequence_indexing_mixin.py @@ -2,18 +2,17 @@ TYPE_CHECKING, Any, Iterable, + List, Sequence, TypeVar, Union, cast, no_type_check, overload, - List, ) -from typing_extensions import SupportsIndex - import numpy as np +from typing_extensions import SupportsIndex from docarray.utils._internal.misc import import_library @@ -185,9 +184,9 @@ def __setitem__(self: T, key, value): key_norm = self._normalize_index_item(key) if isinstance(key_norm, int): - self[key_norm] = value + super().__setitem__(key_norm, value) elif isinstance(key_norm, slice): - self[key_norm] = value + super().__setitem__(key_norm, value) else: # _normalize_index_item() guarantees the line below is correct head = key_norm[0] diff --git a/docarray/array/doc_vec/doc_vec.py b/docarray/array/doc_vec/doc_vec.py index c7c94b393dd..721ac4933bf 100644 --- a/docarray/array/doc_vec/doc_vec.py +++ b/docarray/array/doc_vec/doc_vec.py @@ -271,9 +271,9 @@ def _get_data_column( in the array like container """ if field in self._storage.any_columns.keys(): - return self._storage.any_columns[field].data + return self._storage.any_columns[field] elif field in self._storage.docs_vec_columns.keys(): - return self._storage.docs_vec_columns[field].data + return self._storage.docs_vec_columns[field] elif field in self._storage.columns.keys(): return self._storage.columns[field] else: diff --git a/docarray/array/doc_vec/list_advance_indexing.py b/docarray/array/doc_vec/list_advance_indexing.py index bc5c07d9c83..b21069e4bbb 100644 --- a/docarray/array/doc_vec/list_advance_indexing.py +++ b/docarray/array/doc_vec/list_advance_indexing.py @@ -1,4 +1,4 @@ -from typing import Iterator, MutableSequence, TypeVar +from typing import TypeVar from docarray.array.doc_list.sequence_indexing_mixin import IndexingSequenceMixin @@ -24,18 +24,4 @@ class ListAdvancedIndexing(IndexingSequenceMixin[T_item]): """ - _data: MutableSequence[T_item] - - def __init__(self, data: MutableSequence[T_item]): - self._data = data - - @property - def data(self) -> MutableSequence[T_item]: - return self._data - - def __len__(self) -> int: - return len(self._data) - - def __iter__(self) -> Iterator[T_item]: - for item in self._data: - yield item + ... From cc2460f0dd1cbcefa20ae2be1d6d333bcab32b60 Mon Sep 17 00:00:00 2001 From: samsja Date: Thu, 27 Apr 2023 10:04:38 +0200 Subject: [PATCH 11/15] fix: fix doc vec Signed-off-by: samsja --- tests/units/array/stack/test_init.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/units/array/stack/test_init.py b/tests/units/array/stack/test_init.py index 663eebadf89..6e23835b560 100644 --- a/tests/units/array/stack/test_init.py +++ b/tests/units/array/stack/test_init.py @@ -15,7 +15,7 @@ class MyDoc(BaseDoc): da = DocVec[MyDoc](docs, tensor_type=NdArray) assert (da._storage.tensor_columns['tensor'] == np.zeros((4, 10))).all() - assert da._storage.any_columns['name']._data == ['hello' for _ in range(4)] + assert da._storage.any_columns['name'] == ['hello' for _ in range(4)] def test_da_iter(): From 44020b2835108eee8c958964c333513805c107e5 Mon Sep 17 00:00:00 2001 From: samsja Date: Thu, 27 Apr 2023 10:44:20 +0200 Subject: [PATCH 12/15] fix: fix mypy Signed-off-by: samsja --- docarray/array/doc_list/doc_list.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docarray/array/doc_list/doc_list.py b/docarray/array/doc_list/doc_list.py index 04806d882ad..a9b7e800fa3 100644 --- a/docarray/array/doc_list/doc_list.py +++ b/docarray/array/doc_list/doc_list.py @@ -292,6 +292,6 @@ def __getitem__(self, item): def __class_getitem__(cls, item: Union[Type[BaseDoc], TypeVar, str]): if isinstance(item, type) and issubclass(item, BaseDoc): - return AnyDocArray.__class_getitem__.__func__(cls, item) + return AnyDocArray.__class_getitem__.__func__(cls, item) # type: ignore else: return super().__class_getitem__(item) From 1180bf7bbd2d209fa1fc1077090285a2c0eae18d Mon Sep 17 00:00:00 2001 From: samsja Date: Thu, 27 Apr 2023 10:50:18 +0200 Subject: [PATCH 13/15] fix: revert dict things Signed-off-by: samsja --- docarray/index/abstract.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docarray/index/abstract.py b/docarray/index/abstract.py index a18b28f3a69..13f4837cd61 100644 --- a/docarray/index/abstract.py +++ b/docarray/index/abstract.py @@ -892,7 +892,7 @@ def _convert_dict_to_doc( doc_dict[field_name] = self._convert_dict_to_doc(inner_dict, t_) schema_cls = cast(Type[BaseDoc], schema) - return schema_cls(**dict(doc_dict)) + return schema_cls(**doc_dict) def _dict_list_to_docarray(self, dict_list: Sequence[Dict[str, Any]]) -> DocList: """Convert a list of docs in dict type to a DocList of the schema type.""" From 1b060e7dcea0adf2231a9da306064ec21513bebb Mon Sep 17 00:00:00 2001 From: samsja Date: Thu, 27 Apr 2023 10:56:34 +0200 Subject: [PATCH 14/15] refactor: merge mixin and list advance indexing Signed-off-by: samsja --- docarray/array/doc_list/doc_list.py | 7 ++--- docarray/array/doc_vec/column_storage.py | 2 +- docarray/array/doc_vec/doc_vec.py | 2 +- .../array/doc_vec/list_advance_indexing.py | 27 ------------------- ...xing_mixin.py => list_advance_indexing.py} | 9 +++---- 5 files changed, 8 insertions(+), 39 deletions(-) delete mode 100644 docarray/array/doc_vec/list_advance_indexing.py rename docarray/array/{doc_list/sequence_indexing_mixin.py => list_advance_indexing.py} (94%) diff --git a/docarray/array/doc_list/doc_list.py b/docarray/array/doc_list/doc_list.py index a9b7e800fa3..a9deba3bec6 100644 --- a/docarray/array/doc_list/doc_list.py +++ b/docarray/array/doc_list/doc_list.py @@ -19,10 +19,7 @@ from docarray.array.any_array import AnyDocArray from docarray.array.doc_list.io import IOMixinArray from docarray.array.doc_list.pushpull import PushPullMixin -from docarray.array.doc_list.sequence_indexing_mixin import ( - IndexingSequenceMixin, - IndexIterType, -) +from docarray.array.list_advance_indexing import IndexIterType, ListAdvancedIndexing from docarray.base_doc import AnyDoc, BaseDoc from docarray.typing import NdArray @@ -40,7 +37,7 @@ class DocList( - IndexingSequenceMixin[T_doc], + ListAdvancedIndexing[T_doc], PushPullMixin, IOMixinArray, AnyDocArray[T_doc], diff --git a/docarray/array/doc_vec/column_storage.py b/docarray/array/doc_vec/column_storage.py index 42c67c96b3b..736b4114b16 100644 --- a/docarray/array/doc_vec/column_storage.py +++ b/docarray/array/doc_vec/column_storage.py @@ -10,7 +10,7 @@ Union, ) -from docarray.array.doc_vec.list_advance_indexing import ListAdvancedIndexing +from docarray.array.list_advance_indexing import ListAdvancedIndexing from docarray.typing import NdArray from docarray.typing.tensor.abstract_tensor import AbstractTensor diff --git a/docarray/array/doc_vec/doc_vec.py b/docarray/array/doc_vec/doc_vec.py index 721ac4933bf..101fe5b93e3 100644 --- a/docarray/array/doc_vec/doc_vec.py +++ b/docarray/array/doc_vec/doc_vec.py @@ -21,7 +21,7 @@ from docarray.array.any_array import AnyDocArray from docarray.array.doc_list.doc_list import DocList from docarray.array.doc_vec.column_storage import ColumnStorage, ColumnStorageView -from docarray.array.doc_vec.list_advance_indexing import ListAdvancedIndexing +from docarray.array.list_advance_indexing import ListAdvancedIndexing from docarray.base_doc import BaseDoc from docarray.base_doc.mixins.io import _type_to_protobuf from docarray.typing import NdArray diff --git a/docarray/array/doc_vec/list_advance_indexing.py b/docarray/array/doc_vec/list_advance_indexing.py deleted file mode 100644 index b21069e4bbb..00000000000 --- a/docarray/array/doc_vec/list_advance_indexing.py +++ /dev/null @@ -1,27 +0,0 @@ -from typing import TypeVar - -from docarray.array.doc_list.sequence_indexing_mixin import IndexingSequenceMixin - -T_item = TypeVar('T_item') - - -class ListAdvancedIndexing(IndexingSequenceMixin[T_item]): - """ - A list wrapper that implements custom indexing - - You can index into a ListAdvanceIndex like a numpy array or torch tensor: - - --- - - ```python - docs[0] # index by position - docs[0:5:2] # index by slice - docs[[0, 2, 3]] # index by list of indices - docs[True, False, True, True, ...] # index by boolean mask - ``` - - --- - - """ - - ... diff --git a/docarray/array/doc_list/sequence_indexing_mixin.py b/docarray/array/list_advance_indexing.py similarity index 94% rename from docarray/array/doc_list/sequence_indexing_mixin.py rename to docarray/array/list_advance_indexing.py index e29a5c49da9..bcf966e6454 100644 --- a/docarray/array/doc_list/sequence_indexing_mixin.py +++ b/docarray/array/list_advance_indexing.py @@ -17,7 +17,7 @@ from docarray.utils._internal.misc import import_library T_item = TypeVar('T_item') -T = TypeVar('T', bound='IndexingSequenceMixin') +T = TypeVar('T', bound='ListAdvancedIndexing') IndexIterType = Union[slice, Iterable[int], Iterable[bool], None] @@ -33,12 +33,11 @@ def _is_np_int(item: Any) -> bool: return False # this is unreachable, but mypy wants it -class IndexingSequenceMixin(List[T_item]): +class ListAdvancedIndexing(List[T_item]): """ - This mixin allow sto extend a list into an object that can be indexed - a la numpy/pytorch. + A list wrapper that implements custom indexing - You can index into, delete from, and set items in a IndexingSequenceMixin like a numpy doc_list or torch tensor: + You can index into a ListAdvanceIndex like a numpy array or torch tensor: --- From c9efc92f1dd64c72c4e12ec1fc8ea6de5936d382 Mon Sep 17 00:00:00 2001 From: samsja Date: Thu, 27 Apr 2023 11:19:49 +0200 Subject: [PATCH 15/15] fix: fix doc index Signed-off-by: samsja --- docarray/index/abstract.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docarray/index/abstract.py b/docarray/index/abstract.py index 13f4837cd61..7613b393a10 100644 --- a/docarray/index/abstract.py +++ b/docarray/index/abstract.py @@ -421,7 +421,7 @@ def find( query_vec_np, search_field=search_field, limit=limit, **kwargs ) - if isinstance(docs, List): + if isinstance(docs, List) and not isinstance(docs, DocList): docs = self._dict_list_to_docarray(docs) return FindResult(documents=docs, scores=scores)