diff --git a/docarray/array/storage/base/seqlike.py b/docarray/array/storage/base/seqlike.py index 5e46cafe607..ce89b82a3bf 100644 --- a/docarray/array/storage/base/seqlike.py +++ b/docarray/array/storage/base/seqlike.py @@ -50,7 +50,7 @@ def __eq__(self, other): ... def __len__(self): - return len(self._offset2ids) + ... def __iter__(self) -> Iterator['Document']: for _id in self._offset2ids: diff --git a/docarray/array/storage/milvus/seqlike.py b/docarray/array/storage/milvus/seqlike.py index 1711c5b8080..4ae32860054 100644 --- a/docarray/array/storage/milvus/seqlike.py +++ b/docarray/array/storage/milvus/seqlike.py @@ -1,6 +1,6 @@ from typing import Iterable, Iterator, Union, TYPE_CHECKING from docarray.array.storage.base.seqlike import BaseSequenceLikeMixin -from docarray.array.storage.milvus.backend import _batch_list +from docarray.array.storage.milvus.backend import _batch_list, _always_true_expr from docarray import Document @@ -56,3 +56,16 @@ def _extend(self, values: Iterable['Document'], **kwargs): payload = self._docs_to_milvus_payload(docs_batch) self._collection.insert(payload, **kwargs) self._offset2ids.extend([doc.id for doc in docs_batch]) + + def __len__(self): + if self._list_like: + return len(self._offset2ids) + else: + # Milvus has no native way to get num of entities + # so only use it as fallback option + with self.loaded_collection(): + res = self._collection.query( + expr=_always_true_expr('document_id'), + output_fields=['document_id'], + ) + return len(res) diff --git a/docs/advanced/document-store/extend.md b/docs/advanced/document-store/extend.md index a65d5ac32bb..591d2ce8832 100644 --- a/docs/advanced/document-store/extend.md +++ b/docs/advanced/document-store/extend.md @@ -145,6 +145,9 @@ class SequenceLikeMixin(BaseSequenceLikeMixin): def __add__(self, other: Union['Document', Iterable['Document']]): ... + def __len__(self): + ... + def insert(self, index: int, value: 'Document'): # Optional. By default, this will add a new item and update offset2id # if you want to customize this, make sure to handle offset2id @@ -158,10 +161,6 @@ class SequenceLikeMixin(BaseSequenceLikeMixin): # Optional. Override this if you have better implementation than appending one by one ... - def __len__(self): - # Optional. By default, this will rely on offset2id to get the length - ... - def __iter__(self) -> Iterator['Document']: # Optional. By default, this will rely on offset2id to iterate ...