From d2bfe2401839bce08aac94a2332a6edf134a6391 Mon Sep 17 00:00:00 2001 From: Johannes Messner Date: Fri, 2 Dec 2022 11:12:02 +0100 Subject: [PATCH 01/11] feat: native len for milvus Signed-off-by: Johannes Messner --- docarray/array/storage/milvus/seqlike.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/docarray/array/storage/milvus/seqlike.py b/docarray/array/storage/milvus/seqlike.py index 1711c5b8080..d1ce651c0c6 100644 --- a/docarray/array/storage/milvus/seqlike.py +++ b/docarray/array/storage/milvus/seqlike.py @@ -1,6 +1,6 @@ from typing import Iterable, Iterator, Union, TYPE_CHECKING from docarray.array.storage.base.seqlike import BaseSequenceLikeMixin -from docarray.array.storage.milvus.backend import _batch_list +from docarray.array.storage.milvus.backend import _batch_list, _always_true_expr from docarray import Document @@ -56,3 +56,11 @@ def _extend(self, values: Iterable['Document'], **kwargs): payload = self._docs_to_milvus_payload(docs_batch) self._collection.insert(payload, **kwargs) self._offset2ids.extend([doc.id for doc in docs_batch]) + + def __len__(self): + with self.loaded_collection(): + res = self._collection.query( + expr=_always_true_expr('document_id'), + output_fields=['document_id'], + ) + return len(res) From e8422c8a2f8990fcf2689d6506bcb220a457e3a6 Mon Sep 17 00:00:00 2001 From: Johannes Messner Date: Fri, 2 Dec 2022 11:15:32 +0100 Subject: [PATCH 02/11] fix: make implementing len non-optional Signed-off-by: Johannes Messner --- docarray/array/storage/base/seqlike.py | 2 +- docs/advanced/document-store/extend.md | 7 +++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/docarray/array/storage/base/seqlike.py b/docarray/array/storage/base/seqlike.py index 5e46cafe607..ce89b82a3bf 100644 --- a/docarray/array/storage/base/seqlike.py +++ b/docarray/array/storage/base/seqlike.py @@ -50,7 +50,7 @@ def __eq__(self, other): ... def __len__(self): - return len(self._offset2ids) + ... def __iter__(self) -> Iterator['Document']: for _id in self._offset2ids: diff --git a/docs/advanced/document-store/extend.md b/docs/advanced/document-store/extend.md index a65d5ac32bb..591d2ce8832 100644 --- a/docs/advanced/document-store/extend.md +++ b/docs/advanced/document-store/extend.md @@ -145,6 +145,9 @@ class SequenceLikeMixin(BaseSequenceLikeMixin): def __add__(self, other: Union['Document', Iterable['Document']]): ... + def __len__(self): + ... + def insert(self, index: int, value: 'Document'): # Optional. By default, this will add a new item and update offset2id # if you want to customize this, make sure to handle offset2id @@ -158,10 +161,6 @@ class SequenceLikeMixin(BaseSequenceLikeMixin): # Optional. Override this if you have better implementation than appending one by one ... - def __len__(self): - # Optional. By default, this will rely on offset2id to get the length - ... - def __iter__(self) -> Iterator['Document']: # Optional. By default, this will rely on offset2id to iterate ... From 79f0fcbe470fa02e92b7042108ecea55cb6a0a71 Mon Sep 17 00:00:00 2001 From: Johannes Messner Date: Wed, 11 Jan 2023 14:23:18 +0100 Subject: [PATCH 03/11] docs: add discord link to readme Signed-off-by: Johannes Messner --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 5b92d8ef2e9..e305a6d081d 100644 --- a/README.md +++ b/README.md @@ -9,8 +9,8 @@ Codecov branch PyPI - Downloads from official pypistats -

+`![](https://dcbadge.vercel.app/api/shield/WaMp6PVPgR)` @@ -401,7 +401,7 @@ Intrigued? That's only scratching the surface of what DocArray is capable of. [R ## Support -- Join our [Slack community](https://jina.ai/slack) and chat with other community members about ideas. +- Join our [Discord server](https://discord.gg/WaMp6PVPgR) and chat with other community members about ideas. > DocArray is a trademark of LF AI Projects, LLC From 75618620883db7593d13d51c237e5c0db04e4d76 Mon Sep 17 00:00:00 2001 From: Johannes Messner Date: Wed, 11 Jan 2023 14:28:57 +0100 Subject: [PATCH 04/11] docs: move the badge Signed-off-by: Johannes Messner --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index e305a6d081d..de72f28d105 100644 --- a/README.md +++ b/README.md @@ -10,10 +10,10 @@ PyPI - Downloads from official pypistats

-`![](https://dcbadge.vercel.app/api/shield/WaMp6PVPgR)` + `![](https://dcbadge.vercel.app/api/shield/WaMp6PVPgR)` DocArray is a library for nested, unstructured, multimodal data in transit, including text, image, audio, video, 3D mesh, etc. It allows deep-learning engineers to efficiently process, embed, search, recommend, store, and transfer multimodal data with a Pythonic API. 🚪 **Door to multimodal world**: super-expressive data structure for representing complicated/mixed/nested text, image, video, audio, 3D mesh data. The foundation data structure of [Jina](https://github.com/jina-ai/jina), [CLIP-as-service](https://github.com/jina-ai/clip-as-service), [DALL·E Flow](https://github.com/jina-ai/dalle-flow), [DiscoArt](https://github.com/jina-ai/discoart) etc. From e23d0c541cbf21dc208d853639ece895d176894f Mon Sep 17 00:00:00 2001 From: Johannes Messner Date: Wed, 11 Jan 2023 14:30:20 +0100 Subject: [PATCH 05/11] docs: move the badge Signed-off-by: Johannes Messner --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index de72f28d105..bcd99326928 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ - `![](https://dcbadge.vercel.app/api/shield/WaMp6PVPgR)` +`[![](https://dcbadge.vercel.app/api/server/WaMp6PVPgR)](https://discord.gg/WaMp6PVPgR)` DocArray is a library for nested, unstructured, multimodal data in transit, including text, image, audio, video, 3D mesh, etc. It allows deep-learning engineers to efficiently process, embed, search, recommend, store, and transfer multimodal data with a Pythonic API. 🚪 **Door to multimodal world**: super-expressive data structure for representing complicated/mixed/nested text, image, video, audio, 3D mesh data. The foundation data structure of [Jina](https://github.com/jina-ai/jina), [CLIP-as-service](https://github.com/jina-ai/clip-as-service), [DALL·E Flow](https://github.com/jina-ai/dalle-flow), [DiscoArt](https://github.com/jina-ai/discoart) etc. From 68d37fe110cdf8d22eb94a2f3aa746082986c51c Mon Sep 17 00:00:00 2001 From: Johannes Messner Date: Wed, 11 Jan 2023 14:33:56 +0100 Subject: [PATCH 06/11] docs: move the badge Signed-off-by: Johannes Messner --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index bcd99326928..ddbf85209a1 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ -`[![](https://dcbadge.vercel.app/api/server/WaMp6PVPgR)](https://discord.gg/WaMp6PVPgR)` +[![](https://dcbadge.vercel.app/api/server/WaMp6PVPgR)](https://discord.gg/WaMp6PVPgR) DocArray is a library for nested, unstructured, multimodal data in transit, including text, image, audio, video, 3D mesh, etc. It allows deep-learning engineers to efficiently process, embed, search, recommend, store, and transfer multimodal data with a Pythonic API. 🚪 **Door to multimodal world**: super-expressive data structure for representing complicated/mixed/nested text, image, video, audio, 3D mesh data. The foundation data structure of [Jina](https://github.com/jina-ai/jina), [CLIP-as-service](https://github.com/jina-ai/clip-as-service), [DALL·E Flow](https://github.com/jina-ai/dalle-flow), [DiscoArt](https://github.com/jina-ai/discoart) etc. From 878395ced2fe52abc46ccfe02807a2eda0520f3e Mon Sep 17 00:00:00 2001 From: Johannes Messner Date: Wed, 11 Jan 2023 14:51:26 +0100 Subject: [PATCH 07/11] docs: move the badge Signed-off-by: Johannes Messner --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index ddbf85209a1..bc3602ea782 100644 --- a/README.md +++ b/README.md @@ -9,6 +9,7 @@ Codecov branch PyPI - Downloads from official pypistats +

From b5effea269b789f99b933b48384b3a36fae7b1bb Mon Sep 17 00:00:00 2001 From: Johannes Messner Date: Wed, 11 Jan 2023 14:52:31 +0100 Subject: [PATCH 08/11] docs: move the badge Signed-off-by: Johannes Messner --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index bc3602ea782..873a892f49b 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ Codecov branch PyPI - Downloads from official pypistats - +

From ec206105b24a724503a588f8c2646b9ba6231206 Mon Sep 17 00:00:00 2001 From: Johannes Messner Date: Wed, 11 Jan 2023 15:01:00 +0100 Subject: [PATCH 09/11] docs: make badge compact Signed-off-by: Johannes Messner --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 873a892f49b..2bafa9f0015 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ Codecov branch PyPI - Downloads from official pypistats - +

From a6f2dfb4cf896f5a6720eb42f5b4020478ad1a30 Mon Sep 17 00:00:00 2001 From: Johannes Messner Date: Wed, 11 Jan 2023 15:02:32 +0100 Subject: [PATCH 10/11] docs: change badge style Signed-off-by: Johannes Messner --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 2bafa9f0015..a1abbbf19c9 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ Codecov branch PyPI - Downloads from official pypistats - +

From d2d1c9c5d35c043f5e7b3a55d41ec727051e3a73 Mon Sep 17 00:00:00 2001 From: Johannes Messner Date: Wed, 11 Jan 2023 15:03:08 +0100 Subject: [PATCH 11/11] docs: remove extra badge Signed-off-by: Johannes Messner --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index a1abbbf19c9..1d599d89fa7 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,6 @@ -[![](https://dcbadge.vercel.app/api/server/WaMp6PVPgR)](https://discord.gg/WaMp6PVPgR) DocArray is a library for nested, unstructured, multimodal data in transit, including text, image, audio, video, 3D mesh, etc. It allows deep-learning engineers to efficiently process, embed, search, recommend, store, and transfer multimodal data with a Pythonic API. 🚪 **Door to multimodal world**: super-expressive data structure for representing complicated/mixed/nested text, image, video, audio, 3D mesh data. The foundation data structure of [Jina](https://github.com/jina-ai/jina), [CLIP-as-service](https://github.com/jina-ai/clip-as-service), [DALL·E Flow](https://github.com/jina-ai/dalle-flow), [DiscoArt](https://github.com/jina-ai/discoart) etc.