From d2bfe2401839bce08aac94a2332a6edf134a6391 Mon Sep 17 00:00:00 2001
From: Johannes Messner
Date: Fri, 2 Dec 2022 11:12:02 +0100
Subject: [PATCH 01/11] feat: native len for milvus
Signed-off-by: Johannes Messner
---
docarray/array/storage/milvus/seqlike.py | 10 +++++++++-
1 file changed, 9 insertions(+), 1 deletion(-)
diff --git a/docarray/array/storage/milvus/seqlike.py b/docarray/array/storage/milvus/seqlike.py
index 1711c5b8080..d1ce651c0c6 100644
--- a/docarray/array/storage/milvus/seqlike.py
+++ b/docarray/array/storage/milvus/seqlike.py
@@ -1,6 +1,6 @@
from typing import Iterable, Iterator, Union, TYPE_CHECKING
from docarray.array.storage.base.seqlike import BaseSequenceLikeMixin
-from docarray.array.storage.milvus.backend import _batch_list
+from docarray.array.storage.milvus.backend import _batch_list, _always_true_expr
from docarray import Document
@@ -56,3 +56,11 @@ def _extend(self, values: Iterable['Document'], **kwargs):
payload = self._docs_to_milvus_payload(docs_batch)
self._collection.insert(payload, **kwargs)
self._offset2ids.extend([doc.id for doc in docs_batch])
+
+ def __len__(self):
+ with self.loaded_collection():
+ res = self._collection.query(
+ expr=_always_true_expr('document_id'),
+ output_fields=['document_id'],
+ )
+ return len(res)
From e8422c8a2f8990fcf2689d6506bcb220a457e3a6 Mon Sep 17 00:00:00 2001
From: Johannes Messner
Date: Fri, 2 Dec 2022 11:15:32 +0100
Subject: [PATCH 02/11] fix: make implementing len non-optional
Signed-off-by: Johannes Messner
---
docarray/array/storage/base/seqlike.py | 2 +-
docs/advanced/document-store/extend.md | 7 +++----
2 files changed, 4 insertions(+), 5 deletions(-)
diff --git a/docarray/array/storage/base/seqlike.py b/docarray/array/storage/base/seqlike.py
index 5e46cafe607..ce89b82a3bf 100644
--- a/docarray/array/storage/base/seqlike.py
+++ b/docarray/array/storage/base/seqlike.py
@@ -50,7 +50,7 @@ def __eq__(self, other):
...
def __len__(self):
- return len(self._offset2ids)
+ ...
def __iter__(self) -> Iterator['Document']:
for _id in self._offset2ids:
diff --git a/docs/advanced/document-store/extend.md b/docs/advanced/document-store/extend.md
index a65d5ac32bb..591d2ce8832 100644
--- a/docs/advanced/document-store/extend.md
+++ b/docs/advanced/document-store/extend.md
@@ -145,6 +145,9 @@ class SequenceLikeMixin(BaseSequenceLikeMixin):
def __add__(self, other: Union['Document', Iterable['Document']]):
...
+ def __len__(self):
+ ...
+
def insert(self, index: int, value: 'Document'):
# Optional. By default, this will add a new item and update offset2id
# if you want to customize this, make sure to handle offset2id
@@ -158,10 +161,6 @@ class SequenceLikeMixin(BaseSequenceLikeMixin):
# Optional. Override this if you have better implementation than appending one by one
...
- def __len__(self):
- # Optional. By default, this will rely on offset2id to get the length
- ...
-
def __iter__(self) -> Iterator['Document']:
# Optional. By default, this will rely on offset2id to iterate
...
From 79f0fcbe470fa02e92b7042108ecea55cb6a0a71 Mon Sep 17 00:00:00 2001
From: Johannes Messner
Date: Wed, 11 Jan 2023 14:23:18 +0100
Subject: [PATCH 03/11] docs: add discord link to readme
Signed-off-by: Johannes Messner
---
README.md | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/README.md b/README.md
index 5b92d8ef2e9..e305a6d081d 100644
--- a/README.md
+++ b/README.md
@@ -9,8 +9,8 @@
-
+``
@@ -401,7 +401,7 @@ Intrigued? That's only scratching the surface of what DocArray is capable of. [R
## Support
-- Join our [Slack community](https://jina.ai/slack) and chat with other community members about ideas.
+- Join our [Discord server](https://discord.gg/WaMp6PVPgR) and chat with other community members about ideas.
> DocArray is a trademark of LF AI Projects, LLC
From 75618620883db7593d13d51c237e5c0db04e4d76 Mon Sep 17 00:00:00 2001
From: Johannes Messner
Date: Wed, 11 Jan 2023 14:28:57 +0100
Subject: [PATCH 04/11] docs: move the badge
Signed-off-by: Johannes Messner
---
README.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/README.md b/README.md
index e305a6d081d..de72f28d105 100644
--- a/README.md
+++ b/README.md
@@ -10,10 +10,10 @@
-``
+ ``
DocArray is a library for nested, unstructured, multimodal data in transit, including text, image, audio, video, 3D mesh, etc. It allows deep-learning engineers to efficiently process, embed, search, recommend, store, and transfer multimodal data with a Pythonic API.
🚪 **Door to multimodal world**: super-expressive data structure for representing complicated/mixed/nested text, image, video, audio, 3D mesh data. The foundation data structure of [Jina](https://github.com/jina-ai/jina), [CLIP-as-service](https://github.com/jina-ai/clip-as-service), [DALL·E Flow](https://github.com/jina-ai/dalle-flow), [DiscoArt](https://github.com/jina-ai/discoart) etc.
From e23d0c541cbf21dc208d853639ece895d176894f Mon Sep 17 00:00:00 2001
From: Johannes Messner
Date: Wed, 11 Jan 2023 14:30:20 +0100
Subject: [PATCH 05/11] docs: move the badge
Signed-off-by: Johannes Messner
---
README.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/README.md b/README.md
index de72f28d105..bcd99326928 100644
--- a/README.md
+++ b/README.md
@@ -13,7 +13,7 @@
- ``
+`[](https://discord.gg/WaMp6PVPgR)`
DocArray is a library for nested, unstructured, multimodal data in transit, including text, image, audio, video, 3D mesh, etc. It allows deep-learning engineers to efficiently process, embed, search, recommend, store, and transfer multimodal data with a Pythonic API.
🚪 **Door to multimodal world**: super-expressive data structure for representing complicated/mixed/nested text, image, video, audio, 3D mesh data. The foundation data structure of [Jina](https://github.com/jina-ai/jina), [CLIP-as-service](https://github.com/jina-ai/clip-as-service), [DALL·E Flow](https://github.com/jina-ai/dalle-flow), [DiscoArt](https://github.com/jina-ai/discoart) etc.
From 68d37fe110cdf8d22eb94a2f3aa746082986c51c Mon Sep 17 00:00:00 2001
From: Johannes Messner
Date: Wed, 11 Jan 2023 14:33:56 +0100
Subject: [PATCH 06/11] docs: move the badge
Signed-off-by: Johannes Messner
---
README.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/README.md b/README.md
index bcd99326928..ddbf85209a1 100644
--- a/README.md
+++ b/README.md
@@ -13,7 +13,7 @@
-`[](https://discord.gg/WaMp6PVPgR)`
+[](https://discord.gg/WaMp6PVPgR)
DocArray is a library for nested, unstructured, multimodal data in transit, including text, image, audio, video, 3D mesh, etc. It allows deep-learning engineers to efficiently process, embed, search, recommend, store, and transfer multimodal data with a Pythonic API.
🚪 **Door to multimodal world**: super-expressive data structure for representing complicated/mixed/nested text, image, video, audio, 3D mesh data. The foundation data structure of [Jina](https://github.com/jina-ai/jina), [CLIP-as-service](https://github.com/jina-ai/clip-as-service), [DALL·E Flow](https://github.com/jina-ai/dalle-flow), [DiscoArt](https://github.com/jina-ai/discoart) etc.
From 878395ced2fe52abc46ccfe02807a2eda0520f3e Mon Sep 17 00:00:00 2001
From: Johannes Messner
Date: Wed, 11 Jan 2023 14:51:26 +0100
Subject: [PATCH 07/11] docs: move the badge
Signed-off-by: Johannes Messner
---
README.md | 1 +
1 file changed, 1 insertion(+)
diff --git a/README.md b/README.md
index ddbf85209a1..bc3602ea782 100644
--- a/README.md
+++ b/README.md
@@ -9,6 +9,7 @@
+
From b5effea269b789f99b933b48384b3a36fae7b1bb Mon Sep 17 00:00:00 2001
From: Johannes Messner
Date: Wed, 11 Jan 2023 14:52:31 +0100
Subject: [PATCH 08/11] docs: move the badge
Signed-off-by: Johannes Messner
---
README.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/README.md b/README.md
index bc3602ea782..873a892f49b 100644
--- a/README.md
+++ b/README.md
@@ -9,7 +9,7 @@
-
+
From ec206105b24a724503a588f8c2646b9ba6231206 Mon Sep 17 00:00:00 2001
From: Johannes Messner
Date: Wed, 11 Jan 2023 15:01:00 +0100
Subject: [PATCH 09/11] docs: make badge compact
Signed-off-by: Johannes Messner
---
README.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/README.md b/README.md
index 873a892f49b..2bafa9f0015 100644
--- a/README.md
+++ b/README.md
@@ -9,7 +9,7 @@
-
+
From a6f2dfb4cf896f5a6720eb42f5b4020478ad1a30 Mon Sep 17 00:00:00 2001
From: Johannes Messner
Date: Wed, 11 Jan 2023 15:02:32 +0100
Subject: [PATCH 10/11] docs: change badge style
Signed-off-by: Johannes Messner
---
README.md | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/README.md b/README.md
index 2bafa9f0015..a1abbbf19c9 100644
--- a/README.md
+++ b/README.md
@@ -9,7 +9,7 @@
-
+
From d2d1c9c5d35c043f5e7b3a55d41ec727051e3a73 Mon Sep 17 00:00:00 2001
From: Johannes Messner
Date: Wed, 11 Jan 2023 15:03:08 +0100
Subject: [PATCH 11/11] docs: remove extra badge
Signed-off-by: Johannes Messner
---
README.md | 1 -
1 file changed, 1 deletion(-)
diff --git a/README.md b/README.md
index a1abbbf19c9..1d599d89fa7 100644
--- a/README.md
+++ b/README.md
@@ -14,7 +14,6 @@
-[](https://discord.gg/WaMp6PVPgR)
DocArray is a library for nested, unstructured, multimodal data in transit, including text, image, audio, video, 3D mesh, etc. It allows deep-learning engineers to efficiently process, embed, search, recommend, store, and transfer multimodal data with a Pythonic API.
🚪 **Door to multimodal world**: super-expressive data structure for representing complicated/mixed/nested text, image, video, audio, 3D mesh data. The foundation data structure of [Jina](https://github.com/jina-ai/jina), [CLIP-as-service](https://github.com/jina-ai/clip-as-service), [DALL·E Flow](https://github.com/jina-ai/dalle-flow), [DiscoArt](https://github.com/jina-ai/discoart) etc.