Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
114 commits
Select commit Hold shift + click to select a range
9226633
fix(plot): be robust against non-existing subindices
JohannesMessner Aug 26, 2022
173c85c
Merge remote-tracking branch 'origin/main' into main
JohannesMessner Aug 30, 2022
499ac9a
Merge remote-tracking branch 'origin/main' into main
JohannesMessner Oct 4, 2022
61c4a95
chore: add file structure
JohannesMessner Oct 4, 2022
ef8dc0a
feat: first draft of backend implementation
JohannesMessner Oct 4, 2022
4b1049e
feat: docarray can now connect to running milvus database
JohannesMessner Oct 6, 2022
14d23d1
feat: implement basics of getsetdel and seqlike
JohannesMessner Oct 6, 2022
a7e4555
fix: type hint
JohannesMessner Oct 6, 2022
5ce0bfd
fix: saving and loading offset2ids
JohannesMessner Oct 7, 2022
74db70c
feat: first implementation of vector search
JohannesMessner Oct 10, 2022
b1e3bce
refactor: declare static methods
JohannesMessner Oct 11, 2022
1a481af
feat: add consistency level as a configuration parameter
JohannesMessner Oct 12, 2022
ee50e51
feat: change default consistency to session
JohannesMessner Oct 12, 2022
0ca783b
refactor: in clear_storage, drop and re-create collection
JohannesMessner Oct 12, 2022
f4a2eb0
feat: implement filter and hybrid search
JohannesMessner Oct 12, 2022
7d7d9fb
feat: implement columns feature
JohannesMessner Oct 12, 2022
69f589b
feat: allow consistency level to be passed to extend, append, insert
JohannesMessner Oct 12, 2022
73cf538
Merge remote-tracking branch 'origin/main' into main
JohannesMessner Oct 17, 2022
8a75dce
fix: fix columns feature in milvus
JohannesMessner Oct 17, 2022
76cc4b1
test: add test for milvus columns feature
JohannesMessner Oct 18, 2022
f3a5881
Merge branch 'main' into feat-milvus
JohannesMessner Oct 18, 2022
fb3f0b1
Merge remote-tracking branch 'origin/feat-milvus' into feat-milvus
JohannesMessner Oct 18, 2022
6e35dcf
fix: import types for type hints
JohannesMessner Oct 18, 2022
9abf9f5
fix: add pymilvus requirement
JohannesMessner Oct 18, 2022
95529c3
test: fix docker compose for milvus
JohannesMessner Oct 19, 2022
51dbbfe
Merge branch 'main' into feat-milvus
JohannesMessner Oct 19, 2022
fdab6e7
test: add milvus to test construct
JohannesMessner Oct 19, 2022
c000a90
fix: backend naming
JohannesMessner Oct 19, 2022
0c1be5b
fix: sort returned docs when accessing by id
JohannesMessner Oct 19, 2022
435e19d
test: add milvus to sequence tests
JohannesMessner Oct 19, 2022
5a9fde6
Merge remote-tracking branch 'origin/feat-milvus' into feat-milvus
JohannesMessner Oct 19, 2022
94e4a65
fix: raise keyerror on non existing id
JohannesMessner Oct 19, 2022
061e01c
refactor: use context managar to laod and release collections
JohannesMessner Oct 19, 2022
c5e6b7e
fix: map embeddings to np before inserting
JohannesMessner Oct 19, 2022
f61dfda
test: add milvus to advanced indexing tests
JohannesMessner Oct 19, 2022
e9671ee
test: add milvus to test pull out
JohannesMessner Oct 19, 2022
ee8f40b
fix: deleting to size zero
JohannesMessner Oct 20, 2022
b1c6a88
fix: when accessing with no ids, return empty da instead of rasing
JohannesMessner Oct 20, 2022
4a4d1c8
test: add milvus to content tests
JohannesMessner Oct 20, 2022
479d0f7
test: add milvus to del tests
JohannesMessner Oct 20, 2022
a71cab2
test: add milvus to embed tests
JohannesMessner Oct 20, 2022
d16468a
test: add milvus to test empty
JohannesMessner Oct 20, 2022
ec10d41
test: add milvus to test eval class
JohannesMessner Oct 20, 2022
f7d0a3b
test: add milvus to test find
JohannesMessner Oct 20, 2022
7aa6895
test: add milvus to getset tests
JohannesMessner Oct 20, 2022
85513b8
test: add milvus to all remaining tests
JohannesMessner Oct 20, 2022
20b19a9
feat: load and release collection in context manager
JohannesMessner Oct 24, 2022
c735702
test: use context manager to speed up milvus tests
JohannesMessner Oct 24, 2022
d5c0221
test: add milvus to test plot
JohannesMessner Oct 25, 2022
bb4185a
Merge branch 'main' into feat-milvus
JohannesMessner Oct 25, 2022
eefbe31
test: fix plot tests for milvus
JohannesMessner Oct 25, 2022
5bd9211
test: fix multimodal find test
JohannesMessner Oct 25, 2022
0cab46d
test: fix test embed
JohannesMessner Oct 25, 2022
d4a5f36
test: use context manager to speed up milvus
JohannesMessner Oct 25, 2022
363bc08
fix: implement state methods to enable pickling
JohannesMessner Oct 25, 2022
161dc1f
test: fix more tests
JohannesMessner Oct 26, 2022
7831b77
feat: add overloaded milvus init
JohannesMessner Oct 26, 2022
21126da
test: fix even more tests
JohannesMessner Oct 26, 2022
f0832db
refactor: remove some comments
JohannesMessner Oct 26, 2022
ee310da
Merge remote-tracking branch 'origin/main' into main
JohannesMessner Oct 26, 2022
29d22e7
Merge remote-tracking branch 'origin/main' into main
JohannesMessner Oct 27, 2022
28e371c
Merge remote-tracking branch 'origin/main' into main
JohannesMessner Oct 31, 2022
84f4bb0
test: use context manager even more
JohannesMessner Oct 31, 2022
3bdfbb4
test: moar context manager usage
JohannesMessner Oct 31, 2022
752eb7e
test: fix test fixture input
JohannesMessner Oct 31, 2022
8b7b9a4
test: remove milvus from test that it can't handle
JohannesMessner Oct 31, 2022
8f15868
test: remove milvus from one more test
JohannesMessner Nov 1, 2022
4f97db9
refactor: better mechanism for automatic collection loading
JohannesMessner Nov 2, 2022
c4fcf4c
docs: add docs section for milvus
JohannesMessner Nov 2, 2022
7569c6d
chore: remove comment
JohannesMessner Nov 2, 2022
b44c326
fix: enforce limit for filter only queries
JohannesMessner Nov 2, 2022
6496a4c
docs: fix typo
JohannesMessner Nov 2, 2022
83d8ba0
docs: add milvus to comparison and add docstring
JohannesMessner Nov 2, 2022
e003618
feat: bulk extend
JohannesMessner Nov 2, 2022
fa4c070
docs: document advanced milvus options
JohannesMessner Nov 2, 2022
9dd3b74
feat: allo passing of kwargs to insert
JohannesMessner Nov 2, 2022
20da30b
chore: remove comment
JohannesMessner Nov 2, 2022
6d9dfe5
chore: update accepted array types
JohannesMessner Nov 2, 2022
0b659c8
test: add milvus specific tests
JohannesMessner Nov 3, 2022
3bf691f
docs: apply changes from code review
JohannesMessner Nov 3, 2022
6ba7fac
docs: update docs/advanced/document-store/milvus.md
JohannesMessner Nov 3, 2022
e021866
refactor: update type hint
JohannesMessner Nov 4, 2022
50752ce
refactor: apply suggestions from code review
JohannesMessner Nov 4, 2022
1c20cee
Merge remote-tracking branch 'origin/feat-milvus' into feat-milvus
JohannesMessner Nov 4, 2022
4c7df10
docs: clarify docstring
JohannesMessner Nov 4, 2022
af8dddd
docs: fix fautly docstring
JohannesMessner Nov 4, 2022
fea7c28
Merge branch 'main' into feat-milvus
JohannesMessner Nov 4, 2022
686fef0
Merge remote-tracking branch 'origin/main' into main
JohannesMessner Nov 4, 2022
91ec3bb
refactor: another round of review changes
JohannesMessner Nov 4, 2022
5c026d7
Merge remote-tracking branch 'origin/feat-milvus' into feat-milvus
JohannesMessner Nov 4, 2022
4920635
fix: set consistency level for offset id loading
JohannesMessner Nov 4, 2022
cb4a39c
fix: set stricter defaut consistency level
JohannesMessner Nov 8, 2022
a103eae
Merge branch 'main' into feat-milvus
JohannesMessner Nov 8, 2022
30bd965
Merge remote-tracking branch 'origin/main' into main
JohannesMessner Nov 8, 2022
9b47d2c
Merge branch 'main' into feat-milvus
JohannesMessner Nov 8, 2022
4b9e473
perf: optimize sorting of retrieved documents
JohannesMessner Nov 8, 2022
70828e4
docs: document loading context manager
JohannesMessner Nov 8, 2022
d672398
refactor: find unboxing done by base class
JohannesMessner Nov 8, 2022
00b4de4
feat: add batching
JohannesMessner Nov 8, 2022
b45d214
test: add test for batch size
JohannesMessner Nov 8, 2022
e75c57f
Merge remote-tracking branch 'origin/main' into main
JohannesMessner Nov 8, 2022
c1b09ab
Merge branch 'main' into feat-milvus
JohannesMessner Nov 8, 2022
3055e63
Merge remote-tracking branch 'origin/main' into main
JohannesMessner Nov 9, 2022
697e513
Merge remote-tracking branch 'origin/main' into main
JohannesMessner Nov 9, 2022
26df898
Merge branch 'main' into feat-milvus
JohannesMessner Nov 9, 2022
005b2e5
test: restart milvus if it breaks
JohannesMessner Nov 11, 2022
012d01d
test: dont remove-orphans where it is not needed
JohannesMessner Nov 11, 2022
412d79e
feat: add ability to disble list like behaviour
JohannesMessner Nov 14, 2022
acf832a
Merge branch 'main' into feat-milvus
JohannesMessner Nov 14, 2022
528035e
Merge branch 'main' into feat-milvus
JohannesMessner Nov 14, 2022
dcad639
ci: increase timeout to see how long it really takes
JohannesMessner Nov 15, 2022
3aecd6b
docs: apply suggestions from code review
JohannesMessner Nov 15, 2022
14c4a8f
ci: change timeouts
JohannesMessner Nov 17, 2022
83aebca
Merge branch 'main' into feat-milvus
JohannesMessner Nov 22, 2022
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ jobs:
pytest --suppress-no-test-exit-code --cov=docarray --cov-report=xml \
-v -s -m "not gpu" ${{ matrix.test-path }}
echo "codecov_flag=docarray" >> $GITHUB_OUTPUT
timeout-minutes: 45
timeout-minutes: 60
env:
JINA_AUTH_TOKEN: "${{ secrets.JINA_AUTH_TOKEN }}"
- name: Check codecov file
Expand Down Expand Up @@ -238,7 +238,7 @@ jobs:
pytest --suppress-no-test-exit-code --cov=docarray --cov-report=xml \
-v -s -m "not gpu" ${{ matrix.test-path }}
echo "::set-output name=codecov_flag::docarray"
timeout-minutes: 40
timeout-minutes: 60
env:
JINA_AUTH_TOKEN: "${{ secrets.JINA_AUTH_TOKEN }}"
- name: Check codecov file
Expand Down
16 changes: 16 additions & 0 deletions docarray/array/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,13 @@
from docarray.array.weaviate import DocumentArrayWeaviate
from docarray.array.elastic import DocumentArrayElastic
from docarray.array.redis import DocumentArrayRedis
from docarray.array.milvus import DocumentArrayMilvus
from docarray.array.storage.sqlite import SqliteConfig
from docarray.array.storage.annlite import AnnliteConfig
from docarray.array.storage.weaviate import WeaviateConfig
from docarray.array.storage.elastic import ElasticConfig
from docarray.array.storage.redis import RedisConfig
from docarray.array.storage.milvus import MilvusConfig


class DocumentArray(AllMixins, BaseDocumentArray):
Expand Down Expand Up @@ -140,6 +142,16 @@ def __new__(
"""Create a Redis-powered DocumentArray object."""
...

@overload
def __new__(
cls,
_docs: Optional['DocumentArraySourceType'] = None,
storage: str = 'milvus',
config: Optional[Union['MilvusConfig', Dict]] = None,
) -> 'DocumentArrayMilvus':
"""Create a Milvus-powered DocumentArray object."""
...

def __enter__(self):
self._exit_stack = ExitStack()
# Ensure that we sync the data to the storage backend when exiting the context manager
Expand Down Expand Up @@ -184,6 +196,10 @@ def __new__(cls, *args, storage: str = 'memory', **kwargs):
from .redis import DocumentArrayRedis

instance = super().__new__(DocumentArrayRedis)
elif storage == 'milvus':
from .milvus import DocumentArrayMilvus

instance = super().__new__(DocumentArrayMilvus)

else:
raise ValueError(f'storage=`{storage}` is not supported.')
Expand Down
46 changes: 46 additions & 0 deletions docarray/array/milvus.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
from .document import DocumentArray

from .storage.milvus import StorageMixins, MilvusConfig

__all__ = ['MilvusConfig', 'DocumentArrayMilvus']


class DocumentArrayMilvus(StorageMixins, DocumentArray):
"""
DocumentArray that stores Documents in a `Milvus <https://milvus.io//>`_ vector search engine.

.. note::
This DocumentArray requires `pymilvus`. You can install it via `pip install "docarray[milvus]"`.

To use Milvus as storage backend, a Milvus service needs to be running on your machine.

With this implementation, :meth:`match` and :meth:`find` perform fast (approximate) vector search.
Additionally, search with filters is supported.

Example usage:

.. code-block:: python

from docarray import DocumentArray

# connect to running Milvus service with default configuration (address: http://localhost:19530)
da = DocumentArray(storage='milvus', config={'n_dim': 10})

# connect to a previously persisted DocumentArrayMilvus by specifying collection_name, host, and port
da = DocumentArray(
storage='milvus',
config={
'collection_name': 'persisted',
'host': 'localhost',
'port': '19530',
'n_dim': 10,
},
)


.. seealso::
For further details, see our :ref:`user guide <milvus>`.
"""

def __new__(cls, *args, **kwargs):
return super().__new__(cls)
2 changes: 1 addition & 1 deletion docarray/array/mixins/find.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ def find(
limit: Optional[Union[int, float]] = 20,
metric_name: Optional[str] = None,
exclude_self: bool = False,
filter: Optional[Dict] = None,
filter: Union[Dict, str, None] = None,
only_id: bool = False,
index: str = 'text',
on: Optional[str] = None,
Expand Down
5 changes: 3 additions & 2 deletions docarray/array/storage/base/seqlike.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,14 @@ def _update_subindices_append_extend(self, value):
if len(docs_selector) > 0:
da.extend(docs_selector)

def insert(self, index: int, value: 'Document'):
def insert(self, index: int, value: 'Document', **kwargs):
"""Insert `doc` at `index`.

:param index: Position of the insertion.
:param value: The doc needs to be inserted.
:param kwargs: Additional Arguments that are passed to the Document Store. This has no effect for in-memory DocumentArray.
"""
self._set_doc_by_id(value.id, value)
self._set_doc_by_id(value.id, value, **kwargs)
self._offset2ids.insert(index, value.id)

def append(self, value: 'Document', **kwargs):
Expand Down
12 changes: 12 additions & 0 deletions docarray/array/storage/milvus/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from abc import ABC

from .backend import BackendMixin, MilvusConfig
from .find import FindMixin
from .getsetdel import GetSetDelMixin
from .seqlike import SequenceLikeMixin

__all__ = ['StorageMixins', 'MilvusConfig']


class StorageMixins(FindMixin, BackendMixin, GetSetDelMixin, SequenceLikeMixin, ABC):
...
Loading