From d2bfe2401839bce08aac94a2332a6edf134a6391 Mon Sep 17 00:00:00 2001
From: Johannes Messner <messnerjo@gmail.com>
Date: Fri, 2 Dec 2022 11:12:02 +0100
Subject: [PATCH 1/5] feat: native len for milvus

Signed-off-by: Johannes Messner <messnerjo@gmail.com>
---
 docarray/array/storage/milvus/seqlike.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/docarray/array/storage/milvus/seqlike.py b/docarray/array/storage/milvus/seqlike.py
index 1711c5b8080..d1ce651c0c6 100644
--- a/docarray/array/storage/milvus/seqlike.py
+++ b/docarray/array/storage/milvus/seqlike.py
@@ -1,6 +1,6 @@
 from typing import Iterable, Iterator, Union, TYPE_CHECKING
 from docarray.array.storage.base.seqlike import BaseSequenceLikeMixin
-from docarray.array.storage.milvus.backend import _batch_list
+from docarray.array.storage.milvus.backend import _batch_list, _always_true_expr
 from docarray import Document
 
 
@@ -56,3 +56,11 @@ def _extend(self, values: Iterable['Document'], **kwargs):
             payload = self._docs_to_milvus_payload(docs_batch)
             self._collection.insert(payload, **kwargs)
             self._offset2ids.extend([doc.id for doc in docs_batch])
+
+    def __len__(self):
+        with self.loaded_collection():
+            res = self._collection.query(
+                expr=_always_true_expr('document_id'),
+                output_fields=['document_id'],
+            )
+            return len(res)

From e8422c8a2f8990fcf2689d6506bcb220a457e3a6 Mon Sep 17 00:00:00 2001
From: Johannes Messner <messnerjo@gmail.com>
Date: Fri, 2 Dec 2022 11:15:32 +0100
Subject: [PATCH 2/5] fix: make implementing len non-optional

Signed-off-by: Johannes Messner <messnerjo@gmail.com>
---
 docarray/array/storage/base/seqlike.py | 2 +-
 docs/advanced/document-store/extend.md | 7 +++----
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/docarray/array/storage/base/seqlike.py b/docarray/array/storage/base/seqlike.py
index 5e46cafe607..ce89b82a3bf 100644
--- a/docarray/array/storage/base/seqlike.py
+++ b/docarray/array/storage/base/seqlike.py
@@ -50,7 +50,7 @@ def __eq__(self, other):
         ...
 
     def __len__(self):
-        return len(self._offset2ids)
+        ...
 
     def __iter__(self) -> Iterator['Document']:
         for _id in self._offset2ids:
diff --git a/docs/advanced/document-store/extend.md b/docs/advanced/document-store/extend.md
index a65d5ac32bb..591d2ce8832 100644
--- a/docs/advanced/document-store/extend.md
+++ b/docs/advanced/document-store/extend.md
@@ -145,6 +145,9 @@ class SequenceLikeMixin(BaseSequenceLikeMixin):
     def __add__(self, other: Union['Document', Iterable['Document']]):
         ...
 
+    def __len__(self):
+        ...
+
     def insert(self, index: int, value: 'Document'):
         # Optional. By default, this will add a new item and update offset2id
         # if you want to customize this, make sure to handle offset2id
@@ -158,10 +161,6 @@ class SequenceLikeMixin(BaseSequenceLikeMixin):
         # Optional. Override this if you have better implementation than appending one by one
         ...
 
-    def __len__(self):
-        # Optional. By default, this will rely on offset2id to get the length
-        ...
-
     def __iter__(self) -> Iterator['Document']:
         # Optional. By default, this will rely on offset2id to iterate
         ...

From 836ba44a5704b81422155aeef100421e6674ac83 Mon Sep 17 00:00:00 2001
From: Johannes Messner <messnerjo@gmail.com>
Date: Thu, 12 Jan 2023 16:32:20 +0100
Subject: [PATCH 3/5] test: remove storage backends from evaluation tests

Signed-off-by: Johannes Messner <messnerjo@gmail.com>
---
 .../array/mixins/oldproto/test_eval_class.py  | 340 +++---------------
 1 file changed, 40 insertions(+), 300 deletions(-)

diff --git a/tests/unit/array/mixins/oldproto/test_eval_class.py b/tests/unit/array/mixins/oldproto/test_eval_class.py
index 0850a03804b..560a67ff18e 100644
--- a/tests/unit/array/mixins/oldproto/test_eval_class.py
+++ b/tests/unit/array/mixins/oldproto/test_eval_class.py
@@ -12,20 +12,6 @@
 from docarray import DocumentArray, Document
 
 
-@pytest.mark.parametrize(
-    'storage, config',
-    [
-        ('memory', {}),
-        ('weaviate', {}),
-        ('sqlite', {}),
-        ('annlite', {'n_dim': 256}),
-        ('qdrant', {'n_dim': 256}),
-        ('elasticsearch', {'n_dim': 256}),
-        ('redis', {'n_dim': 256}),
-        # milvus should pass individually, but on the CI it fails
-        # ('milvus', {'n_dim': 256}),
-    ],
-)
 @pytest.mark.parametrize(
     'metric_fn, kwargs',
     [
@@ -39,10 +25,10 @@
         ('ndcg_at_k', {}),
     ],
 )
-def test_eval_mixin_perfect_match(metric_fn, kwargs, storage, config, start_storage):
+def test_eval_mixin_perfect_match(metric_fn, kwargs, config):
     da1 = DocumentArray.empty(10)
     da1.embeddings = np.random.random([10, 256])
-    da1_index = DocumentArray(da1, storage=storage, config=config)
+    da1_index = DocumentArray(da1, config=config)
     with da1_index:
         da1.match(da1_index, exclude_self=True)
     r = da1.evaluate(ground_truth=da1, metrics=[metric_fn], strict=False, **kwargs)[
@@ -54,21 +40,7 @@ def test_eval_mixin_perfect_match(metric_fn, kwargs, storage, config, start_stor
         assert d.evaluations[metric_fn].value == 1.0
 
 
-@pytest.mark.parametrize(
-    'storage, config',
-    [
-        ('memory', {}),
-        ('weaviate', {}),
-        ('sqlite', {}),
-        ('annlite', {'n_dim': 256}),
-        ('qdrant', {'n_dim': 256}),
-        ('elasticsearch', {'n_dim': 256}),
-        ('redis', {'n_dim': 256}),
-        # milvus should pass individually, but on the CI it fails
-        # ('milvus', {'n_dim': 256}),
-    ],
-)
-def test_eval_mixin_perfect_match_multiple_metrics(storage, config, start_storage):
+def test_eval_mixin_perfect_match_multiple_metrics(config):
     metric_fns = [
         'r_precision',
         'precision_at_k',
@@ -82,7 +54,7 @@ def test_eval_mixin_perfect_match_multiple_metrics(storage, config, start_storag
     kwargs = {'max_rel': 9}
     da1 = DocumentArray.empty(10)
     da1.embeddings = np.random.random([10, 256])
-    da1_index = DocumentArray(da1, storage=storage, config=config)
+    da1_index = DocumentArray(da1, config=config)
     with da1_index:
         da1.match(da1_index, exclude_self=True)
     r = da1.evaluate(ground_truth=da1, metrics=metric_fns, strict=False, **kwargs)
@@ -94,18 +66,6 @@ def test_eval_mixin_perfect_match_multiple_metrics(storage, config, start_storag
             assert d.evaluations[metric_fn].value == 1.0
 
 
-@pytest.mark.parametrize(
-    'storage, config',
-    [
-        ('memory', {}),
-        ('weaviate', {}),
-        ('sqlite', {}),
-        ('annlite', {'n_dim': 256}),
-        ('qdrant', {'n_dim': 256}),
-        ('elasticsearch', {'n_dim': 256}),
-        ('redis', {'n_dim': 256}),
-    ],
-)
 @pytest.mark.parametrize(
     'metric_fn, kwargs',
     [
@@ -119,14 +79,12 @@ def test_eval_mixin_perfect_match_multiple_metrics(storage, config, start_storag
         ('ndcg_at_k', {}),
     ],
 )
-def test_eval_mixin_perfect_match_labeled(
-    metric_fn, kwargs, storage, config, start_storage
-):
+def test_eval_mixin_perfect_match_labeled(metric_fn, kwargs, config):
     da1 = DocumentArray.empty(10)
     for d in da1:
         d.tags = {'label': 'A'}
     da1.embeddings = np.random.random([10, 256])
-    da1_index = DocumentArray(da1, storage=storage, config=config)
+    da1_index = DocumentArray(da1, config=config)
     with da1_index:
         da1.match(da1_index, exclude_self=True)
     r = da1.evaluate(metrics=[metric_fn], **kwargs)[metric_fn]
@@ -136,20 +94,6 @@ def test_eval_mixin_perfect_match_labeled(
         assert d.evaluations[metric_fn].value == 1.0
 
 
-@pytest.mark.parametrize(
-    'storage, config',
-    [
-        ('memory', {}),
-        ('weaviate', {}),
-        ('sqlite', {}),
-        ('annlite', {'n_dim': 256}),
-        ('qdrant', {'n_dim': 256}),
-        ('elasticsearch', {'n_dim': 256}),
-        ('redis', {'n_dim': 256}),
-        # milvus should pass individually, but on the CI it fails
-        # ('milvus', {'n_dim': 256}),
-    ],
-)
 @pytest.mark.parametrize(
     'metric_fn, kwargs',
     [
@@ -163,7 +107,7 @@ def test_eval_mixin_perfect_match_labeled(
         ('ndcg_at_k', {}),
     ],
 )
-def test_eval_mixin_zero_labeled(storage, config, metric_fn, start_storage, kwargs):
+def test_eval_mixin_zero_labeled(config, metric_fn, kwargs):
     da1 = DocumentArray.empty(10)
     for d in da1:
         d.tags = {'label': 'A'}
@@ -171,7 +115,7 @@ def test_eval_mixin_zero_labeled(storage, config, metric_fn, start_storage, kwar
     da2 = copy.deepcopy(da1)
     for d in da2:
         d.tags = {'label': 'B'}
-    da1_index = DocumentArray(da2, storage=storage, config=config)
+    da1_index = DocumentArray(da2, config=config)
     with da1_index:
         da1.match(da1_index, exclude_self=True)
     r = da1.evaluate([metric_fn], **kwargs)[metric_fn]
@@ -236,20 +180,6 @@ def test_missing_max_rel_should_raise():
         )
 
 
-@pytest.mark.parametrize(
-    'storage, config',
-    [
-        ('memory', {}),
-        ('weaviate', {}),
-        ('sqlite', {}),
-        ('annlite', {'n_dim': 256}),
-        ('qdrant', {'n_dim': 256}),
-        ('elasticsearch', {'n_dim': 256}),
-        ('redis', {'n_dim': 256}),
-        # milvus should pass individually, but on the CI it fails
-        # ('milvus', {'n_dim': 256}),
-    ],
-)
 @pytest.mark.parametrize(
     'metric_fn, kwargs',
     [
@@ -263,15 +193,15 @@ def test_missing_max_rel_should_raise():
         ('ndcg_at_k', {}),
     ],
 )
-def test_eval_mixin_zero_match(storage, config, metric_fn, start_storage, kwargs):
+def test_eval_mixin_zero_match(config, metric_fn, kwargs):
     da1 = DocumentArray.empty(10)
     da1.embeddings = np.random.random([10, 256])
-    da1_index = DocumentArray(da1, storage=storage, config=config)
+    da1_index = DocumentArray(da1, config=config)
     da1.match(da1_index, exclude_self=True)
 
     da2 = copy.deepcopy(da1)
     da2.embeddings = np.random.random([10, 256])
-    da2_index = DocumentArray(da2, storage=storage, config=config)
+    da2_index = DocumentArray(da2, config=config)
     with da2_index:
         da2.match(da2_index, exclude_self=True)
 
@@ -283,77 +213,35 @@ def test_eval_mixin_zero_match(storage, config, metric_fn, start_storage, kwargs
         assert d.evaluations[metric_fn].value == 1.0
 
 
-@pytest.mark.parametrize(
-    'storage, config',
-    [
-        ('memory', {}),
-        ('weaviate', {}),
-        ('sqlite', {}),
-        ('annlite', {'n_dim': 256}),
-        ('qdrant', {'n_dim': 256}),
-        ('elasticsearch', {'n_dim': 256}),
-        ('redis', {'n_dim': 256}),
-        # milvus should pass individually, but on the CI it fails
-        # ('milvus', {'n_dim': 256}),
-    ],
-)
-def test_diff_len_should_raise(storage, config, start_storage):
+def test_diff_len_should_raise(config):
     da1 = DocumentArray.empty(10)
     da2 = DocumentArray.empty(5)
     for d in da2:
         d.matches.append(da2[0])
-    da2 = DocumentArray(da2, storage=storage, config=config)
+    da2 = DocumentArray(da2, config=config)
     with pytest.raises(ValueError):
         da1.evaluate(ground_truth=da2, metrics=['precision_at_k'])
 
 
-@pytest.mark.parametrize(
-    'storage, config',
-    [
-        ('memory', {}),
-        ('weaviate', {}),
-        ('sqlite', {}),
-        ('annlite', {'n_dim': 256}),
-        ('qdrant', {'n_dim': 256}),
-        ('elasticsearch', {'n_dim': 256}),
-        ('redis', {'n_dim': 256}),
-        # milvus should pass individually, but on the CI it fails
-        # ('milvus', {'n_dim': 256}),
-    ],
-)
-def test_diff_hash_fun_should_raise(storage, config, start_storage):
+def test_diff_hash_fun_should_raise(config):
     da1 = DocumentArray.empty(10)
     da2 = DocumentArray.empty(5)
     for d in da2:
         d.matches.append(da2[0])
-    da2 = DocumentArray(da2, storage=storage, config=config)
+    da2 = DocumentArray(da2, config=config)
     with pytest.raises(ValueError):
         da1.evaluate(ground_truth=da2, metrics=['precision_at_k'])
 
 
-@pytest.mark.parametrize(
-    'storage, config',
-    [
-        ('memory', {}),
-        ('weaviate', {}),
-        ('sqlite', {}),
-        ('annlite', {'n_dim': 3}),
-        ('qdrant', {'n_dim': 3}),
-        ('elasticsearch', {'n_dim': 3}),
-        ('redis', {'n_dim': 3}),
-        # milvus should pass individually, but on the CI it fails
-        # ('milvus', {'n_dim': 3}),
-    ],
-)
-def test_same_hash_same_len_fun_should_work(storage, config, start_storage):
+def test_same_hash_same_len_fun_should_work(config):
     da1 = DocumentArray.empty(10)
     da1.embeddings = np.random.random([10, 3])
-    da1_index = DocumentArray(da1, storage=storage, config=config)
+    da1_index = DocumentArray(da1, config=config)
     with da1_index:
         da1.match(da1_index)
     da2 = DocumentArray.empty(10)
     da2.embeddings = np.random.random([10, 3])
-    da2_index = DocumentArray(da1, storage=storage, config=config)
+    da2_index = DocumentArray(da1, config=config)
     with da2_index:
         da2.match(da2_index)
     with da1_index, da2_index:
@@ -365,25 +253,11 @@ def test_same_hash_same_len_fun_should_work(storage, config, start_storage):
         da1.evaluate(ground_truth=da2, metrics=['precision_at_k'])
 
 
-@pytest.mark.parametrize(
-    'storage, config',
-    [
-        ('memory', {}),
-        ('weaviate', {}),
-        ('sqlite', {}),
-        ('annlite', {'n_dim': 3}),
-        ('qdrant', {'n_dim': 3}),
-        ('elasticsearch', {'n_dim': 3}),
-        ('redis', {'n_dim': 3}),
-        # milvus should pass individually, but on the CI it fails
-        # ('milvus', {'n_dim': 3}),
-    ],
-)
-def test_adding_noise(storage, config, start_storage):
+def test_adding_noise(config):
     da = DocumentArray.empty(10)
 
     da.embeddings = np.random.random([10, 3])
-    da_index = DocumentArray(da, storage=storage, config=config)
+    da_index = DocumentArray(da, config=config)
     with da_index:
         da.match(da_index, exclude_self=True)
 
@@ -404,20 +278,6 @@ def test_adding_noise(storage, config, start_storage):
         assert 0.0 < d.evaluations['precision_at_k'].value < 1.0
 
 
-@pytest.mark.parametrize(
-    'storage, config',
-    [
-        ('memory', {}),
-        ('weaviate', {}),
-        ('sqlite', {}),
-        ('annlite', {'n_dim': 128}),
-        ('qdrant', {'n_dim': 128}),
-        ('elasticsearch', {'n_dim': 128}),
-        ('redis', {'n_dim': 128}),
-        # milvus should pass individually, but on the CI it fails
-        # ('milvus', {'n_dim': 128}),
-    ],
-)
 @pytest.mark.parametrize(
     'metric_fn, kwargs',
     [
@@ -425,7 +285,7 @@ def test_adding_noise(storage, config, start_storage):
         ('f1_score_at_k', {}),
     ],
 )
-def test_diff_match_len_in_gd(storage, config, metric_fn, start_storage, kwargs):
+def test_diff_match_len_in_gd(config, metric_fn, kwargs):
     da1 = DocumentArray.empty(10)
     da1.embeddings = np.random.random([10, 128])
     # da1_index = DocumentArray(da1, storage=storage, config=config)
@@ -433,7 +293,7 @@ def test_diff_match_len_in_gd(storage, config, metric_fn, start_storage, kwargs)
 
     da2 = copy.deepcopy(da1)
     da2.embeddings = np.random.random([10, 128])
-    da2_index = DocumentArray(da2, storage=storage, config=config)
+    da2_index = DocumentArray(da2, config=config)
     with da2_index:
         da2.match(da2_index, exclude_self=True)
         # pop some matches from first document
@@ -448,66 +308,24 @@ def test_diff_match_len_in_gd(storage, config, metric_fn, start_storage, kwargs)
         assert d.evaluations[metric_fn].value > 0.9
 
 
-@pytest.mark.parametrize(
-    'storage, config',
-    [
-        ('memory', {}),
-        ('weaviate', {}),
-        ('sqlite', {}),
-        ('annlite', {'n_dim': 256}),
-        ('qdrant', {'n_dim': 256}),
-        ('elasticsearch', {'n_dim': 256}),
-        ('redis', {'n_dim': 256}),
-        # milvus should pass individually, but on the CI it fails
-        # ('milvus', {'n_dim': 256}),
-    ],
-)
-def test_empty_da_should_raise(storage, config, start_storage):
-    da = DocumentArray([], storage=storage, config=config)
+def test_empty_da_should_raise(config):
+    da = DocumentArray([], config=config)
     with pytest.raises(ValueError):
         da.evaluate(metrics=['precision_at_k'])
 
 
-@pytest.mark.parametrize(
-    'storage, config',
-    [
-        ('memory', {}),
-        ('weaviate', {}),
-        ('sqlite', {}),
-        ('annlite', {'n_dim': 256}),
-        ('qdrant', {'n_dim': 256}),
-        ('elasticsearch', {'n_dim': 256}),
-        ('redis', {'n_dim': 256}),
-        # milvus should pass individually, but on the CI it fails
-        # ('milvus', {'n_dim': 256}),
-    ],
-)
-def test_missing_groundtruth_should_raise(storage, config, start_storage):
-    da = DocumentArray(DocumentArray.empty(10), storage=storage, config=config)
+def test_missing_groundtruth_should_raise(config):
+    da = DocumentArray(DocumentArray.empty(10), config=config)
     with pytest.raises(RuntimeError):
         da.evaluate(metrics=['precision_at_k'])
 
 
-@pytest.mark.parametrize(
-    'storage, config',
-    [
-        ('memory', {}),
-        ('weaviate', {}),
-        ('sqlite', {}),
-        ('annlite', {'n_dim': 256}),
-        ('qdrant', {'n_dim': 256}),
-        ('elasticsearch', {'n_dim': 256}),
-        ('redis', {'n_dim': 256}),
-        # milvus should pass individually, but on the CI it fails
-        # ('milvus', {'n_dim': 256}),
-    ],
-)
-def test_useless_groundtruth_warning_should_raise(storage, config, start_storage):
+def test_useless_groundtruth_warning_should_raise(config):
     da1 = DocumentArray.empty(10)
     for d in da1:
         d.tags = {'label': 'A'}
     da1.embeddings = np.random.random([10, 256])
-    da1_index = DocumentArray(da1, storage=storage, config=config)
+    da1_index = DocumentArray(da1, config=config)
     with da1_index:
         da1.match(da1_index, exclude_self=True)
     da2 = DocumentArray.empty(10)
@@ -521,23 +339,11 @@ def dummy_embed_function(da):
         da[i, 'embedding'] = np.random.random(5)
 
 
-@pytest.mark.parametrize(
-    'storage, config',
-    [
-        ('memory', {}),
-        ('weaviate', {}),
-        ('sqlite', {}),
-        ('annlite', {'n_dim': 5}),
-        ('qdrant', {'n_dim': 5}),
-        ('elasticsearch', {'n_dim': 5}),
-        ('redis', {'n_dim': 5}),
-    ],
-)
-def test_embed_and_evaluate_single_da(storage, config, start_storage):
+def test_embed_and_evaluate_single_da(config):
 
     gt = DocumentArray([Document(text=str(i)) for i in range(10)])
     queries_da = DocumentArray(gt, copy=True)
-    queries_da = DocumentArray(queries_da, storage=storage, config=config)
+    queries_da = DocumentArray(queries_da, config=config)
     dummy_embed_function(gt)
     gt.match(gt, limit=3)
 
@@ -602,25 +408,13 @@ def test_embed_and_evaluate_with_and_without_exclude_self(
     'sample_size',
     [None, 10],
 )
-@pytest.mark.parametrize(
-    'storage, config',
-    [
-        ('memory', {}),
-        ('weaviate', {}),
-        ('sqlite', {}),
-        ('annlite', {'n_dim': 5}),
-        ('qdrant', {'n_dim': 5}),
-        ('elasticsearch', {'n_dim': 5}),
-        ('redis', {'n_dim': 5}),
-    ],
-)
-def test_embed_and_evaluate_two_das(storage, config, sample_size, start_storage):
+def test_embed_and_evaluate_two_das(config, sample_size):
 
     gt_queries = DocumentArray([Document(text=str(i)) for i in range(100)])
     gt_index = DocumentArray([Document(text=str(i)) for i in range(100, 200)])
     queries_da = DocumentArray(gt_queries, copy=True)
     index_da = DocumentArray(gt_index, copy=True)
-    index_da = DocumentArray(index_da, storage=storage, config=config)
+    index_da = DocumentArray(index_da, config=config)
     dummy_embed_function(gt_queries)
     dummy_embed_function(gt_index)
     gt_queries.match(gt_index, limit=3)
@@ -682,21 +476,7 @@ def test_embed_and_evaluate_two_different_das():
         ),
     ],
 )
-@pytest.mark.parametrize(
-    'storage, config',
-    [
-        ('memory', {}),
-        ('weaviate', {}),
-        ('sqlite', {}),
-        ('annlite', {'n_dim': 5}),
-        ('qdrant', {'n_dim': 5}),
-        ('elasticsearch', {'n_dim': 5}),
-        ('redis', {'n_dim': 5}),
-    ],
-)
-def test_embed_and_evaluate_labeled_dataset(
-    storage, config, start_storage, use_index, expected, label_tag
-):
+def test_embed_and_evaluate_labeled_dataset(config, use_index, expected, label_tag):
     metric_fns = list(expected.keys())
 
     def emb_func(da):
@@ -704,7 +484,7 @@ def emb_func(da):
         da[:, 'embedding'] = np.random.random((len(da), 5))
 
     da1 = DocumentArray([Document(text=str(i), tags={label_tag: i}) for i in range(3)])
-    da2 = DocumentArray(da1, storage=storage, config=config, copy=True)
+    da2 = DocumentArray(da1, config=config, copy=True)
 
     with da2:
         if (
@@ -804,27 +584,13 @@ def bert_tokenizer():
     return BertTokenizer.from_pretrained('bert-base-uncased')
 
 
-@pytest.mark.parametrize(
-    'storage, config',
-    [
-        ('memory', {}),
-        ('weaviate', {}),
-        ('sqlite', {}),
-        ('annlite', {'n_dim': 768}),
-        ('qdrant', {'n_dim': 768}),
-        ('elasticsearch', {'n_dim': 768}),
-        ('redis', {'n_dim': 768}),
-    ],
-)
-def test_embed_and_evaluate_with_embed_model(
-    storage, config, bert_tokenizer, start_storage
-):
+def test_embed_and_evaluate_with_embed_model(config, bert_tokenizer):
     model = BertModel(BertConfig())
     collate_fn = lambda da: bert_tokenizer(da.texts, return_tensors='pt')
     da = DocumentArray(
         [Document(text=f'some text {i}', tags={'label': str(i)}) for i in range(5)]
     )
-    da = DocumentArray(da, storage=storage, config=config)
+    da = DocumentArray(da, config=config)
     with da:
         res = da.embed_and_evaluate(
             metrics=['precision_at_k'], embed_models=model, collate_fns=collate_fn
@@ -850,45 +616,19 @@ def test_embed_and_evaluate_with_embed_model(
         ),
     ],
 )
-@pytest.mark.parametrize(
-    'storage, config',
-    [
-        ('memory', {}),
-        ('weaviate', {}),
-        ('sqlite', {}),
-        ('annlite', {'n_dim': 5}),
-        ('qdrant', {'n_dim': 5}),
-        ('elasticsearch', {'n_dim': 5}),
-        ('redis', {'n_dim': 5}),
-    ],
-)
 def test_embed_and_evaluate_invalid_input_should_raise(
-    storage, config, queries, kwargs, exception, start_storage
+    config, queries, kwargs, exception
 ):
     kwargs.update({'metrics': ['precision_at_k']})
     if 'index_data' in kwargs:
-        kwargs['index_data'] = DocumentArray(
-            kwargs['index_data'], storage=storage, config=config
-        )
+        kwargs['index_data'] = DocumentArray(kwargs['index_data'], config=config)
 
     with pytest.raises(exception):
         queries.embed_and_evaluate(**kwargs)
 
 
-@pytest.mark.parametrize(
-    'storage, config',
-    [
-        ('memory', {}),
-        ('weaviate', {}),
-        ('sqlite', {}),
-        ('annlite', {'n_dim': 5}),
-        ('qdrant', {'n_dim': 5}),
-        ('elasticsearch', {'n_dim': 5}),
-        ('redis', {'n_dim': 5}),
-    ],
-)
 @pytest.mark.parametrize('sample_size', [100, 1_000, 10_000])
-def test_embed_and_evaluate_sampling(storage, config, sample_size, start_storage):
+def test_embed_and_evaluate_sampling(config, sample_size):
     metric_fns = ['precision_at_k', 'reciprocal_rank']
 
     def emb_func(da):
@@ -898,7 +638,7 @@ def emb_func(da):
     da1 = DocumentArray(
         [Document(text=str(i), tags={'label': i % 20}) for i in range(2_000)]
     )
-    da2 = DocumentArray(da1, storage=storage, config=config, copy=True)
+    da2 = DocumentArray(da1, config=config, copy=True)
 
     with da2:
         res = da1.embed_and_evaluate(

From b31f34863890c2fee4a398668e5a4f78faf560cb Mon Sep 17 00:00:00 2001
From: Johannes Messner <messnerjo@gmail.com>
Date: Thu, 12 Jan 2023 20:34:57 +0100
Subject: [PATCH 4/5] test: fix eval tests

Signed-off-by: Johannes Messner <messnerjo@gmail.com>
---
 .../array/mixins/oldproto/test_eval_class.py  | 86 ++++++++++---------
 1 file changed, 44 insertions(+), 42 deletions(-)

diff --git a/tests/unit/array/mixins/oldproto/test_eval_class.py b/tests/unit/array/mixins/oldproto/test_eval_class.py
index 560a67ff18e..49feeb414c2 100644
--- a/tests/unit/array/mixins/oldproto/test_eval_class.py
+++ b/tests/unit/array/mixins/oldproto/test_eval_class.py
@@ -25,10 +25,10 @@
         ('ndcg_at_k', {}),
     ],
 )
-def test_eval_mixin_perfect_match(metric_fn, kwargs, config):
+def test_eval_mixin_perfect_match(metric_fn, kwargs):
     da1 = DocumentArray.empty(10)
     da1.embeddings = np.random.random([10, 256])
-    da1_index = DocumentArray(da1, config=config)
+    da1_index = DocumentArray(da1)
     with da1_index:
         da1.match(da1_index, exclude_self=True)
     r = da1.evaluate(ground_truth=da1, metrics=[metric_fn], strict=False, **kwargs)[
@@ -40,7 +40,7 @@ def test_eval_mixin_perfect_match(metric_fn, kwargs, config):
         assert d.evaluations[metric_fn].value == 1.0
 
 
-def test_eval_mixin_perfect_match_multiple_metrics(config):
+def test_eval_mixin_perfect_match_multiple_metrics():
     metric_fns = [
         'r_precision',
         'precision_at_k',
@@ -54,7 +54,7 @@ def test_eval_mixin_perfect_match_multiple_metrics(config):
     kwargs = {'max_rel': 9}
     da1 = DocumentArray.empty(10)
     da1.embeddings = np.random.random([10, 256])
-    da1_index = DocumentArray(da1, config=config)
+    da1_index = DocumentArray(da1)
     with da1_index:
         da1.match(da1_index, exclude_self=True)
     r = da1.evaluate(ground_truth=da1, metrics=metric_fns, strict=False, **kwargs)
@@ -79,12 +79,12 @@ def test_eval_mixin_perfect_match_multiple_metrics(config):
         ('ndcg_at_k', {}),
     ],
 )
-def test_eval_mixin_perfect_match_labeled(metric_fn, kwargs, config):
+def test_eval_mixin_perfect_match_labeled(metric_fn, kwargs):
     da1 = DocumentArray.empty(10)
     for d in da1:
         d.tags = {'label': 'A'}
     da1.embeddings = np.random.random([10, 256])
-    da1_index = DocumentArray(da1, config=config)
+    da1_index = DocumentArray(da1)
     with da1_index:
         da1.match(da1_index, exclude_self=True)
     r = da1.evaluate(metrics=[metric_fn], **kwargs)[metric_fn]
@@ -107,7 +107,7 @@ def test_eval_mixin_perfect_match_labeled(metric_fn, kwargs, config):
         ('ndcg_at_k', {}),
     ],
 )
-def test_eval_mixin_zero_labeled(config, metric_fn, kwargs):
+def test_eval_mixin_zero_labeled(metric_fn, kwargs):
     da1 = DocumentArray.empty(10)
     for d in da1:
         d.tags = {'label': 'A'}
@@ -115,7 +115,7 @@ def test_eval_mixin_zero_labeled(config, metric_fn, kwargs):
     da2 = copy.deepcopy(da1)
     for d in da2:
         d.tags = {'label': 'B'}
-    da1_index = DocumentArray(da2, config=config)
+    da1_index = DocumentArray(da2)
     with da1_index:
         da1.match(da1_index, exclude_self=True)
     r = da1.evaluate([metric_fn], **kwargs)[metric_fn]
@@ -193,15 +193,15 @@ def test_missing_max_rel_should_raise():
         ('ndcg_at_k', {}),
     ],
 )
-def test_eval_mixin_zero_match(config, metric_fn, kwargs):
+def test_eval_mixin_zero_match(metric_fn, kwargs):
     da1 = DocumentArray.empty(10)
     da1.embeddings = np.random.random([10, 256])
-    da1_index = DocumentArray(da1, config=config)
+    da1_index = DocumentArray(da1)
     da1.match(da1_index, exclude_self=True)
 
     da2 = copy.deepcopy(da1)
     da2.embeddings = np.random.random([10, 256])
-    da2_index = DocumentArray(da2, config=config)
+    da2_index = DocumentArray(da2)
     with da2_index:
         da2.match(da2_index, exclude_self=True)
 
@@ -213,35 +213,35 @@ def test_eval_mixin_zero_match(config, metric_fn, kwargs):
         assert d.evaluations[metric_fn].value == 1.0
 
 
-def test_diff_len_should_raise(config):
+def test_diff_len_should_raise():
     da1 = DocumentArray.empty(10)
     da2 = DocumentArray.empty(5)
     for d in da2:
         d.matches.append(da2[0])
-    da2 = DocumentArray(da2, config=config)
+    da2 = DocumentArray(da2)
     with pytest.raises(ValueError):
         da1.evaluate(ground_truth=da2, metrics=['precision_at_k'])
 
 
-def test_diff_hash_fun_should_raise(config):
+def test_diff_hash_fun_should_raise():
     da1 = DocumentArray.empty(10)
     da2 = DocumentArray.empty(5)
     for d in da2:
         d.matches.append(da2[0])
-    da2 = DocumentArray(da2, config=config)
+    da2 = DocumentArray(da2)
     with pytest.raises(ValueError):
         da1.evaluate(ground_truth=da2, metrics=['precision_at_k'])
 
 
-def test_same_hash_same_len_fun_should_work(config):
+def test_same_hash_same_len_fun_should_work():
     da1 = DocumentArray.empty(10)
     da1.embeddings = np.random.random([10, 3])
-    da1_index = DocumentArray(da1, config=config)
+    da1_index = DocumentArray(da1)
     with da1_index:
         da1.match(da1_index)
     da2 = DocumentArray.empty(10)
     da2.embeddings = np.random.random([10, 3])
-    da2_index = DocumentArray(da1, config=config)
+    da2_index = DocumentArray(da1)
     with da2_index:
         da2.match(da2_index)
     with da1_index, da2_index:
@@ -253,11 +253,11 @@ def test_same_hash_same_len_fun_should_work(config):
         da1.evaluate(ground_truth=da2, metrics=['precision_at_k'])
 
 
-def test_adding_noise(config):
+def test_adding_noise():
     da = DocumentArray.empty(10)
 
     da.embeddings = np.random.random([10, 3])
-    da_index = DocumentArray(da, config=config)
+    da_index = DocumentArray(da)
     with da_index:
         da.match(da_index, exclude_self=True)
 
@@ -285,7 +285,7 @@ def test_adding_noise(config):
         ('f1_score_at_k', {}),
     ],
 )
-def test_diff_match_len_in_gd(config, metric_fn, kwargs):
+def test_diff_match_len_in_gd(metric_fn, kwargs):
     da1 = DocumentArray.empty(10)
     da1.embeddings = np.random.random([10, 128])
     # da1_index = DocumentArray(da1, storage=storage, config=config)
@@ -293,7 +293,7 @@ def test_diff_match_len_in_gd(config, metric_fn, kwargs):
 
     da2 = copy.deepcopy(da1)
     da2.embeddings = np.random.random([10, 128])
-    da2_index = DocumentArray(da2, config=config)
+    da2_index = DocumentArray(da2)
     with da2_index:
         da2.match(da2_index, exclude_self=True)
         # pop some matches from first document
@@ -308,24 +308,26 @@ def test_diff_match_len_in_gd(config, metric_fn, kwargs):
         assert d.evaluations[metric_fn].value > 0.9
 
 
-def test_empty_da_should_raise(config):
-    da = DocumentArray([], config=config)
+def test_empty_da_should_raise():
+    da = DocumentArray(
+        [],
+    )
     with pytest.raises(ValueError):
         da.evaluate(metrics=['precision_at_k'])
 
 
-def test_missing_groundtruth_should_raise(config):
-    da = DocumentArray(DocumentArray.empty(10), config=config)
+def test_missing_groundtruth_should_raise():
+    da = DocumentArray(DocumentArray.empty(10))
     with pytest.raises(RuntimeError):
         da.evaluate(metrics=['precision_at_k'])
 
 
-def test_useless_groundtruth_warning_should_raise(config):
+def test_useless_groundtruth_warning_should_raise():
     da1 = DocumentArray.empty(10)
     for d in da1:
         d.tags = {'label': 'A'}
     da1.embeddings = np.random.random([10, 256])
-    da1_index = DocumentArray(da1, config=config)
+    da1_index = DocumentArray(da1)
     with da1_index:
         da1.match(da1_index, exclude_self=True)
     da2 = DocumentArray.empty(10)
@@ -339,11 +341,11 @@ def dummy_embed_function(da):
         da[i, 'embedding'] = np.random.random(5)
 
 
-def test_embed_and_evaluate_single_da(config):
+def test_embed_and_evaluate_single_da():
 
     gt = DocumentArray([Document(text=str(i)) for i in range(10)])
     queries_da = DocumentArray(gt, copy=True)
-    queries_da = DocumentArray(queries_da, config=config)
+    queries_da = DocumentArray(queries_da)
     dummy_embed_function(gt)
     gt.match(gt, limit=3)
 
@@ -408,13 +410,15 @@ def test_embed_and_evaluate_with_and_without_exclude_self(
     'sample_size',
     [None, 10],
 )
-def test_embed_and_evaluate_two_das(config, sample_size):
+def test_embed_and_evaluate_two_das(sample_size):
 
     gt_queries = DocumentArray([Document(text=str(i)) for i in range(100)])
     gt_index = DocumentArray([Document(text=str(i)) for i in range(100, 200)])
     queries_da = DocumentArray(gt_queries, copy=True)
     index_da = DocumentArray(gt_index, copy=True)
-    index_da = DocumentArray(index_da, config=config)
+    index_da = DocumentArray(
+        index_da,
+    )
     dummy_embed_function(gt_queries)
     dummy_embed_function(gt_index)
     gt_queries.match(gt_index, limit=3)
@@ -476,7 +480,7 @@ def test_embed_and_evaluate_two_different_das():
         ),
     ],
 )
-def test_embed_and_evaluate_labeled_dataset(config, use_index, expected, label_tag):
+def test_embed_and_evaluate_labeled_dataset(use_index, expected, label_tag):
     metric_fns = list(expected.keys())
 
     def emb_func(da):
@@ -484,7 +488,7 @@ def emb_func(da):
         da[:, 'embedding'] = np.random.random((len(da), 5))
 
     da1 = DocumentArray([Document(text=str(i), tags={label_tag: i}) for i in range(3)])
-    da2 = DocumentArray(da1, config=config, copy=True)
+    da2 = DocumentArray(da1, copy=True)
 
     with da2:
         if (
@@ -584,13 +588,13 @@ def bert_tokenizer():
     return BertTokenizer.from_pretrained('bert-base-uncased')
 
 
-def test_embed_and_evaluate_with_embed_model(config, bert_tokenizer):
+def test_embed_and_evaluate_with_embed_model(bert_tokenizer):
     model = BertModel(BertConfig())
     collate_fn = lambda da: bert_tokenizer(da.texts, return_tensors='pt')
     da = DocumentArray(
         [Document(text=f'some text {i}', tags={'label': str(i)}) for i in range(5)]
     )
-    da = DocumentArray(da, config=config)
+    da = DocumentArray(da)
     with da:
         res = da.embed_and_evaluate(
             metrics=['precision_at_k'], embed_models=model, collate_fns=collate_fn
@@ -616,19 +620,17 @@ def test_embed_and_evaluate_with_embed_model(config, bert_tokenizer):
         ),
     ],
 )
-def test_embed_and_evaluate_invalid_input_should_raise(
-    config, queries, kwargs, exception
-):
+def test_embed_and_evaluate_invalid_input_should_raise(queries, kwargs, exception):
     kwargs.update({'metrics': ['precision_at_k']})
     if 'index_data' in kwargs:
-        kwargs['index_data'] = DocumentArray(kwargs['index_data'], config=config)
+        kwargs['index_data'] = DocumentArray(kwargs['index_data'])
 
     with pytest.raises(exception):
         queries.embed_and_evaluate(**kwargs)
 
 
 @pytest.mark.parametrize('sample_size', [100, 1_000, 10_000])
-def test_embed_and_evaluate_sampling(config, sample_size):
+def test_embed_and_evaluate_sampling(sample_size):
     metric_fns = ['precision_at_k', 'reciprocal_rank']
 
     def emb_func(da):
@@ -638,7 +640,7 @@ def emb_func(da):
     da1 = DocumentArray(
         [Document(text=str(i), tags={'label': i % 20}) for i in range(2_000)]
     )
-    da2 = DocumentArray(da1, config=config, copy=True)
+    da2 = DocumentArray(da1, copy=True)
 
     with da2:
         res = da1.embed_and_evaluate(

From 02293ac869ea5ebd630238dfc5362ccec38a4d06 Mon Sep 17 00:00:00 2001
From: Johannes Messner <messnerjo@gmail.com>
Date: Thu, 12 Jan 2023 20:43:10 +0100
Subject: [PATCH 5/5] test: fix one test

Signed-off-by: Johannes Messner <messnerjo@gmail.com>
---
 tests/unit/array/mixins/oldproto/test_eval_class.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tests/unit/array/mixins/oldproto/test_eval_class.py b/tests/unit/array/mixins/oldproto/test_eval_class.py
index 0bec7e60d3f..0d018db1d3e 100644
--- a/tests/unit/array/mixins/oldproto/test_eval_class.py
+++ b/tests/unit/array/mixins/oldproto/test_eval_class.py
@@ -25,7 +25,6 @@
         ('ndcg_at_k', {}),
     ],
 )
-
 def test_eval_mixin_perfect_match(metric_fn, kwargs):
     da1 = DocumentArray.empty(10)
     da1.embeddings = np.random.random([10, 256])
@@ -41,7 +40,6 @@ def test_eval_mixin_perfect_match(metric_fn, kwargs):
         assert d.evaluations[metric_fn].value == 1.0
 
 
-
 def test_eval_mixin_perfect_match_multiple_metrics():
     metric_fns = [
         'r_precision',
@@ -55,6 +53,7 @@ def test_eval_mixin_perfect_match_multiple_metrics():
     ]
     kwargs = {'max_rel': 9}
     da1 = DocumentArray.empty(10)
+    da1.embeddings = np.random.random([10, 256])
     da1_index = DocumentArray(da1)
     with da1_index:
         da1.match(da1_index, exclude_self=True)
@@ -254,6 +253,7 @@ def test_same_hash_same_len_fun_should_work():
 
         da1.evaluate(ground_truth=da2, metrics=['precision_at_k'])
 
+
 def test_adding_noise():
     da = DocumentArray.empty(10)
 
@@ -322,6 +322,7 @@ def test_missing_groundtruth_should_raise():
     with pytest.raises(RuntimeError):
         da.evaluate(metrics=['precision_at_k'])
 
+
 def test_useless_groundtruth_warning_should_raise():
     da1 = DocumentArray.empty(10)
     for d in da1: