From d2bfe2401839bce08aac94a2332a6edf134a6391 Mon Sep 17 00:00:00 2001 From: Johannes Messner Date: Fri, 2 Dec 2022 11:12:02 +0100 Subject: [PATCH 1/8] feat: native len for milvus Signed-off-by: Johannes Messner --- docarray/array/storage/milvus/seqlike.py | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/docarray/array/storage/milvus/seqlike.py b/docarray/array/storage/milvus/seqlike.py index 1711c5b8080..d1ce651c0c6 100644 --- a/docarray/array/storage/milvus/seqlike.py +++ b/docarray/array/storage/milvus/seqlike.py @@ -1,6 +1,6 @@ from typing import Iterable, Iterator, Union, TYPE_CHECKING from docarray.array.storage.base.seqlike import BaseSequenceLikeMixin -from docarray.array.storage.milvus.backend import _batch_list +from docarray.array.storage.milvus.backend import _batch_list, _always_true_expr from docarray import Document @@ -56,3 +56,11 @@ def _extend(self, values: Iterable['Document'], **kwargs): payload = self._docs_to_milvus_payload(docs_batch) self._collection.insert(payload, **kwargs) self._offset2ids.extend([doc.id for doc in docs_batch]) + + def __len__(self): + with self.loaded_collection(): + res = self._collection.query( + expr=_always_true_expr('document_id'), + output_fields=['document_id'], + ) + return len(res) From e8422c8a2f8990fcf2689d6506bcb220a457e3a6 Mon Sep 17 00:00:00 2001 From: Johannes Messner Date: Fri, 2 Dec 2022 11:15:32 +0100 Subject: [PATCH 2/8] fix: make implementing len non-optional Signed-off-by: Johannes Messner --- docarray/array/storage/base/seqlike.py | 2 +- docs/advanced/document-store/extend.md | 7 +++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/docarray/array/storage/base/seqlike.py b/docarray/array/storage/base/seqlike.py index 5e46cafe607..ce89b82a3bf 100644 --- a/docarray/array/storage/base/seqlike.py +++ b/docarray/array/storage/base/seqlike.py @@ -50,7 +50,7 @@ def __eq__(self, other): ... def __len__(self): - return len(self._offset2ids) + ... def __iter__(self) -> Iterator['Document']: for _id in self._offset2ids: diff --git a/docs/advanced/document-store/extend.md b/docs/advanced/document-store/extend.md index a65d5ac32bb..591d2ce8832 100644 --- a/docs/advanced/document-store/extend.md +++ b/docs/advanced/document-store/extend.md @@ -145,6 +145,9 @@ class SequenceLikeMixin(BaseSequenceLikeMixin): def __add__(self, other: Union['Document', Iterable['Document']]): ... + def __len__(self): + ... + def insert(self, index: int, value: 'Document'): # Optional. By default, this will add a new item and update offset2id # if you want to customize this, make sure to handle offset2id @@ -158,10 +161,6 @@ class SequenceLikeMixin(BaseSequenceLikeMixin): # Optional. Override this if you have better implementation than appending one by one ... - def __len__(self): - # Optional. By default, this will rely on offset2id to get the length - ... - def __iter__(self) -> Iterator['Document']: # Optional. By default, this will rely on offset2id to iterate ... From cbfc6e6725eb4ddbc81e3a682ba4d5ab7bdf7a4e Mon Sep 17 00:00:00 2001 From: Johannes Messner Date: Tue, 10 Jan 2023 13:13:28 +0100 Subject: [PATCH 3/8] ci: trigger the ci for debugging purposes Signed-off-by: Johannes Messner --- docarray/array/mixins/getattr.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docarray/array/mixins/getattr.py b/docarray/array/mixins/getattr.py index 588b03e12aa..120ff951e11 100644 --- a/docarray/array/mixins/getattr.py +++ b/docarray/array/mixins/getattr.py @@ -11,8 +11,9 @@ def _get_attributes(self, *fields: str) -> List: :return: Returns a list of the values for these fields. When `fields` has multiple values, then it returns a list of list. """ - e_index, b_index = None, None + # small change just to trigger CI tests fields = list(fields) + e_index, b_index = None, None if 'embedding' in fields: e_index = fields.index('embedding') if 'tensor' in fields: From 1e632cbcd6fa7997644ce887859eb1e0d39f4ccd Mon Sep 17 00:00:00 2001 From: Johannes Messner Date: Tue, 10 Jan 2023 15:07:07 +0100 Subject: [PATCH 4/8] ci: only run oldproto tests Signed-off-by: Johannes Messner --- .github/workflows/ci.yml | 100 +++++++++++++++++++-------------------- 1 file changed, 50 insertions(+), 50 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index eda1318e7b1..5f72b5ced0a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -157,55 +157,55 @@ jobs: matrix: ${{ steps.set-matrix.outputs.matrix }} matrix-oldproto: ${{ steps.set-matrix.outputs.matrix-oldproto }} - docarray-test: - needs: prep-testbed - runs-on: ubuntu-latest - strategy: - fail-fast: false - matrix: - python-version: [3.8] - test-path: ${{fromJson(needs.prep-testbed.outputs.matrix)}} - steps: - - uses: actions/checkout@v2.5.0 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python-version }} - - name: Prepare environment - run: | - python -m pip install --upgrade pip - python -m pip install wheel - # pip does not properly resolve dependency versions with syntax pip install --no-cache-dir ".[test,full]" - pip install --no-cache-dir ".[test]" - pip install --no-cache-dir ".[qdrant]" - pip install --no-cache-dir ".[annlite]" - pip install --no-cache-dir ".[weaviate]" - pip install --no-cache-dir ".[elasticsearch]" - pip install --no-cache-dir ".[redis]" - pip install --no-cache-dir ".[full]" - sudo apt-get install libsndfile1 - - name: Test - id: test - run: | - pytest --suppress-no-test-exit-code --cov=docarray --cov-report=xml \ - -v -s -m "not gpu" ${{ matrix.test-path }} - echo "codecov_flag=docarray" >> $GITHUB_OUTPUT - timeout-minutes: 60 - env: - JINA_AUTH_TOKEN: "${{ secrets.JINA_AUTH_TOKEN }}" - - name: Check codecov file - id: check_files - uses: andstor/file-existence-action@v1 - with: - files: "coverage.xml" - - name: Upload coverage from test to Codecov - uses: codecov/codecov-action@v3.1.1 - if: steps.check_files.outputs.files_exists == 'true' && ${{ matrix.python-version }} == '3.8' - with: - file: coverage.xml - flags: ${{ steps.test.outputs.codecov_flag }} - fail_ci_if_error: false - token: ${{ secrets.CODECOV_TOKEN }} # not required for public repos +# docarray-test: +# needs: prep-testbed +# runs-on: ubuntu-latest +# strategy: +# fail-fast: false +# matrix: +# python-version: [3.8] +# test-path: ${{fromJson(needs.prep-testbed.outputs.matrix)}} +# steps: +# - uses: actions/checkout@v2.5.0 +# - name: Set up Python ${{ matrix.python-version }} +# uses: actions/setup-python@v4 +# with: +# python-version: ${{ matrix.python-version }} +# - name: Prepare environment +# run: | +# python -m pip install --upgrade pip +# python -m pip install wheel +# # pip does not properly resolve dependency versions with syntax pip install --no-cache-dir ".[test,full]" +# pip install --no-cache-dir ".[test]" +# pip install --no-cache-dir ".[qdrant]" +# pip install --no-cache-dir ".[annlite]" +# pip install --no-cache-dir ".[weaviate]" +# pip install --no-cache-dir ".[elasticsearch]" +# pip install --no-cache-dir ".[redis]" +# pip install --no-cache-dir ".[full]" +# sudo apt-get install libsndfile1 +# - name: Test +# id: test +# run: | +# pytest --suppress-no-test-exit-code --cov=docarray --cov-report=xml \ +# -v -s -m "not gpu" ${{ matrix.test-path }} +# echo "codecov_flag=docarray" >> $GITHUB_OUTPUT +# timeout-minutes: 60 +# env: +# JINA_AUTH_TOKEN: "${{ secrets.JINA_AUTH_TOKEN }}" +# - name: Check codecov file +# id: check_files +# uses: andstor/file-existence-action@v1 +# with: +# files: "coverage.xml" +# - name: Upload coverage from test to Codecov +# uses: codecov/codecov-action@v3.1.1 +# if: steps.check_files.outputs.files_exists == 'true' && ${{ matrix.python-version }} == '3.8' +# with: +# file: coverage.xml +# flags: ${{ steps.test.outputs.codecov_flag }} +# fail_ci_if_error: false +# token: ${{ secrets.CODECOV_TOKEN }} # not required for public repos docarray-oldproto-test: needs: prep-testbed @@ -260,7 +260,7 @@ jobs: # just for blocking the merge until all parallel core-test are successful success-all-test: - needs: [commit-lint, docarray-test, docarray-oldproto-test] + needs: [commit-lint, docarray-oldproto-test] if: always() runs-on: ubuntu-latest steps: From 0654713701f15a19ada78a8e767e6d731c75b847 Mon Sep 17 00:00:00 2001 From: Johannes Messner Date: Tue, 10 Jan 2023 15:59:48 +0100 Subject: [PATCH 5/8] test: add some context mngrs Signed-off-by: Johannes Messner --- .../array/mixins/oldproto/test_eval_class.py | 144 ++++++++++-------- 1 file changed, 80 insertions(+), 64 deletions(-) diff --git a/tests/unit/array/mixins/oldproto/test_eval_class.py b/tests/unit/array/mixins/oldproto/test_eval_class.py index 2645a4cab07..8d0278a0f8c 100644 --- a/tests/unit/array/mixins/oldproto/test_eval_class.py +++ b/tests/unit/array/mixins/oldproto/test_eval_class.py @@ -42,7 +42,8 @@ def test_eval_mixin_perfect_match(metric_fn, kwargs, storage, config, start_stor da1 = DocumentArray.empty(10) da1.embeddings = np.random.random([10, 256]) da1_index = DocumentArray(da1, storage=storage, config=config) - da1.match(da1_index, exclude_self=True) + with da1_index: + da1.match(da1_index, exclude_self=True) r = da1.evaluate(ground_truth=da1, metrics=[metric_fn], strict=False, **kwargs)[ metric_fn ] @@ -80,7 +81,8 @@ def test_eval_mixin_perfect_match_multiple_metrics(storage, config, start_storag da1 = DocumentArray.empty(10) da1.embeddings = np.random.random([10, 256]) da1_index = DocumentArray(da1, storage=storage, config=config) - da1.match(da1_index, exclude_self=True) + with da1_index: + da1.match(da1_index, exclude_self=True) r = da1.evaluate(ground_truth=da1, metrics=metric_fns, strict=False, **kwargs) for metric_fn in metric_fns: assert metric_fn in r @@ -123,7 +125,8 @@ def test_eval_mixin_perfect_match_labeled( d.tags = {'label': 'A'} da1.embeddings = np.random.random([10, 256]) da1_index = DocumentArray(da1, storage=storage, config=config) - da1.match(da1_index, exclude_self=True) + with da1_index: + da1.match(da1_index, exclude_self=True) r = da1.evaluate(metrics=[metric_fn], **kwargs)[metric_fn] assert isinstance(r, float) assert r == 1.0 @@ -166,7 +169,8 @@ def test_eval_mixin_zero_labeled(storage, config, metric_fn, start_storage, kwar for d in da2: d.tags = {'label': 'B'} da1_index = DocumentArray(da2, storage=storage, config=config) - da1.match(da1_index, exclude_self=True) + with da1_index: + da1.match(da1_index, exclude_self=True) r = da1.evaluate([metric_fn], **kwargs)[metric_fn] assert isinstance(r, float) assert r == 0.0 @@ -264,9 +268,10 @@ def test_eval_mixin_zero_match(storage, config, metric_fn, start_storage, kwargs da2 = copy.deepcopy(da1) da2.embeddings = np.random.random([10, 256]) da2_index = DocumentArray(da2, storage=storage, config=config) - da2.match(da2_index, exclude_self=True) + with da2_index: + da2.match(da2_index, exclude_self=True) - r = da1.evaluate(ground_truth=da2, metrics=[metric_fn], **kwargs)[metric_fn] + r = da1.evaluate(ground_truth=da2, metrics=[metric_fn], **kwargs)[metric_fn] assert isinstance(r, float) assert r == 1.0 for d in da1: @@ -337,17 +342,20 @@ def test_same_hash_same_len_fun_should_work(storage, config, start_storage): da1 = DocumentArray.empty(10) da1.embeddings = np.random.random([10, 3]) da1_index = DocumentArray(da1, storage=storage, config=config) - da1.match(da1_index) + with da1_index: + da1.match(da1_index) da2 = DocumentArray.empty(10) da2.embeddings = np.random.random([10, 3]) da2_index = DocumentArray(da1, storage=storage, config=config) - da2.match(da2_index) - with pytest.raises(ValueError): - da1.evaluate(ground_truth=da2, metrics=['precision_at_k']) - for d1, d2 in zip(da1, da2): - d1.id = d2.id + with da2_index: + da2.match(da2_index) + with da1_index, da2_index: + with pytest.raises(ValueError): + da1.evaluate(ground_truth=da2, metrics=['precision_at_k']) + for d1, d2 in zip(da1, da2): + d1.id = d2.id - da1.evaluate(ground_truth=da2, metrics=['precision_at_k']) + da1.evaluate(ground_truth=da2, metrics=['precision_at_k']) @pytest.mark.parametrize( @@ -368,7 +376,8 @@ def test_adding_noise(storage, config, start_storage): da.embeddings = np.random.random([10, 3]) da_index = DocumentArray(da, storage=storage, config=config) - da.match(da_index, exclude_self=True) + with da_index: + da.match(da_index, exclude_self=True) da2 = copy.deepcopy(da) @@ -410,17 +419,18 @@ def test_adding_noise(storage, config, start_storage): def test_diff_match_len_in_gd(storage, config, metric_fn, start_storage, kwargs): da1 = DocumentArray.empty(10) da1.embeddings = np.random.random([10, 128]) - da1_index = DocumentArray(da1, storage=storage, config=config) + # da1_index = DocumentArray(da1, storage=storage, config=config) da1.match(da1, exclude_self=True) da2 = copy.deepcopy(da1) da2.embeddings = np.random.random([10, 128]) da2_index = DocumentArray(da2, storage=storage, config=config) - da2.match(da2_index, exclude_self=True) - # pop some matches from first document - da2[0].matches.pop(8) + with da2_index: + da2.match(da2_index, exclude_self=True) + # pop some matches from first document + da2[0].matches.pop(8) - r = da1.evaluate(ground_truth=da2, metrics=[metric_fn], **kwargs)[metric_fn] + r = da1.evaluate(ground_truth=da2, metrics=[metric_fn], **kwargs)[metric_fn] assert isinstance(r, float) np.testing.assert_allclose(r, 1.0, rtol=1e-2) # for d in da1: @@ -486,7 +496,8 @@ def test_useless_groundtruth_warning_should_raise(storage, config, start_storage d.tags = {'label': 'A'} da1.embeddings = np.random.random([10, 256]) da1_index = DocumentArray(da1, storage=storage, config=config) - da1.match(da1_index, exclude_self=True) + with da1_index: + da1.match(da1_index, exclude_self=True) da2 = DocumentArray.empty(10) with pytest.warns(UserWarning): da1.evaluate(ground_truth=da2, metrics=['precision_at_k']) @@ -518,13 +529,14 @@ def test_embed_and_evaluate_single_da(storage, config, start_storage): dummy_embed_function(gt) gt.match(gt, limit=3) - res = queries_da.embed_and_evaluate( - ground_truth=gt, - metrics=['precision_at_k', 'reciprocal_rank'], - embed_funcs=dummy_embed_function, - match_batch_size=1, - limit=3, - ) + with queries_da: + res = queries_da.embed_and_evaluate( + ground_truth=gt, + metrics=['precision_at_k', 'reciprocal_rank'], + embed_funcs=dummy_embed_function, + match_batch_size=1, + limit=3, + ) assert all([v == 1.0 for v in res.values()]) @@ -601,15 +613,16 @@ def test_embed_and_evaluate_two_das(storage, config, sample_size, start_storage) dummy_embed_function(gt_index) gt_queries.match(gt_index, limit=3) - res = queries_da.embed_and_evaluate( - ground_truth=gt_queries, - index_data=index_da, - metrics=['precision_at_k', 'reciprocal_rank'], - embed_funcs=dummy_embed_function, - match_batch_size=1, - limit=3, - query_sample_size=sample_size, - ) + with index_da: + res = queries_da.embed_and_evaluate( + ground_truth=gt_queries, + index_data=index_da, + metrics=['precision_at_k', 'reciprocal_rank'], + embed_funcs=dummy_embed_function, + match_batch_size=1, + limit=3, + query_sample_size=sample_size, + ) assert all([v == 1.0 for v in res.values()]) @@ -681,25 +694,26 @@ def emb_func(da): da1 = DocumentArray([Document(text=str(i), tags={label_tag: i}) for i in range(3)]) da2 = DocumentArray(da1, storage=storage, config=config, copy=True) - if ( - use_index - ): # query and index da are distinct # (different embeddings are generated) - res = da1.embed_and_evaluate( - index_data=da2, - metrics=metric_fns, - embed_funcs=emb_func, - match_batch_size=1, - limit=3, - label_tag=label_tag, - ) - else: # query and index are the same (embeddings of both das are equal) - res = da2.embed_and_evaluate( - metrics=metric_fns, - embed_funcs=emb_func, - match_batch_size=1, - limit=3, - label_tag=label_tag, - ) + with da2: + if ( + use_index + ): # query and index da are distinct # (different embeddings are generated) + res = da1.embed_and_evaluate( + index_data=da2, + metrics=metric_fns, + embed_funcs=emb_func, + match_batch_size=1, + limit=3, + label_tag=label_tag, + ) + else: # query and index are the same (embeddings of both das are equal) + res = da2.embed_and_evaluate( + metrics=metric_fns, + embed_funcs=emb_func, + match_batch_size=1, + limit=3, + label_tag=label_tag, + ) for key in metric_fns: assert key in res assert abs(res[key] - expected[key]) < 1e-4 @@ -799,9 +813,10 @@ def test_embed_and_evaluate_with_embed_model( [Document(text=f'some text {i}', tags={'label': str(i)}) for i in range(5)] ) da = DocumentArray(da, storage=storage, config=config) - res = da.embed_and_evaluate( - metrics=['precision_at_k'], embed_models=model, collate_fns=collate_fn - ) + with da: + res = da.embed_and_evaluate( + metrics=['precision_at_k'], embed_models=model, collate_fns=collate_fn + ) assert res assert res['precision_at_k'] == 0.2 @@ -873,12 +888,13 @@ def emb_func(da): ) da2 = DocumentArray(da1, storage=storage, config=config, copy=True) - res = da1.embed_and_evaluate( - index_data=da2, - metrics=metric_fns, - embed_funcs=emb_func, - query_sample_size=sample_size, - ) + with da2: + res = da1.embed_and_evaluate( + index_data=da2, + metrics=metric_fns, + embed_funcs=emb_func, + query_sample_size=sample_size, + ) expected_size = ( sample_size if sample_size and (sample_size < len(da1)) else len(da1) ) From 02326b50cab00a74215e7a761d1741b6314c64fd Mon Sep 17 00:00:00 2001 From: Johannes Messner Date: Tue, 10 Jan 2023 17:08:59 +0100 Subject: [PATCH 6/8] test: remove some tests Signed-off-by: Johannes Messner --- .../array/mixins/oldproto/test_eval_class.py | 776 +++++++++--------- 1 file changed, 388 insertions(+), 388 deletions(-) diff --git a/tests/unit/array/mixins/oldproto/test_eval_class.py b/tests/unit/array/mixins/oldproto/test_eval_class.py index 8d0278a0f8c..1d1e9378071 100644 --- a/tests/unit/array/mixins/oldproto/test_eval_class.py +++ b/tests/unit/array/mixins/oldproto/test_eval_class.py @@ -12,45 +12,45 @@ from docarray import DocumentArray, Document -@pytest.mark.parametrize( - 'storage, config', - [ - ('memory', {}), - ('weaviate', {}), - ('sqlite', {}), - ('annlite', {'n_dim': 256}), - ('qdrant', {'n_dim': 256}), - ('elasticsearch', {'n_dim': 256}), - ('redis', {'n_dim': 256}), - ('milvus', {'n_dim': 256}), - ], -) -@pytest.mark.parametrize( - 'metric_fn, kwargs', - [ - ('r_precision', {}), - ('precision_at_k', {}), - ('hit_at_k', {}), - ('average_precision', {}), - ('reciprocal_rank', {}), - ('recall_at_k', {'max_rel': 9}), - ('f1_score_at_k', {'max_rel': 9}), - ('ndcg_at_k', {}), - ], -) -def test_eval_mixin_perfect_match(metric_fn, kwargs, storage, config, start_storage): - da1 = DocumentArray.empty(10) - da1.embeddings = np.random.random([10, 256]) - da1_index = DocumentArray(da1, storage=storage, config=config) - with da1_index: - da1.match(da1_index, exclude_self=True) - r = da1.evaluate(ground_truth=da1, metrics=[metric_fn], strict=False, **kwargs)[ - metric_fn - ] - assert isinstance(r, float) - assert r == 1.0 - for d in da1: - assert d.evaluations[metric_fn].value == 1.0 +# @pytest.mark.parametrize( +# 'storage, config', +# [ +# ('memory', {}), +# ('weaviate', {}), +# ('sqlite', {}), +# ('annlite', {'n_dim': 256}), +# ('qdrant', {'n_dim': 256}), +# ('elasticsearch', {'n_dim': 256}), +# ('redis', {'n_dim': 256}), +# ('milvus', {'n_dim': 256}), +# ], +# ) +# @pytest.mark.parametrize( +# 'metric_fn, kwargs', +# [ +# ('r_precision', {}), +# ('precision_at_k', {}), +# ('hit_at_k', {}), +# ('average_precision', {}), +# ('reciprocal_rank', {}), +# ('recall_at_k', {'max_rel': 9}), +# ('f1_score_at_k', {'max_rel': 9}), +# ('ndcg_at_k', {}), +# ], +# ) +# def test_eval_mixin_perfect_match(metric_fn, kwargs, storage, config, start_storage): +# da1 = DocumentArray.empty(10) +# da1.embeddings = np.random.random([10, 256]) +# da1_index = DocumentArray(da1, storage=storage, config=config) +# with da1_index: +# da1.match(da1_index, exclude_self=True) +# r = da1.evaluate(ground_truth=da1, metrics=[metric_fn], strict=False, **kwargs)[ +# metric_fn +# ] +# assert isinstance(r, float) +# assert r == 1.0 +# for d in da1: +# assert d.evaluations[metric_fn].value == 1.0 @pytest.mark.parametrize( @@ -134,70 +134,70 @@ def test_eval_mixin_perfect_match_labeled( assert d.evaluations[metric_fn].value == 1.0 -@pytest.mark.parametrize( - 'storage, config', - [ - ('memory', {}), - ('weaviate', {}), - ('sqlite', {}), - ('annlite', {'n_dim': 256}), - ('qdrant', {'n_dim': 256}), - ('elasticsearch', {'n_dim': 256}), - ('redis', {'n_dim': 256}), - ('milvus', {'n_dim': 256}), - ], -) -@pytest.mark.parametrize( - 'metric_fn, kwargs', - [ - ('r_precision', {}), - ('precision_at_k', {}), - ('hit_at_k', {}), - ('average_precision', {}), - ('reciprocal_rank', {}), - ('recall_at_k', {'max_rel': 9}), - ('f1_score_at_k', {'max_rel': 9}), - ('ndcg_at_k', {}), - ], -) -def test_eval_mixin_zero_labeled(storage, config, metric_fn, start_storage, kwargs): - da1 = DocumentArray.empty(10) - for d in da1: - d.tags = {'label': 'A'} - da1.embeddings = np.random.random([10, 256]) - da2 = copy.deepcopy(da1) - for d in da2: - d.tags = {'label': 'B'} - da1_index = DocumentArray(da2, storage=storage, config=config) - with da1_index: - da1.match(da1_index, exclude_self=True) - r = da1.evaluate([metric_fn], **kwargs)[metric_fn] - assert isinstance(r, float) - assert r == 0.0 - for d in da1: - assert d.evaluations[metric_fn].value == 0.0 - - -@pytest.mark.parametrize('label_tag', ['label', 'custom_tag']) -@pytest.mark.parametrize( - 'metric_fn, metric_score', - [ - ('r_precision', 1.0 / 3), - ('precision_at_k', 1.0 / 3), - ('hit_at_k', 1.0), - ('average_precision', (1.0 + 0.5 + (1.0 / 3)) / 3), - ('reciprocal_rank', (1.0 + 0.5 + (1.0 / 3)) / 3), - ('recall_at_k', 1.0 / 3), - ('f1_score_at_k', 1.0 / 3), - ('dcg_at_k', (1.0 + 1.0 + 0.6309) / 3), - ], -) -def test_eval_mixin_one_of_n_labeled(metric_fn, metric_score, label_tag): - da = DocumentArray([Document(text=str(i), tags={label_tag: i}) for i in range(3)]) - for d in da: - d.matches = da - r = da.evaluate([metric_fn], label_tag=label_tag, max_rel=3)[metric_fn] - assert abs(r - metric_score) < 0.001 +# @pytest.mark.parametrize( +# 'storage, config', +# [ +# ('memory', {}), +# ('weaviate', {}), +# ('sqlite', {}), +# ('annlite', {'n_dim': 256}), +# ('qdrant', {'n_dim': 256}), +# ('elasticsearch', {'n_dim': 256}), +# ('redis', {'n_dim': 256}), +# ('milvus', {'n_dim': 256}), +# ], +# ) +# @pytest.mark.parametrize( +# 'metric_fn, kwargs', +# [ +# ('r_precision', {}), +# ('precision_at_k', {}), +# ('hit_at_k', {}), +# ('average_precision', {}), +# ('reciprocal_rank', {}), +# ('recall_at_k', {'max_rel': 9}), +# ('f1_score_at_k', {'max_rel': 9}), +# ('ndcg_at_k', {}), +# ], +# ) +# def test_eval_mixin_zero_labeled(storage, config, metric_fn, start_storage, kwargs): +# da1 = DocumentArray.empty(10) +# for d in da1: +# d.tags = {'label': 'A'} +# da1.embeddings = np.random.random([10, 256]) +# da2 = copy.deepcopy(da1) +# for d in da2: +# d.tags = {'label': 'B'} +# da1_index = DocumentArray(da2, storage=storage, config=config) +# with da1_index: +# da1.match(da1_index, exclude_self=True) +# r = da1.evaluate([metric_fn], **kwargs)[metric_fn] +# assert isinstance(r, float) +# assert r == 0.0 +# for d in da1: +# assert d.evaluations[metric_fn].value == 0.0 + + +# @pytest.mark.parametrize('label_tag', ['label', 'custom_tag']) +# @pytest.mark.parametrize( +# 'metric_fn, metric_score', +# [ +# ('r_precision', 1.0 / 3), +# ('precision_at_k', 1.0 / 3), +# ('hit_at_k', 1.0), +# ('average_precision', (1.0 + 0.5 + (1.0 / 3)) / 3), +# ('reciprocal_rank', (1.0 + 0.5 + (1.0 / 3)) / 3), +# ('recall_at_k', 1.0 / 3), +# ('f1_score_at_k', 1.0 / 3), +# ('dcg_at_k', (1.0 + 1.0 + 0.6309) / 3), +# ], +# ) +# def test_eval_mixin_one_of_n_labeled(metric_fn, metric_score, label_tag): +# da = DocumentArray([Document(text=str(i), tags={label_tag: i}) for i in range(3)]) +# for d in da: +# d.matches = da +# r = da.evaluate([metric_fn], label_tag=label_tag, max_rel=3)[metric_fn] +# assert abs(r - metric_score) < 0.001 @pytest.mark.parametrize('label_tag', ['label', 'custom_tag']) @@ -221,16 +221,16 @@ def test_num_relevant_documents_per_label(metric_fn, metric_score, label_tag): assert abs(r - metric_score) < 0.001 -def test_missing_max_rel_should_raise(): - da = DocumentArray([Document(text=str(i), tags={'label': i}) for i in range(3)]) - num_relevant_documents_per_label = {i: 1 for i in range(2)} - for d in da: - d.matches = da - with pytest.raises(ValueError): - da.evaluate( - ['recall_at_k'], - num_relevant_documents_per_label=num_relevant_documents_per_label, - ) +# def test_missing_max_rel_should_raise(): +# da = DocumentArray([Document(text=str(i), tags={'label': i}) for i in range(3)]) +# num_relevant_documents_per_label = {i: 1 for i in range(2)} +# for d in da: +# d.matches = da +# with pytest.raises(ValueError): +# da.evaluate( +# ['recall_at_k'], +# num_relevant_documents_per_label=num_relevant_documents_per_label, +# ) @pytest.mark.parametrize( @@ -279,50 +279,50 @@ def test_eval_mixin_zero_match(storage, config, metric_fn, start_storage, kwargs assert d.evaluations[metric_fn].value == 1.0 -@pytest.mark.parametrize( - 'storage, config', - [ - ('memory', {}), - ('weaviate', {}), - ('sqlite', {}), - ('annlite', {'n_dim': 256}), - ('qdrant', {'n_dim': 256}), - ('elasticsearch', {'n_dim': 256}), - ('redis', {'n_dim': 256}), - ('milvus', {'n_dim': 256}), - ], -) -def test_diff_len_should_raise(storage, config, start_storage): - da1 = DocumentArray.empty(10) - da2 = DocumentArray.empty(5) - for d in da2: - d.matches.append(da2[0]) - da2 = DocumentArray(da2, storage=storage, config=config) - with pytest.raises(ValueError): - da1.evaluate(ground_truth=da2, metrics=['precision_at_k']) - - -@pytest.mark.parametrize( - 'storage, config', - [ - ('memory', {}), - ('weaviate', {}), - ('sqlite', {}), - ('annlite', {'n_dim': 256}), - ('qdrant', {'n_dim': 256}), - ('elasticsearch', {'n_dim': 256}), - ('redis', {'n_dim': 256}), - ('milvus', {'n_dim': 256}), - ], -) -def test_diff_hash_fun_should_raise(storage, config, start_storage): - da1 = DocumentArray.empty(10) - da2 = DocumentArray.empty(5) - for d in da2: - d.matches.append(da2[0]) - da2 = DocumentArray(da2, storage=storage, config=config) - with pytest.raises(ValueError): - da1.evaluate(ground_truth=da2, metrics=['precision_at_k']) +# @pytest.mark.parametrize( +# 'storage, config', +# [ +# ('memory', {}), +# ('weaviate', {}), +# ('sqlite', {}), +# ('annlite', {'n_dim': 256}), +# ('qdrant', {'n_dim': 256}), +# ('elasticsearch', {'n_dim': 256}), +# ('redis', {'n_dim': 256}), +# ('milvus', {'n_dim': 256}), +# ], +# ) +# def test_diff_len_should_raise(storage, config, start_storage): +# da1 = DocumentArray.empty(10) +# da2 = DocumentArray.empty(5) +# for d in da2: +# d.matches.append(da2[0]) +# da2 = DocumentArray(da2, storage=storage, config=config) +# with pytest.raises(ValueError): +# da1.evaluate(ground_truth=da2, metrics=['precision_at_k']) + + +# @pytest.mark.parametrize( +# 'storage, config', +# [ +# ('memory', {}), +# ('weaviate', {}), +# ('sqlite', {}), +# ('annlite', {'n_dim': 256}), +# ('qdrant', {'n_dim': 256}), +# ('elasticsearch', {'n_dim': 256}), +# ('redis', {'n_dim': 256}), +# ('milvus', {'n_dim': 256}), +# ], +# ) +# def test_diff_hash_fun_should_raise(storage, config, start_storage): +# da1 = DocumentArray.empty(10) +# da2 = DocumentArray.empty(5) +# for d in da2: +# d.matches.append(da2[0]) +# da2 = DocumentArray(da2, storage=storage, config=config) +# with pytest.raises(ValueError): +# da1.evaluate(ground_truth=da2, metrics=['precision_at_k']) @pytest.mark.parametrize( @@ -439,68 +439,68 @@ def test_diff_match_len_in_gd(storage, config, metric_fn, start_storage, kwargs) assert d.evaluations[metric_fn].value > 0.9 -@pytest.mark.parametrize( - 'storage, config', - [ - ('memory', {}), - ('weaviate', {}), - ('sqlite', {}), - ('annlite', {'n_dim': 256}), - ('qdrant', {'n_dim': 256}), - ('elasticsearch', {'n_dim': 256}), - ('redis', {'n_dim': 256}), - ('milvus', {'n_dim': 256}), - ], -) -def test_empty_da_should_raise(storage, config, start_storage): - da = DocumentArray([], storage=storage, config=config) - with pytest.raises(ValueError): - da.evaluate(metrics=['precision_at_k']) - - -@pytest.mark.parametrize( - 'storage, config', - [ - ('memory', {}), - ('weaviate', {}), - ('sqlite', {}), - ('annlite', {'n_dim': 256}), - ('qdrant', {'n_dim': 256}), - ('elasticsearch', {'n_dim': 256}), - ('redis', {'n_dim': 256}), - ('milvus', {'n_dim': 256}), - ], -) -def test_missing_groundtruth_should_raise(storage, config, start_storage): - da = DocumentArray(DocumentArray.empty(10), storage=storage, config=config) - with pytest.raises(RuntimeError): - da.evaluate(metrics=['precision_at_k']) - - -@pytest.mark.parametrize( - 'storage, config', - [ - ('memory', {}), - ('weaviate', {}), - ('sqlite', {}), - ('annlite', {'n_dim': 256}), - ('qdrant', {'n_dim': 256}), - ('elasticsearch', {'n_dim': 256}), - ('redis', {'n_dim': 256}), - ('milvus', {'n_dim': 256}), - ], -) -def test_useless_groundtruth_warning_should_raise(storage, config, start_storage): - da1 = DocumentArray.empty(10) - for d in da1: - d.tags = {'label': 'A'} - da1.embeddings = np.random.random([10, 256]) - da1_index = DocumentArray(da1, storage=storage, config=config) - with da1_index: - da1.match(da1_index, exclude_self=True) - da2 = DocumentArray.empty(10) - with pytest.warns(UserWarning): - da1.evaluate(ground_truth=da2, metrics=['precision_at_k']) +# @pytest.mark.parametrize( +# 'storage, config', +# [ +# ('memory', {}), +# ('weaviate', {}), +# ('sqlite', {}), +# ('annlite', {'n_dim': 256}), +# ('qdrant', {'n_dim': 256}), +# ('elasticsearch', {'n_dim': 256}), +# ('redis', {'n_dim': 256}), +# ('milvus', {'n_dim': 256}), +# ], +# ) +# def test_empty_da_should_raise(storage, config, start_storage): +# da = DocumentArray([], storage=storage, config=config) +# with pytest.raises(ValueError): +# da.evaluate(metrics=['precision_at_k']) + + +# @pytest.mark.parametrize( +# 'storage, config', +# [ +# ('memory', {}), +# ('weaviate', {}), +# ('sqlite', {}), +# ('annlite', {'n_dim': 256}), +# ('qdrant', {'n_dim': 256}), +# ('elasticsearch', {'n_dim': 256}), +# ('redis', {'n_dim': 256}), +# ('milvus', {'n_dim': 256}), +# ], +# ) +# def test_missing_groundtruth_should_raise(storage, config, start_storage): +# da = DocumentArray(DocumentArray.empty(10), storage=storage, config=config) +# with pytest.raises(RuntimeError): +# da.evaluate(metrics=['precision_at_k']) + + +# @pytest.mark.parametrize( +# 'storage, config', +# [ +# ('memory', {}), +# ('weaviate', {}), +# ('sqlite', {}), +# ('annlite', {'n_dim': 256}), +# ('qdrant', {'n_dim': 256}), +# ('elasticsearch', {'n_dim': 256}), +# ('redis', {'n_dim': 256}), +# ('milvus', {'n_dim': 256}), +# ], +# ) +# def test_useless_groundtruth_warning_should_raise(storage, config, start_storage): +# da1 = DocumentArray.empty(10) +# for d in da1: +# d.tags = {'label': 'A'} +# da1.embeddings = np.random.random([10, 256]) +# da1_index = DocumentArray(da1, storage=storage, config=config) +# with da1_index: +# da1.match(da1_index, exclude_self=True) +# da2 = DocumentArray.empty(10) +# with pytest.warns(UserWarning): +# da1.evaluate(ground_truth=da2, metrics=['precision_at_k']) def dummy_embed_function(da): @@ -509,35 +509,35 @@ def dummy_embed_function(da): da[i, 'embedding'] = np.random.random(5) -@pytest.mark.parametrize( - 'storage, config', - [ - ('memory', {}), - ('weaviate', {}), - ('sqlite', {}), - ('annlite', {'n_dim': 5}), - ('qdrant', {'n_dim': 5}), - ('elasticsearch', {'n_dim': 5}), - ('redis', {'n_dim': 5}), - ], -) -def test_embed_and_evaluate_single_da(storage, config, start_storage): - - gt = DocumentArray([Document(text=str(i)) for i in range(10)]) - queries_da = DocumentArray(gt, copy=True) - queries_da = DocumentArray(queries_da, storage=storage, config=config) - dummy_embed_function(gt) - gt.match(gt, limit=3) - - with queries_da: - res = queries_da.embed_and_evaluate( - ground_truth=gt, - metrics=['precision_at_k', 'reciprocal_rank'], - embed_funcs=dummy_embed_function, - match_batch_size=1, - limit=3, - ) - assert all([v == 1.0 for v in res.values()]) +# @pytest.mark.parametrize( +# 'storage, config', +# [ +# ('memory', {}), +# ('weaviate', {}), +# ('sqlite', {}), +# ('annlite', {'n_dim': 5}), +# ('qdrant', {'n_dim': 5}), +# ('elasticsearch', {'n_dim': 5}), +# ('redis', {'n_dim': 5}), +# ], +# ) +# def test_embed_and_evaluate_single_da(storage, config, start_storage): +# +# gt = DocumentArray([Document(text=str(i)) for i in range(10)]) +# queries_da = DocumentArray(gt, copy=True) +# queries_da = DocumentArray(queries_da, storage=storage, config=config) +# dummy_embed_function(gt) +# gt.match(gt, limit=3) +# +# with queries_da: +# res = queries_da.embed_and_evaluate( +# ground_truth=gt, +# metrics=['precision_at_k', 'reciprocal_rank'], +# embed_funcs=dummy_embed_function, +# match_batch_size=1, +# limit=3, +# ) +# assert all([v == 1.0 for v in res.values()]) @pytest.mark.parametrize( @@ -586,44 +586,44 @@ def test_embed_and_evaluate_with_and_without_exclude_self( assert abs(res[key] - expected_results[key]) < 1e-5 -@pytest.mark.parametrize( - 'sample_size', - [None, 10], -) -@pytest.mark.parametrize( - 'storage, config', - [ - ('memory', {}), - ('weaviate', {}), - ('sqlite', {}), - ('annlite', {'n_dim': 5}), - ('qdrant', {'n_dim': 5}), - ('elasticsearch', {'n_dim': 5}), - ('redis', {'n_dim': 5}), - ], -) -def test_embed_and_evaluate_two_das(storage, config, sample_size, start_storage): - - gt_queries = DocumentArray([Document(text=str(i)) for i in range(100)]) - gt_index = DocumentArray([Document(text=str(i)) for i in range(100, 200)]) - queries_da = DocumentArray(gt_queries, copy=True) - index_da = DocumentArray(gt_index, copy=True) - index_da = DocumentArray(index_da, storage=storage, config=config) - dummy_embed_function(gt_queries) - dummy_embed_function(gt_index) - gt_queries.match(gt_index, limit=3) - - with index_da: - res = queries_da.embed_and_evaluate( - ground_truth=gt_queries, - index_data=index_da, - metrics=['precision_at_k', 'reciprocal_rank'], - embed_funcs=dummy_embed_function, - match_batch_size=1, - limit=3, - query_sample_size=sample_size, - ) - assert all([v == 1.0 for v in res.values()]) +# @pytest.mark.parametrize( +# 'sample_size', +# [None, 10], +# ) +# @pytest.mark.parametrize( +# 'storage, config', +# [ +# ('memory', {}), +# ('weaviate', {}), +# ('sqlite', {}), +# ('annlite', {'n_dim': 5}), +# ('qdrant', {'n_dim': 5}), +# ('elasticsearch', {'n_dim': 5}), +# ('redis', {'n_dim': 5}), +# ], +# ) +# def test_embed_and_evaluate_two_das(storage, config, sample_size, start_storage): +# +# gt_queries = DocumentArray([Document(text=str(i)) for i in range(100)]) +# gt_index = DocumentArray([Document(text=str(i)) for i in range(100, 200)]) +# queries_da = DocumentArray(gt_queries, copy=True) +# index_da = DocumentArray(gt_index, copy=True) +# index_da = DocumentArray(index_da, storage=storage, config=config) +# dummy_embed_function(gt_queries) +# dummy_embed_function(gt_index) +# gt_queries.match(gt_index, limit=3) +# +# with index_da: +# res = queries_da.embed_and_evaluate( +# ground_truth=gt_queries, +# index_data=index_da, +# metrics=['precision_at_k', 'reciprocal_rank'], +# embed_funcs=dummy_embed_function, +# match_batch_size=1, +# limit=3, +# query_sample_size=sample_size, +# ) +# assert all([v == 1.0 for v in res.values()]) def test_embed_and_evaluate_two_different_das(): @@ -655,68 +655,68 @@ def test_embed_and_evaluate_two_different_das(): assert abs(res['f1_score_at_k'] - 1.0 / 1.5) < 1e-5 -@pytest.mark.parametrize( - 'use_index, expected, label_tag', - [ - (False, {'precision_at_k': 1.0 / 3, 'reciprocal_rank': 1.0}, 'label'), - ( - True, - { - 'precision_at_k': 1.0 / 3, - 'reciprocal_rank': 11.0 / 18.0, - 'recall_at_k': 1.0, - }, - 'custom_tag', - ), - ], -) -@pytest.mark.parametrize( - 'storage, config', - [ - ('memory', {}), - ('weaviate', {}), - ('sqlite', {}), - ('annlite', {'n_dim': 5}), - ('qdrant', {'n_dim': 5}), - ('elasticsearch', {'n_dim': 5}), - ('redis', {'n_dim': 5}), - ], -) -def test_embed_and_evaluate_labeled_dataset( - storage, config, start_storage, use_index, expected, label_tag -): - metric_fns = list(expected.keys()) - - def emb_func(da): - np.random.seed(0) # makes sure that embeddings are always equal - da[:, 'embedding'] = np.random.random((len(da), 5)) - - da1 = DocumentArray([Document(text=str(i), tags={label_tag: i}) for i in range(3)]) - da2 = DocumentArray(da1, storage=storage, config=config, copy=True) - - with da2: - if ( - use_index - ): # query and index da are distinct # (different embeddings are generated) - res = da1.embed_and_evaluate( - index_data=da2, - metrics=metric_fns, - embed_funcs=emb_func, - match_batch_size=1, - limit=3, - label_tag=label_tag, - ) - else: # query and index are the same (embeddings of both das are equal) - res = da2.embed_and_evaluate( - metrics=metric_fns, - embed_funcs=emb_func, - match_batch_size=1, - limit=3, - label_tag=label_tag, - ) - for key in metric_fns: - assert key in res - assert abs(res[key] - expected[key]) < 1e-4 +# @pytest.mark.parametrize( +# 'use_index, expected, label_tag', +# [ +# (False, {'precision_at_k': 1.0 / 3, 'reciprocal_rank': 1.0}, 'label'), +# ( +# True, +# { +# 'precision_at_k': 1.0 / 3, +# 'reciprocal_rank': 11.0 / 18.0, +# 'recall_at_k': 1.0, +# }, +# 'custom_tag', +# ), +# ], +# ) +# @pytest.mark.parametrize( +# 'storage, config', +# [ +# ('memory', {}), +# ('weaviate', {}), +# ('sqlite', {}), +# ('annlite', {'n_dim': 5}), +# ('qdrant', {'n_dim': 5}), +# ('elasticsearch', {'n_dim': 5}), +# ('redis', {'n_dim': 5}), +# ], +# ) +# def test_embed_and_evaluate_labeled_dataset( +# storage, config, start_storage, use_index, expected, label_tag +# ): +# metric_fns = list(expected.keys()) +# +# def emb_func(da): +# np.random.seed(0) # makes sure that embeddings are always equal +# da[:, 'embedding'] = np.random.random((len(da), 5)) +# +# da1 = DocumentArray([Document(text=str(i), tags={label_tag: i}) for i in range(3)]) +# da2 = DocumentArray(da1, storage=storage, config=config, copy=True) +# +# with da2: +# if ( +# use_index +# ): # query and index da are distinct # (different embeddings are generated) +# res = da1.embed_and_evaluate( +# index_data=da2, +# metrics=metric_fns, +# embed_funcs=emb_func, +# match_batch_size=1, +# limit=3, +# label_tag=label_tag, +# ) +# else: # query and index are the same (embeddings of both das are equal) +# res = da2.embed_and_evaluate( +# metrics=metric_fns, +# embed_funcs=emb_func, +# match_batch_size=1, +# limit=3, +# label_tag=label_tag, +# ) +# for key in metric_fns: +# assert key in res +# assert abs(res[key] - expected[key]) < 1e-4 @pytest.mark.parametrize( @@ -821,46 +821,46 @@ def test_embed_and_evaluate_with_embed_model( assert res['precision_at_k'] == 0.2 -@pytest.mark.parametrize( - 'queries, kwargs, exception', - [ - (DocumentArray.empty(4), {}, ValueError), - ( - DocumentArray([Document(tags={'label': 0})]), - {'index_data': DocumentArray.empty(4)}, - ValueError, - ), - (DocumentArray([Document(tags={'label': 0})]), {}, RuntimeError), - ( - DocumentArray([Document(tags={'label': 0})]), - {'index_data': DocumentArray([Document(tags={'label': 0})])}, - RuntimeError, - ), - ], -) -@pytest.mark.parametrize( - 'storage, config', - [ - ('memory', {}), - ('weaviate', {}), - ('sqlite', {}), - ('annlite', {'n_dim': 5}), - ('qdrant', {'n_dim': 5}), - ('elasticsearch', {'n_dim': 5}), - ('redis', {'n_dim': 5}), - ], -) -def test_embed_and_evaluate_invalid_input_should_raise( - storage, config, queries, kwargs, exception, start_storage -): - kwargs.update({'metrics': ['precision_at_k']}) - if 'index_data' in kwargs: - kwargs['index_data'] = DocumentArray( - kwargs['index_data'], storage=storage, config=config - ) - - with pytest.raises(exception): - queries.embed_and_evaluate(**kwargs) +# @pytest.mark.parametrize( +# 'queries, kwargs, exception', +# [ +# (DocumentArray.empty(4), {}, ValueError), +# ( +# DocumentArray([Document(tags={'label': 0})]), +# {'index_data': DocumentArray.empty(4)}, +# ValueError, +# ), +# (DocumentArray([Document(tags={'label': 0})]), {}, RuntimeError), +# ( +# DocumentArray([Document(tags={'label': 0})]), +# {'index_data': DocumentArray([Document(tags={'label': 0})])}, +# RuntimeError, +# ), +# ], +# ) +# @pytest.mark.parametrize( +# 'storage, config', +# [ +# ('memory', {}), +# ('weaviate', {}), +# ('sqlite', {}), +# ('annlite', {'n_dim': 5}), +# ('qdrant', {'n_dim': 5}), +# ('elasticsearch', {'n_dim': 5}), +# ('redis', {'n_dim': 5}), +# ], +# ) +# def test_embed_and_evaluate_invalid_input_should_raise( +# storage, config, queries, kwargs, exception, start_storage +# ): +# kwargs.update({'metrics': ['precision_at_k']}) +# if 'index_data' in kwargs: +# kwargs['index_data'] = DocumentArray( +# kwargs['index_data'], storage=storage, config=config +# ) +# +# with pytest.raises(exception): +# queries.embed_and_evaluate(**kwargs) @pytest.mark.parametrize( From 1dab1c59c2867627c5c78fe4ae4cb8e670c7199c Mon Sep 17 00:00:00 2001 From: Johannes Messner Date: Wed, 11 Jan 2023 11:06:32 +0100 Subject: [PATCH 7/8] test: try to find minimal working changes Signed-off-by: Johannes Messner --- .github/workflows/ci.yml | 98 +-- docarray/array/mixins/getattr.py | 3 +- .../array/mixins/oldproto/test_eval_class.py | 776 +++++++++--------- 3 files changed, 438 insertions(+), 439 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 5f72b5ced0a..3ea6ccfe944 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -157,55 +157,55 @@ jobs: matrix: ${{ steps.set-matrix.outputs.matrix }} matrix-oldproto: ${{ steps.set-matrix.outputs.matrix-oldproto }} -# docarray-test: -# needs: prep-testbed -# runs-on: ubuntu-latest -# strategy: -# fail-fast: false -# matrix: -# python-version: [3.8] -# test-path: ${{fromJson(needs.prep-testbed.outputs.matrix)}} -# steps: -# - uses: actions/checkout@v2.5.0 -# - name: Set up Python ${{ matrix.python-version }} -# uses: actions/setup-python@v4 -# with: -# python-version: ${{ matrix.python-version }} -# - name: Prepare environment -# run: | -# python -m pip install --upgrade pip -# python -m pip install wheel -# # pip does not properly resolve dependency versions with syntax pip install --no-cache-dir ".[test,full]" -# pip install --no-cache-dir ".[test]" -# pip install --no-cache-dir ".[qdrant]" -# pip install --no-cache-dir ".[annlite]" -# pip install --no-cache-dir ".[weaviate]" -# pip install --no-cache-dir ".[elasticsearch]" -# pip install --no-cache-dir ".[redis]" -# pip install --no-cache-dir ".[full]" -# sudo apt-get install libsndfile1 -# - name: Test -# id: test -# run: | -# pytest --suppress-no-test-exit-code --cov=docarray --cov-report=xml \ -# -v -s -m "not gpu" ${{ matrix.test-path }} -# echo "codecov_flag=docarray" >> $GITHUB_OUTPUT -# timeout-minutes: 60 -# env: -# JINA_AUTH_TOKEN: "${{ secrets.JINA_AUTH_TOKEN }}" -# - name: Check codecov file -# id: check_files -# uses: andstor/file-existence-action@v1 -# with: -# files: "coverage.xml" -# - name: Upload coverage from test to Codecov -# uses: codecov/codecov-action@v3.1.1 -# if: steps.check_files.outputs.files_exists == 'true' && ${{ matrix.python-version }} == '3.8' -# with: -# file: coverage.xml -# flags: ${{ steps.test.outputs.codecov_flag }} -# fail_ci_if_error: false -# token: ${{ secrets.CODECOV_TOKEN }} # not required for public repos + docarray-test: + needs: prep-testbed + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: [3.8] + test-path: ${{fromJson(needs.prep-testbed.outputs.matrix)}} + steps: + - uses: actions/checkout@v2.5.0 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - name: Prepare environment + run: | + python -m pip install --upgrade pip + python -m pip install wheel + # pip does not properly resolve dependency versions with syntax pip install --no-cache-dir ".[test,full]" + pip install --no-cache-dir ".[test]" + pip install --no-cache-dir ".[qdrant]" + pip install --no-cache-dir ".[annlite]" + pip install --no-cache-dir ".[weaviate]" + pip install --no-cache-dir ".[elasticsearch]" + pip install --no-cache-dir ".[redis]" + pip install --no-cache-dir ".[full]" + sudo apt-get install libsndfile1 + - name: Test + id: test + run: | + pytest --suppress-no-test-exit-code --cov=docarray --cov-report=xml \ + -v -s -m "not gpu" ${{ matrix.test-path }} + echo "codecov_flag=docarray" >> $GITHUB_OUTPUT + timeout-minutes: 60 + env: + JINA_AUTH_TOKEN: "${{ secrets.JINA_AUTH_TOKEN }}" + - name: Check codecov file + id: check_files + uses: andstor/file-existence-action@v1 + with: + files: "coverage.xml" + - name: Upload coverage from test to Codecov + uses: codecov/codecov-action@v3.1.1 + if: steps.check_files.outputs.files_exists == 'true' && ${{ matrix.python-version }} == '3.8' + with: + file: coverage.xml + flags: ${{ steps.test.outputs.codecov_flag }} + fail_ci_if_error: false + token: ${{ secrets.CODECOV_TOKEN }} # not required for public repos docarray-oldproto-test: needs: prep-testbed diff --git a/docarray/array/mixins/getattr.py b/docarray/array/mixins/getattr.py index 120ff951e11..588b03e12aa 100644 --- a/docarray/array/mixins/getattr.py +++ b/docarray/array/mixins/getattr.py @@ -11,9 +11,8 @@ def _get_attributes(self, *fields: str) -> List: :return: Returns a list of the values for these fields. When `fields` has multiple values, then it returns a list of list. """ - # small change just to trigger CI tests - fields = list(fields) e_index, b_index = None, None + fields = list(fields) if 'embedding' in fields: e_index = fields.index('embedding') if 'tensor' in fields: diff --git a/tests/unit/array/mixins/oldproto/test_eval_class.py b/tests/unit/array/mixins/oldproto/test_eval_class.py index 1d1e9378071..8d0278a0f8c 100644 --- a/tests/unit/array/mixins/oldproto/test_eval_class.py +++ b/tests/unit/array/mixins/oldproto/test_eval_class.py @@ -12,45 +12,45 @@ from docarray import DocumentArray, Document -# @pytest.mark.parametrize( -# 'storage, config', -# [ -# ('memory', {}), -# ('weaviate', {}), -# ('sqlite', {}), -# ('annlite', {'n_dim': 256}), -# ('qdrant', {'n_dim': 256}), -# ('elasticsearch', {'n_dim': 256}), -# ('redis', {'n_dim': 256}), -# ('milvus', {'n_dim': 256}), -# ], -# ) -# @pytest.mark.parametrize( -# 'metric_fn, kwargs', -# [ -# ('r_precision', {}), -# ('precision_at_k', {}), -# ('hit_at_k', {}), -# ('average_precision', {}), -# ('reciprocal_rank', {}), -# ('recall_at_k', {'max_rel': 9}), -# ('f1_score_at_k', {'max_rel': 9}), -# ('ndcg_at_k', {}), -# ], -# ) -# def test_eval_mixin_perfect_match(metric_fn, kwargs, storage, config, start_storage): -# da1 = DocumentArray.empty(10) -# da1.embeddings = np.random.random([10, 256]) -# da1_index = DocumentArray(da1, storage=storage, config=config) -# with da1_index: -# da1.match(da1_index, exclude_self=True) -# r = da1.evaluate(ground_truth=da1, metrics=[metric_fn], strict=False, **kwargs)[ -# metric_fn -# ] -# assert isinstance(r, float) -# assert r == 1.0 -# for d in da1: -# assert d.evaluations[metric_fn].value == 1.0 +@pytest.mark.parametrize( + 'storage, config', + [ + ('memory', {}), + ('weaviate', {}), + ('sqlite', {}), + ('annlite', {'n_dim': 256}), + ('qdrant', {'n_dim': 256}), + ('elasticsearch', {'n_dim': 256}), + ('redis', {'n_dim': 256}), + ('milvus', {'n_dim': 256}), + ], +) +@pytest.mark.parametrize( + 'metric_fn, kwargs', + [ + ('r_precision', {}), + ('precision_at_k', {}), + ('hit_at_k', {}), + ('average_precision', {}), + ('reciprocal_rank', {}), + ('recall_at_k', {'max_rel': 9}), + ('f1_score_at_k', {'max_rel': 9}), + ('ndcg_at_k', {}), + ], +) +def test_eval_mixin_perfect_match(metric_fn, kwargs, storage, config, start_storage): + da1 = DocumentArray.empty(10) + da1.embeddings = np.random.random([10, 256]) + da1_index = DocumentArray(da1, storage=storage, config=config) + with da1_index: + da1.match(da1_index, exclude_self=True) + r = da1.evaluate(ground_truth=da1, metrics=[metric_fn], strict=False, **kwargs)[ + metric_fn + ] + assert isinstance(r, float) + assert r == 1.0 + for d in da1: + assert d.evaluations[metric_fn].value == 1.0 @pytest.mark.parametrize( @@ -134,70 +134,70 @@ def test_eval_mixin_perfect_match_labeled( assert d.evaluations[metric_fn].value == 1.0 -# @pytest.mark.parametrize( -# 'storage, config', -# [ -# ('memory', {}), -# ('weaviate', {}), -# ('sqlite', {}), -# ('annlite', {'n_dim': 256}), -# ('qdrant', {'n_dim': 256}), -# ('elasticsearch', {'n_dim': 256}), -# ('redis', {'n_dim': 256}), -# ('milvus', {'n_dim': 256}), -# ], -# ) -# @pytest.mark.parametrize( -# 'metric_fn, kwargs', -# [ -# ('r_precision', {}), -# ('precision_at_k', {}), -# ('hit_at_k', {}), -# ('average_precision', {}), -# ('reciprocal_rank', {}), -# ('recall_at_k', {'max_rel': 9}), -# ('f1_score_at_k', {'max_rel': 9}), -# ('ndcg_at_k', {}), -# ], -# ) -# def test_eval_mixin_zero_labeled(storage, config, metric_fn, start_storage, kwargs): -# da1 = DocumentArray.empty(10) -# for d in da1: -# d.tags = {'label': 'A'} -# da1.embeddings = np.random.random([10, 256]) -# da2 = copy.deepcopy(da1) -# for d in da2: -# d.tags = {'label': 'B'} -# da1_index = DocumentArray(da2, storage=storage, config=config) -# with da1_index: -# da1.match(da1_index, exclude_self=True) -# r = da1.evaluate([metric_fn], **kwargs)[metric_fn] -# assert isinstance(r, float) -# assert r == 0.0 -# for d in da1: -# assert d.evaluations[metric_fn].value == 0.0 - - -# @pytest.mark.parametrize('label_tag', ['label', 'custom_tag']) -# @pytest.mark.parametrize( -# 'metric_fn, metric_score', -# [ -# ('r_precision', 1.0 / 3), -# ('precision_at_k', 1.0 / 3), -# ('hit_at_k', 1.0), -# ('average_precision', (1.0 + 0.5 + (1.0 / 3)) / 3), -# ('reciprocal_rank', (1.0 + 0.5 + (1.0 / 3)) / 3), -# ('recall_at_k', 1.0 / 3), -# ('f1_score_at_k', 1.0 / 3), -# ('dcg_at_k', (1.0 + 1.0 + 0.6309) / 3), -# ], -# ) -# def test_eval_mixin_one_of_n_labeled(metric_fn, metric_score, label_tag): -# da = DocumentArray([Document(text=str(i), tags={label_tag: i}) for i in range(3)]) -# for d in da: -# d.matches = da -# r = da.evaluate([metric_fn], label_tag=label_tag, max_rel=3)[metric_fn] -# assert abs(r - metric_score) < 0.001 +@pytest.mark.parametrize( + 'storage, config', + [ + ('memory', {}), + ('weaviate', {}), + ('sqlite', {}), + ('annlite', {'n_dim': 256}), + ('qdrant', {'n_dim': 256}), + ('elasticsearch', {'n_dim': 256}), + ('redis', {'n_dim': 256}), + ('milvus', {'n_dim': 256}), + ], +) +@pytest.mark.parametrize( + 'metric_fn, kwargs', + [ + ('r_precision', {}), + ('precision_at_k', {}), + ('hit_at_k', {}), + ('average_precision', {}), + ('reciprocal_rank', {}), + ('recall_at_k', {'max_rel': 9}), + ('f1_score_at_k', {'max_rel': 9}), + ('ndcg_at_k', {}), + ], +) +def test_eval_mixin_zero_labeled(storage, config, metric_fn, start_storage, kwargs): + da1 = DocumentArray.empty(10) + for d in da1: + d.tags = {'label': 'A'} + da1.embeddings = np.random.random([10, 256]) + da2 = copy.deepcopy(da1) + for d in da2: + d.tags = {'label': 'B'} + da1_index = DocumentArray(da2, storage=storage, config=config) + with da1_index: + da1.match(da1_index, exclude_self=True) + r = da1.evaluate([metric_fn], **kwargs)[metric_fn] + assert isinstance(r, float) + assert r == 0.0 + for d in da1: + assert d.evaluations[metric_fn].value == 0.0 + + +@pytest.mark.parametrize('label_tag', ['label', 'custom_tag']) +@pytest.mark.parametrize( + 'metric_fn, metric_score', + [ + ('r_precision', 1.0 / 3), + ('precision_at_k', 1.0 / 3), + ('hit_at_k', 1.0), + ('average_precision', (1.0 + 0.5 + (1.0 / 3)) / 3), + ('reciprocal_rank', (1.0 + 0.5 + (1.0 / 3)) / 3), + ('recall_at_k', 1.0 / 3), + ('f1_score_at_k', 1.0 / 3), + ('dcg_at_k', (1.0 + 1.0 + 0.6309) / 3), + ], +) +def test_eval_mixin_one_of_n_labeled(metric_fn, metric_score, label_tag): + da = DocumentArray([Document(text=str(i), tags={label_tag: i}) for i in range(3)]) + for d in da: + d.matches = da + r = da.evaluate([metric_fn], label_tag=label_tag, max_rel=3)[metric_fn] + assert abs(r - metric_score) < 0.001 @pytest.mark.parametrize('label_tag', ['label', 'custom_tag']) @@ -221,16 +221,16 @@ def test_num_relevant_documents_per_label(metric_fn, metric_score, label_tag): assert abs(r - metric_score) < 0.001 -# def test_missing_max_rel_should_raise(): -# da = DocumentArray([Document(text=str(i), tags={'label': i}) for i in range(3)]) -# num_relevant_documents_per_label = {i: 1 for i in range(2)} -# for d in da: -# d.matches = da -# with pytest.raises(ValueError): -# da.evaluate( -# ['recall_at_k'], -# num_relevant_documents_per_label=num_relevant_documents_per_label, -# ) +def test_missing_max_rel_should_raise(): + da = DocumentArray([Document(text=str(i), tags={'label': i}) for i in range(3)]) + num_relevant_documents_per_label = {i: 1 for i in range(2)} + for d in da: + d.matches = da + with pytest.raises(ValueError): + da.evaluate( + ['recall_at_k'], + num_relevant_documents_per_label=num_relevant_documents_per_label, + ) @pytest.mark.parametrize( @@ -279,50 +279,50 @@ def test_eval_mixin_zero_match(storage, config, metric_fn, start_storage, kwargs assert d.evaluations[metric_fn].value == 1.0 -# @pytest.mark.parametrize( -# 'storage, config', -# [ -# ('memory', {}), -# ('weaviate', {}), -# ('sqlite', {}), -# ('annlite', {'n_dim': 256}), -# ('qdrant', {'n_dim': 256}), -# ('elasticsearch', {'n_dim': 256}), -# ('redis', {'n_dim': 256}), -# ('milvus', {'n_dim': 256}), -# ], -# ) -# def test_diff_len_should_raise(storage, config, start_storage): -# da1 = DocumentArray.empty(10) -# da2 = DocumentArray.empty(5) -# for d in da2: -# d.matches.append(da2[0]) -# da2 = DocumentArray(da2, storage=storage, config=config) -# with pytest.raises(ValueError): -# da1.evaluate(ground_truth=da2, metrics=['precision_at_k']) - - -# @pytest.mark.parametrize( -# 'storage, config', -# [ -# ('memory', {}), -# ('weaviate', {}), -# ('sqlite', {}), -# ('annlite', {'n_dim': 256}), -# ('qdrant', {'n_dim': 256}), -# ('elasticsearch', {'n_dim': 256}), -# ('redis', {'n_dim': 256}), -# ('milvus', {'n_dim': 256}), -# ], -# ) -# def test_diff_hash_fun_should_raise(storage, config, start_storage): -# da1 = DocumentArray.empty(10) -# da2 = DocumentArray.empty(5) -# for d in da2: -# d.matches.append(da2[0]) -# da2 = DocumentArray(da2, storage=storage, config=config) -# with pytest.raises(ValueError): -# da1.evaluate(ground_truth=da2, metrics=['precision_at_k']) +@pytest.mark.parametrize( + 'storage, config', + [ + ('memory', {}), + ('weaviate', {}), + ('sqlite', {}), + ('annlite', {'n_dim': 256}), + ('qdrant', {'n_dim': 256}), + ('elasticsearch', {'n_dim': 256}), + ('redis', {'n_dim': 256}), + ('milvus', {'n_dim': 256}), + ], +) +def test_diff_len_should_raise(storage, config, start_storage): + da1 = DocumentArray.empty(10) + da2 = DocumentArray.empty(5) + for d in da2: + d.matches.append(da2[0]) + da2 = DocumentArray(da2, storage=storage, config=config) + with pytest.raises(ValueError): + da1.evaluate(ground_truth=da2, metrics=['precision_at_k']) + + +@pytest.mark.parametrize( + 'storage, config', + [ + ('memory', {}), + ('weaviate', {}), + ('sqlite', {}), + ('annlite', {'n_dim': 256}), + ('qdrant', {'n_dim': 256}), + ('elasticsearch', {'n_dim': 256}), + ('redis', {'n_dim': 256}), + ('milvus', {'n_dim': 256}), + ], +) +def test_diff_hash_fun_should_raise(storage, config, start_storage): + da1 = DocumentArray.empty(10) + da2 = DocumentArray.empty(5) + for d in da2: + d.matches.append(da2[0]) + da2 = DocumentArray(da2, storage=storage, config=config) + with pytest.raises(ValueError): + da1.evaluate(ground_truth=da2, metrics=['precision_at_k']) @pytest.mark.parametrize( @@ -439,68 +439,68 @@ def test_diff_match_len_in_gd(storage, config, metric_fn, start_storage, kwargs) assert d.evaluations[metric_fn].value > 0.9 -# @pytest.mark.parametrize( -# 'storage, config', -# [ -# ('memory', {}), -# ('weaviate', {}), -# ('sqlite', {}), -# ('annlite', {'n_dim': 256}), -# ('qdrant', {'n_dim': 256}), -# ('elasticsearch', {'n_dim': 256}), -# ('redis', {'n_dim': 256}), -# ('milvus', {'n_dim': 256}), -# ], -# ) -# def test_empty_da_should_raise(storage, config, start_storage): -# da = DocumentArray([], storage=storage, config=config) -# with pytest.raises(ValueError): -# da.evaluate(metrics=['precision_at_k']) - - -# @pytest.mark.parametrize( -# 'storage, config', -# [ -# ('memory', {}), -# ('weaviate', {}), -# ('sqlite', {}), -# ('annlite', {'n_dim': 256}), -# ('qdrant', {'n_dim': 256}), -# ('elasticsearch', {'n_dim': 256}), -# ('redis', {'n_dim': 256}), -# ('milvus', {'n_dim': 256}), -# ], -# ) -# def test_missing_groundtruth_should_raise(storage, config, start_storage): -# da = DocumentArray(DocumentArray.empty(10), storage=storage, config=config) -# with pytest.raises(RuntimeError): -# da.evaluate(metrics=['precision_at_k']) - - -# @pytest.mark.parametrize( -# 'storage, config', -# [ -# ('memory', {}), -# ('weaviate', {}), -# ('sqlite', {}), -# ('annlite', {'n_dim': 256}), -# ('qdrant', {'n_dim': 256}), -# ('elasticsearch', {'n_dim': 256}), -# ('redis', {'n_dim': 256}), -# ('milvus', {'n_dim': 256}), -# ], -# ) -# def test_useless_groundtruth_warning_should_raise(storage, config, start_storage): -# da1 = DocumentArray.empty(10) -# for d in da1: -# d.tags = {'label': 'A'} -# da1.embeddings = np.random.random([10, 256]) -# da1_index = DocumentArray(da1, storage=storage, config=config) -# with da1_index: -# da1.match(da1_index, exclude_self=True) -# da2 = DocumentArray.empty(10) -# with pytest.warns(UserWarning): -# da1.evaluate(ground_truth=da2, metrics=['precision_at_k']) +@pytest.mark.parametrize( + 'storage, config', + [ + ('memory', {}), + ('weaviate', {}), + ('sqlite', {}), + ('annlite', {'n_dim': 256}), + ('qdrant', {'n_dim': 256}), + ('elasticsearch', {'n_dim': 256}), + ('redis', {'n_dim': 256}), + ('milvus', {'n_dim': 256}), + ], +) +def test_empty_da_should_raise(storage, config, start_storage): + da = DocumentArray([], storage=storage, config=config) + with pytest.raises(ValueError): + da.evaluate(metrics=['precision_at_k']) + + +@pytest.mark.parametrize( + 'storage, config', + [ + ('memory', {}), + ('weaviate', {}), + ('sqlite', {}), + ('annlite', {'n_dim': 256}), + ('qdrant', {'n_dim': 256}), + ('elasticsearch', {'n_dim': 256}), + ('redis', {'n_dim': 256}), + ('milvus', {'n_dim': 256}), + ], +) +def test_missing_groundtruth_should_raise(storage, config, start_storage): + da = DocumentArray(DocumentArray.empty(10), storage=storage, config=config) + with pytest.raises(RuntimeError): + da.evaluate(metrics=['precision_at_k']) + + +@pytest.mark.parametrize( + 'storage, config', + [ + ('memory', {}), + ('weaviate', {}), + ('sqlite', {}), + ('annlite', {'n_dim': 256}), + ('qdrant', {'n_dim': 256}), + ('elasticsearch', {'n_dim': 256}), + ('redis', {'n_dim': 256}), + ('milvus', {'n_dim': 256}), + ], +) +def test_useless_groundtruth_warning_should_raise(storage, config, start_storage): + da1 = DocumentArray.empty(10) + for d in da1: + d.tags = {'label': 'A'} + da1.embeddings = np.random.random([10, 256]) + da1_index = DocumentArray(da1, storage=storage, config=config) + with da1_index: + da1.match(da1_index, exclude_self=True) + da2 = DocumentArray.empty(10) + with pytest.warns(UserWarning): + da1.evaluate(ground_truth=da2, metrics=['precision_at_k']) def dummy_embed_function(da): @@ -509,35 +509,35 @@ def dummy_embed_function(da): da[i, 'embedding'] = np.random.random(5) -# @pytest.mark.parametrize( -# 'storage, config', -# [ -# ('memory', {}), -# ('weaviate', {}), -# ('sqlite', {}), -# ('annlite', {'n_dim': 5}), -# ('qdrant', {'n_dim': 5}), -# ('elasticsearch', {'n_dim': 5}), -# ('redis', {'n_dim': 5}), -# ], -# ) -# def test_embed_and_evaluate_single_da(storage, config, start_storage): -# -# gt = DocumentArray([Document(text=str(i)) for i in range(10)]) -# queries_da = DocumentArray(gt, copy=True) -# queries_da = DocumentArray(queries_da, storage=storage, config=config) -# dummy_embed_function(gt) -# gt.match(gt, limit=3) -# -# with queries_da: -# res = queries_da.embed_and_evaluate( -# ground_truth=gt, -# metrics=['precision_at_k', 'reciprocal_rank'], -# embed_funcs=dummy_embed_function, -# match_batch_size=1, -# limit=3, -# ) -# assert all([v == 1.0 for v in res.values()]) +@pytest.mark.parametrize( + 'storage, config', + [ + ('memory', {}), + ('weaviate', {}), + ('sqlite', {}), + ('annlite', {'n_dim': 5}), + ('qdrant', {'n_dim': 5}), + ('elasticsearch', {'n_dim': 5}), + ('redis', {'n_dim': 5}), + ], +) +def test_embed_and_evaluate_single_da(storage, config, start_storage): + + gt = DocumentArray([Document(text=str(i)) for i in range(10)]) + queries_da = DocumentArray(gt, copy=True) + queries_da = DocumentArray(queries_da, storage=storage, config=config) + dummy_embed_function(gt) + gt.match(gt, limit=3) + + with queries_da: + res = queries_da.embed_and_evaluate( + ground_truth=gt, + metrics=['precision_at_k', 'reciprocal_rank'], + embed_funcs=dummy_embed_function, + match_batch_size=1, + limit=3, + ) + assert all([v == 1.0 for v in res.values()]) @pytest.mark.parametrize( @@ -586,44 +586,44 @@ def test_embed_and_evaluate_with_and_without_exclude_self( assert abs(res[key] - expected_results[key]) < 1e-5 -# @pytest.mark.parametrize( -# 'sample_size', -# [None, 10], -# ) -# @pytest.mark.parametrize( -# 'storage, config', -# [ -# ('memory', {}), -# ('weaviate', {}), -# ('sqlite', {}), -# ('annlite', {'n_dim': 5}), -# ('qdrant', {'n_dim': 5}), -# ('elasticsearch', {'n_dim': 5}), -# ('redis', {'n_dim': 5}), -# ], -# ) -# def test_embed_and_evaluate_two_das(storage, config, sample_size, start_storage): -# -# gt_queries = DocumentArray([Document(text=str(i)) for i in range(100)]) -# gt_index = DocumentArray([Document(text=str(i)) for i in range(100, 200)]) -# queries_da = DocumentArray(gt_queries, copy=True) -# index_da = DocumentArray(gt_index, copy=True) -# index_da = DocumentArray(index_da, storage=storage, config=config) -# dummy_embed_function(gt_queries) -# dummy_embed_function(gt_index) -# gt_queries.match(gt_index, limit=3) -# -# with index_da: -# res = queries_da.embed_and_evaluate( -# ground_truth=gt_queries, -# index_data=index_da, -# metrics=['precision_at_k', 'reciprocal_rank'], -# embed_funcs=dummy_embed_function, -# match_batch_size=1, -# limit=3, -# query_sample_size=sample_size, -# ) -# assert all([v == 1.0 for v in res.values()]) +@pytest.mark.parametrize( + 'sample_size', + [None, 10], +) +@pytest.mark.parametrize( + 'storage, config', + [ + ('memory', {}), + ('weaviate', {}), + ('sqlite', {}), + ('annlite', {'n_dim': 5}), + ('qdrant', {'n_dim': 5}), + ('elasticsearch', {'n_dim': 5}), + ('redis', {'n_dim': 5}), + ], +) +def test_embed_and_evaluate_two_das(storage, config, sample_size, start_storage): + + gt_queries = DocumentArray([Document(text=str(i)) for i in range(100)]) + gt_index = DocumentArray([Document(text=str(i)) for i in range(100, 200)]) + queries_da = DocumentArray(gt_queries, copy=True) + index_da = DocumentArray(gt_index, copy=True) + index_da = DocumentArray(index_da, storage=storage, config=config) + dummy_embed_function(gt_queries) + dummy_embed_function(gt_index) + gt_queries.match(gt_index, limit=3) + + with index_da: + res = queries_da.embed_and_evaluate( + ground_truth=gt_queries, + index_data=index_da, + metrics=['precision_at_k', 'reciprocal_rank'], + embed_funcs=dummy_embed_function, + match_batch_size=1, + limit=3, + query_sample_size=sample_size, + ) + assert all([v == 1.0 for v in res.values()]) def test_embed_and_evaluate_two_different_das(): @@ -655,68 +655,68 @@ def test_embed_and_evaluate_two_different_das(): assert abs(res['f1_score_at_k'] - 1.0 / 1.5) < 1e-5 -# @pytest.mark.parametrize( -# 'use_index, expected, label_tag', -# [ -# (False, {'precision_at_k': 1.0 / 3, 'reciprocal_rank': 1.0}, 'label'), -# ( -# True, -# { -# 'precision_at_k': 1.0 / 3, -# 'reciprocal_rank': 11.0 / 18.0, -# 'recall_at_k': 1.0, -# }, -# 'custom_tag', -# ), -# ], -# ) -# @pytest.mark.parametrize( -# 'storage, config', -# [ -# ('memory', {}), -# ('weaviate', {}), -# ('sqlite', {}), -# ('annlite', {'n_dim': 5}), -# ('qdrant', {'n_dim': 5}), -# ('elasticsearch', {'n_dim': 5}), -# ('redis', {'n_dim': 5}), -# ], -# ) -# def test_embed_and_evaluate_labeled_dataset( -# storage, config, start_storage, use_index, expected, label_tag -# ): -# metric_fns = list(expected.keys()) -# -# def emb_func(da): -# np.random.seed(0) # makes sure that embeddings are always equal -# da[:, 'embedding'] = np.random.random((len(da), 5)) -# -# da1 = DocumentArray([Document(text=str(i), tags={label_tag: i}) for i in range(3)]) -# da2 = DocumentArray(da1, storage=storage, config=config, copy=True) -# -# with da2: -# if ( -# use_index -# ): # query and index da are distinct # (different embeddings are generated) -# res = da1.embed_and_evaluate( -# index_data=da2, -# metrics=metric_fns, -# embed_funcs=emb_func, -# match_batch_size=1, -# limit=3, -# label_tag=label_tag, -# ) -# else: # query and index are the same (embeddings of both das are equal) -# res = da2.embed_and_evaluate( -# metrics=metric_fns, -# embed_funcs=emb_func, -# match_batch_size=1, -# limit=3, -# label_tag=label_tag, -# ) -# for key in metric_fns: -# assert key in res -# assert abs(res[key] - expected[key]) < 1e-4 +@pytest.mark.parametrize( + 'use_index, expected, label_tag', + [ + (False, {'precision_at_k': 1.0 / 3, 'reciprocal_rank': 1.0}, 'label'), + ( + True, + { + 'precision_at_k': 1.0 / 3, + 'reciprocal_rank': 11.0 / 18.0, + 'recall_at_k': 1.0, + }, + 'custom_tag', + ), + ], +) +@pytest.mark.parametrize( + 'storage, config', + [ + ('memory', {}), + ('weaviate', {}), + ('sqlite', {}), + ('annlite', {'n_dim': 5}), + ('qdrant', {'n_dim': 5}), + ('elasticsearch', {'n_dim': 5}), + ('redis', {'n_dim': 5}), + ], +) +def test_embed_and_evaluate_labeled_dataset( + storage, config, start_storage, use_index, expected, label_tag +): + metric_fns = list(expected.keys()) + + def emb_func(da): + np.random.seed(0) # makes sure that embeddings are always equal + da[:, 'embedding'] = np.random.random((len(da), 5)) + + da1 = DocumentArray([Document(text=str(i), tags={label_tag: i}) for i in range(3)]) + da2 = DocumentArray(da1, storage=storage, config=config, copy=True) + + with da2: + if ( + use_index + ): # query and index da are distinct # (different embeddings are generated) + res = da1.embed_and_evaluate( + index_data=da2, + metrics=metric_fns, + embed_funcs=emb_func, + match_batch_size=1, + limit=3, + label_tag=label_tag, + ) + else: # query and index are the same (embeddings of both das are equal) + res = da2.embed_and_evaluate( + metrics=metric_fns, + embed_funcs=emb_func, + match_batch_size=1, + limit=3, + label_tag=label_tag, + ) + for key in metric_fns: + assert key in res + assert abs(res[key] - expected[key]) < 1e-4 @pytest.mark.parametrize( @@ -821,46 +821,46 @@ def test_embed_and_evaluate_with_embed_model( assert res['precision_at_k'] == 0.2 -# @pytest.mark.parametrize( -# 'queries, kwargs, exception', -# [ -# (DocumentArray.empty(4), {}, ValueError), -# ( -# DocumentArray([Document(tags={'label': 0})]), -# {'index_data': DocumentArray.empty(4)}, -# ValueError, -# ), -# (DocumentArray([Document(tags={'label': 0})]), {}, RuntimeError), -# ( -# DocumentArray([Document(tags={'label': 0})]), -# {'index_data': DocumentArray([Document(tags={'label': 0})])}, -# RuntimeError, -# ), -# ], -# ) -# @pytest.mark.parametrize( -# 'storage, config', -# [ -# ('memory', {}), -# ('weaviate', {}), -# ('sqlite', {}), -# ('annlite', {'n_dim': 5}), -# ('qdrant', {'n_dim': 5}), -# ('elasticsearch', {'n_dim': 5}), -# ('redis', {'n_dim': 5}), -# ], -# ) -# def test_embed_and_evaluate_invalid_input_should_raise( -# storage, config, queries, kwargs, exception, start_storage -# ): -# kwargs.update({'metrics': ['precision_at_k']}) -# if 'index_data' in kwargs: -# kwargs['index_data'] = DocumentArray( -# kwargs['index_data'], storage=storage, config=config -# ) -# -# with pytest.raises(exception): -# queries.embed_and_evaluate(**kwargs) +@pytest.mark.parametrize( + 'queries, kwargs, exception', + [ + (DocumentArray.empty(4), {}, ValueError), + ( + DocumentArray([Document(tags={'label': 0})]), + {'index_data': DocumentArray.empty(4)}, + ValueError, + ), + (DocumentArray([Document(tags={'label': 0})]), {}, RuntimeError), + ( + DocumentArray([Document(tags={'label': 0})]), + {'index_data': DocumentArray([Document(tags={'label': 0})])}, + RuntimeError, + ), + ], +) +@pytest.mark.parametrize( + 'storage, config', + [ + ('memory', {}), + ('weaviate', {}), + ('sqlite', {}), + ('annlite', {'n_dim': 5}), + ('qdrant', {'n_dim': 5}), + ('elasticsearch', {'n_dim': 5}), + ('redis', {'n_dim': 5}), + ], +) +def test_embed_and_evaluate_invalid_input_should_raise( + storage, config, queries, kwargs, exception, start_storage +): + kwargs.update({'metrics': ['precision_at_k']}) + if 'index_data' in kwargs: + kwargs['index_data'] = DocumentArray( + kwargs['index_data'], storage=storage, config=config + ) + + with pytest.raises(exception): + queries.embed_and_evaluate(**kwargs) @pytest.mark.parametrize( From b3387a45a6e0cfa57d6cd4e8214466e8f72d3d97 Mon Sep 17 00:00:00 2001 From: Johannes Messner Date: Wed, 11 Jan 2023 11:08:34 +0100 Subject: [PATCH 8/8] ci: fix success all tests Signed-off-by: Johannes Messner --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3ea6ccfe944..eda1318e7b1 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -260,7 +260,7 @@ jobs: # just for blocking the merge until all parallel core-test are successful success-all-test: - needs: [commit-lint, docarray-oldproto-test] + needs: [commit-lint, docarray-test, docarray-oldproto-test] if: always() runs-on: ubuntu-latest steps: