From b94945920851b0d3a69b4649cbcbf841b9f76f90 Mon Sep 17 00:00:00 2001 From: jemmyshin Date: Fri, 14 Oct 2022 17:43:23 +0800 Subject: [PATCH 01/24] fix: fix annlite unit test --- .github/workflows/ci.yml | 2 +- docarray/array/storage/annlite/backend.py | 3 +++ tests/unit/array/mixins/test_io.py | 20 +++++++++++++++----- 3 files changed, 19 insertions(+), 6 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 256cfe224e1..f0c32c84f06 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -133,7 +133,7 @@ jobs: pip install --no-cache-dir ".[test]" pip install --no-cache-dir ".[full]" pip install --no-cache-dir ".[qdrant]" - pip install --no-cache-dir ".[annlite]" + pip install --pre annlite pip install --no-cache-dir ".[weaviate]" pip install --no-cache-dir ".[elasticsearch]" pip install --no-cache-dir ".[redis]" diff --git a/docarray/array/storage/annlite/backend.py b/docarray/array/storage/annlite/backend.py index e2cfde9cb5c..f85145701b5 100644 --- a/docarray/array/storage/annlite/backend.py +++ b/docarray/array/storage/annlite/backend.py @@ -133,3 +133,6 @@ def __setstate__(self, state): def __len__(self): return self._annlite.index_size + + def close(self): + self._annlite.close() diff --git a/tests/unit/array/mixins/test_io.py b/tests/unit/array/mixins/test_io.py index e51b16f628c..5f6fe20670f 100644 --- a/tests/unit/array/mixins/test_io.py +++ b/tests/unit/array/mixins/test_io.py @@ -200,13 +200,23 @@ def test_from_to_pd_dataframe(da_cls, config, start_storage): ) def test_from_to_bytes(da_cls, config, start_storage): # simple - assert len(da_cls.load_binary(bytes(da_cls.empty(2, config=config)))) == 2 + if da_cls == DocumentArrayAnnlite: + b = da_cls.empty(2, config=config) + b.close() + + d = da_cls.from_bytes(b.to_bytes(), config=config) + assert len(d) == 2 + d.close() + else: + assert len(da_cls.load_binary(bytes(da_cls.empty(2, config=config)))) == 2 da = da_cls.empty(2, config=config) da[:, 'embedding'] = [[1, 2, 3], [4, 5, 6]] da[:, 'tensor'] = [[1, 2], [2, 1]] da[0, 'tags'] = {'hello': 'world'} + if da_cls == DocumentArrayAnnlite: + da.close() da2 = da_cls.load_binary(bytes(da)) assert da2.tensors == [[1, 2], [2, 1]] import numpy as np @@ -267,10 +277,10 @@ def test_push_pull_io(da_cls, config, show_progress, start_storage): (DocumentArrayInMemory, None), (DocumentArraySqlite, None), (DocumentArrayWeaviate, WeaviateConfig(n_dim=3)), - # (DocumentArrayAnnlite, PqliteConfig(n_dim=3)), # TODO: enable this - # (DocumentArrayQdrant, QdrantConfig(n_dim=3)), - # (DocumentArrayElastic, ElasticConfig(n_dim=3)), # Elastic needs config - # (DocumentArrayRedis, RedisConfig(n_dim=3)), # Redis needs config + (DocumentArrayAnnlite, PqliteConfig(n_dim=3)), # TODO: enable this + (DocumentArrayQdrant, QdrantConfig(n_dim=3)), + (DocumentArrayElastic, ElasticConfig(n_dim=3)), # Elastic needs config + (DocumentArrayRedis, RedisConfig(n_dim=3)), # Redis needs config ], ) def test_from_to_base64(protocol, compress, da_cls, config): From 0e4b33529cb9b36999b02c6f59c088ac6c54d107 Mon Sep 17 00:00:00 2001 From: jemmyshin Date: Fri, 14 Oct 2022 17:46:34 +0800 Subject: [PATCH 02/24] fix: fix annlite unit test --- tests/unit/array/mixins/test_io.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/unit/array/mixins/test_io.py b/tests/unit/array/mixins/test_io.py index 5f6fe20670f..c7583822058 100644 --- a/tests/unit/array/mixins/test_io.py +++ b/tests/unit/array/mixins/test_io.py @@ -277,10 +277,10 @@ def test_push_pull_io(da_cls, config, show_progress, start_storage): (DocumentArrayInMemory, None), (DocumentArraySqlite, None), (DocumentArrayWeaviate, WeaviateConfig(n_dim=3)), - (DocumentArrayAnnlite, PqliteConfig(n_dim=3)), # TODO: enable this - (DocumentArrayQdrant, QdrantConfig(n_dim=3)), - (DocumentArrayElastic, ElasticConfig(n_dim=3)), # Elastic needs config - (DocumentArrayRedis, RedisConfig(n_dim=3)), # Redis needs config + # (DocumentArrayAnnlite, PqliteConfig(n_dim=3)), # TODO: enable this + # (DocumentArrayQdrant, QdrantConfig(n_dim=3)), + # (DocumentArrayElastic, ElasticConfig(n_dim=3)), # Elastic needs config + # (DocumentArrayRedis, RedisConfig(n_dim=3)), # Redis needs config ], ) def test_from_to_base64(protocol, compress, da_cls, config): From 1e0e3cb1f6d0b7c86a1de048b94ab85ac7db45c9 Mon Sep 17 00:00:00 2001 From: jemmyshin Date: Fri, 14 Oct 2022 17:47:12 +0800 Subject: [PATCH 03/24] fix: fix annlite unit test --- tests/unit/array/mixins/test_io.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/unit/array/mixins/test_io.py b/tests/unit/array/mixins/test_io.py index c7583822058..0ade8861178 100644 --- a/tests/unit/array/mixins/test_io.py +++ b/tests/unit/array/mixins/test_io.py @@ -277,10 +277,10 @@ def test_push_pull_io(da_cls, config, show_progress, start_storage): (DocumentArrayInMemory, None), (DocumentArraySqlite, None), (DocumentArrayWeaviate, WeaviateConfig(n_dim=3)), - # (DocumentArrayAnnlite, PqliteConfig(n_dim=3)), # TODO: enable this + # (DocumentArrayAnnlite, PqliteConfig(n_dim=3)), # TODO: enable this # (DocumentArrayQdrant, QdrantConfig(n_dim=3)), - # (DocumentArrayElastic, ElasticConfig(n_dim=3)), # Elastic needs config - # (DocumentArrayRedis, RedisConfig(n_dim=3)), # Redis needs config + # (DocumentArrayElastic, ElasticConfig(n_dim=3)), # Elastic needs config + # (DocumentArrayRedis, RedisConfig(n_dim=3)), # Redis needs config ], ) def test_from_to_base64(protocol, compress, da_cls, config): From 9616ce74b5681b3bd601b9e302cec4e7187254a9 Mon Sep 17 00:00:00 2001 From: jemmyshin Date: Mon, 17 Oct 2022 14:07:37 +0800 Subject: [PATCH 04/24] fix: resolve file lock in rocksdb --- tests/unit/array/mixins/test_empty.py | 3 ++- tests/unit/array/mixins/test_io.py | 22 ++++++++++++++++++---- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/tests/unit/array/mixins/test_empty.py b/tests/unit/array/mixins/test_empty.py index 0ba3da06e93..0dcac2ebf34 100644 --- a/tests/unit/array/mixins/test_empty.py +++ b/tests/unit/array/mixins/test_empty.py @@ -24,7 +24,6 @@ ], ) def test_empty_non_zero(da_cls, config, start_storage): - # Assert .empty provides a da with 0 docs if config: da = da_cls.empty(config=config) @@ -32,6 +31,8 @@ def test_empty_non_zero(da_cls, config, start_storage): da = da_cls.empty() assert len(da) == 0 + if da_cls == DocumentArrayAnnlite: + da.close() # Assert .empty provides a da of the correct length if config: diff --git a/tests/unit/array/mixins/test_io.py b/tests/unit/array/mixins/test_io.py index 0ade8861178..9bcde17618a 100644 --- a/tests/unit/array/mixins/test_io.py +++ b/tests/unit/array/mixins/test_io.py @@ -45,6 +45,13 @@ def test_document_save_load( da = da_cls(docs, config=config()) da.insert(2, Document(id='new')) da.save(tmp_file, file_format=method, encoding=encoding) + if da_cls == DocumentArrayAnnlite: + da_info = [ + [d.id for d in da], + [d.embedding for d in da], + [d.content for d in da], + ] + da.close() da_r = type(da).load( tmp_file, file_format=method, encoding=encoding, config=config() @@ -53,10 +60,17 @@ def test_document_save_load( assert type(da) is type(da_r) assert len(da) == len(da_r) assert da_r[2].id == 'new' - for d, d_r in zip(da, da_r): - assert d.id == d_r.id - np.testing.assert_equal(d.embedding, d_r.embedding) - assert d.content == d_r.content + + if da_cls == DocumentArrayAnnlite: + for idx, d_r in enumerate(da_r): + assert da_info[0][idx] == d_r.id + np.testing.assert_equal(da_info[1][idx], d_r.embedding) + assert da_info[2][idx] == d_r.content + else: + for d, d_r in zip(da, da_r): + assert d.id == d_r.id + np.testing.assert_equal(d.embedding, d_r.embedding) + assert d.content == d_r.content @pytest.mark.parametrize('flatten_tags', [True, False]) From 370df2d423772a623ef51ad725e6bba43dcf4c52 Mon Sep 17 00:00:00 2001 From: jemmyshin Date: Mon, 17 Oct 2022 14:41:20 +0800 Subject: [PATCH 05/24] fix: close da in plot test --- tests/unit/array/mixins/test_plot.py | 332 ++++++++++++++------------- 1 file changed, 168 insertions(+), 164 deletions(-) diff --git a/tests/unit/array/mixins/test_plot.py b/tests/unit/array/mixins/test_plot.py index 858cd082630..eaf2a139589 100644 --- a/tests/unit/array/mixins/test_plot.py +++ b/tests/unit/array/mixins/test_plot.py @@ -57,167 +57,171 @@ def test_sprite_fail_tensor_success_uri( da.save_gif(tmpdir / 'sprint_da.gif', show_index=show_index, channel_axis=0) assert os.path.exists(tmpdir / 'sprint_da.png') - -@pytest.mark.parametrize('image_source', ['tensor', 'uri']) -@pytest.mark.parametrize( - 'da_cls,config_gen', - [ - (DocumentArray, None), - (DocumentArraySqlite, None), - (DocumentArrayAnnlite, lambda: AnnliteConfig(n_dim=128)), - (DocumentArrayWeaviate, lambda: WeaviateConfig(n_dim=128)), - (DocumentArrayQdrant, lambda: QdrantConfig(n_dim=128, scroll_batch_size=8)), - (DocumentArrayElastic, lambda: ElasticConfig(n_dim=128)), - (DocumentArrayRedis, lambda: RedisConfig(n_dim=128)), - ], -) -@pytest.mark.parametrize('canvas_size', [50, 512]) -@pytest.mark.parametrize('min_size', [16, 64]) -def test_sprite_image_generator( - pytestconfig, - tmpdir, - image_source, - da_cls, - config_gen, - canvas_size, - min_size, - start_storage, -): - files = [ - f'{pytestconfig.rootdir}/tests/image-data/*.jpg', - f'{pytestconfig.rootdir}/tests/image-data/*.png', - ] - if config_gen: - da = da_cls.from_files(files, config=config_gen()) - else: - da = da_cls.from_files(files) - da.apply(lambda d: d.load_uri_to_image_tensor()) - da.plot_image_sprites( - tmpdir / 'sprint_da.png', - image_source=image_source, - canvas_size=canvas_size, - min_size=min_size, - ) - assert os.path.exists(tmpdir / 'sprint_da.png') - - -@pytest.fixture -def da_and_dam(start_storage): - embeddings = np.array([[1, 0, 0], [2, 0, 0], [3, 0, 0]]) - return [ - cls( - [ - Document(embedding=x, tags={'label': random.randint(0, 5)}) - for x in embeddings - ], - **kwargs, - ) - for cls, kwargs in [ - (DocumentArray, {}), - (DocumentArraySqlite, {}), - (DocumentArrayWeaviate, {'config': {'n_dim': 3}}), - (DocumentArrayAnnlite, {'config': {'n_dim': 3}}), - (DocumentArrayQdrant, {'config': {'n_dim': 3}}), - (DocumentArrayRedis, {'config': {'n_dim': 3}}), - ] - ] - - -def test_plot_embeddings(da_and_dam): - for da in da_and_dam: - _test_plot_embeddings(da) - - -def test_plot_sprites(tmpdir): - da = DocumentArray.empty(5) - da.tensors = np.random.random([5, 3, 226, 226]) - da.plot_image_sprites(tmpdir / 'a.png', channel_axis=0, show_index=True) - assert os.path.exists(tmpdir / 'a.png') - - -def _test_plot_embeddings(da): - p = da.plot_embeddings(start_server=False) - assert os.path.exists(p) - assert os.path.exists(os.path.join(p, 'config.json')) - with open(os.path.join(p, 'config.json')) as fp: - config = json.load(fp) - assert len(config['embeddings']) == 1 - assert config['embeddings'][0]['tensorShape'] == list(da.embeddings.shape) - - -@pytest.mark.parametrize( - 'da_cls,config_gen', - [ - (DocumentArray, None), - (DocumentArraySqlite, None), - (DocumentArrayAnnlite, lambda: AnnliteConfig(n_dim=5)), - (DocumentArrayWeaviate, lambda: WeaviateConfig(n_dim=5)), - (DocumentArrayQdrant, lambda: QdrantConfig(n_dim=5)), - (DocumentArrayElastic, lambda: ElasticConfig(n_dim=5)), - (DocumentArrayRedis, lambda: RedisConfig(n_dim=5)), - ], -) -def test_plot_embeddings_same_path(tmpdir, da_cls, config_gen, start_storage): - if config_gen: - da1 = da_cls.empty(100, config=config_gen()) - da2 = da_cls.empty(768, config=config_gen()) - else: - da1 = da_cls.empty(100) - da2 = da_cls.empty(768) - da1.embeddings = np.random.random([100, 5]) - p1 = da1.plot_embeddings(start_server=False, path=tmpdir) - da2.embeddings = np.random.random([768, 5]) - p2 = da2.plot_embeddings(start_server=False, path=tmpdir) - assert p1 == p2 - assert os.path.exists(p1) - with open(os.path.join(p1, 'config.json')) as fp: - config = json.load(fp) - assert len(config['embeddings']) == 2 - - -@pytest.mark.parametrize( - 'da_cls,config', - [ - (DocumentArray, None), - (DocumentArraySqlite, None), - (DocumentArrayAnnlite, AnnliteConfig(n_dim=128)), - (DocumentArrayWeaviate, WeaviateConfig(n_dim=128)), - (DocumentArrayQdrant, QdrantConfig(n_dim=128)), - (DocumentArrayElastic, ElasticConfig(n_dim=128)), - (DocumentArrayRedis, RedisConfig(n_dim=128)), - ], -) -def test_summary_homo_hetero(da_cls, config, start_storage): - if config: - da = da_cls.empty(100, config=config) - else: - da = da_cls.empty(100) - da._get_attributes() - da.summary() - da._get_raw_summary() - - da[0].pop('id') - da.summary() - - da._get_raw_summary() - - -@pytest.mark.parametrize( - 'da_cls,config', - [ - (DocumentArray, None), - (DocumentArraySqlite, None), - (DocumentArrayAnnlite, AnnliteConfig(n_dim=128)), - (DocumentArrayWeaviate, WeaviateConfig(n_dim=128)), - (DocumentArrayQdrant, QdrantConfig(n_dim=128)), - (DocumentArrayElastic, ElasticConfig(n_dim=128)), - (DocumentArrayRedis, RedisConfig(n_dim=128)), - ], -) -def test_empty_get_attributes(da_cls, config, start_storage): - if config: - da = da_cls.empty(10, config=config) - else: - da = da_cls.empty(10) - da[0].pop('id') - print(da[:, 'id']) + if da_cls == DocumentArrayAnnlite: + da.close() + + +# +# @pytest.mark.parametrize('image_source', ['tensor', 'uri']) +# @pytest.mark.parametrize( +# 'da_cls,config_gen', +# [ +# (DocumentArray, None), +# (DocumentArraySqlite, None), +# (DocumentArrayAnnlite, lambda: AnnliteConfig(n_dim=128)), +# (DocumentArrayWeaviate, lambda: WeaviateConfig(n_dim=128)), +# (DocumentArrayQdrant, lambda: QdrantConfig(n_dim=128, scroll_batch_size=8)), +# (DocumentArrayElastic, lambda: ElasticConfig(n_dim=128)), +# (DocumentArrayRedis, lambda: RedisConfig(n_dim=128)), +# ], +# ) +# @pytest.mark.parametrize('canvas_size', [50, 512]) +# @pytest.mark.parametrize('min_size', [16, 64]) +# def test_sprite_image_generator( +# pytestconfig, +# tmpdir, +# image_source, +# da_cls, +# config_gen, +# canvas_size, +# min_size, +# start_storage, +# ): +# files = [ +# f'{pytestconfig.rootdir}/tests/image-data/*.jpg', +# f'{pytestconfig.rootdir}/tests/image-data/*.png', +# ] +# if config_gen: +# da = da_cls.from_files(files, config=config_gen()) +# else: +# da = da_cls.from_files(files) +# da.apply(lambda d: d.load_uri_to_image_tensor()) +# da.plot_image_sprites( +# tmpdir / 'sprint_da.png', +# image_source=image_source, +# canvas_size=canvas_size, +# min_size=min_size, +# ) +# assert os.path.exists(tmpdir / 'sprint_da.png') +# +# +# @pytest.fixture +# def da_and_dam(start_storage): +# embeddings = np.array([[1, 0, 0], [2, 0, 0], [3, 0, 0]]) +# return [ +# cls( +# [ +# Document(embedding=x, tags={'label': random.randint(0, 5)}) +# for x in embeddings +# ], +# **kwargs, +# ) +# for cls, kwargs in [ +# (DocumentArray, {}), +# (DocumentArraySqlite, {}), +# (DocumentArrayWeaviate, {'config': {'n_dim': 3}}), +# (DocumentArrayAnnlite, {'config': {'n_dim': 3}}), +# (DocumentArrayQdrant, {'config': {'n_dim': 3}}), +# (DocumentArrayRedis, {'config': {'n_dim': 3}}), +# ] +# ] +# +# +# def test_plot_embeddings(da_and_dam): +# for da in da_and_dam: +# _test_plot_embeddings(da) +# +# +# def test_plot_sprites(tmpdir): +# da = DocumentArray.empty(5) +# da.tensors = np.random.random([5, 3, 226, 226]) +# da.plot_image_sprites(tmpdir / 'a.png', channel_axis=0, show_index=True) +# assert os.path.exists(tmpdir / 'a.png') +# +# +# def _test_plot_embeddings(da): +# p = da.plot_embeddings(start_server=False) +# assert os.path.exists(p) +# assert os.path.exists(os.path.join(p, 'config.json')) +# with open(os.path.join(p, 'config.json')) as fp: +# config = json.load(fp) +# assert len(config['embeddings']) == 1 +# assert config['embeddings'][0]['tensorShape'] == list(da.embeddings.shape) +# +# +# @pytest.mark.parametrize( +# 'da_cls,config_gen', +# [ +# (DocumentArray, None), +# (DocumentArraySqlite, None), +# (DocumentArrayAnnlite, lambda: AnnliteConfig(n_dim=5)), +# (DocumentArrayWeaviate, lambda: WeaviateConfig(n_dim=5)), +# (DocumentArrayQdrant, lambda: QdrantConfig(n_dim=5)), +# (DocumentArrayElastic, lambda: ElasticConfig(n_dim=5)), +# (DocumentArrayRedis, lambda: RedisConfig(n_dim=5)), +# ], +# ) +# def test_plot_embeddings_same_path(tmpdir, da_cls, config_gen, start_storage): +# if config_gen: +# da1 = da_cls.empty(100, config=config_gen()) +# da2 = da_cls.empty(768, config=config_gen()) +# else: +# da1 = da_cls.empty(100) +# da2 = da_cls.empty(768) +# da1.embeddings = np.random.random([100, 5]) +# p1 = da1.plot_embeddings(start_server=False, path=tmpdir) +# da2.embeddings = np.random.random([768, 5]) +# p2 = da2.plot_embeddings(start_server=False, path=tmpdir) +# assert p1 == p2 +# assert os.path.exists(p1) +# with open(os.path.join(p1, 'config.json')) as fp: +# config = json.load(fp) +# assert len(config['embeddings']) == 2 +# +# +# @pytest.mark.parametrize( +# 'da_cls,config', +# [ +# (DocumentArray, None), +# (DocumentArraySqlite, None), +# (DocumentArrayAnnlite, AnnliteConfig(n_dim=128)), +# (DocumentArrayWeaviate, WeaviateConfig(n_dim=128)), +# (DocumentArrayQdrant, QdrantConfig(n_dim=128)), +# (DocumentArrayElastic, ElasticConfig(n_dim=128)), +# (DocumentArrayRedis, RedisConfig(n_dim=128)), +# ], +# ) +# def test_summary_homo_hetero(da_cls, config, start_storage): +# if config: +# da = da_cls.empty(100, config=config) +# else: +# da = da_cls.empty(100) +# da._get_attributes() +# da.summary() +# da._get_raw_summary() +# +# da[0].pop('id') +# da.summary() +# +# da._get_raw_summary() +# +# +# @pytest.mark.parametrize( +# 'da_cls,config', +# [ +# (DocumentArray, None), +# (DocumentArraySqlite, None), +# (DocumentArrayAnnlite, AnnliteConfig(n_dim=128)), +# (DocumentArrayWeaviate, WeaviateConfig(n_dim=128)), +# (DocumentArrayQdrant, QdrantConfig(n_dim=128)), +# (DocumentArrayElastic, ElasticConfig(n_dim=128)), +# (DocumentArrayRedis, RedisConfig(n_dim=128)), +# ], +# ) +# def test_empty_get_attributes(da_cls, config, start_storage): +# if config: +# da = da_cls.empty(10, config=config) +# else: +# da = da_cls.empty(10) +# da[0].pop('id') +# print(da[:, 'id']) From 468888c575623e254b6ed2bfd1041587c4f62fb1 Mon Sep 17 00:00:00 2001 From: jemmyshin Date: Mon, 17 Oct 2022 14:44:58 +0800 Subject: [PATCH 06/24] fix: close da in plot test --- tests/unit/array/mixins/test_plot.py | 327 +++++++++++++-------------- 1 file changed, 163 insertions(+), 164 deletions(-) diff --git a/tests/unit/array/mixins/test_plot.py b/tests/unit/array/mixins/test_plot.py index eaf2a139589..91d46f0acae 100644 --- a/tests/unit/array/mixins/test_plot.py +++ b/tests/unit/array/mixins/test_plot.py @@ -61,167 +61,166 @@ def test_sprite_fail_tensor_success_uri( da.close() -# -# @pytest.mark.parametrize('image_source', ['tensor', 'uri']) -# @pytest.mark.parametrize( -# 'da_cls,config_gen', -# [ -# (DocumentArray, None), -# (DocumentArraySqlite, None), -# (DocumentArrayAnnlite, lambda: AnnliteConfig(n_dim=128)), -# (DocumentArrayWeaviate, lambda: WeaviateConfig(n_dim=128)), -# (DocumentArrayQdrant, lambda: QdrantConfig(n_dim=128, scroll_batch_size=8)), -# (DocumentArrayElastic, lambda: ElasticConfig(n_dim=128)), -# (DocumentArrayRedis, lambda: RedisConfig(n_dim=128)), -# ], -# ) -# @pytest.mark.parametrize('canvas_size', [50, 512]) -# @pytest.mark.parametrize('min_size', [16, 64]) -# def test_sprite_image_generator( -# pytestconfig, -# tmpdir, -# image_source, -# da_cls, -# config_gen, -# canvas_size, -# min_size, -# start_storage, -# ): -# files = [ -# f'{pytestconfig.rootdir}/tests/image-data/*.jpg', -# f'{pytestconfig.rootdir}/tests/image-data/*.png', -# ] -# if config_gen: -# da = da_cls.from_files(files, config=config_gen()) -# else: -# da = da_cls.from_files(files) -# da.apply(lambda d: d.load_uri_to_image_tensor()) -# da.plot_image_sprites( -# tmpdir / 'sprint_da.png', -# image_source=image_source, -# canvas_size=canvas_size, -# min_size=min_size, -# ) -# assert os.path.exists(tmpdir / 'sprint_da.png') -# -# -# @pytest.fixture -# def da_and_dam(start_storage): -# embeddings = np.array([[1, 0, 0], [2, 0, 0], [3, 0, 0]]) -# return [ -# cls( -# [ -# Document(embedding=x, tags={'label': random.randint(0, 5)}) -# for x in embeddings -# ], -# **kwargs, -# ) -# for cls, kwargs in [ -# (DocumentArray, {}), -# (DocumentArraySqlite, {}), -# (DocumentArrayWeaviate, {'config': {'n_dim': 3}}), -# (DocumentArrayAnnlite, {'config': {'n_dim': 3}}), -# (DocumentArrayQdrant, {'config': {'n_dim': 3}}), -# (DocumentArrayRedis, {'config': {'n_dim': 3}}), -# ] -# ] -# -# -# def test_plot_embeddings(da_and_dam): -# for da in da_and_dam: -# _test_plot_embeddings(da) -# -# -# def test_plot_sprites(tmpdir): -# da = DocumentArray.empty(5) -# da.tensors = np.random.random([5, 3, 226, 226]) -# da.plot_image_sprites(tmpdir / 'a.png', channel_axis=0, show_index=True) -# assert os.path.exists(tmpdir / 'a.png') -# -# -# def _test_plot_embeddings(da): -# p = da.plot_embeddings(start_server=False) -# assert os.path.exists(p) -# assert os.path.exists(os.path.join(p, 'config.json')) -# with open(os.path.join(p, 'config.json')) as fp: -# config = json.load(fp) -# assert len(config['embeddings']) == 1 -# assert config['embeddings'][0]['tensorShape'] == list(da.embeddings.shape) -# -# -# @pytest.mark.parametrize( -# 'da_cls,config_gen', -# [ -# (DocumentArray, None), -# (DocumentArraySqlite, None), -# (DocumentArrayAnnlite, lambda: AnnliteConfig(n_dim=5)), -# (DocumentArrayWeaviate, lambda: WeaviateConfig(n_dim=5)), -# (DocumentArrayQdrant, lambda: QdrantConfig(n_dim=5)), -# (DocumentArrayElastic, lambda: ElasticConfig(n_dim=5)), -# (DocumentArrayRedis, lambda: RedisConfig(n_dim=5)), -# ], -# ) -# def test_plot_embeddings_same_path(tmpdir, da_cls, config_gen, start_storage): -# if config_gen: -# da1 = da_cls.empty(100, config=config_gen()) -# da2 = da_cls.empty(768, config=config_gen()) -# else: -# da1 = da_cls.empty(100) -# da2 = da_cls.empty(768) -# da1.embeddings = np.random.random([100, 5]) -# p1 = da1.plot_embeddings(start_server=False, path=tmpdir) -# da2.embeddings = np.random.random([768, 5]) -# p2 = da2.plot_embeddings(start_server=False, path=tmpdir) -# assert p1 == p2 -# assert os.path.exists(p1) -# with open(os.path.join(p1, 'config.json')) as fp: -# config = json.load(fp) -# assert len(config['embeddings']) == 2 -# -# -# @pytest.mark.parametrize( -# 'da_cls,config', -# [ -# (DocumentArray, None), -# (DocumentArraySqlite, None), -# (DocumentArrayAnnlite, AnnliteConfig(n_dim=128)), -# (DocumentArrayWeaviate, WeaviateConfig(n_dim=128)), -# (DocumentArrayQdrant, QdrantConfig(n_dim=128)), -# (DocumentArrayElastic, ElasticConfig(n_dim=128)), -# (DocumentArrayRedis, RedisConfig(n_dim=128)), -# ], -# ) -# def test_summary_homo_hetero(da_cls, config, start_storage): -# if config: -# da = da_cls.empty(100, config=config) -# else: -# da = da_cls.empty(100) -# da._get_attributes() -# da.summary() -# da._get_raw_summary() -# -# da[0].pop('id') -# da.summary() -# -# da._get_raw_summary() -# -# -# @pytest.mark.parametrize( -# 'da_cls,config', -# [ -# (DocumentArray, None), -# (DocumentArraySqlite, None), -# (DocumentArrayAnnlite, AnnliteConfig(n_dim=128)), -# (DocumentArrayWeaviate, WeaviateConfig(n_dim=128)), -# (DocumentArrayQdrant, QdrantConfig(n_dim=128)), -# (DocumentArrayElastic, ElasticConfig(n_dim=128)), -# (DocumentArrayRedis, RedisConfig(n_dim=128)), -# ], -# ) -# def test_empty_get_attributes(da_cls, config, start_storage): -# if config: -# da = da_cls.empty(10, config=config) -# else: -# da = da_cls.empty(10) -# da[0].pop('id') -# print(da[:, 'id']) +@pytest.mark.parametrize('image_source', ['tensor', 'uri']) +@pytest.mark.parametrize( + 'da_cls,config_gen', + [ + (DocumentArray, None), + (DocumentArraySqlite, None), + (DocumentArrayAnnlite, lambda: AnnliteConfig(n_dim=128)), + (DocumentArrayWeaviate, lambda: WeaviateConfig(n_dim=128)), + (DocumentArrayQdrant, lambda: QdrantConfig(n_dim=128, scroll_batch_size=8)), + (DocumentArrayElastic, lambda: ElasticConfig(n_dim=128)), + (DocumentArrayRedis, lambda: RedisConfig(n_dim=128)), + ], +) +@pytest.mark.parametrize('canvas_size', [50, 512]) +@pytest.mark.parametrize('min_size', [16, 64]) +def test_sprite_image_generator( + pytestconfig, + tmpdir, + image_source, + da_cls, + config_gen, + canvas_size, + min_size, + start_storage, +): + files = [ + f'{pytestconfig.rootdir}/tests/image-data/*.jpg', + f'{pytestconfig.rootdir}/tests/image-data/*.png', + ] + if config_gen: + da = da_cls.from_files(files, config=config_gen()) + else: + da = da_cls.from_files(files) + da.apply(lambda d: d.load_uri_to_image_tensor()) + da.plot_image_sprites( + tmpdir / 'sprint_da.png', + image_source=image_source, + canvas_size=canvas_size, + min_size=min_size, + ) + assert os.path.exists(tmpdir / 'sprint_da.png') + + +@pytest.fixture +def da_and_dam(start_storage): + embeddings = np.array([[1, 0, 0], [2, 0, 0], [3, 0, 0]]) + return [ + cls( + [ + Document(embedding=x, tags={'label': random.randint(0, 5)}) + for x in embeddings + ], + **kwargs, + ) + for cls, kwargs in [ + (DocumentArray, {}), + (DocumentArraySqlite, {}), + (DocumentArrayWeaviate, {'config': {'n_dim': 3}}), + (DocumentArrayAnnlite, {'config': {'n_dim': 3}}), + (DocumentArrayQdrant, {'config': {'n_dim': 3}}), + (DocumentArrayRedis, {'config': {'n_dim': 3}}), + ] + ] + + +def test_plot_embeddings(da_and_dam): + for da in da_and_dam: + _test_plot_embeddings(da) + + +def test_plot_sprites(tmpdir): + da = DocumentArray.empty(5) + da.tensors = np.random.random([5, 3, 226, 226]) + da.plot_image_sprites(tmpdir / 'a.png', channel_axis=0, show_index=True) + assert os.path.exists(tmpdir / 'a.png') + + +def _test_plot_embeddings(da): + p = da.plot_embeddings(start_server=False) + assert os.path.exists(p) + assert os.path.exists(os.path.join(p, 'config.json')) + with open(os.path.join(p, 'config.json')) as fp: + config = json.load(fp) + assert len(config['embeddings']) == 1 + assert config['embeddings'][0]['tensorShape'] == list(da.embeddings.shape) + + +@pytest.mark.parametrize( + 'da_cls,config_gen', + [ + (DocumentArray, None), + (DocumentArraySqlite, None), + (DocumentArrayAnnlite, lambda: AnnliteConfig(n_dim=5)), + (DocumentArrayWeaviate, lambda: WeaviateConfig(n_dim=5)), + (DocumentArrayQdrant, lambda: QdrantConfig(n_dim=5)), + (DocumentArrayElastic, lambda: ElasticConfig(n_dim=5)), + (DocumentArrayRedis, lambda: RedisConfig(n_dim=5)), + ], +) +def test_plot_embeddings_same_path(tmpdir, da_cls, config_gen, start_storage): + if config_gen: + da1 = da_cls.empty(100, config=config_gen()) + da2 = da_cls.empty(768, config=config_gen()) + else: + da1 = da_cls.empty(100) + da2 = da_cls.empty(768) + da1.embeddings = np.random.random([100, 5]) + p1 = da1.plot_embeddings(start_server=False, path=tmpdir) + da2.embeddings = np.random.random([768, 5]) + p2 = da2.plot_embeddings(start_server=False, path=tmpdir) + assert p1 == p2 + assert os.path.exists(p1) + with open(os.path.join(p1, 'config.json')) as fp: + config = json.load(fp) + assert len(config['embeddings']) == 2 + + +@pytest.mark.parametrize( + 'da_cls,config', + [ + (DocumentArray, None), + (DocumentArraySqlite, None), + (DocumentArrayAnnlite, AnnliteConfig(n_dim=128)), + (DocumentArrayWeaviate, WeaviateConfig(n_dim=128)), + (DocumentArrayQdrant, QdrantConfig(n_dim=128)), + (DocumentArrayElastic, ElasticConfig(n_dim=128)), + (DocumentArrayRedis, RedisConfig(n_dim=128)), + ], +) +def test_summary_homo_hetero(da_cls, config, start_storage): + if config: + da = da_cls.empty(100, config=config) + else: + da = da_cls.empty(100) + da._get_attributes() + da.summary() + da._get_raw_summary() + + da[0].pop('id') + da.summary() + + da._get_raw_summary() + + +@pytest.mark.parametrize( + 'da_cls,config', + [ + (DocumentArray, None), + (DocumentArraySqlite, None), + (DocumentArrayAnnlite, AnnliteConfig(n_dim=128)), + (DocumentArrayWeaviate, WeaviateConfig(n_dim=128)), + (DocumentArrayQdrant, QdrantConfig(n_dim=128)), + (DocumentArrayElastic, ElasticConfig(n_dim=128)), + (DocumentArrayRedis, RedisConfig(n_dim=128)), + ], +) +def test_empty_get_attributes(da_cls, config, start_storage): + if config: + da = da_cls.empty(10, config=config) + else: + da = da_cls.empty(10) + da[0].pop('id') + print(da[:, 'id']) From 71fdcdccff6aa468bae673eadbec0167ad2b5ff1 Mon Sep 17 00:00:00 2001 From: jemmyshin Date: Wed, 19 Oct 2022 10:30:32 +0800 Subject: [PATCH 07/24] fix: remove close in backend --- docarray/array/storage/annlite/backend.py | 3 --- tests/unit/array/mixins/test_empty.py | 2 +- tests/unit/array/mixins/test_io.py | 8 ++++---- tests/unit/array/mixins/test_plot.py | 2 +- 4 files changed, 6 insertions(+), 9 deletions(-) diff --git a/docarray/array/storage/annlite/backend.py b/docarray/array/storage/annlite/backend.py index f85145701b5..e2cfde9cb5c 100644 --- a/docarray/array/storage/annlite/backend.py +++ b/docarray/array/storage/annlite/backend.py @@ -133,6 +133,3 @@ def __setstate__(self, state): def __len__(self): return self._annlite.index_size - - def close(self): - self._annlite.close() diff --git a/tests/unit/array/mixins/test_empty.py b/tests/unit/array/mixins/test_empty.py index 0dcac2ebf34..e8b4cf605d8 100644 --- a/tests/unit/array/mixins/test_empty.py +++ b/tests/unit/array/mixins/test_empty.py @@ -32,7 +32,7 @@ def test_empty_non_zero(da_cls, config, start_storage): assert len(da) == 0 if da_cls == DocumentArrayAnnlite: - da.close() + da._annlite.close() # Assert .empty provides a da of the correct length if config: diff --git a/tests/unit/array/mixins/test_io.py b/tests/unit/array/mixins/test_io.py index 9bcde17618a..eee786c12de 100644 --- a/tests/unit/array/mixins/test_io.py +++ b/tests/unit/array/mixins/test_io.py @@ -51,7 +51,7 @@ def test_document_save_load( [d.embedding for d in da], [d.content for d in da], ] - da.close() + da._annlite.close() da_r = type(da).load( tmp_file, file_format=method, encoding=encoding, config=config() @@ -216,11 +216,11 @@ def test_from_to_bytes(da_cls, config, start_storage): # simple if da_cls == DocumentArrayAnnlite: b = da_cls.empty(2, config=config) - b.close() + b._annlite.close() d = da_cls.from_bytes(b.to_bytes(), config=config) assert len(d) == 2 - d.close() + d._annlite.close() else: assert len(da_cls.load_binary(bytes(da_cls.empty(2, config=config)))) == 2 @@ -230,7 +230,7 @@ def test_from_to_bytes(da_cls, config, start_storage): da[:, 'tensor'] = [[1, 2], [2, 1]] da[0, 'tags'] = {'hello': 'world'} if da_cls == DocumentArrayAnnlite: - da.close() + da._annlite.close() da2 = da_cls.load_binary(bytes(da)) assert da2.tensors == [[1, 2], [2, 1]] import numpy as np diff --git a/tests/unit/array/mixins/test_plot.py b/tests/unit/array/mixins/test_plot.py index 91d46f0acae..79c6017d64c 100644 --- a/tests/unit/array/mixins/test_plot.py +++ b/tests/unit/array/mixins/test_plot.py @@ -58,7 +58,7 @@ def test_sprite_fail_tensor_success_uri( assert os.path.exists(tmpdir / 'sprint_da.png') if da_cls == DocumentArrayAnnlite: - da.close() + da._annlite.close() @pytest.mark.parametrize('image_source', ['tensor', 'uri']) From 7a02127aa5aeb7faca0321dd5b14cde2f7afe986 Mon Sep 17 00:00:00 2001 From: jemmyshin Date: Thu, 20 Oct 2022 10:56:29 +0800 Subject: [PATCH 08/24] feat: add annlite ctx mngr --- docarray/array/document.py | 9 --------- docarray/array/storage/annlite/backend.py | 4 ++++ docarray/array/storage/base/backend.py | 13 +++++++++++++ docarray/array/storage/base/getsetdel.py | 4 ---- tests/unit/array/mixins/test_io.py | 5 +++-- 5 files changed, 20 insertions(+), 15 deletions(-) diff --git a/docarray/array/document.py b/docarray/array/document.py index 89e513060d9..8d5d8516ea9 100644 --- a/docarray/array/document.py +++ b/docarray/array/document.py @@ -139,15 +139,6 @@ def __new__( """Create a Redis-powered DocumentArray object.""" ... - def __enter__(self): - return self - - def __exit__(self, *args, **kwargs): - """ - Ensures that we sync the data to the storage backend when exiting the context manager - """ - self.sync() - def __new__(cls, *args, storage: str = 'memory', **kwargs): if cls is DocumentArray: if storage == 'memory': diff --git a/docarray/array/storage/annlite/backend.py b/docarray/array/storage/annlite/backend.py index e2cfde9cb5c..b1b13cec8d0 100644 --- a/docarray/array/storage/annlite/backend.py +++ b/docarray/array/storage/annlite/backend.py @@ -133,3 +133,7 @@ def __setstate__(self, state): def __len__(self): return self._annlite.index_size + + def __exit__(self, *args, **kwargs): + super().__exit__(*args, **kwargs) + self._annlite.close() diff --git a/docarray/array/storage/base/backend.py b/docarray/array/storage/base/backend.py index 8caace91e54..52315716195 100644 --- a/docarray/array/storage/base/backend.py +++ b/docarray/array/storage/base/backend.py @@ -96,3 +96,16 @@ def _normalize_columns( ) columns = {col_desc[0]: col_desc[1] for col_desc in columns} return columns + + def __enter__(self): + return self + + def __exit__(self, *args, **kwargs): + """ + Ensures that we sync the data to the storage backend when exiting the context manager + """ + self.sync() + + def sync(self): + if hasattr(self, '_offset2ids'): + self._save_offset2ids() diff --git a/docarray/array/storage/base/getsetdel.py b/docarray/array/storage/base/getsetdel.py index 682b6964cb2..42dbb8e0705 100644 --- a/docarray/array/storage/base/getsetdel.py +++ b/docarray/array/storage/base/getsetdel.py @@ -324,7 +324,3 @@ def _load_offset2ids(self): @abstractmethod def _save_offset2ids(self): ... - - def sync(self): - if hasattr(self, '_offset2ids'): - self._save_offset2ids() diff --git a/tests/unit/array/mixins/test_io.py b/tests/unit/array/mixins/test_io.py index eee786c12de..78494d2c0df 100644 --- a/tests/unit/array/mixins/test_io.py +++ b/tests/unit/array/mixins/test_io.py @@ -13,7 +13,8 @@ from docarray.array.storage.weaviate import WeaviateConfig from docarray.array.weaviate import DocumentArrayWeaviate from docarray.array.elastic import DocumentArrayElastic, ElasticConfig -from docarray.array.redis import DocumentArrayRedis, RedisConfig + +# from docarray.array.redis import DocumentArrayRedis, RedisConfig from docarray.helper import random_identity from tests import random_docs @@ -35,7 +36,7 @@ def docs(): (DocumentArrayWeaviate, lambda: WeaviateConfig(n_dim=10)), (DocumentArrayQdrant, lambda: QdrantConfig(n_dim=10)), (DocumentArrayElastic, lambda: ElasticConfig(n_dim=10)), - (DocumentArrayRedis, lambda: RedisConfig(n_dim=10)), + # (DocumentArrayRedis, lambda: RedisConfig(n_dim=10)), ], ) def test_document_save_load( From 8ca4027514b970038ab60336d4983bfd390c3a84 Mon Sep 17 00:00:00 2001 From: jemmyshin Date: Thu, 20 Oct 2022 10:59:25 +0800 Subject: [PATCH 09/24] feat: add annlite ctx mngr --- tests/unit/array/mixins/test_empty.py | 9 ++++--- tests/unit/array/mixins/test_io.py | 35 ++++++++++++--------------- 2 files changed, 20 insertions(+), 24 deletions(-) diff --git a/tests/unit/array/mixins/test_empty.py b/tests/unit/array/mixins/test_empty.py index e8b4cf605d8..a038e561a6e 100644 --- a/tests/unit/array/mixins/test_empty.py +++ b/tests/unit/array/mixins/test_empty.py @@ -24,15 +24,15 @@ ], ) def test_empty_non_zero(da_cls, config, start_storage): + # Assert .empty provides a da with 0 docs if config: da = da_cls.empty(config=config) else: da = da_cls.empty() - assert len(da) == 0 - if da_cls == DocumentArrayAnnlite: - da._annlite.close() + with da: + assert len(da) == 0 # Assert .empty provides a da of the correct length if config: @@ -40,4 +40,5 @@ def test_empty_non_zero(da_cls, config, start_storage): else: da = da_cls.empty(10) - assert len(da) == 10 + with da: + assert len(da) == 10 diff --git a/tests/unit/array/mixins/test_io.py b/tests/unit/array/mixins/test_io.py index 78494d2c0df..c6873abddf6 100644 --- a/tests/unit/array/mixins/test_io.py +++ b/tests/unit/array/mixins/test_io.py @@ -14,7 +14,7 @@ from docarray.array.weaviate import DocumentArrayWeaviate from docarray.array.elastic import DocumentArrayElastic, ElasticConfig -# from docarray.array.redis import DocumentArrayRedis, RedisConfig +from docarray.array.redis import DocumentArrayRedis, RedisConfig from docarray.helper import random_identity from tests import random_docs @@ -36,7 +36,7 @@ def docs(): (DocumentArrayWeaviate, lambda: WeaviateConfig(n_dim=10)), (DocumentArrayQdrant, lambda: QdrantConfig(n_dim=10)), (DocumentArrayElastic, lambda: ElasticConfig(n_dim=10)), - # (DocumentArrayRedis, lambda: RedisConfig(n_dim=10)), + (DocumentArrayRedis, lambda: RedisConfig(n_dim=10)), ], ) def test_document_save_load( @@ -44,34 +44,29 @@ def test_document_save_load( ): tmp_file = os.path.join(tmp_path, 'test') da = da_cls(docs, config=config()) + da.insert(2, Document(id='new')) da.save(tmp_file, file_format=method, encoding=encoding) - if da_cls == DocumentArrayAnnlite: - da_info = [ - [d.id for d in da], - [d.embedding for d in da], - [d.content for d in da], - ] - da._annlite.close() + + with da: + da_info = { + 'id': [d.id for d in da], + 'embedding': [d.embedding for d in da], + 'content': [d.content for d in da], + } da_r = type(da).load( tmp_file, file_format=method, encoding=encoding, config=config() ) assert type(da) is type(da_r) - assert len(da) == len(da_r) + assert len(da) == len(da_info['id']) assert da_r[2].id == 'new' - if da_cls == DocumentArrayAnnlite: - for idx, d_r in enumerate(da_r): - assert da_info[0][idx] == d_r.id - np.testing.assert_equal(da_info[1][idx], d_r.embedding) - assert da_info[2][idx] == d_r.content - else: - for d, d_r in zip(da, da_r): - assert d.id == d_r.id - np.testing.assert_equal(d.embedding, d_r.embedding) - assert d.content == d_r.content + for idx, d_r in enumerate(da_r): + assert da_info['id'][idx] == d_r.id + np.testing.assert_equal(da_info['embedding'][idx], d_r.embedding) + assert da_info['content'][idx] == d_r.content @pytest.mark.parametrize('flatten_tags', [True, False]) From ab3846ffe106e4d3c2eeecb7f4f74c048a5bc020 Mon Sep 17 00:00:00 2001 From: jemmyshin Date: Thu, 20 Oct 2022 11:01:18 +0800 Subject: [PATCH 10/24] fix: fix plot test --- tests/unit/array/mixins/test_plot.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tests/unit/array/mixins/test_plot.py b/tests/unit/array/mixins/test_plot.py index 79c6017d64c..858cd082630 100644 --- a/tests/unit/array/mixins/test_plot.py +++ b/tests/unit/array/mixins/test_plot.py @@ -57,9 +57,6 @@ def test_sprite_fail_tensor_success_uri( da.save_gif(tmpdir / 'sprint_da.gif', show_index=show_index, channel_axis=0) assert os.path.exists(tmpdir / 'sprint_da.png') - if da_cls == DocumentArrayAnnlite: - da._annlite.close() - @pytest.mark.parametrize('image_source', ['tensor', 'uri']) @pytest.mark.parametrize( From 8026e878b1fb3bdfa3d3cd527cb7dc4c6054d5b2 Mon Sep 17 00:00:00 2001 From: jemmyshin Date: Thu, 20 Oct 2022 11:17:55 +0800 Subject: [PATCH 11/24] fix: fix plot test --- docarray/array/document.py | 9 +++++++++ docarray/array/storage/base/backend.py | 13 ------------- docarray/array/storage/base/getsetdel.py | 4 ++++ 3 files changed, 13 insertions(+), 13 deletions(-) diff --git a/docarray/array/document.py b/docarray/array/document.py index 8d5d8516ea9..89e513060d9 100644 --- a/docarray/array/document.py +++ b/docarray/array/document.py @@ -139,6 +139,15 @@ def __new__( """Create a Redis-powered DocumentArray object.""" ... + def __enter__(self): + return self + + def __exit__(self, *args, **kwargs): + """ + Ensures that we sync the data to the storage backend when exiting the context manager + """ + self.sync() + def __new__(cls, *args, storage: str = 'memory', **kwargs): if cls is DocumentArray: if storage == 'memory': diff --git a/docarray/array/storage/base/backend.py b/docarray/array/storage/base/backend.py index 52315716195..8caace91e54 100644 --- a/docarray/array/storage/base/backend.py +++ b/docarray/array/storage/base/backend.py @@ -96,16 +96,3 @@ def _normalize_columns( ) columns = {col_desc[0]: col_desc[1] for col_desc in columns} return columns - - def __enter__(self): - return self - - def __exit__(self, *args, **kwargs): - """ - Ensures that we sync the data to the storage backend when exiting the context manager - """ - self.sync() - - def sync(self): - if hasattr(self, '_offset2ids'): - self._save_offset2ids() diff --git a/docarray/array/storage/base/getsetdel.py b/docarray/array/storage/base/getsetdel.py index 42dbb8e0705..682b6964cb2 100644 --- a/docarray/array/storage/base/getsetdel.py +++ b/docarray/array/storage/base/getsetdel.py @@ -324,3 +324,7 @@ def _load_offset2ids(self): @abstractmethod def _save_offset2ids(self): ... + + def sync(self): + if hasattr(self, '_offset2ids'): + self._save_offset2ids() From ad5c2f16ea07036150b41902379b330b62462b36 Mon Sep 17 00:00:00 2001 From: jemmyshin Date: Thu, 20 Oct 2022 14:10:16 +0800 Subject: [PATCH 12/24] fix: fix annlite based unit test --- tests/unit/array/mixins/test_del.py | 12 +++---- tests/unit/array/mixins/test_getset.py | 3 +- tests/unit/array/test_advance_indexing.py | 38 ++++++++++++----------- tests/unit/array/test_sequence.py | 3 +- 4 files changed, 28 insertions(+), 28 deletions(-) diff --git a/tests/unit/array/mixins/test_del.py b/tests/unit/array/mixins/test_del.py index 610ca99140b..3e47ae9b68c 100644 --- a/tests/unit/array/mixins/test_del.py +++ b/tests/unit/array/mixins/test_del.py @@ -148,13 +148,13 @@ def test_del_subindex(storage, config): ] ) - del da['0'] - assert len(da) == 9 - assert len(da._subindices['@c']) == 18 + del da['0'] + assert len(da) == 9 + assert len(da._subindices['@c']) == 18 - del da[-2:] - assert len(da) == 7 - assert len(da._subindices['@c']) == 14 + del da[-2:] + assert len(da) == 7 + assert len(da._subindices['@c']) == 14 def test_del_subindex_annlite_multimodal(): diff --git a/tests/unit/array/mixins/test_getset.py b/tests/unit/array/mixins/test_getset.py index 5cc8ef9cbc5..89b32523513 100644 --- a/tests/unit/array/mixins/test_getset.py +++ b/tests/unit/array/mixins/test_getset.py @@ -454,7 +454,7 @@ def test_getset_subindex(storage, config): for i in range(3) ] ) - with da: + da[0] = Document( embedding=-1 * np.ones(n_dim), chunks=[ @@ -463,7 +463,6 @@ def test_getset_subindex(storage, config): ], ) - with da: da[1:] = [ Document( embedding=-1 * np.ones(n_dim), diff --git a/tests/unit/array/test_advance_indexing.py b/tests/unit/array/test_advance_indexing.py index 666ae8596b9..fb01e2c9093 100644 --- a/tests/unit/array/test_advance_indexing.py +++ b/tests/unit/array/test_advance_indexing.py @@ -678,15 +678,15 @@ def test_edge_case_two_strings(storage, config_gen, start_storage): @pytest.mark.parametrize( 'storage,config', [ - ('sqlite', None), - ('weaviate', WeaviateConfig(n_dim=123)), + # ('sqlite', None), + # ('weaviate', WeaviateConfig(n_dim=123)), ('annlite', AnnliteConfig(n_dim=123)), - ('qdrant', QdrantConfig(n_dim=123)), - ('elasticsearch', ElasticConfig(n_dim=123)), - ('redis', RedisConfig(n_dim=123)), + # ('qdrant', QdrantConfig(n_dim=123)), + # ('elasticsearch', ElasticConfig(n_dim=123)), + # ('redis', RedisConfig(n_dim=123)), ], ) -def test_offset2ids_persistence(storage, config, start_storage): +def test_offset2ids_persistence(storage, config): da = DocumentArray(storage=storage, config=config) with da: @@ -700,21 +700,23 @@ def test_offset2ids_persistence(storage, config, start_storage): da.insert(1, Document(id='1')) da.insert(3, Document(id='3')) - config = da._config - da_ids = da[:, 'id'] - assert da_ids == [str(i) for i in range(5)] - da.sync() - - da1 = DocumentArray(storage=storage, config=config) - - assert da1[:, 'id'] == da_ids - - with da1: + config = da._config + da_ids = da[:, 'id'] + assert da_ids == [str(i) for i in range(5)] + # da.sync() + # + # da1 = DocumentArray(storage=storage, config=config) + # + # assert da1[:, 'id'] == da_ids + + with DocumentArray(storage=storage, config=config) as da1: + assert da1[:, 'id'] == da_ids da1.extend([Document(id=i) for i in 'abc']) + da1_ids = da1[:, 'id'] assert len(da1) == 8 - da2 = DocumentArray(storage=storage, config=config) - assert da2[:, 'id'] == da1[:, 'id'] + with DocumentArray(storage=storage, config=config) as da2: + assert da2[:, 'id'] == da1_ids def test_dam_conflicting_ids(): diff --git a/tests/unit/array/test_sequence.py b/tests/unit/array/test_sequence.py index 92b04995357..a872374d9ce 100644 --- a/tests/unit/array/test_sequence.py +++ b/tests/unit/array/test_sequence.py @@ -226,11 +226,10 @@ def test_del_and_append(index, storage, config): with da: da.extend([Document(id=str(i)) for i in range(5)]) - with da: del da[index] da.append(Document(id='new')) - assert da[:, 'id'] == ['0', '2', '3', '4', 'new'] + assert da[:, 'id'] == ['0', '2', '3', '4', 'new'] @pytest.mark.parametrize( From 1abdaa2c25bda3eeb22bdd44a271bcbe3826714c Mon Sep 17 00:00:00 2001 From: jemmyshin Date: Thu, 20 Oct 2022 14:12:32 +0800 Subject: [PATCH 13/24] fix: ensure ci use main branch of annlite --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index fedfcc5a4d1..32ae2915940 100644 --- a/setup.py +++ b/setup.py @@ -103,7 +103,7 @@ 'jupyterlab', 'transformers>=4.16.2', 'weaviate-client~=3.3.0', - 'annlite>=0.3.12', + # 'annlite>=0.3.12', 'elasticsearch>=8.2.0', 'redis>=4.3.0', 'jina', From 7c7c648bd254d88feb0a3c9e2518b3082ffb5661 Mon Sep 17 00:00:00 2001 From: jemmyshin Date: Thu, 20 Oct 2022 14:21:06 +0800 Subject: [PATCH 14/24] fix: ensure ci use main branch of annlite --- tests/unit/array/mixins/test_io.py | 14 ++++++-------- tests/unit/array/test_advance_indexing.py | 10 +++++----- 2 files changed, 11 insertions(+), 13 deletions(-) diff --git a/tests/unit/array/mixins/test_io.py b/tests/unit/array/mixins/test_io.py index c6873abddf6..6e85cbaf0f4 100644 --- a/tests/unit/array/mixins/test_io.py +++ b/tests/unit/array/mixins/test_io.py @@ -45,10 +45,9 @@ def test_document_save_load( tmp_file = os.path.join(tmp_path, 'test') da = da_cls(docs, config=config()) - da.insert(2, Document(id='new')) - da.save(tmp_file, file_format=method, encoding=encoding) - with da: + da.insert(2, Document(id='new')) + da.save(tmp_file, file_format=method, encoding=encoding) da_info = { 'id': [d.id for d in da], 'embedding': [d.embedding for d in da], @@ -211,12 +210,11 @@ def test_from_to_pd_dataframe(da_cls, config, start_storage): def test_from_to_bytes(da_cls, config, start_storage): # simple if da_cls == DocumentArrayAnnlite: - b = da_cls.empty(2, config=config) - b._annlite.close() + with da_cls.empty(2, config=config) as da: + da_bytes = da.to_bytes() - d = da_cls.from_bytes(b.to_bytes(), config=config) - assert len(d) == 2 - d._annlite.close() + with da_cls.from_bytes(da_bytes, config=config) as db: + assert len(db) == 2 else: assert len(da_cls.load_binary(bytes(da_cls.empty(2, config=config)))) == 2 diff --git a/tests/unit/array/test_advance_indexing.py b/tests/unit/array/test_advance_indexing.py index fb01e2c9093..f1aa0c6e24a 100644 --- a/tests/unit/array/test_advance_indexing.py +++ b/tests/unit/array/test_advance_indexing.py @@ -678,12 +678,12 @@ def test_edge_case_two_strings(storage, config_gen, start_storage): @pytest.mark.parametrize( 'storage,config', [ - # ('sqlite', None), - # ('weaviate', WeaviateConfig(n_dim=123)), + ('sqlite', None), + ('weaviate', WeaviateConfig(n_dim=123)), ('annlite', AnnliteConfig(n_dim=123)), - # ('qdrant', QdrantConfig(n_dim=123)), - # ('elasticsearch', ElasticConfig(n_dim=123)), - # ('redis', RedisConfig(n_dim=123)), + ('qdrant', QdrantConfig(n_dim=123)), + ('elasticsearch', ElasticConfig(n_dim=123)), + ('redis', RedisConfig(n_dim=123)), ], ) def test_offset2ids_persistence(storage, config): From b99d438ff376af81202d304e22754a4d97b96194 Mon Sep 17 00:00:00 2001 From: jemmyshin Date: Thu, 20 Oct 2022 14:50:21 +0800 Subject: [PATCH 15/24] fix: ensure ci use main branch of annlite --- .github/workflows/ci.yml | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f0c32c84f06..bf608068e6d 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -130,10 +130,10 @@ jobs: python -m pip install --upgrade pip python -m pip install wheel # pip does not properly resolve dependency versions with syntax pip install --no-cache-dir ".[test,full]" + pip install --pre annlite pip install --no-cache-dir ".[test]" pip install --no-cache-dir ".[full]" pip install --no-cache-dir ".[qdrant]" - pip install --pre annlite pip install --no-cache-dir ".[weaviate]" pip install --no-cache-dir ".[elasticsearch]" pip install --no-cache-dir ".[redis]" diff --git a/setup.py b/setup.py index 32ae2915940..fedfcc5a4d1 100644 --- a/setup.py +++ b/setup.py @@ -103,7 +103,7 @@ 'jupyterlab', 'transformers>=4.16.2', 'weaviate-client~=3.3.0', - # 'annlite>=0.3.12', + 'annlite>=0.3.12', 'elasticsearch>=8.2.0', 'redis>=4.3.0', 'jina', From 0665f4b1f6e9c52feda46ae68a8b1d299a042897 Mon Sep 17 00:00:00 2001 From: jemmyshin Date: Fri, 21 Oct 2022 13:36:10 +0800 Subject: [PATCH 16/24] fix: remove annlite cntx mngr --- docarray/array/storage/annlite/backend.py | 4 --- tests/unit/array/mixins/test_del.py | 18 ++++++------- tests/unit/array/mixins/test_empty.py | 21 ++++++++------- tests/unit/array/mixins/test_getset.py | 14 +++++----- tests/unit/array/mixins/test_io.py | 22 ++++++++------- tests/unit/array/test_advance_indexing.py | 33 +++++++++++++---------- tests/unit/array/test_construct.py | 20 ++++++++++++++ tests/unit/array/test_sequence.py | 4 +-- 8 files changed, 81 insertions(+), 55 deletions(-) diff --git a/docarray/array/storage/annlite/backend.py b/docarray/array/storage/annlite/backend.py index b1b13cec8d0..e2cfde9cb5c 100644 --- a/docarray/array/storage/annlite/backend.py +++ b/docarray/array/storage/annlite/backend.py @@ -133,7 +133,3 @@ def __setstate__(self, state): def __len__(self): return self._annlite.index_size - - def __exit__(self, *args, **kwargs): - super().__exit__(*args, **kwargs) - self._annlite.close() diff --git a/tests/unit/array/mixins/test_del.py b/tests/unit/array/mixins/test_del.py index 3e47ae9b68c..1b9d3c71684 100644 --- a/tests/unit/array/mixins/test_del.py +++ b/tests/unit/array/mixins/test_del.py @@ -148,13 +148,13 @@ def test_del_subindex(storage, config): ] ) - del da['0'] - assert len(da) == 9 - assert len(da._subindices['@c']) == 18 + del da['0'] + assert len(da) == 9 + assert len(da._subindices['@c']) == 18 - del da[-2:] - assert len(da) == 7 - assert len(da._subindices['@c']) == 14 + del da[-2:] + assert len(da) == 7 + assert len(da._subindices['@c']) == 14 def test_del_subindex_annlite_multimodal(): @@ -191,6 +191,6 @@ class MMDoc: with da: da.extend(docs_to_add) - del da['0'] - assert len(da) == 9 - assert len(da._subindices['@.[my_text, my_other_text]']) == 18 + del da['0'] + assert len(da) == 9 + assert len(da._subindices['@.[my_text, my_other_text]']) == 18 diff --git a/tests/unit/array/mixins/test_empty.py b/tests/unit/array/mixins/test_empty.py index a038e561a6e..c73017efb5a 100644 --- a/tests/unit/array/mixins/test_empty.py +++ b/tests/unit/array/mixins/test_empty.py @@ -14,13 +14,13 @@ @pytest.mark.parametrize( 'da_cls,config', [ - (DocumentArray, None), - (DocumentArraySqlite, None), + # (DocumentArray, None), + # (DocumentArraySqlite, None), (DocumentArrayAnnlite, AnnliteConfig(n_dim=5)), - (DocumentArrayWeaviate, WeaviateConfig(n_dim=5)), - (DocumentArrayQdrant, QdrantConfig(n_dim=5)), - (DocumentArrayElastic, ElasticConfig(n_dim=5)), - (DocumentArrayRedis, RedisConfig(n_dim=5)), + # (DocumentArrayWeaviate, WeaviateConfig(n_dim=5)), + # (DocumentArrayQdrant, QdrantConfig(n_dim=5)), + # (DocumentArrayElastic, ElasticConfig(n_dim=5)), + # (DocumentArrayRedis, RedisConfig(n_dim=5)), ], ) def test_empty_non_zero(da_cls, config, start_storage): @@ -31,8 +31,10 @@ def test_empty_non_zero(da_cls, config, start_storage): else: da = da_cls.empty() - with da: - assert len(da) == 0 + assert len(da) == 0 + + if da_cls == DocumentArrayAnnlite: + da._annlite.close() # Assert .empty provides a da of the correct length if config: @@ -40,5 +42,4 @@ def test_empty_non_zero(da_cls, config, start_storage): else: da = da_cls.empty(10) - with da: - assert len(da) == 10 + assert len(da) == 10 diff --git a/tests/unit/array/mixins/test_getset.py b/tests/unit/array/mixins/test_getset.py index 89b32523513..dfc3b61bc08 100644 --- a/tests/unit/array/mixins/test_getset.py +++ b/tests/unit/array/mixins/test_getset.py @@ -419,13 +419,13 @@ def embeddings_eq(emb1, emb2): @pytest.mark.parametrize( 'storage, config', [ - ('memory', None), - ('weaviate', {'n_dim': 3, 'distance': 'l2-squared'}), + # ('memory', None), + # ('weaviate', {'n_dim': 3, 'distance': 'l2-squared'}), ('annlite', {'n_dim': 3, 'metric': 'Euclidean'}), - ('qdrant', {'n_dim': 3, 'distance': 'euclidean'}), - ('elasticsearch', {'n_dim': 3, 'distance': 'l2_norm'}), - ('sqlite', dict()), - ('redis', {'n_dim': 3, 'distance': 'L2'}), + # ('qdrant', {'n_dim': 3, 'distance': 'euclidean'}), + # ('elasticsearch', {'n_dim': 3, 'distance': 'l2_norm'}), + # ('sqlite', dict()), + # ('redis', {'n_dim': 3, 'distance': 'L2'}), ], ) def test_getset_subindex(storage, config): @@ -455,6 +455,7 @@ def test_getset_subindex(storage, config): ] ) + with da: da[0] = Document( embedding=-1 * np.ones(n_dim), chunks=[ @@ -463,6 +464,7 @@ def test_getset_subindex(storage, config): ], ) + with da: da[1:] = [ Document( embedding=-1 * np.ones(n_dim), diff --git a/tests/unit/array/mixins/test_io.py b/tests/unit/array/mixins/test_io.py index 6e85cbaf0f4..7202675ed62 100644 --- a/tests/unit/array/mixins/test_io.py +++ b/tests/unit/array/mixins/test_io.py @@ -199,22 +199,24 @@ def test_from_to_pd_dataframe(da_cls, config, start_storage): @pytest.mark.parametrize( 'da_cls, config', [ - (DocumentArrayInMemory, None), - (DocumentArraySqlite, None), + # (DocumentArrayInMemory, None), + # (DocumentArraySqlite, None), (DocumentArrayAnnlite, AnnliteConfig(n_dim=3)), - (DocumentArrayQdrant, QdrantConfig(n_dim=3)), - (DocumentArrayElastic, ElasticConfig(n_dim=3)), - (DocumentArrayRedis, RedisConfig(n_dim=3)), + # (DocumentArrayQdrant, QdrantConfig(n_dim=3)), + # (DocumentArrayElastic, ElasticConfig(n_dim=3)), + # (DocumentArrayRedis, RedisConfig(n_dim=3)), ], ) -def test_from_to_bytes(da_cls, config, start_storage): +def test_from_to_bytes(da_cls, config): # simple if da_cls == DocumentArrayAnnlite: - with da_cls.empty(2, config=config) as da: - da_bytes = da.to_bytes() + da = da_cls.empty(2, config=config) + da_bytes = da.to_bytes() + da._annlite.close() - with da_cls.from_bytes(da_bytes, config=config) as db: - assert len(db) == 2 + db = da_cls.from_bytes(da_bytes, config=config) + assert len(db) == 2 + db._annlite.close() else: assert len(da_cls.load_binary(bytes(da_cls.empty(2, config=config)))) == 2 diff --git a/tests/unit/array/test_advance_indexing.py b/tests/unit/array/test_advance_indexing.py index f1aa0c6e24a..ebf3a06c2eb 100644 --- a/tests/unit/array/test_advance_indexing.py +++ b/tests/unit/array/test_advance_indexing.py @@ -686,7 +686,7 @@ def test_edge_case_two_strings(storage, config_gen, start_storage): ('redis', RedisConfig(n_dim=123)), ], ) -def test_offset2ids_persistence(storage, config): +def test_offset2ids_persistence(storage, config, start_storage): da = DocumentArray(storage=storage, config=config) with da: @@ -700,23 +700,28 @@ def test_offset2ids_persistence(storage, config): da.insert(1, Document(id='1')) da.insert(3, Document(id='3')) - config = da._config - da_ids = da[:, 'id'] - assert da_ids == [str(i) for i in range(5)] - # da.sync() - # - # da1 = DocumentArray(storage=storage, config=config) - # - # assert da1[:, 'id'] == da_ids - - with DocumentArray(storage=storage, config=config) as da1: - assert da1[:, 'id'] == da_ids + config = da._config + da_ids = da[:, 'id'] + assert da_ids == [str(i) for i in range(5)] + da.sync() + + if storage == 'annlite': + da._annlite.close() + + da1 = DocumentArray(storage=storage, config=config) + + assert da1[:, 'id'] == da_ids + + with da1: da1.extend([Document(id=i) for i in 'abc']) da1_ids = da1[:, 'id'] assert len(da1) == 8 - with DocumentArray(storage=storage, config=config) as da2: - assert da2[:, 'id'] == da1_ids + if storage == 'annlite': + da1._annlite.close() + + da2 = DocumentArray(storage=storage, config=config) + assert da2[:, 'id'] == da1_ids def test_dam_conflicting_ids(): diff --git a/tests/unit/array/test_construct.py b/tests/unit/array/test_construct.py index 251e8459b16..592975db5c3 100644 --- a/tests/unit/array/test_construct.py +++ b/tests/unit/array/test_construct.py @@ -28,33 +28,53 @@ def test_construct_docarray(da_cls, config, start_storage): if config: da = da_cls(config=config) assert len(da) == 0 + if da_cls == DocumentArrayAnnlite: + da._annlite.close() da = da_cls(Document(), config=config) assert len(da) == 1 + if da_cls == DocumentArrayAnnlite: + da._annlite.close() da = da_cls([Document(), Document()], config=config) assert len(da) == 2 + if da_cls == DocumentArrayAnnlite: + da._annlite.close() da = da_cls((Document(), Document()), config=config) assert len(da) == 2 + if da_cls == DocumentArrayAnnlite: + da._annlite.close() da = da_cls((Document() for _ in range(10)), config=config) assert len(da) == 10 + if da_cls == DocumentArrayAnnlite: + da._annlite.close() else: da = da_cls() assert len(da) == 0 + if da_cls == DocumentArrayAnnlite: + da._annlite.close() da = da_cls(Document()) assert len(da) == 1 + if da_cls == DocumentArrayAnnlite: + da._annlite.close() da = da_cls([Document(), Document()]) assert len(da) == 2 + if da_cls == DocumentArrayAnnlite: + da._annlite.close() da = da_cls((Document(), Document())) assert len(da) == 2 + if da_cls == DocumentArrayAnnlite: + da._annlite.close() da = da_cls((Document() for _ in range(10))) assert len(da) == 10 + if da_cls == DocumentArrayAnnlite: + da._annlite.close() if da_cls is DocumentArrayInMemory: da1 = da_cls(da) diff --git a/tests/unit/array/test_sequence.py b/tests/unit/array/test_sequence.py index a872374d9ce..03ad6116157 100644 --- a/tests/unit/array/test_sequence.py +++ b/tests/unit/array/test_sequence.py @@ -226,10 +226,11 @@ def test_del_and_append(index, storage, config): with da: da.extend([Document(id=str(i)) for i in range(5)]) + with da: del da[index] da.append(Document(id='new')) - assert da[:, 'id'] == ['0', '2', '3', '4', 'new'] + assert da[:, 'id'] == ['0', '2', '3', '4', 'new'] @pytest.mark.parametrize( @@ -252,7 +253,6 @@ def test_set_and_append(index, storage, config): with da: da.extend([Document(id=str(i)) for i in range(5)]) - with da: da[index] = ( Document(id='new') if isinstance(index, int) or isinstance(index, str) From 21e6d1d73b5e3472682ba78dd1c0d55b95458b91 Mon Sep 17 00:00:00 2001 From: jemmyshin Date: Fri, 21 Oct 2022 13:37:06 +0800 Subject: [PATCH 17/24] fix: recover annlite ci --- .github/workflows/ci.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index bf608068e6d..156d52f4a6e 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -134,6 +134,7 @@ jobs: pip install --no-cache-dir ".[test]" pip install --no-cache-dir ".[full]" pip install --no-cache-dir ".[qdrant]" + pip install --no-cache-dir ".[annlite]" pip install --no-cache-dir ".[weaviate]" pip install --no-cache-dir ".[elasticsearch]" pip install --no-cache-dir ".[redis]" From d8bc5174831222fbcd46a4c5f7ee26b7f00732b6 Mon Sep 17 00:00:00 2001 From: jemmyshin Date: Fri, 21 Oct 2022 13:39:59 +0800 Subject: [PATCH 18/24] fix: revert changes in tests --- tests/unit/array/mixins/test_del.py | 6 +++--- tests/unit/array/mixins/test_empty.py | 12 ++++++------ tests/unit/array/mixins/test_getset.py | 12 ++++++------ tests/unit/array/mixins/test_io.py | 12 ++++++------ tests/unit/array/test_sequence.py | 1 + 5 files changed, 22 insertions(+), 21 deletions(-) diff --git a/tests/unit/array/mixins/test_del.py b/tests/unit/array/mixins/test_del.py index 1b9d3c71684..610ca99140b 100644 --- a/tests/unit/array/mixins/test_del.py +++ b/tests/unit/array/mixins/test_del.py @@ -191,6 +191,6 @@ class MMDoc: with da: da.extend(docs_to_add) - del da['0'] - assert len(da) == 9 - assert len(da._subindices['@.[my_text, my_other_text]']) == 18 + del da['0'] + assert len(da) == 9 + assert len(da._subindices['@.[my_text, my_other_text]']) == 18 diff --git a/tests/unit/array/mixins/test_empty.py b/tests/unit/array/mixins/test_empty.py index c73017efb5a..ea58b0769be 100644 --- a/tests/unit/array/mixins/test_empty.py +++ b/tests/unit/array/mixins/test_empty.py @@ -14,13 +14,13 @@ @pytest.mark.parametrize( 'da_cls,config', [ - # (DocumentArray, None), - # (DocumentArraySqlite, None), + (DocumentArray, None), + (DocumentArraySqlite, None), (DocumentArrayAnnlite, AnnliteConfig(n_dim=5)), - # (DocumentArrayWeaviate, WeaviateConfig(n_dim=5)), - # (DocumentArrayQdrant, QdrantConfig(n_dim=5)), - # (DocumentArrayElastic, ElasticConfig(n_dim=5)), - # (DocumentArrayRedis, RedisConfig(n_dim=5)), + (DocumentArrayWeaviate, WeaviateConfig(n_dim=5)), + (DocumentArrayQdrant, QdrantConfig(n_dim=5)), + (DocumentArrayElastic, ElasticConfig(n_dim=5)), + (DocumentArrayRedis, RedisConfig(n_dim=5)), ], ) def test_empty_non_zero(da_cls, config, start_storage): diff --git a/tests/unit/array/mixins/test_getset.py b/tests/unit/array/mixins/test_getset.py index dfc3b61bc08..764c92a5f88 100644 --- a/tests/unit/array/mixins/test_getset.py +++ b/tests/unit/array/mixins/test_getset.py @@ -419,13 +419,13 @@ def embeddings_eq(emb1, emb2): @pytest.mark.parametrize( 'storage, config', [ - # ('memory', None), - # ('weaviate', {'n_dim': 3, 'distance': 'l2-squared'}), + ('memory', None), + ('weaviate', {'n_dim': 3, 'distance': 'l2-squared'}), ('annlite', {'n_dim': 3, 'metric': 'Euclidean'}), - # ('qdrant', {'n_dim': 3, 'distance': 'euclidean'}), - # ('elasticsearch', {'n_dim': 3, 'distance': 'l2_norm'}), - # ('sqlite', dict()), - # ('redis', {'n_dim': 3, 'distance': 'L2'}), + ('qdrant', {'n_dim': 3, 'distance': 'euclidean'}), + ('elasticsearch', {'n_dim': 3, 'distance': 'l2_norm'}), + ('sqlite', dict()), + ('redis', {'n_dim': 3, 'distance': 'L2'}), ], ) def test_getset_subindex(storage, config): diff --git a/tests/unit/array/mixins/test_io.py b/tests/unit/array/mixins/test_io.py index 7202675ed62..782ddca3463 100644 --- a/tests/unit/array/mixins/test_io.py +++ b/tests/unit/array/mixins/test_io.py @@ -199,15 +199,15 @@ def test_from_to_pd_dataframe(da_cls, config, start_storage): @pytest.mark.parametrize( 'da_cls, config', [ - # (DocumentArrayInMemory, None), - # (DocumentArraySqlite, None), + (DocumentArrayInMemory, None), + (DocumentArraySqlite, None), (DocumentArrayAnnlite, AnnliteConfig(n_dim=3)), - # (DocumentArrayQdrant, QdrantConfig(n_dim=3)), - # (DocumentArrayElastic, ElasticConfig(n_dim=3)), - # (DocumentArrayRedis, RedisConfig(n_dim=3)), + (DocumentArrayQdrant, QdrantConfig(n_dim=3)), + (DocumentArrayElastic, ElasticConfig(n_dim=3)), + (DocumentArrayRedis, RedisConfig(n_dim=3)), ], ) -def test_from_to_bytes(da_cls, config): +def test_from_to_bytes(da_cls, config, start_storage): # simple if da_cls == DocumentArrayAnnlite: da = da_cls.empty(2, config=config) diff --git a/tests/unit/array/test_sequence.py b/tests/unit/array/test_sequence.py index 03ad6116157..92b04995357 100644 --- a/tests/unit/array/test_sequence.py +++ b/tests/unit/array/test_sequence.py @@ -253,6 +253,7 @@ def test_set_and_append(index, storage, config): with da: da.extend([Document(id=str(i)) for i in range(5)]) + with da: da[index] = ( Document(id='new') if isinstance(index, int) or isinstance(index, str) From 99cfb70ef998170bc5ccd3c0543bfc1a3639478c Mon Sep 17 00:00:00 2001 From: jemmyshin Date: Fri, 21 Oct 2022 13:41:46 +0800 Subject: [PATCH 19/24] fix: revert changes in tests --- tests/unit/array/mixins/test_getset.py | 1 - tests/unit/array/mixins/test_io.py | 1 - 2 files changed, 2 deletions(-) diff --git a/tests/unit/array/mixins/test_getset.py b/tests/unit/array/mixins/test_getset.py index 764c92a5f88..5cc8ef9cbc5 100644 --- a/tests/unit/array/mixins/test_getset.py +++ b/tests/unit/array/mixins/test_getset.py @@ -454,7 +454,6 @@ def test_getset_subindex(storage, config): for i in range(3) ] ) - with da: da[0] = Document( embedding=-1 * np.ones(n_dim), diff --git a/tests/unit/array/mixins/test_io.py b/tests/unit/array/mixins/test_io.py index 782ddca3463..625c9b7487a 100644 --- a/tests/unit/array/mixins/test_io.py +++ b/tests/unit/array/mixins/test_io.py @@ -13,7 +13,6 @@ from docarray.array.storage.weaviate import WeaviateConfig from docarray.array.weaviate import DocumentArrayWeaviate from docarray.array.elastic import DocumentArrayElastic, ElasticConfig - from docarray.array.redis import DocumentArrayRedis, RedisConfig from docarray.helper import random_identity from tests import random_docs From cde4a16f96185f0449409965d670fd783bd26690 Mon Sep 17 00:00:00 2001 From: jemmyshin Date: Fri, 21 Oct 2022 14:08:17 +0800 Subject: [PATCH 20/24] fix: close annlite after iteration --- tests/unit/array/mixins/test_content.py | 6 ++++++ tests/unit/array/mixins/test_io.py | 3 +++ tests/unit/array/mixins/test_text.py | 3 +++ tests/unit/array/test_advance_indexing.py | 3 +++ tests/unit/document/test_plot.py | 3 +++ 5 files changed, 18 insertions(+) diff --git a/tests/unit/array/mixins/test_content.py b/tests/unit/array/mixins/test_content.py index ea4535c9d00..7172b348348 100644 --- a/tests/unit/array/mixins/test_content.py +++ b/tests/unit/array/mixins/test_content.py @@ -149,6 +149,9 @@ def test_content_empty(da_len, da_cls, config, start_storage): assert not da.tensors assert da.blobs == [b''] * da_len + if da_cls == DocumentArrayAnnlite: + da._annlite.close() + @pytest.mark.parametrize('da_len', [0, 1, 2]) @pytest.mark.parametrize( @@ -171,3 +174,6 @@ def test_embeddings_setter(da_len, da_cls, config, start_storage): da.embeddings = np.random.rand(da_len, 5) for doc in da: assert doc.embedding.shape == (5,) + + if da_cls == DocumentArrayAnnlite: + da._annlite.close() diff --git a/tests/unit/array/mixins/test_io.py b/tests/unit/array/mixins/test_io.py index 625c9b7487a..e5628df80da 100644 --- a/tests/unit/array/mixins/test_io.py +++ b/tests/unit/array/mixins/test_io.py @@ -53,6 +53,9 @@ def test_document_save_load( 'content': [d.content for d in da], } + if da_cls == DocumentArrayAnnlite: + da._annlite.close() + da_r = type(da).load( tmp_file, file_format=method, encoding=encoding, config=config() ) diff --git a/tests/unit/array/mixins/test_text.py b/tests/unit/array/mixins/test_text.py index 0f7481a7e0d..312bd715c71 100644 --- a/tests/unit/array/mixins/test_text.py +++ b/tests/unit/array/mixins/test_text.py @@ -50,6 +50,9 @@ def test_da_vocabulary(da_cls, config, docs, min_freq, start_storage): assert not vocab.values() assert not vocab.keys() + if da_cls == DocumentArrayAnnlite: + da._annlite.close() + @pytest.mark.parametrize( 'da_cls,config', diff --git a/tests/unit/array/test_advance_indexing.py b/tests/unit/array/test_advance_indexing.py index ebf3a06c2eb..7b0038621cf 100644 --- a/tests/unit/array/test_advance_indexing.py +++ b/tests/unit/array/test_advance_indexing.py @@ -239,6 +239,9 @@ def test_sequence_int(docs, nparray, storage, config, start_storage): assert docs[5].text == 'new' assert docs[9].text == 'new' + if storage == 'annlite': + docs._annlite.close() + @pytest.mark.parametrize( 'storage,config', diff --git a/tests/unit/document/test_plot.py b/tests/unit/document/test_plot.py index c14d7bbc51c..be0aa719053 100644 --- a/tests/unit/document/test_plot.py +++ b/tests/unit/document/test_plot.py @@ -72,6 +72,9 @@ def test_matches_sprites( da[0].plot_matches_sprites(top_k, output=tmpdir / 'sprint_da.png') assert os.path.exists(tmpdir / 'sprint_da.png') + if da_cls == DocumentArrayAnnlite: + das._annlite.close() + @pytest.mark.parametrize('image_source', ['tensor', 'uri']) @pytest.mark.parametrize( From e1274aa092b374758fe302f8032af564464771fa Mon Sep 17 00:00:00 2001 From: jemmyshin Date: Fri, 21 Oct 2022 14:31:07 +0800 Subject: [PATCH 21/24] fix: close annlite after iteration --- tests/unit/array/mixins/test_content.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/tests/unit/array/mixins/test_content.py b/tests/unit/array/mixins/test_content.py index 7172b348348..3758df40f5e 100644 --- a/tests/unit/array/mixins/test_content.py +++ b/tests/unit/array/mixins/test_content.py @@ -40,6 +40,9 @@ def test_content_empty_getter_return_none(cls, content_attr, start_storage): da = cls() assert getattr(da, content_attr) is None + if cls == DocumentArrayAnnlite: + da._annlite.close() + @pytest.mark.parametrize( 'cls', @@ -77,6 +80,9 @@ def test_content_empty_setter(cls, content_attr, start_storage): setattr(da, content_attr[0], content_attr[1]) assert getattr(da, content_attr[0]) is None + if cls == DocumentArrayAnnlite: + da._annlite.close() + @pytest.mark.parametrize( 'cls,config', @@ -111,6 +117,9 @@ def test_content_getter_setter(cls, content_attr, config, start_storage): da.contents = None assert da.contents is None + if cls == DocumentArrayAnnlite: + da._annlite.close() + @pytest.mark.parametrize('da_len', [0, 1, 2]) @pytest.mark.parametrize( From 089e3165e12132413730b67cb992bd8b5a935997 Mon Sep 17 00:00:00 2001 From: jemmyshin Date: Fri, 21 Oct 2022 15:03:55 +0800 Subject: [PATCH 22/24] fix: remove redundant cntx mngr --- tests/unit/array/mixins/test_io.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/unit/array/mixins/test_io.py b/tests/unit/array/mixins/test_io.py index e5628df80da..514791b49c6 100644 --- a/tests/unit/array/mixins/test_io.py +++ b/tests/unit/array/mixins/test_io.py @@ -44,14 +44,14 @@ def test_document_save_load( tmp_file = os.path.join(tmp_path, 'test') da = da_cls(docs, config=config()) - with da: - da.insert(2, Document(id='new')) - da.save(tmp_file, file_format=method, encoding=encoding) - da_info = { - 'id': [d.id for d in da], - 'embedding': [d.embedding for d in da], - 'content': [d.content for d in da], - } + da.insert(2, Document(id='new')) + da.save(tmp_file, file_format=method, encoding=encoding) + + da_info = { + 'id': [d.id for d in da], + 'embedding': [d.embedding for d in da], + 'content': [d.content for d in da], + } if da_cls == DocumentArrayAnnlite: da._annlite.close() From 95d054b76f4663c9b9c5bafde66767e1c1b30eb8 Mon Sep 17 00:00:00 2001 From: jemmyshin Date: Fri, 21 Oct 2022 15:46:45 +0800 Subject: [PATCH 23/24] fix: fix and enable the annlite test --- tests/unit/array/mixins/test_plot.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/unit/array/mixins/test_plot.py b/tests/unit/array/mixins/test_plot.py index 858cd082630..a6020810d79 100644 --- a/tests/unit/array/mixins/test_plot.py +++ b/tests/unit/array/mixins/test_plot.py @@ -24,8 +24,7 @@ [ (DocumentArray, None), (DocumentArraySqlite, None), - # TODO: restore this after annlite issue is fixed in #622 - # (DocumentArrayAnnlite, AnnliteConfig(n_dim=128)), + (DocumentArrayAnnlite, AnnliteConfig(n_dim=128)), # (DocumentArrayWeaviate, WeaviateConfig(n_dim=128)), (DocumentArrayQdrant, QdrantConfig(n_dim=128, scroll_batch_size=8)), (DocumentArrayElastic, ElasticConfig(n_dim=128)), @@ -57,6 +56,9 @@ def test_sprite_fail_tensor_success_uri( da.save_gif(tmpdir / 'sprint_da.gif', show_index=show_index, channel_axis=0) assert os.path.exists(tmpdir / 'sprint_da.png') + if da_cls == DocumentArrayAnnlite: + da._annlite.close() + @pytest.mark.parametrize('image_source', ['tensor', 'uri']) @pytest.mark.parametrize( From 9ea6b5014667eefb6f553c008b8bc4ccc1f95a9b Mon Sep 17 00:00:00 2001 From: jemmyshin Date: Fri, 21 Oct 2022 16:27:11 +0800 Subject: [PATCH 24/24] ci: use the released annlite --- .github/workflows/ci.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 40c986e2c51..4c2cb48583f 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -131,7 +131,6 @@ jobs: python -m pip install --upgrade pip python -m pip install wheel # pip does not properly resolve dependency versions with syntax pip install --no-cache-dir ".[test,full]" - pip install --pre annlite pip install --no-cache-dir ".[test]" pip install --no-cache-dir ".[full]" pip install --no-cache-dir ".[qdrant]"