From b2ccb52455a946b3ec019073214cb143246229aa Mon Sep 17 00:00:00 2001 From: AnneY Date: Thu, 15 Sep 2022 13:52:09 +0800 Subject: [PATCH 01/10] feat: redis supports storing multiple DAs --- docarray/array/storage/redis/backend.py | 15 ++--- docarray/array/storage/redis/getsetdel.py | 8 +-- tests/unit/array/mixins/test_content.py | 12 ++-- tests/unit/array/mixins/test_del.py | 2 +- tests/unit/array/mixins/test_embed.py | 5 +- tests/unit/array/mixins/test_empty.py | 2 +- tests/unit/array/mixins/test_eval_class.py | 14 ++--- tests/unit/array/mixins/test_find.py | 27 +++------ tests/unit/array/mixins/test_getset.py | 32 +++++------ tests/unit/array/mixins/test_io.py | 30 ++++------ tests/unit/array/mixins/test_magic.py | 8 +-- tests/unit/array/mixins/test_match.py | 12 +--- tests/unit/array/mixins/test_parallel.py | 10 ++-- tests/unit/array/mixins/test_plot.py | 11 ++-- tests/unit/array/mixins/test_sample.py | 8 +-- tests/unit/array/mixins/test_text.py | 10 ++-- tests/unit/array/mixins/test_traverse.py | 56 +++++++++---------- .../unit/array/storage/redis/test_backend.py | 53 ++++++++++++------ .../array/storage/redis/test_getsetdel.py | 22 +++----- tests/unit/array/test_advance_indexing.py | 42 ++++++-------- tests/unit/array/test_construct.py | 8 +-- tests/unit/array/test_pull_out.py | 16 +++--- tests/unit/array/test_sequence.py | 17 +++--- 23 files changed, 195 insertions(+), 225 deletions(-) diff --git a/docarray/array/storage/redis/backend.py b/docarray/array/storage/redis/backend.py index 8bf8d560949..c684d59e461 100644 --- a/docarray/array/storage/redis/backend.py +++ b/docarray/array/storage/redis/backend.py @@ -1,3 +1,5 @@ +import copy +import uuid from dataclasses import dataclass, field from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Tuple, Union @@ -19,8 +21,7 @@ class RedisConfig: n_dim: int host: str = field(default='localhost') port: int = field(default=6379) - index_name: str = field(default='idx') - flush: bool = field(default=False) + index_name: Optional[str] = None update_schema: bool = field(default=True) distance: str = field(default='COSINE') redis_config: Dict[str, Any] = field(default_factory=dict) @@ -55,6 +56,7 @@ def _init_storage( config: Optional[Union[RedisConfig, Dict]] = None, **kwargs, ): + config = copy.deepcopy(config) if not config: raise ValueError('Empty config is not allowed for Redis storage') elif isinstance(config, dict): @@ -72,6 +74,10 @@ def _init_storage( if config.redis_config.get('decode_responses'): config.redis_config['decode_responses'] = False + if config.index_name is None: + id = uuid.uuid4().hex + config.index_name = 'index_name__' + id + self._offset2id_key = config.index_name + '__offset2id' self._config = config self.n_dim = self._config.n_dim @@ -95,14 +101,10 @@ def _build_client(self): **self._config.redis_config, ) - if self._config.flush: - client.flushdb() - if self._config.update_schema: if self._config.index_name.encode() in client.execute_command('FT._LIST'): client.ft(index_name=self._config.index_name).dropindex() - if self._config.flush or self._config.update_schema: schema = self._build_schema_from_redis_config() idef = IndexDefinition(prefix=[self._doc_prefix]) client.ft(index_name=self._config.index_name).create_index( @@ -122,7 +124,6 @@ def _ensure_unique_config( config_joined['index_name'] = ( config_joined['index_name'] + '_subindex_' + subindex_name ) - config_joined['flush'] = False return config_joined def _build_schema_from_redis_config(self): diff --git a/docarray/array/storage/redis/getsetdel.py b/docarray/array/storage/redis/getsetdel.py index 53c9ef543aa..bbbde17f5f1 100644 --- a/docarray/array/storage/redis/getsetdel.py +++ b/docarray/array/storage/redis/getsetdel.py @@ -1,10 +1,8 @@ -from codecs import unicode_escape_decode -from typing import Dict +from typing import Dict, Iterable, Sequence from docarray import Document from docarray.array.storage.base.getsetdel import BaseGetSetDelMixin from docarray.array.storage.base.helper import Offset2ID -from typing import Sequence, Iterable class GetSetDelMixin(BaseGetSetDelMixin): @@ -120,4 +118,6 @@ def _save_offset2ids(self): self._update_offset2ids_meta() def _clear_storage(self): - self._client.flushdb() + for _id in self._offset2ids.ids: + self._del_doc_by_id(_id) + self._client.delete(self._offset2id_key) diff --git a/tests/unit/array/mixins/test_content.py b/tests/unit/array/mixins/test_content.py index 6d2d5896e1c..ceb7cf36deb 100644 --- a/tests/unit/array/mixins/test_content.py +++ b/tests/unit/array/mixins/test_content.py @@ -33,10 +33,9 @@ def test_content_empty_getter_return_none(cls, content_attr, start_storage): DocumentArrayWeaviate, DocumentArrayQdrant, DocumentArrayElastic, + DocumentArrayRedis, ]: da = cls(config={'n_dim': 3}) - elif cls == DocumentArrayRedis: - da = cls(config={'n_dim': 3, 'flush': True}) else: da = cls() assert getattr(da, content_attr) is None @@ -70,10 +69,9 @@ def test_content_empty_setter(cls, content_attr, start_storage): DocumentArrayWeaviate, DocumentArrayQdrant, DocumentArrayElastic, + DocumentArrayRedis, ]: da = cls(config={'n_dim': 3}) - elif cls == DocumentArrayRedis: - da = cls(config={'n_dim': 3, 'flush': True}) else: da = cls() setattr(da, content_attr[0], content_attr[1]) @@ -89,7 +87,7 @@ def test_content_empty_setter(cls, content_attr, start_storage): (DocumentArrayWeaviate, WeaviateConfig(n_dim=128)), (DocumentArrayQdrant, QdrantConfig(n_dim=128)), (DocumentArrayElastic, ElasticConfig(n_dim=128)), - (DocumentArrayRedis, RedisConfig(n_dim=128, flush=True)), + (DocumentArrayRedis, RedisConfig(n_dim=1)), ], ) @pytest.mark.parametrize( @@ -124,7 +122,7 @@ def test_content_getter_setter(cls, content_attr, config, start_storage): (DocumentArrayWeaviate, WeaviateConfig(n_dim=128)), (DocumentArrayQdrant, QdrantConfig(n_dim=128)), (DocumentArrayElastic, ElasticConfig(n_dim=128)), - (DocumentArrayRedis, RedisConfig(n_dim=128, flush=True)), + (DocumentArrayRedis, RedisConfig(n_dim=128)), ], ) def test_content_empty(da_len, da_cls, config, start_storage): @@ -162,7 +160,7 @@ def test_content_empty(da_len, da_cls, config, start_storage): (DocumentArrayWeaviate, WeaviateConfig(n_dim=5)), (DocumentArrayQdrant, QdrantConfig(n_dim=5)), (DocumentArrayElastic, ElasticConfig(n_dim=5)), - (DocumentArrayRedis, RedisConfig(n_dim=128, flush=True)), + (DocumentArrayRedis, RedisConfig(n_dim=128)), ], ) def test_embeddings_setter(da_len, da_cls, config, start_storage): diff --git a/tests/unit/array/mixins/test_del.py b/tests/unit/array/mixins/test_del.py index 52f132624fa..610ca99140b 100644 --- a/tests/unit/array/mixins/test_del.py +++ b/tests/unit/array/mixins/test_del.py @@ -118,7 +118,7 @@ def test_del_da_attribute(): ('qdrant', {'n_dim': 3, 'distance': 'euclidean'}), ('elasticsearch', {'n_dim': 3, 'distance': 'l2_norm'}), ('sqlite', dict()), - ('redis', {'n_dim': 3, 'distance': 'L2', 'flush': True}), + ('redis', {'n_dim': 3, 'distance': 'L2'}), ], ) def test_del_subindex(storage, config): diff --git a/tests/unit/array/mixins/test_embed.py b/tests/unit/array/mixins/test_embed.py index dc1a20c5d98..e5c1762e925 100644 --- a/tests/unit/array/mixins/test_embed.py +++ b/tests/unit/array/mixins/test_embed.py @@ -96,12 +96,9 @@ def test_embedding_on_random_network( DocumentArrayAnnlite, DocumentArrayQdrant, DocumentArrayElastic, - ]: - da = da_cls.empty(N, config={'n_dim': embedding_shape}) - elif da_cls in [ DocumentArrayRedis, ]: - da = da_cls.empty(N, config={'n_dim': embedding_shape, 'flush': True}) + da = da_cls.empty(N, config={'n_dim': embedding_shape}) else: da = da_cls.empty(N, config=None) da.tensors = np.random.random([N, *input_shape]).astype(np.float32) diff --git a/tests/unit/array/mixins/test_empty.py b/tests/unit/array/mixins/test_empty.py index 7de86e9a5a8..0ba3da06e93 100644 --- a/tests/unit/array/mixins/test_empty.py +++ b/tests/unit/array/mixins/test_empty.py @@ -20,7 +20,7 @@ (DocumentArrayWeaviate, WeaviateConfig(n_dim=5)), (DocumentArrayQdrant, QdrantConfig(n_dim=5)), (DocumentArrayElastic, ElasticConfig(n_dim=5)), - (DocumentArrayRedis, RedisConfig(n_dim=5, flush=True)), + (DocumentArrayRedis, RedisConfig(n_dim=5)), ], ) def test_empty_non_zero(da_cls, config, start_storage): diff --git a/tests/unit/array/mixins/test_eval_class.py b/tests/unit/array/mixins/test_eval_class.py index 8467f6d4ede..c8431dcf6db 100644 --- a/tests/unit/array/mixins/test_eval_class.py +++ b/tests/unit/array/mixins/test_eval_class.py @@ -15,7 +15,7 @@ ('annlite', {'n_dim': 256}), ('qdrant', {'n_dim': 256}), ('elasticsearch', {'n_dim': 256}), - ('redis', {'n_dim': 256, 'flush': True}), + ('redis', {'n_dim': 256}), ], ) @pytest.mark.parametrize( @@ -52,7 +52,7 @@ def test_eval_mixin_perfect_match(metric_fn, kwargs, storage, config, start_stor ('annlite', {'n_dim': 256}), ('qdrant', {'n_dim': 256}), ('elasticsearch', {'n_dim': 256}), - ('redis', {'n_dim': 256, 'flush': True}), + ('redis', {'n_dim': 256}), ], ) @pytest.mark.parametrize( @@ -96,7 +96,7 @@ def test_eval_mixin_zero_match(storage, config, metric_fn, start_storage, kwargs ('annlite', {'n_dim': 256}), ('qdrant', {'n_dim': 256}), ('elasticsearch', {'n_dim': 256}), - ('redis', {'n_dim': 256, 'flush': True}), + ('redis', {'n_dim': 256}), ], ) def test_diff_len_should_raise(storage, config, start_storage): @@ -115,7 +115,7 @@ def test_diff_len_should_raise(storage, config, start_storage): ('annlite', {'n_dim': 256}), ('qdrant', {'n_dim': 256}), ('elasticsearch', {'n_dim': 256}), - ('redis', {'n_dim': 256, 'flush': True}), + ('redis', {'n_dim': 256}), ], ) def test_diff_hash_fun_should_raise(storage, config, start_storage): @@ -134,7 +134,7 @@ def test_diff_hash_fun_should_raise(storage, config, start_storage): ('annlite', {'n_dim': 3}), ('qdrant', {'n_dim': 3}), ('elasticsearch', {'n_dim': 3}), - ('redis', {'n_dim': 3, 'flush': True}), + ('redis', {'n_dim': 3}), ], ) def test_same_hash_same_len_fun_should_work(storage, config, start_storage): @@ -163,7 +163,7 @@ def test_same_hash_same_len_fun_should_work(storage, config, start_storage): ('annlite', {'n_dim': 3}), ('qdrant', {'n_dim': 3}), ('elasticsearch', {'n_dim': 3}), - ('redis', {'n_dim': 3, 'flush': True}), + ('redis', {'n_dim': 3}), ], ) def test_adding_noise(storage, config, start_storage): @@ -194,7 +194,7 @@ def test_adding_noise(storage, config, start_storage): ('annlite', {'n_dim': 128}), ('qdrant', {'n_dim': 128}), ('elasticsearch', {'n_dim': 128}), - ('redis', {'n_dim': 128, 'flush': True}), + ('redis', {'n_dim': 128}), ], ) @pytest.mark.parametrize( diff --git a/tests/unit/array/mixins/test_find.py b/tests/unit/array/mixins/test_find.py index 3bfec1b988c..bc023765eed 100644 --- a/tests/unit/array/mixins/test_find.py +++ b/tests/unit/array/mixins/test_find.py @@ -31,7 +31,7 @@ def inv_cosine(*args): ('annlite', {'n_dim': 32}), ('qdrant', {'n_dim': 32}), ('elasticsearch', {'n_dim': 32}), - ('redis', {'n_dim': 32, 'flush': True}), + ('redis', {'n_dim': 32}), ], ) @pytest.mark.parametrize('limit', [1, 5, 10]) @@ -99,7 +99,7 @@ def test_find(storage, config, limit, query, start_storage): 'storage, config', [ ('elasticsearch', {'n_dim': 32, 'index_text': True}), - ('redis', {'n_dim': 32, 'flush': True, 'index_text': True}), + ('redis', {'n_dim': 32, 'index_text': True}), ], ) def test_find_by_text(storage, config, start_storage): @@ -146,7 +146,7 @@ def test_find_by_text(storage, config, start_storage): ('elasticsearch', {'n_dim': 32, 'tag_indices': ['attr1', 'attr2', 'attr3']}), ( 'redis', - {'n_dim': 32, 'flush': True, 'tag_indices': ['attr1', 'attr2', 'attr3']}, + {'n_dim': 32, 'tag_indices': ['attr1', 'attr2', 'attr3']}, ), ], ) @@ -373,13 +373,7 @@ def test_search_pre_filtering( np.random.seed(0) n_dim = 128 - if storage == 'redis': - da = DocumentArray( - storage=storage, - config={'n_dim': n_dim, 'columns': columns, 'flush': True}, - ) - else: - da = DocumentArray(storage=storage, config={'n_dim': n_dim, 'columns': columns}) + da = DocumentArray(storage=storage, config={'n_dim': n_dim, 'columns': columns}) da.extend( [ @@ -478,13 +472,7 @@ def test_filtering( ): n_dim = 128 - if storage == 'redis': - da = DocumentArray( - storage=storage, - config={'n_dim': n_dim, 'columns': columns, 'flush': True}, - ) - else: - da = DocumentArray(storage=storage, config={'n_dim': n_dim, 'columns': columns}) + da = DocumentArray(storage=storage, config={'n_dim': n_dim, 'columns': columns}) da.extend([Document(id=f'r{i}', tags={'price': i}) for i in range(50)]) thresholds = [10, 20, 30] @@ -583,7 +571,6 @@ def test_redis_category_filter(filter, checker, start_storage, columns): config={ 'n_dim': n_dim, 'columns': columns, - 'flush': True, }, ) @@ -668,7 +655,7 @@ def test_elastic_id_filter(storage, config, limit): ('qdrant', {'n_dim': 3, 'distance': 'euclidean'}), ('elasticsearch', {'n_dim': 3, 'distance': 'l2_norm'}), ('sqlite', dict()), - ('redis', {'n_dim': 3, 'distance': 'L2', 'flush': True}), + ('redis', {'n_dim': 3, 'distance': 'L2'}), ], ) def test_find_subindex(storage, config): @@ -723,7 +710,7 @@ def test_find_subindex(storage, config): ('qdrant', {'n_dim': 3, 'distance': 'euclidean'}), ('elasticsearch', {'n_dim': 3, 'distance': 'l2_norm'}), ('sqlite', dict()), - ('redis', {'n_dim': 3, 'distance': 'L2', 'flush': True}), + ('redis', {'n_dim': 3, 'distance': 'L2'}), ], ) def test_find_subindex_multimodal(storage, config): diff --git a/tests/unit/array/mixins/test_getset.py b/tests/unit/array/mixins/test_getset.py index 3a32ddbc8b4..cae3442b3fe 100644 --- a/tests/unit/array/mixins/test_getset.py +++ b/tests/unit/array/mixins/test_getset.py @@ -45,7 +45,7 @@ def nested_docs(): ('weaviate', {'n_dim': 3}), ('qdrant', {'n_dim': 3}), ('elasticsearch', {'n_dim': 3}), - ('redis', {'n_dim': 3, 'flush': True}), + ('redis', {'n_dim': 3}), ], ) @pytest.mark.parametrize( @@ -71,7 +71,7 @@ def test_set_embeddings_multi_kind(array, storage, config, start_storage): (DocumentArrayWeaviate, WeaviateConfig(n_dim=10)), (DocumentArrayQdrant, QdrantConfig(n_dim=10)), (DocumentArrayElastic, ElasticConfig(n_dim=10)), - (DocumentArrayRedis, RedisConfig(n_dim=10, flush=True)), + (DocumentArrayRedis, RedisConfig(n_dim=10)), ], ) def test_da_get_embeddings(docs, config, da_cls, start_storage): @@ -93,7 +93,7 @@ def test_da_get_embeddings(docs, config, da_cls, start_storage): (DocumentArrayWeaviate, WeaviateConfig(n_dim=10)), (DocumentArrayQdrant, QdrantConfig(n_dim=10)), (DocumentArrayElastic, ElasticConfig(n_dim=10)), - (DocumentArrayRedis, RedisConfig(n_dim=10, flush=True)), + (DocumentArrayRedis, RedisConfig(n_dim=10)), ], ) def test_embeddings_setter_da(docs, config, da_cls, start_storage): @@ -124,7 +124,7 @@ def test_embeddings_setter_da(docs, config, da_cls, start_storage): (DocumentArrayWeaviate, WeaviateConfig(n_dim=10)), (DocumentArrayQdrant, QdrantConfig(n_dim=10)), (DocumentArrayElastic, ElasticConfig(n_dim=10)), - (DocumentArrayRedis, RedisConfig(n_dim=10, flush=True)), + (DocumentArrayRedis, RedisConfig(n_dim=10)), ], ) def test_embeddings_wrong_len(docs, config, da_cls, start_storage): @@ -148,7 +148,7 @@ def test_embeddings_wrong_len(docs, config, da_cls, start_storage): (DocumentArrayWeaviate, WeaviateConfig(n_dim=10)), (DocumentArrayQdrant, QdrantConfig(n_dim=10)), (DocumentArrayElastic, ElasticConfig(n_dim=10)), - (DocumentArrayRedis, RedisConfig(n_dim=10, flush=True)), + (DocumentArrayRedis, RedisConfig(n_dim=10)), ], ) def test_tensors_getter_da(docs, config, da_cls, start_storage): @@ -175,7 +175,7 @@ def test_tensors_getter_da(docs, config, da_cls, start_storage): (DocumentArrayWeaviate, WeaviateConfig(n_dim=10)), (DocumentArrayQdrant, QdrantConfig(n_dim=10)), (DocumentArrayElastic, ElasticConfig(n_dim=10)), - (DocumentArrayRedis, RedisConfig(n_dim=10, flush=True)), + (DocumentArrayRedis, RedisConfig(n_dim=10)), ], ) def test_texts_getter_da(docs, config, da_cls, start_storage): @@ -211,7 +211,7 @@ def test_texts_getter_da(docs, config, da_cls, start_storage): (DocumentArrayWeaviate, WeaviateConfig(n_dim=10)), (DocumentArrayQdrant, QdrantConfig(n_dim=10)), (DocumentArrayElastic, ElasticConfig(n_dim=10)), - (DocumentArrayRedis, RedisConfig(n_dim=10, flush=True)), + (DocumentArrayRedis, RedisConfig(n_dim=10)), ], ) def test_setter_by_sequences_in_selected_docs_da(docs, config, da_cls, start_storage): @@ -249,7 +249,7 @@ def test_setter_by_sequences_in_selected_docs_da(docs, config, da_cls, start_sto (DocumentArrayWeaviate, WeaviateConfig(n_dim=10)), (DocumentArrayQdrant, QdrantConfig(n_dim=10)), (DocumentArrayElastic, ElasticConfig(n_dim=10)), - (DocumentArrayRedis, RedisConfig(n_dim=10, flush=True)), + (DocumentArrayRedis, RedisConfig(n_dim=10)), ], ) def test_texts_wrong_len(docs, config, da_cls, start_storage): @@ -273,7 +273,7 @@ def test_texts_wrong_len(docs, config, da_cls, start_storage): (DocumentArrayWeaviate, WeaviateConfig(n_dim=10)), (DocumentArrayQdrant, QdrantConfig(n_dim=10)), (DocumentArrayElastic, ElasticConfig(n_dim=10)), - (DocumentArrayRedis, RedisConfig(n_dim=10, flush=True)), + (DocumentArrayRedis, RedisConfig(n_dim=10)), ], ) def test_tensors_wrong_len(docs, config, da_cls, start_storage): @@ -297,7 +297,7 @@ def test_tensors_wrong_len(docs, config, da_cls, start_storage): (DocumentArrayWeaviate, WeaviateConfig(n_dim=10)), (DocumentArrayQdrant, QdrantConfig(n_dim=10)), (DocumentArrayElastic, ElasticConfig(n_dim=10)), - (DocumentArrayRedis, RedisConfig(n_dim=10, flush=True)), + (DocumentArrayRedis, RedisConfig(n_dim=10)), ], ) def test_blobs_getter_setter(docs, da_cls, config, start_storage): @@ -330,7 +330,7 @@ def test_blobs_getter_setter(docs, da_cls, config, start_storage): (DocumentArrayWeaviate, WeaviateConfig(n_dim=10)), (DocumentArrayQdrant, QdrantConfig(n_dim=10)), (DocumentArrayElastic, ElasticConfig(n_dim=10)), - (DocumentArrayRedis, RedisConfig(n_dim=10, flush=True)), + (DocumentArrayRedis, RedisConfig(n_dim=10)), ], ) def test_ellipsis_getter(nested_docs, da_cls, config, start_storage): @@ -354,7 +354,7 @@ def test_ellipsis_getter(nested_docs, da_cls, config, start_storage): (DocumentArrayWeaviate, WeaviateConfig(n_dim=10)), (DocumentArrayQdrant, QdrantConfig(n_dim=10)), (DocumentArrayElastic, ElasticConfig(n_dim=10)), - (DocumentArrayRedis, RedisConfig(n_dim=10, flush=True)), + (DocumentArrayRedis, RedisConfig(n_dim=10)), ], ) def test_ellipsis_attribute_setter(nested_docs, da_cls, config, start_storage): @@ -375,7 +375,7 @@ def test_ellipsis_attribute_setter(nested_docs, da_cls, config, start_storage): (DocumentArrayAnnlite, AnnliteConfig(n_dim=6)), (DocumentArrayWeaviate, WeaviateConfig(n_dim=6)), (DocumentArrayElastic, ElasticConfig(n_dim=6)), - (DocumentArrayRedis, RedisConfig(n_dim=10, flush=True)), + (DocumentArrayRedis, RedisConfig(n_dim=10)), ], ) def test_zero_embeddings(da_cls, config, start_storage): @@ -427,7 +427,7 @@ def embeddings_eq(emb1, emb2): ('qdrant', {'n_dim': 3, 'distance': 'euclidean'}), ('elasticsearch', {'n_dim': 3, 'distance': 'l2_norm'}), ('sqlite', dict()), - ('redis', {'n_dim': 3, 'distance': 'L2', 'flush': True}), + ('redis', {'n_dim': 3, 'distance': 'L2'}), ], ) def test_getset_subindex(storage, config): @@ -510,7 +510,7 @@ def test_getset_subindex(storage, config): ('qdrant', {'n_dim': 3, 'distance': 'euclidean'}), ('elasticsearch', {'n_dim': 3, 'distance': 'l2_norm'}), ('sqlite', dict()), - ('redis', {'n_dim': 3, 'distance': 'L2', 'flush': True}), + ('redis', {'n_dim': 3, 'distance': 'L2'}), ], ) def test_init_subindex(storage, config): @@ -553,7 +553,7 @@ def test_init_subindex(storage, config): ('qdrant', {'n_dim': 3, 'distance': 'euclidean'}), ('elasticsearch', {'n_dim': 3, 'distance': 'l2_norm'}), ('sqlite', dict()), - ('redis', {'n_dim': 3, 'distance': 'L2', 'flush': True}), + ('redis', {'n_dim': 3, 'distance': 'L2'}), ], ) def test_set_on_subindex(storage, config): diff --git a/tests/unit/array/mixins/test_io.py b/tests/unit/array/mixins/test_io.py index 56dcf746de3..82f8c75cccd 100644 --- a/tests/unit/array/mixins/test_io.py +++ b/tests/unit/array/mixins/test_io.py @@ -17,8 +17,6 @@ from docarray.helper import random_identity from tests import random_docs -import gc - @pytest.fixture def docs(): @@ -37,7 +35,7 @@ def docs(): (DocumentArrayWeaviate, lambda: WeaviateConfig(n_dim=10)), (DocumentArrayQdrant, lambda: QdrantConfig(n_dim=10)), (DocumentArrayElastic, lambda: ElasticConfig(n_dim=10)), - (DocumentArrayRedis, lambda: RedisConfig(n_dim=10, flush=True)), + (DocumentArrayRedis, lambda: RedisConfig(n_dim=10)), ], ) def test_document_save_load( @@ -71,7 +69,7 @@ def test_document_save_load( (DocumentArrayWeaviate, lambda: WeaviateConfig(n_dim=10)), (DocumentArrayQdrant, lambda: QdrantConfig(n_dim=10)), (DocumentArrayElastic, lambda: ElasticConfig(n_dim=10)), - (DocumentArrayRedis, lambda: RedisConfig(n_dim=10, flush=True)), + (DocumentArrayRedis, lambda: RedisConfig(n_dim=10)), ], ) def test_da_csv_write(docs, flatten_tags, tmp_path, da_cls, config, start_storage): @@ -91,7 +89,7 @@ def test_da_csv_write(docs, flatten_tags, tmp_path, da_cls, config, start_storag (DocumentArrayWeaviate, lambda: WeaviateConfig(n_dim=256)), (DocumentArrayQdrant, lambda: QdrantConfig(n_dim=256)), (DocumentArrayElastic, lambda: ElasticConfig(n_dim=256)), - (DocumentArrayRedis, lambda: RedisConfig(n_dim=256, flush=True)), + (DocumentArrayRedis, lambda: RedisConfig(n_dim=256)), ], ) def test_from_ndarray(da_cls, config, start_storage): @@ -109,7 +107,7 @@ def test_from_ndarray(da_cls, config, start_storage): (DocumentArrayWeaviate, lambda: WeaviateConfig(n_dim=256)), (DocumentArrayQdrant, lambda: QdrantConfig(n_dim=256)), (DocumentArrayElastic, lambda: ElasticConfig(n_dim=256)), - (DocumentArrayRedis, lambda: RedisConfig(n_dim=256, flush=True)), + (DocumentArrayRedis, lambda: RedisConfig(n_dim=256)), ], ) def test_from_files(da_cls, config, start_storage): @@ -150,7 +148,7 @@ def test_from_files_exclude(): (DocumentArrayWeaviate, lambda: WeaviateConfig(n_dim=256)), (DocumentArrayQdrant, lambda: QdrantConfig(n_dim=256)), (DocumentArrayElastic, lambda: ElasticConfig(n_dim=256)), - (DocumentArrayRedis, lambda: RedisConfig(n_dim=256, flush=True)), + (DocumentArrayRedis, lambda: RedisConfig(n_dim=256)), ], ) def test_from_ndjson(da_cls, config, start_storage): @@ -168,13 +166,10 @@ def test_from_ndjson(da_cls, config, start_storage): (DocumentArrayWeaviate, lambda: WeaviateConfig(n_dim=3)), (DocumentArrayQdrant, lambda: QdrantConfig(n_dim=3)), (DocumentArrayElastic, lambda: ElasticConfig(n_dim=3)), - (DocumentArrayRedis, lambda: RedisConfig(n_dim=3, flush=True)), + (DocumentArrayRedis, lambda: RedisConfig(n_dim=3)), ], ) def test_from_to_pd_dataframe(da_cls, config, start_storage): - if da_cls == DocumentArrayRedis: - gc.collect() - df = da_cls.empty(2, config=config()).to_dataframe() assert len(da_cls.from_dataframe(df, config=config())) == 2 @@ -200,7 +195,7 @@ def test_from_to_pd_dataframe(da_cls, config, start_storage): (DocumentArrayAnnlite, AnnliteConfig(n_dim=3)), (DocumentArrayQdrant, QdrantConfig(n_dim=3)), (DocumentArrayElastic, ElasticConfig(n_dim=3)), - (DocumentArrayRedis, RedisConfig(n_dim=3, flush=True)), + (DocumentArrayRedis, RedisConfig(n_dim=3)), ], ) def test_from_to_bytes(da_cls, config, start_storage): @@ -232,7 +227,7 @@ def test_from_to_bytes(da_cls, config, start_storage): (DocumentArrayWeaviate, lambda: WeaviateConfig(n_dim=256)), (DocumentArrayQdrant, lambda: QdrantConfig(n_dim=256)), (DocumentArrayElastic, lambda: ElasticConfig(n_dim=256)), - (DocumentArrayRedis, lambda: RedisConfig(n_dim=256, flush=True)), + (DocumentArrayRedis, lambda: RedisConfig(n_dim=256)), ], ) def test_push_pull_io(da_cls, config, show_progress, start_storage): @@ -246,12 +241,7 @@ def test_push_pull_io(da_cls, config, show_progress, start_storage): da1.push(name, show_progress=show_progress) - if da_cls == DocumentArrayRedis: - config = config() - config.flush = False - da2 = da_cls.pull(name, show_progress=show_progress, config=config) - else: - da2 = da_cls.pull(name, show_progress=show_progress, config=config()) + da2 = da_cls.pull(name, show_progress=show_progress, config=config()) assert len(da1) == len(da2) == 10 assert da1.texts == da2.texts == random_texts @@ -270,7 +260,7 @@ def test_push_pull_io(da_cls, config, show_progress, start_storage): # (DocumentArrayAnnlite, PqliteConfig(n_dim=3)), # TODO: enable this # (DocumentArrayQdrant, QdrantConfig(n_dim=3)), # (DocumentArrayElastic, ElasticConfig(n_dim=3)), # Elastic needs config - # (DocumentArrayRedis, RedisConfig(n_dim=3, flush=True)), # Redis needs config + # (DocumentArrayRedis, RedisConfig(n_dim=3)), # Redis needs config ], ) def test_from_to_base64(protocol, compress, da_cls, config): diff --git a/tests/unit/array/mixins/test_magic.py b/tests/unit/array/mixins/test_magic.py index 66edce5b152..104c3139b27 100644 --- a/tests/unit/array/mixins/test_magic.py +++ b/tests/unit/array/mixins/test_magic.py @@ -33,7 +33,7 @@ def docs(): (DocumentArrayWeaviate, WeaviateConfig(n_dim=128)), (DocumentArrayQdrant, QdrantConfig(n_dim=128)), (DocumentArrayElastic, ElasticConfig(n_dim=128)), - (DocumentArrayRedis, RedisConfig(n_dim=1, flush=True)), + (DocumentArrayRedis, RedisConfig(n_dim=1)), ], ) def test_iter_len_bool(da_cls, config, start_storage): @@ -60,7 +60,7 @@ def test_iter_len_bool(da_cls, config, start_storage): (DocumentArrayWeaviate, WeaviateConfig(n_dim=128)), (DocumentArrayQdrant, QdrantConfig(n_dim=128)), (DocumentArrayElastic, ElasticConfig(n_dim=128)), - (DocumentArrayRedis, RedisConfig(n_dim=128, flush=True)), + (DocumentArrayRedis, RedisConfig(n_dim=128)), ], ) def test_repr(da_cls, config, start_storage): @@ -80,7 +80,7 @@ def test_repr(da_cls, config, start_storage): ('weaviate', WeaviateConfig(n_dim=128)), ('qdrant', QdrantConfig(n_dim=128)), ('elasticsearch', ElasticConfig(n_dim=128)), - ('redis', RedisConfig(n_dim=128, flush=True)), + ('redis', RedisConfig(n_dim=128)), ], ) def test_repr_str(docs, storage, config, start_storage): @@ -104,7 +104,7 @@ def test_repr_str(docs, storage, config, start_storage): (DocumentArrayWeaviate, WeaviateConfig(n_dim=10)), (DocumentArrayQdrant, QdrantConfig(n_dim=10)), (DocumentArrayElastic, ElasticConfig(n_dim=10)), - (DocumentArrayRedis, RedisConfig(n_dim=10, flush=True)), + (DocumentArrayRedis, RedisConfig(n_dim=10)), ], ) def test_iadd(da_cls, config, start_storage): diff --git a/tests/unit/array/mixins/test_match.py b/tests/unit/array/mixins/test_match.py index 70cd79902bb..fbe863d1106 100644 --- a/tests/unit/array/mixins/test_match.py +++ b/tests/unit/array/mixins/test_match.py @@ -75,7 +75,7 @@ def doc_lists_to_doc_arrays(doc_lists, *args, **kwargs): ('annlite', {'n_dim': 3}), ('qdrant', {'n_dim': 3}), ('weaviate', {'n_dim': 3}), - ('redis', {'n_dim': 3, 'flush': True}), + ('redis', {'n_dim': 3}), ], ) @pytest.mark.parametrize('limit', [1, 2, 3]) @@ -704,13 +704,7 @@ def test_match_pre_filtering( ): n_dim = 128 - if storage == 'redis': - da = DocumentArray( - storage=storage, - config={'n_dim': n_dim, 'columns': columns, 'flush': True}, - ) - else: - da = DocumentArray(storage=storage, config={'n_dim': n_dim, 'columns': columns}) + da = DocumentArray(storage=storage, config={'n_dim': n_dim, 'columns': columns}) da.extend( [ @@ -754,7 +748,7 @@ def embeddings_eq(emb1, emb2): ('qdrant', {'n_dim': 3, 'distance': 'euclidean'}), ('elasticsearch', {'n_dim': 3, 'distance': 'l2_norm'}), ('sqlite', dict()), - ('redis', {'n_dim': 3, 'distance': 'L2', 'flush': True}), + ('redis', {'n_dim': 3, 'distance': 'L2'}), ], ) def test_match_subindex(storage, config): diff --git a/tests/unit/array/mixins/test_parallel.py b/tests/unit/array/mixins/test_parallel.py index 52901c977d7..22ce0a78e3a 100644 --- a/tests/unit/array/mixins/test_parallel.py +++ b/tests/unit/array/mixins/test_parallel.py @@ -53,7 +53,7 @@ def test_parallel_map_apply_external_pool(pytestconfig, pool): (DocumentArrayWeaviate, WeaviateConfig(n_dim=10)), (DocumentArrayQdrant, QdrantConfig(n_dim=10)), (DocumentArrayElastic, ElasticConfig(n_dim=10)), - (DocumentArrayRedis, RedisConfig(n_dim=10, flush=True)), + (DocumentArrayRedis, RedisConfig(n_dim=10)), ], ) @pytest.mark.parametrize('backend', ['process', 'thread']) @@ -110,7 +110,7 @@ def test_parallel_map( (DocumentArrayWeaviate, WeaviateConfig(n_dim=10)), (DocumentArrayQdrant, QdrantConfig(n_dim=10)), (DocumentArrayElastic, ElasticConfig(n_dim=10)), - (DocumentArrayRedis, RedisConfig(n_dim=10, flush=True)), + (DocumentArrayRedis, RedisConfig(n_dim=10)), ], ) @pytest.mark.parametrize('backend', ['thread']) @@ -182,7 +182,7 @@ def test_parallel_map_batch( (DocumentArrayWeaviate, WeaviateConfig(n_dim=10)), (DocumentArrayQdrant, QdrantConfig(n_dim=10)), (DocumentArrayElastic, ElasticConfig(n_dim=10)), - (DocumentArrayRedis, RedisConfig(n_dim=10, flush=True)), + (DocumentArrayRedis, RedisConfig(n_dim=10)), ], ) def test_map_lambda(pytestconfig, da_cls, config, start_storage): @@ -211,7 +211,7 @@ def test_map_lambda(pytestconfig, da_cls, config, start_storage): (DocumentArrayWeaviate, WeaviateConfig(n_dim=10)), (DocumentArrayQdrant, QdrantConfig(n_dim=10)), (DocumentArrayElastic, ElasticConfig(n_dim=10)), - (DocumentArrayRedis, RedisConfig(n_dim=10, flush=True)), + (DocumentArrayRedis, RedisConfig(n_dim=10)), ], ) def test_apply_partial(pytestconfig, da_cls, config, start_storage): @@ -241,7 +241,7 @@ def test_apply_partial(pytestconfig, da_cls, config, start_storage): ('weaviate', WeaviateConfig(n_dim=256)), ('qdrant', QdrantConfig(n_dim=256)), ('elasticsearch', ElasticConfig(n_dim=256)), - ('redis', RedisConfig(n_dim=256, flush=True)), + ('redis', RedisConfig(n_dim=256)), ], ) @pytest.mark.parametrize('backend', ['thread', 'process']) diff --git a/tests/unit/array/mixins/test_plot.py b/tests/unit/array/mixins/test_plot.py index e9d76fa97e9..2e706daeb92 100644 --- a/tests/unit/array/mixins/test_plot.py +++ b/tests/unit/array/mixins/test_plot.py @@ -28,7 +28,7 @@ # (DocumentArrayWeaviate, WeaviateConfig(n_dim=128)), (DocumentArrayQdrant, QdrantConfig(n_dim=128, scroll_batch_size=8)), (DocumentArrayElastic, ElasticConfig(n_dim=128)), - (DocumentArrayRedis, RedisConfig(n_dim=128, flush=True)), + (DocumentArrayRedis, RedisConfig(n_dim=128)), ], ) def test_sprite_fail_tensor_success_uri( @@ -67,7 +67,7 @@ def test_sprite_fail_tensor_success_uri( (DocumentArrayWeaviate, lambda: WeaviateConfig(n_dim=128)), (DocumentArrayQdrant, lambda: QdrantConfig(n_dim=128, scroll_batch_size=8)), (DocumentArrayElastic, lambda: ElasticConfig(n_dim=128)), - (DocumentArrayRedis, lambda: RedisConfig(n_dim=128, flush=True)), + (DocumentArrayRedis, lambda: RedisConfig(n_dim=128)), ], ) @pytest.mark.parametrize('canvas_size', [50, 512]) @@ -117,7 +117,7 @@ def da_and_dam(start_storage): (DocumentArrayWeaviate, {'config': {'n_dim': 3}}), (DocumentArrayAnnlite, {'config': {'n_dim': 3}}), (DocumentArrayQdrant, {'config': {'n_dim': 3}}), - (DocumentArrayRedis, {'config': {'n_dim': 3, 'flush': True}}), + (DocumentArrayRedis, {'config': {'n_dim': 3}}), ] ] @@ -153,6 +153,7 @@ def _test_plot_embeddings(da): (DocumentArrayWeaviate, lambda: WeaviateConfig(n_dim=5)), (DocumentArrayQdrant, lambda: QdrantConfig(n_dim=5)), (DocumentArrayElastic, lambda: ElasticConfig(n_dim=5)), + (DocumentArrayRedis, lambda: RedisConfig(n_dim=5)), ], ) def test_plot_embeddings_same_path(tmpdir, da_cls, config_gen, start_storage): @@ -182,7 +183,7 @@ def test_plot_embeddings_same_path(tmpdir, da_cls, config_gen, start_storage): (DocumentArrayWeaviate, WeaviateConfig(n_dim=128)), (DocumentArrayQdrant, QdrantConfig(n_dim=128)), (DocumentArrayElastic, ElasticConfig(n_dim=128)), - (DocumentArrayRedis, RedisConfig(n_dim=128, flush=True)), + (DocumentArrayRedis, RedisConfig(n_dim=128)), ], ) def test_summary_homo_hetero(da_cls, config, start_storage): @@ -206,7 +207,7 @@ def test_summary_homo_hetero(da_cls, config, start_storage): (DocumentArrayWeaviate, WeaviateConfig(n_dim=128)), (DocumentArrayQdrant, QdrantConfig(n_dim=128)), (DocumentArrayElastic, ElasticConfig(n_dim=128)), - (DocumentArrayRedis, RedisConfig(n_dim=128, flush=True)), + (DocumentArrayRedis, RedisConfig(n_dim=128)), ], ) def test_empty_get_attributes(da_cls, config, start_storage): diff --git a/tests/unit/array/mixins/test_sample.py b/tests/unit/array/mixins/test_sample.py index 5844db56afc..b4d1b6b4d17 100644 --- a/tests/unit/array/mixins/test_sample.py +++ b/tests/unit/array/mixins/test_sample.py @@ -20,7 +20,7 @@ (DocumentArrayWeaviate, WeaviateConfig(n_dim=128)), (DocumentArrayQdrant, QdrantConfig(n_dim=128)), (DocumentArrayElastic, ElasticConfig(n_dim=128)), - (DocumentArrayRedis, RedisConfig(n_dim=128, flush=True)), + (DocumentArrayRedis, RedisConfig(n_dim=128)), ], ) def test_sample(da_cls, config, start_storage): @@ -46,7 +46,7 @@ def test_sample(da_cls, config, start_storage): (DocumentArrayWeaviate, WeaviateConfig(n_dim=128)), (DocumentArrayQdrant, QdrantConfig(n_dim=128)), (DocumentArrayElastic, ElasticConfig(n_dim=128)), - (DocumentArrayRedis, RedisConfig(n_dim=128, flush=True)), + (DocumentArrayRedis, RedisConfig(n_dim=128)), ], ) def test_sample_with_seed(da_cls, config, start_storage): @@ -71,7 +71,7 @@ def test_sample_with_seed(da_cls, config, start_storage): (DocumentArrayWeaviate, WeaviateConfig(n_dim=128)), (DocumentArrayQdrant, QdrantConfig(n_dim=128)), (DocumentArrayElastic, ElasticConfig(n_dim=128)), - (DocumentArrayRedis, RedisConfig(n_dim=128, flush=True)), + (DocumentArrayRedis, RedisConfig(n_dim=128)), ], ) def test_shuffle(da_cls, config, start_storage): @@ -97,7 +97,7 @@ def test_shuffle(da_cls, config, start_storage): (DocumentArrayWeaviate, WeaviateConfig(n_dim=128)), (DocumentArrayQdrant, QdrantConfig(n_dim=128)), (DocumentArrayElastic, ElasticConfig(n_dim=128)), - (DocumentArrayRedis, RedisConfig(n_dim=128, flush=True)), + (DocumentArrayRedis, RedisConfig(n_dim=128)), ], ) def test_shuffle_with_seed(da_cls, config, start_storage): diff --git a/tests/unit/array/mixins/test_text.py b/tests/unit/array/mixins/test_text.py index 5c1eb163cb7..0f7481a7e0d 100644 --- a/tests/unit/array/mixins/test_text.py +++ b/tests/unit/array/mixins/test_text.py @@ -31,7 +31,7 @@ def docs(): (DocumentArrayWeaviate, WeaviateConfig(n_dim=128)), (DocumentArrayQdrant, QdrantConfig(n_dim=128)), (DocumentArrayElastic, ElasticConfig(n_dim=128)), - (DocumentArrayRedis, RedisConfig(n_dim=128, flush=True)), + (DocumentArrayRedis, RedisConfig(n_dim=128)), ], ) def test_da_vocabulary(da_cls, config, docs, min_freq, start_storage): @@ -60,7 +60,7 @@ def test_da_vocabulary(da_cls, config, docs, min_freq, start_storage): (DocumentArrayWeaviate, WeaviateConfig(n_dim=128)), (DocumentArrayQdrant, QdrantConfig(n_dim=128)), (DocumentArrayElastic, ElasticConfig(n_dim=128)), - (DocumentArrayRedis, RedisConfig(n_dim=128, flush=True)), + (DocumentArrayRedis, RedisConfig(n_dim=128)), ], ) def test_da_text_to_tensor_non_max_len(docs, da_cls, config, start_storage): @@ -89,7 +89,7 @@ def test_da_text_to_tensor_non_max_len(docs, da_cls, config, start_storage): (DocumentArrayWeaviate, WeaviateConfig(n_dim=128)), (DocumentArrayQdrant, QdrantConfig(n_dim=128)), (DocumentArrayElastic, ElasticConfig(n_dim=128)), - (DocumentArrayRedis, RedisConfig(n_dim=128, flush=True)), + (DocumentArrayRedis, RedisConfig(n_dim=128)), ], ) def test_da_text_to_tensor_max_len_3(docs, da_cls, config, start_storage): @@ -120,7 +120,7 @@ def test_da_text_to_tensor_max_len_3(docs, da_cls, config, start_storage): (DocumentArrayWeaviate, WeaviateConfig(n_dim=128)), (DocumentArrayQdrant, QdrantConfig(n_dim=128)), (DocumentArrayElastic, ElasticConfig(n_dim=128)), - (DocumentArrayRedis, RedisConfig(n_dim=128, flush=True)), + (DocumentArrayRedis, RedisConfig(n_dim=128)), ], ) def test_da_text_to_tensor_max_len_1(docs, da_cls, config, start_storage): @@ -151,7 +151,7 @@ def test_da_text_to_tensor_max_len_1(docs, da_cls, config, start_storage): (DocumentArrayWeaviate, WeaviateConfig(n_dim=128)), (DocumentArrayQdrant, QdrantConfig(n_dim=128)), (DocumentArrayElastic, ElasticConfig(n_dim=128)), - (DocumentArrayRedis, RedisConfig(n_dim=128, flush=True)), + (DocumentArrayRedis, RedisConfig(n_dim=128)), ], ) def test_convert_text_tensor_random_text(da_cls, docs, config, start_storage): diff --git a/tests/unit/array/mixins/test_traverse.py b/tests/unit/array/mixins/test_traverse.py index af8a75c296e..9dad5475bcc 100644 --- a/tests/unit/array/mixins/test_traverse.py +++ b/tests/unit/array/mixins/test_traverse.py @@ -43,7 +43,7 @@ def doc_req(): (DocumentArrayWeaviate, {'config': {'n_dim': 10}}), (DocumentArrayQdrant, {'config': {'n_dim': 10}}), (DocumentArrayElastic, {'config': {'n_dim': 10}}), - (DocumentArrayRedis, {'config': {'n_dim': 10, 'flush': True}}), + (DocumentArrayRedis, {'config': {'n_dim': 10}}), ], ) def test_traverse_type(doc_req, filter_fn, da_cls, kwargs, start_storage): @@ -63,7 +63,7 @@ def test_traverse_type(doc_req, filter_fn, da_cls, kwargs, start_storage): (DocumentArrayWeaviate, {'config': {'n_dim': 10}}), (DocumentArrayQdrant, {'config': {'n_dim': 10}}), (DocumentArrayElastic, {'config': {'n_dim': 10}}), - (DocumentArrayRedis, {'config': {'n_dim': 10, 'flush': True}}), + (DocumentArrayRedis, {'config': {'n_dim': 10}}), ], ) def test_traverse_root(doc_req, filter_fn, da_cls, kwargs, start_storage): @@ -83,7 +83,7 @@ def test_traverse_root(doc_req, filter_fn, da_cls, kwargs, start_storage): (DocumentArrayWeaviate, {'config': {'n_dim': 10}}), (DocumentArrayQdrant, {'config': {'n_dim': 10}}), (DocumentArrayElastic, {'config': {'n_dim': 10}}), - (DocumentArrayRedis, {'config': {'n_dim': 10, 'flush': True}}), + (DocumentArrayRedis, {'config': {'n_dim': 10}}), ], ) def test_traverse_chunk(doc_req, filter_fn, da_cls, kwargs, start_storage): @@ -103,7 +103,7 @@ def test_traverse_chunk(doc_req, filter_fn, da_cls, kwargs, start_storage): (DocumentArrayWeaviate, {'config': {'n_dim': 10}}), (DocumentArrayQdrant, {'config': {'n_dim': 10}}), (DocumentArrayElastic, {'config': {'n_dim': 10}}), - (DocumentArrayRedis, {'config': {'n_dim': 10, 'flush': True}}), + (DocumentArrayRedis, {'config': {'n_dim': 10}}), ], ) def test_traverse_root_plus_chunk(doc_req, filter_fn, da_cls, kwargs, start_storage): @@ -124,7 +124,7 @@ def test_traverse_root_plus_chunk(doc_req, filter_fn, da_cls, kwargs, start_stor (DocumentArrayWeaviate, {'config': {'n_dim': 10}}), (DocumentArrayQdrant, {'config': {'n_dim': 10}}), (DocumentArrayElastic, {'config': {'n_dim': 10}}), - (DocumentArrayRedis, {'config': {'n_dim': 10, 'flush': True}}), + (DocumentArrayRedis, {'config': {'n_dim': 10}}), ], ) def test_traverse_chunk_plus_root(doc_req, filter_fn, da_cls, kwargs, start_storage): @@ -145,7 +145,7 @@ def test_traverse_chunk_plus_root(doc_req, filter_fn, da_cls, kwargs, start_stor (DocumentArrayWeaviate, {'config': {'n_dim': 10}}), (DocumentArrayQdrant, {'config': {'n_dim': 10}}), (DocumentArrayElastic, {'config': {'n_dim': 10}}), - (DocumentArrayRedis, {'config': {'n_dim': 10, 'flush': True}}), + (DocumentArrayRedis, {'config': {'n_dim': 10}}), ], ) def test_traverse_match(doc_req, filter_fn, da_cls, kwargs, start_storage): @@ -165,7 +165,7 @@ def test_traverse_match(doc_req, filter_fn, da_cls, kwargs, start_storage): (DocumentArrayWeaviate, {'config': {'n_dim': 10}}), (DocumentArrayQdrant, {'config': {'n_dim': 10}}), (DocumentArrayElastic, {'config': {'n_dim': 10}}), - (DocumentArrayRedis, {'config': {'n_dim': 10, 'flush': True}}), + (DocumentArrayRedis, {'config': {'n_dim': 10}}), ], ) def test_traverse_match_chunk(doc_req, filter_fn, da_cls, kwargs, start_storage): @@ -185,7 +185,7 @@ def test_traverse_match_chunk(doc_req, filter_fn, da_cls, kwargs, start_storage) (DocumentArrayWeaviate, {'config': {'n_dim': 10}}), (DocumentArrayQdrant, {'config': {'n_dim': 10}}), (DocumentArrayElastic, {'config': {'n_dim': 10}}), - (DocumentArrayRedis, {'config': {'n_dim': 10, 'flush': True}}), + (DocumentArrayRedis, {'config': {'n_dim': 10}}), ], ) def test_traverse_root_match_chunk(doc_req, filter_fn, da_cls, kwargs, start_storage): @@ -204,7 +204,7 @@ def test_traverse_root_match_chunk(doc_req, filter_fn, da_cls, kwargs, start_sto (DocumentArrayWeaviate, {'config': {'n_dim': 10}}), (DocumentArrayQdrant, {'config': {'n_dim': 10}}), (DocumentArrayElastic, {'config': {'n_dim': 10}}), - (DocumentArrayRedis, {'config': {'n_dim': 10, 'flush': True}}), + (DocumentArrayRedis, {'config': {'n_dim': 10}}), ], ) def test_traverse_flatten_embedding(doc_req, filter_fn, da_cls, kwargs, start_storage): @@ -224,7 +224,7 @@ def test_traverse_flatten_embedding(doc_req, filter_fn, da_cls, kwargs, start_st (DocumentArrayWeaviate, {'config': {'n_dim': 10}}), (DocumentArrayQdrant, {'config': {'n_dim': 10}}), (DocumentArrayElastic, {'config': {'n_dim': 10}}), - (DocumentArrayRedis, {'config': {'n_dim': 10, 'flush': True}}), + (DocumentArrayRedis, {'config': {'n_dim': 10}}), ], ) def test_traverse_flatten_root(doc_req, filter_fn, da_cls, kwargs, start_storage): @@ -243,7 +243,7 @@ def test_traverse_flatten_root(doc_req, filter_fn, da_cls, kwargs, start_storage (DocumentArrayWeaviate, {'config': {'n_dim': 10}}), (DocumentArrayQdrant, {'config': {'n_dim': 10}}), (DocumentArrayElastic, {'config': {'n_dim': 10}}), - (DocumentArrayRedis, {'config': {'n_dim': 10, 'flush': True}}), + (DocumentArrayRedis, {'config': {'n_dim': 10}}), ], ) def test_traverse_flatten_chunk(doc_req, filter_fn, da_cls, kwargs, start_storage): @@ -262,7 +262,7 @@ def test_traverse_flatten_chunk(doc_req, filter_fn, da_cls, kwargs, start_storag (DocumentArrayWeaviate, {'config': {'n_dim': 10}}), (DocumentArrayQdrant, {'config': {'n_dim': 10}}), (DocumentArrayElastic, {'config': {'n_dim': 10}}), - (DocumentArrayRedis, {'config': {'n_dim': 10, 'flush': True}}), + (DocumentArrayRedis, {'config': {'n_dim': 10}}), ], ) def test_traverse_flatten_root_plus_chunk( @@ -283,7 +283,7 @@ def test_traverse_flatten_root_plus_chunk( (DocumentArrayWeaviate, {'config': {'n_dim': 10}}), (DocumentArrayQdrant, {'config': {'n_dim': 10}}), (DocumentArrayElastic, {'config': {'n_dim': 10}}), - (DocumentArrayRedis, {'config': {'n_dim': 10, 'flush': True}}), + (DocumentArrayRedis, {'config': {'n_dim': 10}}), ], ) def test_traverse_flatten_match(doc_req, filter_fn, da_cls, kwargs, start_storage): @@ -302,7 +302,7 @@ def test_traverse_flatten_match(doc_req, filter_fn, da_cls, kwargs, start_storag (DocumentArrayWeaviate, {'config': {'n_dim': 10}}), (DocumentArrayQdrant, {'config': {'n_dim': 10}}), (DocumentArrayElastic, {'config': {'n_dim': 10}}), - (DocumentArrayRedis, {'config': {'n_dim': 10, 'flush': True}}), + (DocumentArrayRedis, {'config': {'n_dim': 10}}), ], ) def test_traverse_flatten_match_chunk( @@ -323,7 +323,7 @@ def test_traverse_flatten_match_chunk( (DocumentArrayWeaviate, {'config': {'n_dim': 10}}), (DocumentArrayQdrant, {'config': {'n_dim': 10}}), (DocumentArrayElastic, {'config': {'n_dim': 10}}), - (DocumentArrayRedis, {'config': {'n_dim': 10, 'flush': True}}), + (DocumentArrayRedis, {'config': {'n_dim': 10}}), ], ) def test_traverse_flatten_root_match_chunk( @@ -350,7 +350,7 @@ def test_traverse_flatten_root_match_chunk( (DocumentArrayWeaviate, {'config': {'n_dim': 10}}), (DocumentArrayQdrant, {'config': {'n_dim': 10}}), (DocumentArrayElastic, {'config': {'n_dim': 10}}), - (DocumentArrayRedis, {'config': {'n_dim': 10, 'flush': True}}), + (DocumentArrayRedis, {'config': {'n_dim': 10}}), ], ) def test_traverse_flattened_per_path_embedding( @@ -375,7 +375,7 @@ def test_traverse_flattened_per_path_embedding( (DocumentArrayWeaviate, {'config': {'n_dim': 10}}), (DocumentArrayQdrant, {'config': {'n_dim': 10}}), (DocumentArrayElastic, {'config': {'n_dim': 10}}), - (DocumentArrayRedis, {'config': {'n_dim': 10, 'flush': True}}), + (DocumentArrayRedis, {'config': {'n_dim': 10}}), ], ) def test_traverse_flattened_per_path_root( @@ -396,7 +396,7 @@ def test_traverse_flattened_per_path_root( (DocumentArrayWeaviate, {'config': {'n_dim': 10}}), (DocumentArrayQdrant, {'config': {'n_dim': 10}}), (DocumentArrayElastic, {'config': {'n_dim': 10}}), - (DocumentArrayRedis, {'config': {'n_dim': 10, 'flush': True}}), + (DocumentArrayRedis, {'config': {'n_dim': 10}}), ], ) def test_traverse_flattened_per_path_chunk( @@ -417,7 +417,7 @@ def test_traverse_flattened_per_path_chunk( (DocumentArrayWeaviate, {'config': {'n_dim': 10}}), (DocumentArrayQdrant, {'config': {'n_dim': 10}}), (DocumentArrayElastic, {'config': {'n_dim': 10}}), - (DocumentArrayRedis, {'config': {'n_dim': 10, 'flush': True}}), + (DocumentArrayRedis, {'config': {'n_dim': 10}}), ], ) def test_traverse_flattened_per_path_root_plus_chunk( @@ -439,7 +439,7 @@ def test_traverse_flattened_per_path_root_plus_chunk( (DocumentArrayWeaviate, {'config': {'n_dim': 10}}), (DocumentArrayQdrant, {'config': {'n_dim': 10}}), (DocumentArrayElastic, {'config': {'n_dim': 10}}), - (DocumentArrayRedis, {'config': {'n_dim': 10, 'flush': True}}), + (DocumentArrayRedis, {'config': {'n_dim': 10}}), ], ) def test_traverse_flattened_per_path_match( @@ -460,7 +460,7 @@ def test_traverse_flattened_per_path_match( (DocumentArrayWeaviate, {'config': {'n_dim': 10}}), (DocumentArrayQdrant, {'config': {'n_dim': 10}}), (DocumentArrayElastic, {'config': {'n_dim': 10}}), - (DocumentArrayRedis, {'config': {'n_dim': 10, 'flush': True}}), + (DocumentArrayRedis, {'config': {'n_dim': 10}}), ], ) def test_traverse_flattened_per_path_root_match_chunk( @@ -484,7 +484,7 @@ def test_traverse_flattened_per_path_root_match_chunk( (DocumentArrayWeaviate, {'config': {'n_dim': 10}}), (DocumentArrayQdrant, {'config': {'n_dim': 10}}), (DocumentArrayElastic, {'config': {'n_dim': 10}}), - (DocumentArrayRedis, {'config': {'n_dim': 10, 'flush': True}}), + (DocumentArrayRedis, {'config': {'n_dim': 10}}), ], ) def test_docuset_traverse_over_iterator_HACKY(da_cls, kwargs, filter_fn): @@ -512,7 +512,7 @@ def test_docuset_traverse_over_iterator_HACKY(da_cls, kwargs, filter_fn): (DocumentArrayWeaviate, {'config': {'n_dim': 10}}), (DocumentArrayQdrant, {'config': {'n_dim': 10}}), (DocumentArrayElastic, {'config': {'n_dim': 10}}), - (DocumentArrayRedis, {'config': {'n_dim': 10, 'flush': True}}), + (DocumentArrayRedis, {'config': {'n_dim': 10}}), ], ) def test_docuset_traverse_over_iterator_CAVEAT(da_cls, kwargs, filter_fn): @@ -579,7 +579,7 @@ def test_traverse_chunkarray(filter_fn): (DocumentArrayWeaviate, {'config': {'n_dim': 10}}), (DocumentArrayQdrant, {'config': {'n_dim': 10}}), (DocumentArrayElastic, {'config': {'n_dim': 10}}), - (DocumentArrayRedis, {'config': {'n_dim': 10, 'flush': True}}), + (DocumentArrayRedis, {'config': {'n_dim': 10}}), ], ) @pytest.mark.parametrize( @@ -625,7 +625,7 @@ def test_filter_fn_traverse_flat( (DocumentArrayWeaviate, {'config': {'n_dim': 10}}), (DocumentArrayQdrant, {'config': {'n_dim': 10}}), (DocumentArrayElastic, {'config': {'n_dim': 10}}), - (DocumentArrayRedis, {'config': {'n_dim': 10, 'flush': True}}), + (DocumentArrayRedis, {'config': {'n_dim': 10}}), ], ) @pytest.mark.parametrize( @@ -677,7 +677,7 @@ def test_filter_fn_traverse_flat_per_path( (DocumentArrayWeaviate, {'config': {'n_dim': 10}}), (DocumentArrayQdrant, {'config': {'n_dim': 10}}), (DocumentArrayElastic, {'config': {'n_dim': 10}}), - (DocumentArrayRedis, {'config': {'n_dim': 10, 'flush': True}}), + (DocumentArrayRedis, {'config': {'n_dim': 10}}), ], ) def test_traversal_path(da_cls, kwargs): @@ -696,7 +696,7 @@ def test_traversal_path(da_cls, kwargs): (DocumentArrayWeaviate, {'config': {'n_dim': 10}}), (DocumentArrayQdrant, {'config': {'n_dim': 10}}), (DocumentArrayElastic, {'config': {'n_dim': 10}}), - (DocumentArrayRedis, {'config': {'n_dim': 10, 'flush': True}}), + (DocumentArrayRedis, {'config': {'n_dim': 10}}), ], ) def test_traverse_flat_root_itself(da_cls, kwargs): @@ -719,7 +719,7 @@ def da_and_dam(N): (DocumentArrayWeaviate, {'config': {'n_dim': 10}}), (DocumentArrayQdrant, {'config': {'n_dim': 10}}), (DocumentArrayElastic, {'config': {'n_dim': 10}}), - (DocumentArrayRedis, {'config': {'n_dim': 10, 'flush': True}}), + (DocumentArrayRedis, {'config': {'n_dim': 10}}), ], ) def test_flatten(da_cls, kwargs): diff --git a/tests/unit/array/storage/redis/test_backend.py b/tests/unit/array/storage/redis/test_backend.py index ca57f99915b..5f9afd5ff8c 100644 --- a/tests/unit/array/storage/redis/test_backend.py +++ b/tests/unit/array/storage/redis/test_backend.py @@ -32,13 +32,6 @@ def _save_offset2ids(self): } -@pytest.fixture(scope='function') -def da_redis(): - cfg = RedisConfig(n_dim=128, flush=True) - da_redis = DocumentArrayDummy(storage='redis', config=cfg) - return da_redis - - @pytest.mark.parametrize('distance', ['L2', 'IP', 'COSINE']) @pytest.mark.parametrize( 'method,initial_cap,ef_construction,block_size', @@ -81,7 +74,6 @@ def test_init_storage( cfg = RedisConfig( n_dim=128, distance=distance, - flush=True, columns=columns, method=method, initial_cap=initial_cap, @@ -92,19 +84,48 @@ def test_init_storage( redis_da = DocumentArrayDummy(storage='redis', config=cfg) assert redis_da._client.info()['tcp_port'] == redis_da._config.port - assert redis_da._client.ft().info()['attributes'][0][1] == b'embedding' - assert redis_da._client.ft().info()['attributes'][0][5] == b'VECTOR' + assert ( + redis_da._client.ft(index_name=redis_da._config.index_name).info()[ + 'attributes' + ][0][1] + == b'embedding' + ) + assert ( + redis_da._client.ft(index_name=redis_da._config.index_name).info()[ + 'attributes' + ][0][5] + == b'VECTOR' + ) def test_init_storage_update_schema(start_storage): - cfg = RedisConfig(n_dim=128, columns={'attr1': 'str'}, flush=True) + cfg = RedisConfig(n_dim=128, columns={'attr1': 'str'}, index_name="idx") redis_da = DocumentArrayDummy(storage='redis', config=cfg) - assert redis_da._client.ft().info()['attributes'][1][1] == b'attr1' + assert ( + redis_da._client.ft(index_name=redis_da._config.index_name).info()[ + 'attributes' + ][1][1] + == b'attr1' + ) - cfg = RedisConfig(n_dim=128, columns={'attr2': 'str'}, update_schema=False) + cfg = RedisConfig( + n_dim=128, columns={'attr2': 'str'}, index_name="idx", update_schema=False + ) redis_da = DocumentArrayDummy(storage='redis', config=cfg) - assert redis_da._client.ft().info()['attributes'][1][1] == b'attr1' + assert ( + redis_da._client.ft(index_name=redis_da._config.index_name).info()[ + 'attributes' + ][1][1] + == b'attr1' + ) - cfg = RedisConfig(n_dim=128, columns={'attr2': 'str'}, update_schema=True) + cfg = RedisConfig( + n_dim=128, columns={'attr2': 'str'}, index_name="idx", update_schema=True + ) redis_da = DocumentArrayDummy(storage='redis', config=cfg) - assert redis_da._client.ft().info()['attributes'][1][1] == b'attr2' + assert ( + redis_da._client.ft(index_name=redis_da._config.index_name).info()[ + 'attributes' + ][1][1] + == b'attr2' + ) diff --git a/tests/unit/array/storage/redis/test_getsetdel.py b/tests/unit/array/storage/redis/test_getsetdel.py index dfe7d9b0cdc..3825f052ae0 100644 --- a/tests/unit/array/storage/redis/test_getsetdel.py +++ b/tests/unit/array/storage/redis/test_getsetdel.py @@ -40,7 +40,7 @@ def columns(): @pytest.fixture(scope='function') def da_redis(columns): - cfg = RedisConfig(n_dim=3, flush=True, columns=columns) + cfg = RedisConfig(n_dim=3, columns=columns) da_redis = DocumentArrayDummy(storage='redis', config=cfg) return da_redis @@ -129,19 +129,7 @@ def test_setgetdel_doc_by_id(doc, da_redis, start_storage): da_redis._get_doc_by_id(doc.id) -def test_clear_storage(da_redis, start_storage): - for i in range(3): - doc = Document(id=str(i)) - da_redis._set_doc_by_id(str(i), doc) - - da_redis._clear_storage() - - for i in range(3): - with pytest.raises(KeyError): - da_redis._get_doc_by_id(i) - - -def test_offset2ids(da_redis, start_storage): +def test_offset2ids_and_clear_storage(da_redis, start_storage): ids = [str(i) for i in range(3)] for id in ids: doc = Document(id=id) @@ -150,3 +138,9 @@ def test_offset2ids(da_redis, start_storage): da_redis._save_offset2ids() da_redis._load_offset2ids() assert da_redis._offset2ids.ids == ids + + da_redis._clear_storage() + + for i in range(3): + with pytest.raises(KeyError): + da_redis._get_doc_by_id(i) diff --git a/tests/unit/array/test_advance_indexing.py b/tests/unit/array/test_advance_indexing.py index 0e1ff6884e1..8cf7780c3ba 100644 --- a/tests/unit/array/test_advance_indexing.py +++ b/tests/unit/array/test_advance_indexing.py @@ -7,7 +7,6 @@ from docarray.array.qdrant import QdrantConfig from docarray.array.elastic import ElasticConfig from docarray.array.redis import RedisConfig -import gc @pytest.fixture @@ -29,7 +28,7 @@ def indices(): ('annlite', AnnliteConfig(n_dim=123)), ('qdrant', QdrantConfig(n_dim=123)), ('elasticsearch', ElasticConfig(n_dim=123)), - ('redis', RedisConfig(n_dim=123, flush=True)), + ('redis', RedisConfig(n_dim=123)), ], ) def test_getter_int_str(docs, storage, config, start_storage): @@ -62,7 +61,7 @@ def test_getter_int_str(docs, storage, config, start_storage): ('weaviate', WeaviateConfig(n_dim=123)), ('annlite', AnnliteConfig(n_dim=123)), ('qdrant', QdrantConfig(n_dim=123)), - ('redis', RedisConfig(n_dim=123, flush=True)), + ('redis', RedisConfig(n_dim=123)), ], ) def test_setter_int_str(docs, storage, config, start_storage): @@ -92,7 +91,7 @@ def test_setter_int_str(docs, storage, config, start_storage): ('annlite', AnnliteConfig(n_dim=123)), ('qdrant', QdrantConfig(n_dim=123)), ('elasticsearch', ElasticConfig(n_dim=123)), - ('redis', RedisConfig(n_dim=123, flush=True)), + ('redis', RedisConfig(n_dim=123)), ], ) def test_del_int_str(docs, storage, config, start_storage, indices): @@ -127,7 +126,7 @@ def test_del_int_str(docs, storage, config, start_storage, indices): ('annlite', AnnliteConfig(n_dim=123)), ('qdrant', QdrantConfig(n_dim=123)), ('elasticsearch', ElasticConfig(n_dim=123)), - ('redis', RedisConfig(n_dim=123, flush=True)), + ('redis', RedisConfig(n_dim=123)), ], ) def test_slice(docs, storage, config, start_storage): @@ -166,7 +165,7 @@ def test_slice(docs, storage, config, start_storage): ('annlite', AnnliteConfig(n_dim=123)), ('qdrant', QdrantConfig(n_dim=123)), ('elasticsearch', ElasticConfig(n_dim=123)), - ('redis', RedisConfig(n_dim=123, flush=True)), + ('redis', RedisConfig(n_dim=123)), ], ) def test_sequence_bool_index(docs, storage, config, start_storage): @@ -213,7 +212,7 @@ def test_sequence_bool_index(docs, storage, config, start_storage): ('annlite', AnnliteConfig(n_dim=123)), ('qdrant', QdrantConfig(n_dim=123)), ('elasticsearch', ElasticConfig(n_dim=123)), - ('redis', RedisConfig(n_dim=123, flush=True)), + ('redis', RedisConfig(n_dim=123)), ], ) def test_sequence_int(docs, nparray, storage, config, start_storage): @@ -250,7 +249,7 @@ def test_sequence_int(docs, nparray, storage, config, start_storage): ('annlite', AnnliteConfig(n_dim=123)), ('qdrant', QdrantConfig(n_dim=123)), ('elasticsearch', ElasticConfig(n_dim=123)), - ('redis', RedisConfig(n_dim=123, flush=True)), + ('redis', RedisConfig(n_dim=123)), ], ) def test_sequence_str(docs, storage, config, start_storage): @@ -285,7 +284,7 @@ def test_sequence_str(docs, storage, config, start_storage): ('annlite', AnnliteConfig(n_dim=123)), ('qdrant', QdrantConfig(n_dim=123)), ('elasticsearch', ElasticConfig(n_dim=123)), - ('redis', RedisConfig(n_dim=123, flush=True)), + ('redis', RedisConfig(n_dim=123)), ], ) def test_docarray_list_tuple(docs, storage, config, start_storage): @@ -306,7 +305,7 @@ def test_docarray_list_tuple(docs, storage, config, start_storage): ('annlite', AnnliteConfig(n_dim=123)), ('qdrant', QdrantConfig(n_dim=123)), ('elasticsearch', ElasticConfig(n_dim=123)), - ('redis', RedisConfig(n_dim=123, flush=True)), + ('redis', RedisConfig(n_dim=123)), ], ) def test_path_syntax_indexing(storage, config, start_storage): @@ -346,7 +345,7 @@ def test_path_syntax_indexing(storage, config, start_storage): ('annlite', AnnliteConfig(n_dim=123)), ('qdrant', QdrantConfig(n_dim=123)), ('elasticsearch', ElasticConfig(n_dim=123)), - ('redis', RedisConfig(n_dim=123, flush=True)), + ('redis', RedisConfig(n_dim=123)), ], ) @pytest.mark.parametrize('use_subindex', [False, True]) @@ -443,6 +442,7 @@ def test_path_syntax_indexing_set(storage, config, use_subindex, start_storage): ('annlite', AnnliteConfig(n_dim=123)), ('qdrant', QdrantConfig(n_dim=123)), ('elasticsearch', ElasticConfig(n_dim=123)), + ('redis', RedisConfig(n_dim=123)), ], ) def test_getset_subindex(storage, config, start_storage): @@ -488,7 +488,7 @@ def test_getset_subindex(storage, config, start_storage): ('annlite', lambda: AnnliteConfig(n_dim=123)), ('qdrant', lambda: QdrantConfig(n_dim=123)), ('elasticsearch', lambda: ElasticConfig(n_dim=123)), - ('redis', lambda: RedisConfig(n_dim=123, flush=True)), + ('redis', lambda: RedisConfig(n_dim=123)), ], ) def test_attribute_indexing(storage, config_gen, start_storage, size): @@ -529,10 +529,8 @@ def test_tensor_attribute_selector(storage, start_storage): sp_embed[sp_embed > 0.1] = 0 sp_embed = scipy.sparse.coo_matrix(sp_embed) - if storage in ('annlite', 'weaviate', 'qdrant', 'elasticsearch'): + if storage in ('annlite', 'weaviate', 'qdrant', 'elasticsearch', 'redis'): da = DocumentArray(storage=storage, config={'n_dim': 10}) - elif storage == 'redis': - da = DocumentArray(storage=storage, config={'n_dim': 10, 'flush': True}) else: da = DocumentArray(storage=storage) @@ -578,10 +576,8 @@ def test_advance_selector_mixed(storage): ['memory', 'sqlite', 'weaviate', 'annlite', 'qdrant', 'elasticsearch', 'redis'], ) def test_single_boolean_and_padding(storage, start_storage): - if storage in ('annlite', 'weaviate', 'qdrant', 'elasticsearch'): + if storage in ('annlite', 'weaviate', 'qdrant', 'elasticsearch', 'redis'): da = DocumentArray(storage=storage, config={'n_dim': 10}) - elif storage == 'redis': - da = DocumentArray(storage=storage, config={'n_dim': 10, 'flush': True}) else: da = DocumentArray(storage=storage) da.extend(DocumentArray.empty(3)) @@ -609,7 +605,7 @@ def test_single_boolean_and_padding(storage, start_storage): ('annlite', lambda: AnnliteConfig(n_dim=123)), ('qdrant', lambda: QdrantConfig(n_dim=123)), ('elasticsearch', lambda: ElasticConfig(n_dim=123)), - ('redis', lambda: RedisConfig(n_dim=123, flush=True)), + ('redis', lambda: RedisConfig(n_dim=123)), ], ) def test_edge_case_two_strings(storage, config_gen, start_storage): @@ -678,9 +674,6 @@ def test_edge_case_two_strings(storage, config_gen, start_storage): with pytest.raises(IndexError): da['1', 'hellohello'] = 'hello' - if storage == 'redis': - gc.collect() - @pytest.mark.parametrize( 'storage,config', @@ -690,7 +683,7 @@ def test_edge_case_two_strings(storage, config_gen, start_storage): ('annlite', AnnliteConfig(n_dim=123)), ('qdrant', QdrantConfig(n_dim=123)), ('elasticsearch', ElasticConfig(n_dim=123)), - ('redis', RedisConfig(n_dim=123, flush=True)), + ('redis', RedisConfig(n_dim=123)), ], ) def test_offset2ids_persistence(storage, config, start_storage): @@ -712,9 +705,6 @@ def test_offset2ids_persistence(storage, config, start_storage): da._persist = True da.__del__() - if storage == 'redis': - config.flush = False - config.update_schema = False da = DocumentArray(storage=storage, config=config) assert da[:, 'id'] == da_ids diff --git a/tests/unit/array/test_construct.py b/tests/unit/array/test_construct.py index e25bb7576c2..251e8459b16 100644 --- a/tests/unit/array/test_construct.py +++ b/tests/unit/array/test_construct.py @@ -21,7 +21,7 @@ (DocumentArrayWeaviate, WeaviateConfig(n_dim=128)), (DocumentArrayQdrant, QdrantConfig(n_dim=128)), (DocumentArrayElastic, ElasticConfig(n_dim=128)), - (DocumentArrayRedis, RedisConfig(n_dim=128, flush=True)), + (DocumentArrayRedis, RedisConfig(n_dim=128)), ], ) def test_construct_docarray(da_cls, config, start_storage): @@ -70,7 +70,7 @@ def test_construct_docarray(da_cls, config, start_storage): (DocumentArrayWeaviate, WeaviateConfig(n_dim=128)), (DocumentArrayQdrant, QdrantConfig(n_dim=128)), (DocumentArrayElastic, ElasticConfig(n_dim=128)), - (DocumentArrayRedis, RedisConfig(n_dim=128, flush=True)), + (DocumentArrayRedis, RedisConfig(n_dim=128)), ], ) @pytest.mark.parametrize('is_copy', [True, False]) @@ -100,7 +100,7 @@ def test_docarray_copy_singleton(da_cls, config, is_copy, start_storage): (DocumentArrayWeaviate, WeaviateConfig(n_dim=128)), (DocumentArrayQdrant, QdrantConfig(n_dim=128)), (DocumentArrayElastic, ElasticConfig(n_dim=128)), - (DocumentArrayRedis, RedisConfig(n_dim=128, flush=True)), + (DocumentArrayRedis, RedisConfig(n_dim=128)), ], ) @pytest.mark.parametrize('is_copy', [True, False]) @@ -129,7 +129,7 @@ def test_docarray_copy_da(da_cls, config, is_copy, start_storage): (DocumentArrayAnnlite, AnnliteConfig(n_dim=1)), (DocumentArrayQdrant, QdrantConfig(n_dim=1)), (DocumentArrayElastic, ElasticConfig(n_dim=128)), - (DocumentArrayRedis, RedisConfig(n_dim=128, flush=True)), + (DocumentArrayRedis, RedisConfig(n_dim=128)), ], ) @pytest.mark.parametrize('is_copy', [True, False]) diff --git a/tests/unit/array/test_pull_out.py b/tests/unit/array/test_pull_out.py index 76a3b5b1c69..e487c94214e 100644 --- a/tests/unit/array/test_pull_out.py +++ b/tests/unit/array/test_pull_out.py @@ -22,7 +22,7 @@ def docs(): ('annlite', {'n_dim': 2}), ('qdrant', {'n_dim': 2}), ('elasticsearch', {'n_dim': 2}), - ('redis', {'n_dim': 2, 'flush': True}), + ('redis', {'n_dim': 2}), ], ) def test_update_embedding(docs, storage, config, start_storage): @@ -57,7 +57,7 @@ def test_update_embedding(docs, storage, config, start_storage): ('annlite', {'n_dim': 2}), ('qdrant', {'n_dim': 2}), ('elasticsearch', {'n_dim': 2}), - ('redis', {'n_dim': 2, 'flush': True}), + ('redis', {'n_dim': 2}), ], ) def test_update_doc_embedding(docs, storage, config, start_storage): @@ -92,7 +92,7 @@ def test_update_doc_embedding(docs, storage, config, start_storage): ('annlite', {'n_dim': 2}), ('qdrant', {'n_dim': 2}), ('elasticsearch', {'n_dim': 2}), - ('redis', {'n_dim': 2, 'flush': True}), + ('redis', {'n_dim': 2}), ], ) def test_batch_update_embedding(docs, storage, config, start_storage): @@ -125,7 +125,7 @@ def test_batch_update_embedding(docs, storage, config, start_storage): ('annlite', {'n_dim': 2}), ('qdrant', {'n_dim': 2}), ('elasticsearch', {'n_dim': 2}), - ('redis', {'n_dim': 2, 'flush': True}), + ('redis', {'n_dim': 2}), ], ) def test_batch_update_doc_embedding(docs, storage, config, start_storage): @@ -160,7 +160,7 @@ def test_batch_update_doc_embedding(docs, storage, config, start_storage): ('annlite', {'n_dim': 2}), ('qdrant', {'n_dim': 2}), ('elasticsearch', {'n_dim': 2}), - ('redis', {'n_dim': 2, 'flush': True}), + ('redis', {'n_dim': 2}), ], ) def test_update_id(docs, storage, config, start_storage): @@ -182,7 +182,7 @@ def test_update_id(docs, storage, config, start_storage): ('annlite', {'n_dim': 2}), ('qdrant', {'n_dim': 2}), ('elasticsearch', {'n_dim': 2}), - ('redis', {'n_dim': 2, 'flush': True}), + ('redis', {'n_dim': 2}), ], ) def test_update_doc_id(docs, storage, config, start_storage): @@ -203,7 +203,7 @@ def test_update_doc_id(docs, storage, config, start_storage): ('annlite', {'n_dim': 2}), ('qdrant', {'n_dim': 2}), ('elasticsearch', {'n_dim': 2}), - ('redis', {'n_dim': 2, 'flush': True}), + ('redis', {'n_dim': 2}), ], ) def test_batch_update_id(docs, storage, config, start_storage): @@ -227,7 +227,7 @@ def test_batch_update_id(docs, storage, config, start_storage): ('annlite', {'n_dim': 2}), ('qdrant', {'n_dim': 2}), ('elasticsearch', {'n_dim': 2}), - ('redis', {'n_dim': 2, 'flush': True}), + ('redis', {'n_dim': 2}), ], ) def test_batch_update_doc_id(docs, storage, config, start_storage): diff --git a/tests/unit/array/test_sequence.py b/tests/unit/array/test_sequence.py index 33b25b8db4d..83aed2e8376 100644 --- a/tests/unit/array/test_sequence.py +++ b/tests/unit/array/test_sequence.py @@ -27,7 +27,7 @@ (DocumentArrayWeaviate, lambda: WeaviateConfig(n_dim=1)), (DocumentArrayQdrant, lambda: QdrantConfig(n_dim=1)), (DocumentArrayElastic, lambda: ElasticConfig(n_dim=1)), - (DocumentArrayRedis, lambda: RedisConfig(n_dim=1, flush=True)), + (DocumentArrayRedis, lambda: RedisConfig(n_dim=1)), ], ) def test_insert(da_cls, config, start_storage): @@ -50,7 +50,7 @@ def test_insert(da_cls, config, start_storage): (DocumentArrayWeaviate, lambda: WeaviateConfig(n_dim=1)), (DocumentArrayQdrant, lambda: QdrantConfig(n_dim=1)), (DocumentArrayElastic, lambda: ElasticConfig(n_dim=1)), - (DocumentArrayRedis, lambda: RedisConfig(n_dim=1, flush=True)), + (DocumentArrayRedis, lambda: RedisConfig(n_dim=1)), ], ) def test_append_extend(da_cls, config, start_storage): @@ -84,7 +84,7 @@ def update_config_inplace(config, tmpdir, tmpfile): ('weaviate', {'n_dim': 3, 'name': 'Weaviate'}), ('qdrant', {'n_dim': 3, 'collection_name': 'qdrant'}), ('elasticsearch', {'n_dim': 3, 'index_name': 'elasticsearch'}), - ('redis', {'n_dim': 3, 'flush': True}), + ('redis', {'n_dim': 3, 'index_name': 'redis'}), ], ) def test_context_manager_from_disk(storage, config, start_storage, tmpdir, tmpfile): @@ -103,9 +103,6 @@ def test_context_manager_from_disk(storage, config, start_storage, tmpdir, tmpfi assert len(da) == 2 assert len(da._offset2ids.ids) == 2 - if storage == 'redis': - config['flush'] = False - config['update_schema'] = False da2 = DocumentArray(storage=storage, config=config) assert len(da2) == 2 @@ -124,7 +121,7 @@ def test_context_manager_from_disk(storage, config, start_storage, tmpdir, tmpfi ('qdrant', {'n_dim': 3, 'distance': 'euclidean'}), ('elasticsearch', {'n_dim': 3, 'distance': 'l2_norm'}), ('sqlite', dict()), - ('redis', {'n_dim': 3, 'distance': 'L2', 'flush': True}), + ('redis', {'n_dim': 3, 'distance': 'L2'}), ], ) def test_extend_subindex(storage, config): @@ -170,7 +167,7 @@ def test_extend_subindex(storage, config): ('qdrant', {'n_dim': 3, 'distance': 'euclidean'}), ('elasticsearch', {'n_dim': 3, 'distance': 'l2_norm'}), ('sqlite', dict()), - ('redis', {'n_dim': 3, 'distance': 'L2', 'flush': True}), + ('redis', {'n_dim': 3, 'distance': 'L2'}), ], ) def test_append_subindex(storage, config): @@ -220,7 +217,7 @@ def embeddings_eq(emb1, emb2): ('qdrant', {'n_dim': 3, 'distance': 'euclidean'}), ('elasticsearch', {'n_dim': 3, 'distance': 'l2_norm'}), ('sqlite', dict()), - ('redis', {'n_dim': 3, 'distance': 'L2', 'flush': True}), + ('redis', {'n_dim': 3, 'distance': 'L2'}), ], ) @pytest.mark.parametrize( @@ -247,7 +244,7 @@ def test_del_and_append(index, storage, config): ('qdrant', {'n_dim': 3, 'distance': 'euclidean'}), ('elasticsearch', {'n_dim': 3, 'distance': 'l2_norm'}), ('sqlite', dict()), - ('redis', {'n_dim': 3, 'distance': 'L2', 'flush': True}), + ('redis', {'n_dim': 3, 'distance': 'L2'}), ], ) @pytest.mark.parametrize( From f40ef7caa92829cabb1892df3f010795f692d2f1 Mon Sep 17 00:00:00 2001 From: AnneY Date: Thu, 15 Sep 2022 14:33:40 +0800 Subject: [PATCH 02/10] docs: modify redis doc for mutiple DAs --- docs/advanced/document-store/redis.md | 236 +++++++++++++------------- 1 file changed, 115 insertions(+), 121 deletions(-) diff --git a/docs/advanced/document-store/redis.md b/docs/advanced/document-store/redis.md index 190255a4ea9..aaee31c0faf 100644 --- a/docs/advanced/document-store/redis.md +++ b/docs/advanced/document-store/redis.md @@ -43,19 +43,7 @@ da = DocumentArray( The usage will be the same as the ordinary DocumentArray, but the dimension of an embedding for a Document must be provided at creation time. -```{caution} -Currently, one Redis server instance can only store a single DocumentArray. -``` - -To store a new DocumentArray on the current Redis server, you can set `flush` to `True` so that the previous DocumentArray will be cleared: - -```python -from docarray import DocumentArray - -da = DocumentArray(storage='redis', config={'n_dim': 128, 'flush': True}) -``` - -To access a previously stored DocumentArray, you can set `host` and `port` to match with the previuosly stored DocumentArray and make sure `flush` is `False`. +To access a previously stored DocumentArray, you can specify `index_name` and set `host` and `port` to match with the previuosly stored DocumentArray. The following example builds a DocumentArray from previously stored data on `localhost:6379`: @@ -64,13 +52,19 @@ from docarray import DocumentArray, Document with DocumentArray( storage='redis', - config={'n_dim': 128, 'flush': True}, + config={ + 'n_dim': 128, + 'index_name': 'idx', + }, ) as da: da.extend([Document() for _ in range(1000)]) da2 = DocumentArray( storage='redis', - config={'n_dim': 128, 'flush': False}, + config={ + 'n_dim': 128, + 'index_name': 'idx', + }, ) da2.summary() @@ -100,10 +94,11 @@ da2.summary() │ host localhost │ │ port 6379 │ │ index_name idx │ -│ flush False │ │ update_schema True │ │ distance COSINE │ │ redis_config {} │ +│ index_text False │ +│ tag_indices [] │ │ batch_size 64 │ │ method HNSW │ │ ef_construction 200 │ @@ -116,8 +111,6 @@ da2.summary() ╰─────────────────────────────────╯ ``` - - Other functions behave the same as in-memory DocumentArray. @@ -147,7 +140,6 @@ da = DocumentArray( config={ 'n_dim': n_dim, 'columns': {'price': 'int', 'color': 'str', 'stock': 'bool'}, - 'flush': True, 'distance': 'L2', }, ) @@ -236,7 +228,7 @@ for embedding, price, color, stock, score in zip( This will print: ```console -Embeddings Approximate Nearest Neighbours with "price" at most 7, "color" blue and "stock" False: +Embeddings Approximate Nearest Neighbours with "price" at most 7, "color" blue and "stock" True: score=12, embedding=[6. 6. 6.], price=6, color=blue, stock=True score=48, embedding=[4. 4. 4.], price=4, color=blue, stock=True @@ -281,6 +273,102 @@ More example filter expresses } ``` + +(vector-search-index)= +### Update Vector Search Indexing Schema + +Redis vector similarity supports two indexing methods: + +- **FLAT**: Brute-force search. +- **HNSW**: Efficient and robust approximate nearest neighbor search using Hierarchical Navigable Small World graphs. + +Both methods have some mandatory parameters and optional parameters. + +```{tip} +Read more about HNSW or FLAT parameters and their default values [here](https://redis.io/docs/stack/search/reference/vectors/#querying-vector-fields). +``` + +You can update the search indexing schema on an existing DocumentArray by setting `update_schema` to `True` and changing your configuratoin parameters. + +Consider you store Documents with default indexing method `'HNSW'` and distance `'L2'`, and want to find the nearest vectors to the embedding `[8. 8. 8.]`. + +```python +import numpy as np +from docarray import Document, DocumentArray + +n_dim = 3 + +da = DocumentArray( + storage='redis', + config={ + 'n_dim': n_dim, + 'index_name': 'idx', + 'distance': 'L2', + }, +) + +da.extend([Document(id=f'{i}', embedding=i * np.ones(n_dim)) for i in range(10)]) + +np_query = np.ones(n_dim) * 8 +n_limit = 5 + +results = da.find(np_query, limit=n_limit) + +print('\nEmbeddings Approximate Nearest Neighbours:\n') +for embedding, score in zip( + results.embeddings, + results[:, 'scores'], +): + print(f' embedding={embedding},\t score={score["score"].value}') +``` + +This will print: + +```console +Embeddings Approximate Nearest Neighbours: + + embedding=[8. 8. 8.], score=0 + embedding=[7. 7. 7.], score=3 + embedding=[9. 9. 9.], score=3 + embedding=[6. 6. 6.], score=12 + embedding=[5. 5. 5.], score=27 +``` + +Then you can use a different search indexing schema on the current DocumentArray as follows: +```python +da2 = DocumentArray( + storage='redis', + config={ + 'n_dim': n_dim, + 'index_name': 'idx', + 'update_schema': True, + 'distance': 'COSINE', + }, +) + +results = da.find(np_query, limit=n_limit) + +print('\nEmbeddings Approximate Nearest Neighbours:\n') +for embedding, score in zip( + results.embeddings, + results[:, 'scores'], +): + print(f' embedding={embedding},\t score={score["score"].value}') +``` + +This will print: + +```console +Embeddings Approximate Nearest Neighbours: + + embedding=[3. 3. 3.], score=0 + embedding=[6. 6. 6.], score=0 + embedding=[4. 4. 4.], score=5.96046447754e-08 + embedding=[1. 1. 1.], score=5.96046447754e-08 + embedding=[8. 8. 8.], score=5.96046447754e-08 +``` + + ### Search by `.text` field You can perform full-text search in a `DocumentArray` with `storage='redis'`. @@ -290,9 +378,7 @@ The following example builds a `DocumentArray` with several documents containing ```python from docarray import Document, DocumentArray -da = DocumentArray( - storage='redis', config={'n_dim': 2, 'index_text': True, 'flush': True} -) +da = DocumentArray(storage='redis', config={'n_dim': 2, 'index_text': True}) da.extend( [ Document(id='1', text='token1 token2 token3'), @@ -344,7 +430,7 @@ from docarray import Document, DocumentArray da = DocumentArray( storage='redis', - config={'n_dim': 32, 'flush': True, 'tag_indices': ['food_type', 'price']}, + config={'n_dim': 32, 'tag_indices': ['food_type', 'price']}, ) da.extend( [ @@ -382,7 +468,7 @@ This will print: searching "cheap" in : ['cheap but not that cheap', 'quite cheap for what you get!'] searching "italian" in : - ['French and Italian food', 'Italian and Spanish food'] + ['Italian and Spanish food', 'French and Italian food'] ``` ```{note} @@ -392,99 +478,6 @@ results = da.find('cheap', index='price') ``` -(vector-search-index)= -### Update Vector Search Indexing Schema - -Redis vector similarity supports two indexing methods: - -- **FLAT**: Brute-force search. -- **HNSW**: Efficient and robust approximate nearest neighbor search using Hierarchical Navigable Small World graphs. - -Both methods have some mandatory parameters and optional parameters. - -```{tip} -Read more about HNSW or FLAT parameters and their default values [here](https://redis.io/docs/stack/search/reference/vectors/#querying-vector-fields). -``` - -You can update the search indexing schema on an existing DocumentArray by setting `update_schema` to `True` and changing your configuratoin parameters. - -Consider you store Documents with default indexing method `'HNSW'` and distance `'L2'`, and want to find the nearest vectors to the embedding `[8. 8. 8.]`. - -```python -import numpy as np -from docarray import Document, DocumentArray - -n_dim = 3 - -da = DocumentArray( - storage='redis', - config={ - 'n_dim': n_dim, - 'flush': True, - 'distance': 'L2', - }, -) - -da.extend([Document(id=f'{i}', embedding=i * np.ones(n_dim)) for i in range(10)]) - -np_query = np.ones(n_dim) * 8 -n_limit = 5 - -results = da.find(np_query, limit=n_limit) - -print('\nEmbeddings Approximate Nearest Neighbours:\n') -for embedding, score in zip( - results.embeddings, - results[:, 'scores'], -): - print(f' embedding={embedding},\t score={score["score"].value}') -``` - -This will print: - -```console -Embeddings Approximate Nearest Neighbours: - - embedding=[8. 8. 8.], score=0 - embedding=[7. 7. 7.], score=3 - embedding=[9. 9. 9.], score=3 - embedding=[6. 6. 6.], score=12 - embedding=[5. 5. 5.], score=27 -``` - -Then you can use a different search indexing schema on the current DocumentArray as follows: -```python -da2 = DocumentArray( - storage='redis', - config={ - 'n_dim': n_dim, - 'update_schema': True, - 'distance': 'COSINE', - }, -) - -results = da.find(np_query, limit=n_limit) - -print('\nEmbeddings Approximate Nearest Neighbours:\n') -for embedding, score in zip( - results.embeddings, - results[:, 'scores'], -): - print(f' embedding={embedding},\t score={score["score"].value}') -``` - -This will print: - -```console -Embeddings Approximate Nearest Neighbours: - - embedding=[3. 3. 3.], score=0 - embedding=[6. 6. 6.], score=0 - embedding=[9. 9. 9.], score=5.96046447754e-08 - embedding=[8. 8. 8.], score=5.96046447754e-08 - embedding=[5. 5. 5.], score=5.96046447754e-08 -``` - ## Configuration @@ -495,13 +488,14 @@ The following configs can be set: | `host` | Host address of the Redis server | `'localhost'` | | `port` | Port of the Redis Server | `6379` | | `redis_config` | Other Redis configs in a Dict and pass to `Redis` client constructor, e.g. `socket_timeout`, `ssl`| `{}` | -| `index_name` | Redis index name; the name of RedisSearch index to set this DocumentArray | `'idx'` | +| `index_name` | Redis index name; the name of RedisSearch index to set this DocumentArray | `None` | | `n_dim` | Dimensionality of the embeddings | `None` | -| `flush` | Boolean flag indicating whether to clear previous DocumentArray in Redis | `False` | | `update_schema` | Boolean flag indicating whether to update Redis Search schema | `True` | | `distance` | Similarity distance metric in Redis, one of {`'L2'`, `'IP'`, `'COSINE'`} | `'COSINE'` | | `batch_size` | Batch size used to handle storage updates | `64` | | `method` | Vector similarity index algorithm in Redis, either `FLAT` or `HNSW` | `'HNSW'` | +| `index_text` | Boolean flag indicating whether to index `.text` | `None` | +| `tag_indices` | List of tags to index as text field | `[]` | | `ef_construction` | Optional parameter for Redis HNSW algorithm | `200` | | `m` | Optional parameter for Redis HNSW algorithm | `16` | | `ef_runtime` | Optional parameter for Redis HNSW algorithm | `10` | @@ -513,6 +507,6 @@ You can check the default values in [the docarray source code](https://github.co ```{note} -Only 1 DocumentArray is allowed per redis instance (db0). We will support storing multiple DocumentArrays in one redis instance, full-text search, more query conitions and geo-filtering soon. +We will support geo-filtering soon. The benchmark test is on the way. ``` From 45c95d528fe73f7dcfd5f66b4c7b5a76acb0fb27 Mon Sep 17 00:00:00 2001 From: AnneY Date: Sat, 17 Sep 2022 00:56:43 +0800 Subject: [PATCH 03/10] refactor: use packaged function --- docarray/array/storage/redis/backend.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/docarray/array/storage/redis/backend.py b/docarray/array/storage/redis/backend.py index c684d59e461..559ba6b8058 100644 --- a/docarray/array/storage/redis/backend.py +++ b/docarray/array/storage/redis/backend.py @@ -6,7 +6,7 @@ import numpy as np from docarray import Document from docarray.array.storage.base.backend import BaseBackendMixin, TypeMap -from docarray.helper import dataclass_from_dict +from docarray.helper import dataclass_from_dict, random_identity from redis import Redis from redis.commands.search.field import NumericField, TextField, VectorField @@ -75,8 +75,7 @@ def _init_storage( config.redis_config['decode_responses'] = False if config.index_name is None: - id = uuid.uuid4().hex - config.index_name = 'index_name__' + id + config.index_name = 'index_name__' + random_identity() self._offset2id_key = config.index_name + '__offset2id' self._config = config From 3d04d50eacfc5f80c641dd3e7c767305d101af4a Mon Sep 17 00:00:00 2001 From: AnneY Date: Sat, 17 Sep 2022 00:58:18 +0800 Subject: [PATCH 04/10] docs: clarify expression --- docs/advanced/document-store/redis.md | 36 +++++++++------------------ 1 file changed, 12 insertions(+), 24 deletions(-) diff --git a/docs/advanced/document-store/redis.md b/docs/advanced/document-store/redis.md index aaee31c0faf..52eeec0d51c 100644 --- a/docs/advanced/document-store/redis.md +++ b/docs/advanced/document-store/redis.md @@ -166,10 +166,10 @@ da.extend( ) print('\nIndexed price, color and stock:\n') -for embedding, price, color, stock in zip( - da.embeddings, da[:, 'tags__price'], da[:, 'tags__color'], da[:, 'tags__stock'] -): - print(f'\tembedding={embedding},\t color={color},\t stock={stock}') +for doc in da: + print( + f"\tembedding={doc.embedding},\t color={doc.tags['color']},\t stock={doc.tags['stock']}" + ) ``` Consider the case where you want the nearest vectors to the embedding `[8., 8., 8.]`, with the restriction that prices, colors and stock must pass a filter. For example, let's consider that retrieved Documents must have a `price` value lower than or equal to `max_price`, have `color` equal to `blue` and have `stock` equal to `True`. We can encode this information in Redis using @@ -213,15 +213,9 @@ results = da.find(np_query, filter=filter, limit=n_limit) print( '\nEmbeddings Approximate Nearest Neighbours with "price" at most 7, "color" blue and "stock" False:\n' ) -for embedding, price, color, stock, score in zip( - results.embeddings, - results[:, 'tags__price'], - results[:, 'tags__color'], - results[:, 'tags__stock'], - results[:, 'scores'], -): +for doc in results: print( - f' score={score["score"].value},\t embedding={embedding},\t price={price},\t color={color},\t stock={stock}' + f" score={doc.scores['score'].value},\t embedding={doc.embedding},\t price={doc.tags['price']},\t color={doc.tags['color']},\t stock={doc.tags['stock']}" ) ``` @@ -290,7 +284,7 @@ Read more about HNSW or FLAT parameters and their default values [here](https:// You can update the search indexing schema on an existing DocumentArray by setting `update_schema` to `True` and changing your configuratoin parameters. -Consider you store Documents with default indexing method `'HNSW'` and distance `'L2'`, and want to find the nearest vectors to the embedding `[8. 8. 8.]`. +Consider you store Documents with default indexing method `'HNSW'` and distance `'L2'`, and want to find the nearest vectors to the embedding `[8. 8. 8.]`: ```python import numpy as np @@ -315,11 +309,8 @@ n_limit = 5 results = da.find(np_query, limit=n_limit) print('\nEmbeddings Approximate Nearest Neighbours:\n') -for embedding, score in zip( - results.embeddings, - results[:, 'scores'], -): - print(f' embedding={embedding},\t score={score["score"].value}') +for doc in results: + print(f" embedding={doc.embedding},\t score={doc.scores['score'].value}") ``` This will print: @@ -349,11 +340,8 @@ da2 = DocumentArray( results = da.find(np_query, limit=n_limit) print('\nEmbeddings Approximate Nearest Neighbours:\n') -for embedding, score in zip( - results.embeddings, - results[:, 'scores'], -): - print(f' embedding={embedding},\t score={score["score"].value}') +for doc in results: + print(f" embedding={doc.embedding},\t score={doc.scores['score'].value}") ``` This will print: @@ -494,7 +482,7 @@ The following configs can be set: | `distance` | Similarity distance metric in Redis, one of {`'L2'`, `'IP'`, `'COSINE'`} | `'COSINE'` | | `batch_size` | Batch size used to handle storage updates | `64` | | `method` | Vector similarity index algorithm in Redis, either `FLAT` or `HNSW` | `'HNSW'` | -| `index_text` | Boolean flag indicating whether to index `.text` | `None` | +| `index_text` | Boolean flag indicating whether to index `.text`. `True` will enable full text search on `.text` | `None` | | `tag_indices` | List of tags to index as text field | `[]` | | `ef_construction` | Optional parameter for Redis HNSW algorithm | `200` | | `m` | Optional parameter for Redis HNSW algorithm | `16` | From 606297e6c17d21eadaddadc19dc95b4237d09615 Mon Sep 17 00:00:00 2001 From: AnneY Date: Mon, 19 Sep 2022 16:16:28 +0800 Subject: [PATCH 05/10] perf: batch redis _clear_storage --- docarray/array/storage/redis/getsetdel.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/docarray/array/storage/redis/getsetdel.py b/docarray/array/storage/redis/getsetdel.py index bbbde17f5f1..8d963d3e99f 100644 --- a/docarray/array/storage/redis/getsetdel.py +++ b/docarray/array/storage/redis/getsetdel.py @@ -118,6 +118,13 @@ def _save_offset2ids(self): self._update_offset2ids_meta() def _clear_storage(self): - for _id in self._offset2ids.ids: - self._del_doc_by_id(_id) - self._client.delete(self._offset2id_key) + cursor = 0 + while True: + cursor, keys = self._client.scan( + cursor, + match=self._config.index_name + '*', + count=self._config.batch_size, + ) + self._client.delete(*keys) + if cursor == 0: + break From 0cc4902f8c902bee87944012ef026d832eddf447 Mon Sep 17 00:00:00 2001 From: AnneY Date: Tue, 20 Sep 2022 08:14:26 +0800 Subject: [PATCH 06/10] fix: minor error fix --- docarray/array/storage/redis/backend.py | 1 - tests/unit/array/mixins/test_content.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/docarray/array/storage/redis/backend.py b/docarray/array/storage/redis/backend.py index 559ba6b8058..45e04656f9d 100644 --- a/docarray/array/storage/redis/backend.py +++ b/docarray/array/storage/redis/backend.py @@ -1,5 +1,4 @@ import copy -import uuid from dataclasses import dataclass, field from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Tuple, Union diff --git a/tests/unit/array/mixins/test_content.py b/tests/unit/array/mixins/test_content.py index ceb7cf36deb..ea4535c9d00 100644 --- a/tests/unit/array/mixins/test_content.py +++ b/tests/unit/array/mixins/test_content.py @@ -87,7 +87,7 @@ def test_content_empty_setter(cls, content_attr, start_storage): (DocumentArrayWeaviate, WeaviateConfig(n_dim=128)), (DocumentArrayQdrant, QdrantConfig(n_dim=128)), (DocumentArrayElastic, ElasticConfig(n_dim=128)), - (DocumentArrayRedis, RedisConfig(n_dim=1)), + (DocumentArrayRedis, RedisConfig(n_dim=128)), ], ) @pytest.mark.parametrize( From 88301c5761e863b80081d10fe65d5bd14a8c046c Mon Sep 17 00:00:00 2001 From: AnneY Date: Tue, 20 Sep 2022 22:57:29 +0800 Subject: [PATCH 07/10] refactor: use scan_iter in redis clear --- docarray/array/storage/redis/getsetdel.py | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/docarray/array/storage/redis/getsetdel.py b/docarray/array/storage/redis/getsetdel.py index 8d963d3e99f..653759529c0 100644 --- a/docarray/array/storage/redis/getsetdel.py +++ b/docarray/array/storage/redis/getsetdel.py @@ -4,6 +4,8 @@ from docarray.array.storage.base.getsetdel import BaseGetSetDelMixin from docarray.array.storage.base.helper import Offset2ID +from itertools import zip_longest + class GetSetDelMixin(BaseGetSetDelMixin): """Provide concrete implementation for ``__getitem__``, ``__setitem__``, @@ -118,13 +120,12 @@ def _save_offset2ids(self): self._update_offset2ids_meta() def _clear_storage(self): - cursor = 0 - while True: - cursor, keys = self._client.scan( - cursor, - match=self._config.index_name + '*', - count=self._config.batch_size, - ) - self._client.delete(*keys) - if cursor == 0: - break + pipe = self._client.pipeline() + batch = [] + for key in self._client.scan_iter(match=self._config.index_name + '*'): + batch.append(key) + if len(batch) % self._config.batch_size == 0: + pipe.delete(*batch) + batch = [] + pipe.delete(*batch) + pipe.execute() From 695795eb12185fb7431fb6477819edc98a05c870 Mon Sep 17 00:00:00 2001 From: AnneY Date: Tue, 20 Sep 2022 23:12:31 +0800 Subject: [PATCH 08/10] fix: remove useless imports --- docarray/array/storage/redis/getsetdel.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/docarray/array/storage/redis/getsetdel.py b/docarray/array/storage/redis/getsetdel.py index 653759529c0..90be2b2e7fb 100644 --- a/docarray/array/storage/redis/getsetdel.py +++ b/docarray/array/storage/redis/getsetdel.py @@ -4,8 +4,6 @@ from docarray.array.storage.base.getsetdel import BaseGetSetDelMixin from docarray.array.storage.base.helper import Offset2ID -from itertools import zip_longest - class GetSetDelMixin(BaseGetSetDelMixin): """Provide concrete implementation for ``__getitem__``, ``__setitem__``, From be798d35ba0678cd8ff58fbf128b38cf8af26dc2 Mon Sep 17 00:00:00 2001 From: AnneY Date: Tue, 20 Sep 2022 23:17:13 +0800 Subject: [PATCH 09/10] fix: redis delete empty batch --- docarray/array/storage/redis/getsetdel.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docarray/array/storage/redis/getsetdel.py b/docarray/array/storage/redis/getsetdel.py index 90be2b2e7fb..6a4598610a1 100644 --- a/docarray/array/storage/redis/getsetdel.py +++ b/docarray/array/storage/redis/getsetdel.py @@ -125,5 +125,6 @@ def _clear_storage(self): if len(batch) % self._config.batch_size == 0: pipe.delete(*batch) batch = [] - pipe.delete(*batch) + if len(batch) > 0: + pipe.delete(*batch) pipe.execute() From 37198cb759317ea930ff7cc25fb5bf582c391f56 Mon Sep 17 00:00:00 2001 From: AnneY Date: Wed, 21 Sep 2022 14:27:13 +0800 Subject: [PATCH 10/10] refactor: use drop_index in redis clear --- docarray/array/storage/redis/getsetdel.py | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/docarray/array/storage/redis/getsetdel.py b/docarray/array/storage/redis/getsetdel.py index 6a4598610a1..8187faaf837 100644 --- a/docarray/array/storage/redis/getsetdel.py +++ b/docarray/array/storage/redis/getsetdel.py @@ -118,13 +118,7 @@ def _save_offset2ids(self): self._update_offset2ids_meta() def _clear_storage(self): - pipe = self._client.pipeline() - batch = [] - for key in self._client.scan_iter(match=self._config.index_name + '*'): - batch.append(key) - if len(batch) % self._config.batch_size == 0: - pipe.delete(*batch) - batch = [] - if len(batch) > 0: - pipe.delete(*batch) - pipe.execute() + self._client.ft(index_name=self._config.index_name).dropindex( + delete_documents=True + ) + self._client.delete(self._offset2id_key)