diff --git a/docarray/array/document.py b/docarray/array/document.py index 07be3ba1018..d119d2f2983 100644 --- a/docarray/array/document.py +++ b/docarray/array/document.py @@ -10,10 +10,12 @@ from docarray.array.annlite import DocumentArrayAnnlite from docarray.array.weaviate import DocumentArrayWeaviate from docarray.array.elastic import DocumentArrayElastic + from docarray.array.redis import DocumentArrayRedis from docarray.array.storage.sqlite import SqliteConfig from docarray.array.storage.annlite import AnnliteConfig from docarray.array.storage.weaviate import WeaviateConfig from docarray.array.storage.elastic import ElasticConfig + from docarray.array.storage.redis import RedisConfig class DocumentArray(AllMixins, BaseDocumentArray): @@ -127,6 +129,16 @@ def __new__( """Create a Elastic-powered DocumentArray object.""" ... + @overload + def __new__( + cls, + _docs: Optional['DocumentArraySourceType'] = None, + storage: str = 'redis', + config: Optional[Union['RedisConfig', Dict]] = None, + ) -> 'DocumentArrayRedis': + """Create a Redis-powered DocumentArray object.""" + ... + def __enter__(self): return self @@ -163,6 +175,10 @@ def __new__(cls, *args, storage: str = 'memory', **kwargs): from docarray.array.elastic import DocumentArrayElastic instance = super().__new__(DocumentArrayElastic) + elif storage == 'redis': + from .redis import DocumentArrayRedis + + instance = super().__new__(DocumentArrayRedis) else: raise ValueError(f'storage=`{storage}` is not supported.') diff --git a/docarray/array/redis.py b/docarray/array/redis.py new file mode 100644 index 00000000000..bee5d597288 --- /dev/null +++ b/docarray/array/redis.py @@ -0,0 +1,19 @@ +from .document import DocumentArray +from .storage.redis import RedisConfig, StorageMixins + +__all__ = ['DocumentArrayRedis', 'RedisConfig'] + + +class DocumentArrayRedis(StorageMixins, DocumentArray): + """This is a :class:`DocumentArray` that uses Redis as + vector search engine and storage. + """ + + def __new__(cls, *args, **kwargs): + """``__new__`` method for :class:`DocumentArrayRedis` + + :param *args: list of args to instantiate the object + :param **kwargs: dict of args to instantiate the object + :return: the instantiated :class:`DocumentArrayRedis` object + """ + return super().__new__(cls) diff --git a/docarray/array/storage/redis/__init__.py b/docarray/array/storage/redis/__init__.py new file mode 100644 index 00000000000..8b74432ac0b --- /dev/null +++ b/docarray/array/storage/redis/__init__.py @@ -0,0 +1,12 @@ +from abc import ABC + +from .backend import BackendMixin, RedisConfig +from .find import FindMixin +from .getsetdel import GetSetDelMixin +from .seqlike import SequenceLikeMixin + +__all__ = ['StorageMixins', 'RedisConfig'] + + +class StorageMixins(FindMixin, BackendMixin, GetSetDelMixin, SequenceLikeMixin, ABC): + ... diff --git a/docarray/array/storage/redis/backend.py b/docarray/array/storage/redis/backend.py new file mode 100644 index 00000000000..41728834714 --- /dev/null +++ b/docarray/array/storage/redis/backend.py @@ -0,0 +1,180 @@ +from dataclasses import dataclass, field +from typing import TYPE_CHECKING, Any, Dict, Iterable, List, Optional, Tuple, Union + +import numpy as np +from docarray import Document +from docarray.array.storage.base.backend import BaseBackendMixin, TypeMap +from docarray.helper import dataclass_from_dict + +from redis import Redis +from redis.commands.search.field import NumericField, TextField, VectorField +from redis.commands.search.indexDefinition import IndexDefinition + +if TYPE_CHECKING: + from docarray.typing import ArrayType, DocumentArraySourceType + + +@dataclass +class RedisConfig: + n_dim: int + host: str = field(default='localhost') + port: int = field(default=6379) + index_name: str = field(default='idx') + flush: bool = field(default=False) + update_schema: bool = field(default=True) + distance: str = field(default='COSINE') + redis_config: Dict[str, Any] = field(default_factory=dict) + batch_size: int = field(default=64) + method: str = field(default='HNSW') + ef_construction: int = field(default=200) + m: int = field(default=16) + ef_runtime: int = field(default=10) + block_size: int = field(default=1048576) + initial_cap: Optional[int] = None + columns: Optional[List[Tuple[str, str]]] = None + + +class BackendMixin(BaseBackendMixin): + """Provide necessary functions to enable this storage backend.""" + + TYPE_MAP = { + 'str': TypeMap(type='text', converter=TextField), + 'bytes': TypeMap(type='text', converter=TextField), + 'int': TypeMap(type='integer', converter=NumericField), + 'float': TypeMap(type='float', converter=NumericField), + 'double': TypeMap(type='double', converter=NumericField), + 'long': TypeMap(type='long', converter=NumericField), + 'bool': TypeMap(type='long', converter=NumericField), + } + + def _init_storage( + self, + _docs: Optional['DocumentArraySourceType'] = None, + config: Optional[Union[RedisConfig, Dict]] = None, + **kwargs, + ): + if not config: + raise ValueError('Empty config is not allowed for Redis storage') + elif isinstance(config, dict): + config = dataclass_from_dict(RedisConfig, config) + + if config.distance not in ['L2', 'IP', 'COSINE']: + raise ValueError( + f'Expecting distance metric one of COSINE, L2 OR IP, got {config.distance} instead' + ) + if config.method not in ['HNSW', 'FLAT']: + raise ValueError( + f'Expecting search method one of HNSW OR FLAT, got {config.method} instead' + ) + + if config.redis_config.get('decode_responses'): + config.redis_config['decode_responses'] = False + + self._offset2id_key = config.index_name + '__offset2id' + self._config = config + self.n_dim = self._config.n_dim + self._doc_prefix = config.index_name + ':' + self._config.columns = self._normalize_columns(self._config.columns) + + self._client = self._build_client() + super()._init_storage() + + if _docs is None: + return + elif isinstance(_docs, Iterable): + self.extend(_docs) + elif isinstance(_docs, Document): + self.append(_docs) + + def _build_client(self): + client = Redis( + host=self._config.host, + port=self._config.port, + **self._config.redis_config, + ) + + if self._config.flush: + client.flushdb() + + if self._config.update_schema: + if self._config.index_name.encode() in client.execute_command('FT._LIST'): + client.ft(index_name=self._config.index_name).dropindex() + + if self._config.flush or self._config.update_schema: + schema = self._build_schema_from_redis_config() + idef = IndexDefinition(prefix=[self._doc_prefix]) + client.ft(index_name=self._config.index_name).create_index( + schema, definition=idef + ) + + return client + + def _ensure_unique_config( + self, + config_root: dict, + config_subindex: dict, + config_joined: dict, + subindex_name: str, + ) -> dict: + if 'index_name' not in config_subindex: + config_joined['index_name'] = ( + config_joined['index_name'] + '_subindex_' + subindex_name + ) + config_joined['flush'] = False + return config_joined + + def _build_schema_from_redis_config(self): + index_param = { + 'TYPE': 'FLOAT32', + 'DIM': self.n_dim, + 'DISTANCE_METRIC': self._config.distance, + } + + if self._config.method == 'HNSW': + index_options = { + 'M': self._config.m, + 'EF_CONSTRUCTION': self._config.ef_construction, + 'EF_RUNTIME': self._config.ef_runtime, + } + index_param.update(index_options) + + if self._config.method == 'FLAT': + index_options = {'BLOCK_SIZE': self._config.block_size} + index_param.update(index_options) + + if self._config.initial_cap: + index_param['INITIAL_CAP'] = self._config.initial_cap + schema = [VectorField('embedding', self._config.method, index_param)] + + for col, coltype in self._config.columns: + schema.append(self._map_column(col, coltype)) + + return schema + + def _doc_id_exists(self, doc_id): + return self._client.exists(self._doc_prefix + doc_id) + + def _map_embedding(self, embedding: 'ArrayType') -> bytes: + if embedding is not None: + from docarray.math.ndarray import to_numpy_array + + embedding = to_numpy_array(embedding) + + if embedding.ndim > 1: + embedding = np.asarray(embedding).squeeze() + else: + embedding = np.zeros(self.n_dim) + return embedding.astype(np.float32).tobytes() + + def _get_offset2ids_meta(self) -> List[str]: + if not self._client.exists(self._offset2id_key): + return [] + ids = self._client.lrange(self._offset2id_key, 0, -1) + return [id.decode() for id in ids] + + def _update_offset2ids_meta(self): + """Update the offset2ids in redis""" + if self._client.exists(self._offset2id_key): + self._client.delete(self._offset2id_key) + if len(self._offset2ids.ids) > 0: + self._client.rpush(self._offset2id_key, *self._offset2ids.ids) diff --git a/docarray/array/storage/redis/find.py b/docarray/array/storage/redis/find.py new file mode 100644 index 00000000000..2459333c1ed --- /dev/null +++ b/docarray/array/storage/redis/find.py @@ -0,0 +1,127 @@ +from typing import TYPE_CHECKING, Dict, List, Optional, Sequence, TypeVar, Union + +import numpy as np +from docarray import Document, DocumentArray +from docarray.array.mixins.find import FindMixin as BaseFindMixin +from docarray.math import ndarray +from docarray.math.ndarray import to_numpy_array +from docarray.score import NamedScore + +from redis.commands.search.query import NumericFilter, Query + +if TYPE_CHECKING: + import tensorflow + import torch + + RedisArrayType = TypeVar( + 'RedisArrayType', + np.ndarray, + tensorflow.Tensor, + torch.Tensor, + Sequence[float], + Dict, + ) + + +class FindMixin(BaseFindMixin): + def _find_similar_vectors( + self, + query: 'RedisArrayType', + filter: Optional[Dict] = None, + limit: Optional[Union[int, float]] = 20, + **kwargs, + ): + + query_str = self._build_query_str(filter) if filter else "*" + + q = ( + Query(f'{query_str}=>[KNN {limit} @embedding $vec AS vector_score]') + .sort_by('vector_score') + .paging(0, limit) + .dialect(2) + ) + + query_params = {'vec': to_numpy_array(query).astype(np.float32).tobytes()} + results = ( + self._client.ft(index_name=self._config.index_name) + .search(q, query_params) + .docs + ) + + da = DocumentArray() + for res in results: + doc = Document.from_base64(res.blob.encode()) + doc.scores['score'] = NamedScore(value=res.vector_score) + da.append(doc) + return da + + def _find( + self, + query: 'RedisArrayType', + limit: Optional[Union[int, float]] = 20, + filter: Optional[Dict] = None, + **kwargs, + ) -> List['DocumentArray']: + + query = np.array(query) + num_rows, n_dim = ndarray.get_array_rows(query) + if n_dim != 2: + query = query.reshape((num_rows, -1)) + + return [ + self._find_similar_vectors(q, filter=filter, limit=limit, **kwargs) + for q in query + ] + + def _find_with_filter(self, filter: Dict, limit: Optional[Union[int, float]] = 20): + s = self._build_query_str(filter) + q = Query(s) + q.paging(0, limit) + + results = self._client.ft(index_name=self._config.index_name).search(q).docs + + da = DocumentArray() + for res in results: + doc = Document.from_base64(res.blob.encode()) + da.append(doc) + return da + + def _filter( + self, filter: Dict, limit: Optional[Union[int, float]] = 20 + ) -> 'DocumentArray': + + return self._find_with_filter(filter, limit=limit) + + def _build_query_str(self, filter: Dict) -> str: + INF = "+inf" + NEG_INF = "-inf" + s = "(" + + for key in filter: + operator = list(filter[key].keys())[0] + value = filter[key][operator] + if operator == '$gt': + s += f"@{key}:[({value} {INF}] " + elif operator == '$gte': + s += f"@{key}:[{value} {INF}] " + elif operator == '$lt': + s += f"@{key}:[{NEG_INF} ({value}] " + elif operator == '$lte': + s += f"@{key}:[{NEG_INF} {value}] " + elif operator == '$eq': + if type(value) is int: + s += f"@{key}:[{value} {value}] " + elif type(value) is bool: + s += f"@{key}:[{int(value)} {int(value)}] " + else: + s += f"@{key}:{value} " + elif operator == '$ne': + if type(value) is int: + s += f"-@{key}:[{value} {value}] " + elif type(value) is bool: + s += f"-@{key}:[{int(value)} {int(value)}] " + else: + s += f"-@{key}:{value} " + s += ")" + + return s diff --git a/docarray/array/storage/redis/getsetdel.py b/docarray/array/storage/redis/getsetdel.py new file mode 100644 index 00000000000..709d404d45e --- /dev/null +++ b/docarray/array/storage/redis/getsetdel.py @@ -0,0 +1,117 @@ +from codecs import unicode_escape_decode +from typing import Dict + +from docarray import Document +from docarray.array.storage.base.getsetdel import BaseGetSetDelMixin +from docarray.array.storage.base.helper import Offset2ID +from typing import Sequence, Iterable + + +class GetSetDelMixin(BaseGetSetDelMixin): + """Provide concrete implementation for ``__getitem__``, ``__setitem__``, + and ``__delitem__`` for ``DocumentArrayRedis``""" + + def _get_doc_by_id(self, _id: str) -> 'Document': + """Concrete implementation of base class' ``_get_doc_by_id`` + + :param _id: the id of the document + :return: the retrieved document from redis + """ + try: + result = self._client.hgetall(self._doc_prefix + _id) + doc = Document.from_base64(result[b'blob']) + return doc + except Exception as ex: + raise KeyError(_id) from ex + + def _get_docs_by_ids(self, ids: Sequence[str]) -> Iterable['Document']: + """Concrete implementation of base class' ``_get_docs_by_ids`` + + :param ids: ids of the document + :return: Iterable[Document] + """ + + accumulated_docs = [] + accumulated_docs_id_not_found = [] + + if not ids: + return accumulated_docs + + pipe = self._client.pipeline() + for id in ids: + pipe.hgetall(self._doc_prefix + id) + + results = pipe.execute() + + for i, result in enumerate(results): + if result: + accumulated_docs.append(Document.from_base64(result[b'blob'])) + else: + accumulated_docs_id_not_found.append(ids[i]) + + if accumulated_docs_id_not_found: + raise KeyError(accumulated_docs_id_not_found, accumulated_docs) + + return accumulated_docs + + def _set_doc_by_id(self, _id: str, value: 'Document'): + """Concrete implementation of base class' ``_set_doc_by_id`` + + :param _id: the id of doc to update + :param value: the document to update to + """ + self._del_doc_by_id(_id) + if _id != value.id: + self._del_doc_by_id(value.id) + + payload = self._document_to_redis(value) + self._client.hset(self._doc_prefix + value.id, mapping=payload) + + def _set_docs_by_ids(self, ids, docs: Iterable['Document'], mismatch_ids: Dict): + """Overridden implementation of _set_docs_by_ids in order to add docs in batches and flush at the end + + :param ids: the ids used for indexing + """ + for _id, doc in zip(ids, docs): + self._del_doc_by_id(_id) + if _id != doc.id: + self._del_doc_by_id(doc.id) + + self._upload_batch(docs) + + def _del_doc_by_id(self, _id: str): + """Concrete implementation of base class' ``_del_doc_by_id`` + + :param _id: the id of the document to delete + """ + if self._doc_id_exists(_id): + self._client.delete(self._doc_prefix + _id) + + def _document_to_redis(self, doc: 'Document') -> Dict: + extra_columns = {} + + for col, _ in self._config.columns: + tag = doc.tags.get(col) + if tag is not None: + extra_columns[col] = int(tag) if isinstance(tag, bool) else tag + + payload = { + 'id': doc.id, + 'embedding': self._map_embedding(doc.embedding), + 'blob': doc.to_base64(), + **extra_columns, + } + + if doc.text: + payload['text'] = doc.text + return payload + + def _load_offset2ids(self): + ids = self._get_offset2ids_meta() + self._offset2ids = Offset2ID(ids) + + def _save_offset2ids(self): + self._update_offset2ids_meta() + + def _clear_storage(self): + self._client.flushdb() diff --git a/docarray/array/storage/redis/seqlike.py b/docarray/array/storage/redis/seqlike.py new file mode 100644 index 00000000000..3db561daef3 --- /dev/null +++ b/docarray/array/storage/redis/seqlike.py @@ -0,0 +1,64 @@ +from typing import Iterable, Union + +from docarray import Document, DocumentArray +from docarray.array.storage.base.seqlike import BaseSequenceLikeMixin + + +class SequenceLikeMixin(BaseSequenceLikeMixin): + """Implement sequence-like methods for DocumentArray with Redis as storage""" + + def __eq__(self, other): + """Compare this object to the other, returns True if and only if other + as the same type as self and other has the same meta information + + :param other: the other object to check for equality + :return: ``True`` if other is equal to self + """ + # two DA are considered as the same if they have the same client meta data + return ( + type(self) is type(other) + and self._client.client_info() == other._client.client_info() + and self._config == other._config + ) + + def __len__(self): + """Return the length of :class:`DocumentArray` that uses Redis as storage + + :return: the length of this :class:`DocumentArrayRedis` object + """ + try: + return len(self._offset2ids) + except: + return 0 + + def __contains__(self, x: Union[str, 'Document']): + """Check if ``x`` is contained in this :class:`DocumentArray` with Redis storage + + :param x: the id of the document to check or the document object itself + :return: True if ``x`` is contained in self + """ + if isinstance(x, str): + return self._doc_id_exists(x) + elif isinstance(x, Document): + return self._doc_id_exists(x.id) + else: + return False + + def __repr__(self): + """Return the string representation of :class:`DocumentArrayRedis` object + :return: string representation of this object + """ + return f'' + + def _upload_batch(self, batch_of_docs: Iterable['Document']): + pipe = self._client.pipeline() + for doc in batch_of_docs: + payload = self._document_to_redis(doc) + pipe.hset(self._doc_prefix + doc.id, mapping=payload) + pipe.execute() + + def _extend(self, docs: Iterable['Document']): + da = DocumentArray(docs) + for batch_of_docs in da.batch(self._config.batch_size): + self._upload_batch(batch_of_docs) + self._offset2ids.extend(batch_of_docs[:, 'id']) diff --git a/docs/advanced/document-store/index.md b/docs/advanced/document-store/index.md index 93739a044c8..0c79d816807 100644 --- a/docs/advanced/document-store/index.md +++ b/docs/advanced/document-store/index.md @@ -9,6 +9,7 @@ annlite qdrant elasticsearch weaviate +redis extend benchmark ``` diff --git a/docs/advanced/document-store/redis.md b/docs/advanced/document-store/redis.md new file mode 100644 index 00000000000..b3abb03771a --- /dev/null +++ b/docs/advanced/document-store/redis.md @@ -0,0 +1,344 @@ +(redis)= +# Redis + +You can use [Redis](https://redis.io) as the document store for DocumentArray. It is useful when you want to have faster Document retrieval on embeddings, i.e. `.match()`, `.find()`. + +````{tip} +This feature requires `redis`. You can install it via `pip install "docarray[redis]".` +```` + +## Usage + +### Start Redis service + +To use Redis as the storage backend, it is required to have the Redis service started. Create `docker-compose.yml` as follows: + +```yaml +version: "3.3" +services: + redis: + image: redislabs/redisearch:2.6.0 + ports: + - "6379:6379" +``` + +Then + +```bash +docker-compose up +``` + +### Create DocumentArray with Redis backend + +Assuming the service is started using the default configuration (i.e. server address is `localhost:6379`), you can instantiate a DocumentArray with Redis storage as such: + +```python +from docarray import DocumentArray + +da = DocumentArray( + storage='redis', config={'host': 'localhost', 'port': 6379, 'n_dim': 128} +) +``` + +The usage would be the same as the ordinary DocumentArray, but the dimension of an embedding for a Document must be provided at creation time. + +```{caution} +Currently, one Redis server instance can only store a single DocumentArray. +``` + +To store a new DocumentArray on the current Redis server, you can set `flush` to `True` so that the previous DocumentArray will be cleared: + +```python +from docarray import DocumentArray + +da = DocumentArray(storage='redis', config={'n_dim': 128, 'flush': True}) +``` + +To access a previously stored DocumentArray, you can set `host` and `port` to match with the previuosly stored DocumentArray and make sure `flush` is `False`. + +The following example builds a DocumentArray from previously stored data on `localhost:6379`: + +```python +from docarray import DocumentArray, Document + +with DocumentArray( + storage='redis', + config={'n_dim': 128, 'flush': True}, +) as da: + da.extend([Document() for _ in range(1000)]) + +da2 = DocumentArray( + storage='redis', + config={'n_dim': 128, 'flush': False}, +) + +da2.summary() +``` + +```{dropdown} Output +```console +╭────────────── Documents Summary ──────────────╮ +│ │ +│ Type DocumentArrayRedis │ +│ Length 1000 │ +│ Homogenous Documents True │ +│ Common Attributes ('id',) │ +│ Multimodal dataclass False │ +│ │ +╰───────────────────────────────────────────────╯ +╭───────────────────── Attributes Summary ─────────────────────╮ +│ │ +│ Attribute Data type #Unique values Has empty value │ +│ ────────────────────────────────────────────────────────── │ +│ id ('str',) 1000 False │ +│ │ +╰──────────────────────────────────────────────────────────────╯ +╭─── DocumentArrayRedis Config ───╮ +│ │ +│ n_dim 128 │ +│ host localhost │ +│ port 6379 │ +│ index_name idx │ +│ flush False │ +│ update_schema True │ +│ distance COSINE │ +│ redis_config {} │ +│ batch_size 64 │ +│ method HNSW │ +│ ef_construction 200 │ +│ m 16 │ +│ ef_runtime 10 │ +│ block_size 1048576 │ +│ initial_cap None │ +│ columns [] │ +│ │ +╰─────────────────────────────────╯ +``` + + + +Other functions behave the same as in-memory DocumentArray. + + +### Vector search with filter query + +You can perform Vector Similarity Search based on [FLAT or HNSW algorithm](vector-search-index) and pre-filter results using a filter query that is based on [MongoDB's Query](https://www.mongodb.com/docs/manual/reference/operator/query/). We currently support a subset of those selectors: + +- `$eq` - Equal to (number, string) +- `$ne` - Not equal to (number, string) +- `$gt` - Greater than (number) +- `$gte` - Greater than or equal to (number) +- `$lt` - Less than (number) +- `$lte` - Less than or equal to (number) + + +Consider Documents with embeddings `[0, 0, 0]` up to `[9, 9, 9]` where the Document with embedding `[i, i, i]` +has tag `price` with a number value, and tag `color` with a string value. You can create such example with the following code: + +```python +import numpy as np +from docarray import Document, DocumentArray + +n_dim = 3 + +da = DocumentArray( + storage='redis', + config={ + 'n_dim': n_dim, + 'columns': [('price', 'int'), ('color', 'str')], + 'flush': True, + 'distance': 'L2', + }, +) + +da.extend( + [ + Document( + id=f'{i}', embedding=i * np.ones(n_dim), tags={'price': i, 'color': 'red'} + ) + for i in range(10) + ] +) +da.extend( + [ + Document( + id=f'{i+10}', + embedding=i * np.ones(n_dim), + tags={'price': i, 'color': 'blue'}, + ) + for i in range(10) + ] +) + +print('\nIndexed prices and colors:\n') +for embedding, price, color in zip( + da.embeddings, da[:, 'tags__price'], da[:, 'tags__color'] +): + print(f'\tembedding={embedding},\t price={price},\t color={color}') +``` + +Consider the case where you want the nearest vectors to the embedding `[8., 8., 8.]`, with the restriction that +prices and colors must pass a filter. For example, let's consider that retrieved Documents must have a `price` value lower than or equal to `max_price` and have `color` equal to `color`. We can encode this information in Redis using `{'price': {'$lte': max_price}, 'color': {'$eq': color}}`. + +Then the search with the proposed filter can be used as follows: +```python +max_price = 7 +color = 'red' +n_limit = 5 + +np_query = np.ones(n_dim) * 8 +print(f'\nQuery vector: \t{np_query}') + +filter = {'price': {'$lte': max_price}, 'color': {'$eq': color}} +results = da.find(np_query, filter=filter, limit=n_limit) + +print( + '\nEmbeddings Approximate Nearest Neighbours with "price" at most 7 and "color" red:\n' +) +for embedding, price, color, score in zip( + results.embeddings, + results[:, 'tags__price'], + results[:, 'tags__color'], + results[:, 'scores'], +): + print( + f' score={score["score"].value},\t embedding={embedding},\t price={price},\t color={color}' + ) +``` + +This would print: + +```console +Embeddings Approximate Nearest Neighbours with "price" at most 7 and "color" red: + + score=3, embedding=[7. 7. 7.], price=7, color=red + score=12, embedding=[6. 6. 6.], price=6, color=red + score=27, embedding=[5. 5. 5.], price=5, color=red + score=48, embedding=[4. 4. 4.], price=4, color=red + score=75, embedding=[3. 3. 3.], price=3, color=red +``` + +(vector-search-index)= +### Update Vector Search Indexing Schema + +Redis vector similarity supports two indexing methods: + +- **FLAT**: Brute-force search. +- **HNSW**: Efficient and robust approximate nearest neighbor search using Hierarchical Navigable Small World graphs. + +Both methods have some mandatory parameters and optional parameters. + +```{tip} +Read more about HNSW or FLAT parameters and their default values [here](https://redis.io/docs/stack/search/reference/vectors/#querying-vector-fields). +``` + +You can update the search indexing schema on an existing DocumentArray by setting `update_schema` to `True` and changing your configuratoin parameters. + +Consider you store Documents with default indexing method `'HNSW'` and distance `'L2'`, and want to find the nearest vectors to the embedding `[8. 8. 8.]`. + +```python +import numpy as np +from docarray import Document, DocumentArray + +n_dim = 3 + +da = DocumentArray( + storage='redis', + config={ + 'n_dim': n_dim, + 'flush': True, + 'distance': 'L2', + }, +) + +da.extend([Document(id=f'{i}', embedding=i * np.ones(n_dim)) for i in range(10)]) + +np_query = np.ones(n_dim) * 8 +n_limit = 5 + +results = da.find(np_query, limit=n_limit) + +print('\nEmbeddings Approximate Nearest Neighbours:\n') +for embedding, score in zip( + results.embeddings, + results[:, 'scores'], +): + print(f' embedding={embedding},\t score={score["score"].value}') +``` + +This would print: + +```console +Embeddings Approximate Nearest Neighbours: + + embedding=[8. 8. 8.], score=0 + embedding=[7. 7. 7.], score=3 + embedding=[9. 9. 9.], score=3 + embedding=[6. 6. 6.], score=12 + embedding=[5. 5. 5.], score=27 +``` + +Then you can use a different search indexing schema on the current DocumentArray as follows: +```python +da2 = DocumentArray( + storage='redis', + config={ + 'n_dim': n_dim, + 'update_schema': True, + 'distance': 'COSINE', + }, +) + +results = da.find(np_query, limit=n_limit) + +print('\nEmbeddings Approximate Nearest Neighbours:\n') +for embedding, score in zip( + results.embeddings, + results[:, 'scores'], +): + print(f' embedding={embedding},\t score={score["score"].value}') +``` + +This will print: + +```console +Embeddings Approximate Nearest Neighbours: + + embedding=[3. 3. 3.], score=0 + embedding=[6. 6. 6.], score=0 + embedding=[9. 9. 9.], score=5.96046447754e-08 + embedding=[8. 8. 8.], score=5.96046447754e-08 + embedding=[5. 5. 5.], score=5.96046447754e-08 +``` + + +## Configuration + +The following configs can be set: + +| Name | Description | Default | +|-------------------|---------------------------------------------------------------------------------------------------|-------------------------------------------------- | +| `host` | Host address of the Redis server | `'localhost'` | +| `port` | Port of the Redis Server | `6379` | +| `redis_config` | Other Redis configs in a Dict and pass to `Redis` client constructor, e.g. `socket_timeout`, `ssl`| `{}` | +| `index_name` | Redis index name; the name of RedisSearch index to set this DocumentArray | `'idx'` | +| `n_dim` | Dimensionality of the embeddings | `None` | +| `flush` | Boolean flag indicating whether to clear previous DocumentArray in Redis | `False` | +| `update_schema` | Boolean flag indicating whether to update Redis Search schema | `True` | +| `distance` | Similarity distance metric in Redis, one of {`'L2'`, `'IP'`, `'COSINE'`} | `'COSINE'` | +| `batch_size` | Batch size used to handle storage updates | `64` | +| `method` | Vector similarity index algorithm in Redis, either `FLAT` or `HNSW` | `'HNSW'` | +| `ef_construction` | Optional parameter for Redis HNSW algorithm | `200` | +| `m` | Optional parameter for Redis HNSW algorithm | `16` | +| `ef_runtime` | Optional parameter for Redis HNSW algorithm | `10` | +| `block_size` | Optional parameter for Redis FLAT algorithm | `1048576` | +| `initial_cap` | Optional parameter for Redis HNSW and FLAT algorithm | `None`, defaults to the default value in Redis | +| `columns` | Other fields to store in Document and build schema | `None` | + +You can check the default values in [the docarray source code](https://github.com/jina-ai/docarray/blob/main/docarray/array/storage/redis/backend.py) + + +```{note} +Only 1 DocumentArray is allowed per redis instance (db0). We will support storing multiple DocumentArrays in one redis instance, full-text search, more query conitions and geo-filtering soon. +The benchmark test is on the way. +``` diff --git a/setup.py b/setup.py index 21ddf977266..41a9b4268eb 100644 --- a/setup.py +++ b/setup.py @@ -69,6 +69,7 @@ 'annlite>=0.3.2', 'qdrant-client~=0.7.3', 'elasticsearch>=8.2.0', + 'redis>=4.3.0', ], 'qdrant': [ 'qdrant-client~=0.7.3', @@ -82,6 +83,9 @@ 'elasticsearch': [ 'elasticsearch>=8.2.0', ], + 'redis': [ + 'redis>=4.3.0', + ], 'test': [ 'pytest', 'pytest-timeout', @@ -104,6 +108,7 @@ 'weaviate-client~=3.3.0', 'annlite>=0.3.2', 'elasticsearch>=8.2.0', + 'redis>=4.3.0', 'jina', ], }, diff --git a/tests/unit/array/docker-compose.yml b/tests/unit/array/docker-compose.yml index b3b1a844256..07de4842154 100644 --- a/tests/unit/array/docker-compose.yml +++ b/tests/unit/array/docker-compose.yml @@ -26,7 +26,11 @@ services: - "9200:9200" networks: - elastic - + redis: + image: redislabs/redisearch:2.6.0 + ports: + - "6379:6379" + networks: elastic: name: elastic \ No newline at end of file diff --git a/tests/unit/array/mixins/test_content.py b/tests/unit/array/mixins/test_content.py index 1a590211dc2..6d2d5896e1c 100644 --- a/tests/unit/array/mixins/test_content.py +++ b/tests/unit/array/mixins/test_content.py @@ -9,6 +9,7 @@ from docarray.array.storage.weaviate import WeaviateConfig from docarray.array.weaviate import DocumentArrayWeaviate from docarray.array.elastic import DocumentArrayElastic, ElasticConfig +from docarray.array.redis import DocumentArrayRedis, RedisConfig @pytest.mark.parametrize( @@ -20,6 +21,7 @@ DocumentArrayWeaviate, DocumentArrayQdrant, DocumentArrayElastic, + DocumentArrayRedis, ], ) @pytest.mark.parametrize( @@ -33,6 +35,8 @@ def test_content_empty_getter_return_none(cls, content_attr, start_storage): DocumentArrayElastic, ]: da = cls(config={'n_dim': 3}) + elif cls == DocumentArrayRedis: + da = cls(config={'n_dim': 3, 'flush': True}) else: da = cls() assert getattr(da, content_attr) is None @@ -47,6 +51,7 @@ def test_content_empty_getter_return_none(cls, content_attr, start_storage): DocumentArrayWeaviate, DocumentArrayQdrant, DocumentArrayElastic, + DocumentArrayRedis, ], ) @pytest.mark.parametrize( @@ -67,6 +72,8 @@ def test_content_empty_setter(cls, content_attr, start_storage): DocumentArrayElastic, ]: da = cls(config={'n_dim': 3}) + elif cls == DocumentArrayRedis: + da = cls(config={'n_dim': 3, 'flush': True}) else: da = cls() setattr(da, content_attr[0], content_attr[1]) @@ -82,6 +89,7 @@ def test_content_empty_setter(cls, content_attr, start_storage): (DocumentArrayWeaviate, WeaviateConfig(n_dim=128)), (DocumentArrayQdrant, QdrantConfig(n_dim=128)), (DocumentArrayElastic, ElasticConfig(n_dim=128)), + (DocumentArrayRedis, RedisConfig(n_dim=128, flush=True)), ], ) @pytest.mark.parametrize( @@ -116,6 +124,7 @@ def test_content_getter_setter(cls, content_attr, config, start_storage): (DocumentArrayWeaviate, WeaviateConfig(n_dim=128)), (DocumentArrayQdrant, QdrantConfig(n_dim=128)), (DocumentArrayElastic, ElasticConfig(n_dim=128)), + (DocumentArrayRedis, RedisConfig(n_dim=128, flush=True)), ], ) def test_content_empty(da_len, da_cls, config, start_storage): @@ -153,6 +162,7 @@ def test_content_empty(da_len, da_cls, config, start_storage): (DocumentArrayWeaviate, WeaviateConfig(n_dim=5)), (DocumentArrayQdrant, QdrantConfig(n_dim=5)), (DocumentArrayElastic, ElasticConfig(n_dim=5)), + (DocumentArrayRedis, RedisConfig(n_dim=128, flush=True)), ], ) def test_embeddings_setter(da_len, da_cls, config, start_storage): diff --git a/tests/unit/array/mixins/test_del.py b/tests/unit/array/mixins/test_del.py index 7656679bd81..dffd573e85d 100644 --- a/tests/unit/array/mixins/test_del.py +++ b/tests/unit/array/mixins/test_del.py @@ -116,6 +116,7 @@ def test_del_da_attribute(): ('qdrant', {'n_dim': 3, 'distance': 'euclidean'}), ('elasticsearch', {'n_dim': 3, 'distance': 'l2_norm'}), ('sqlite', dict()), + ('redis', {'n_dim': 3, 'distance': 'L2', 'flush': True}), ], ) def test_del_subindex(storage, config): diff --git a/tests/unit/array/mixins/test_embed.py b/tests/unit/array/mixins/test_embed.py index 31dfef5bd34..dc1a20c5d98 100644 --- a/tests/unit/array/mixins/test_embed.py +++ b/tests/unit/array/mixins/test_embed.py @@ -22,6 +22,7 @@ from docarray.array.sqlite import DocumentArraySqlite from docarray.array.weaviate import DocumentArrayWeaviate from docarray.array.elastic import DocumentArrayElastic +from docarray.array.redis import DocumentArrayRedis random_embed_models = { 'keras': lambda: tf.keras.Sequential( @@ -74,6 +75,7 @@ DocumentArrayQdrant, # DocumentArrayWeaviate, TODO: enable this DocumentArrayElastic, + DocumentArrayRedis, ], ) @pytest.mark.parametrize('N', [2, 10]) @@ -96,6 +98,10 @@ def test_embedding_on_random_network( DocumentArrayElastic, ]: da = da_cls.empty(N, config={'n_dim': embedding_shape}) + elif da_cls in [ + DocumentArrayRedis, + ]: + da = da_cls.empty(N, config={'n_dim': embedding_shape, 'flush': True}) else: da = da_cls.empty(N, config=None) da.tensors = np.random.random([N, *input_shape]).astype(np.float32) diff --git a/tests/unit/array/mixins/test_empty.py b/tests/unit/array/mixins/test_empty.py index 3301bb59485..7de86e9a5a8 100644 --- a/tests/unit/array/mixins/test_empty.py +++ b/tests/unit/array/mixins/test_empty.py @@ -8,6 +8,7 @@ from docarray.array.storage.weaviate import WeaviateConfig from docarray.array.weaviate import DocumentArrayWeaviate from docarray.array.elastic import DocumentArrayElastic, ElasticConfig +from docarray.array.redis import DocumentArrayRedis, RedisConfig @pytest.mark.parametrize( @@ -19,6 +20,7 @@ (DocumentArrayWeaviate, WeaviateConfig(n_dim=5)), (DocumentArrayQdrant, QdrantConfig(n_dim=5)), (DocumentArrayElastic, ElasticConfig(n_dim=5)), + (DocumentArrayRedis, RedisConfig(n_dim=5, flush=True)), ], ) def test_empty_non_zero(da_cls, config, start_storage): diff --git a/tests/unit/array/mixins/test_eval_class.py b/tests/unit/array/mixins/test_eval_class.py index a8f9255e678..8467f6d4ede 100644 --- a/tests/unit/array/mixins/test_eval_class.py +++ b/tests/unit/array/mixins/test_eval_class.py @@ -15,6 +15,7 @@ ('annlite', {'n_dim': 256}), ('qdrant', {'n_dim': 256}), ('elasticsearch', {'n_dim': 256}), + ('redis', {'n_dim': 256, 'flush': True}), ], ) @pytest.mark.parametrize( @@ -51,6 +52,7 @@ def test_eval_mixin_perfect_match(metric_fn, kwargs, storage, config, start_stor ('annlite', {'n_dim': 256}), ('qdrant', {'n_dim': 256}), ('elasticsearch', {'n_dim': 256}), + ('redis', {'n_dim': 256, 'flush': True}), ], ) @pytest.mark.parametrize( @@ -94,6 +96,7 @@ def test_eval_mixin_zero_match(storage, config, metric_fn, start_storage, kwargs ('annlite', {'n_dim': 256}), ('qdrant', {'n_dim': 256}), ('elasticsearch', {'n_dim': 256}), + ('redis', {'n_dim': 256, 'flush': True}), ], ) def test_diff_len_should_raise(storage, config, start_storage): @@ -112,6 +115,7 @@ def test_diff_len_should_raise(storage, config, start_storage): ('annlite', {'n_dim': 256}), ('qdrant', {'n_dim': 256}), ('elasticsearch', {'n_dim': 256}), + ('redis', {'n_dim': 256, 'flush': True}), ], ) def test_diff_hash_fun_should_raise(storage, config, start_storage): @@ -130,6 +134,7 @@ def test_diff_hash_fun_should_raise(storage, config, start_storage): ('annlite', {'n_dim': 3}), ('qdrant', {'n_dim': 3}), ('elasticsearch', {'n_dim': 3}), + ('redis', {'n_dim': 3, 'flush': True}), ], ) def test_same_hash_same_len_fun_should_work(storage, config, start_storage): @@ -158,6 +163,7 @@ def test_same_hash_same_len_fun_should_work(storage, config, start_storage): ('annlite', {'n_dim': 3}), ('qdrant', {'n_dim': 3}), ('elasticsearch', {'n_dim': 3}), + ('redis', {'n_dim': 3, 'flush': True}), ], ) def test_adding_noise(storage, config, start_storage): @@ -188,6 +194,7 @@ def test_adding_noise(storage, config, start_storage): ('annlite', {'n_dim': 128}), ('qdrant', {'n_dim': 128}), ('elasticsearch', {'n_dim': 128}), + ('redis', {'n_dim': 128, 'flush': True}), ], ) @pytest.mark.parametrize( diff --git a/tests/unit/array/mixins/test_find.py b/tests/unit/array/mixins/test_find.py index 99594e1673b..6ad66f22a5b 100644 --- a/tests/unit/array/mixins/test_find.py +++ b/tests/unit/array/mixins/test_find.py @@ -33,6 +33,7 @@ def inv_cosine(*args): ('annlite', {'n_dim': 32}), ('qdrant', {'n_dim': 32}), ('elasticsearch', {'n_dim': 32}), + ('redis', {'n_dim': 32, 'flush': True}), ], ) @pytest.mark.parametrize('limit', [1, 5, 10]) @@ -73,6 +74,9 @@ def test_find(storage, config, limit, query, start_storage): t['cosine_similarity'].value for t in result[:, 'scores'] ] assert sorted(cosine_similarities, reverse=True) == cosine_similarities + if storage == 'redis': + cosine_distances = [t['score'].value for t in da[:, 'scores']] + assert sorted(cosine_distances, reverse=False) == cosine_distances elif storage in ['memory', 'annlite', 'elasticsearch']: cosine_distances = [t['cosine'].value for t in da[:, 'scores']] assert sorted(cosine_distances, reverse=False) == cosine_distances @@ -83,6 +87,10 @@ def test_find(storage, config, limit, query, start_storage): t['cosine_similarity'].value for t in da[:, 'scores'] ] assert sorted(cosine_similarities, reverse=True) == cosine_similarities + if storage == 'redis': + for da in result: + cosine_distances = [t['score'].value for t in da[:, 'scores']] + assert sorted(cosine_distances, reverse=False) == cosine_distances elif storage in ['memory', 'annlite', 'elasticsearch']: for da in result: cosine_distances = [t['cosine'].value for t in da[:, 'scores']] @@ -251,6 +259,15 @@ def test_find_by_tag(storage, config, start_storage): 'eq': operator.eq, } +numeric_operators_redis = { + '$gte': operator.ge, + '$gt': operator.gt, + '$lte': operator.le, + '$lt': operator.lt, + '$eq': operator.eq, + '$ne': operator.ne, +} + @pytest.mark.parametrize( 'storage,filter_gen,numeric_operators,operator', @@ -331,6 +348,17 @@ def test_find_by_tag(storage, config, start_storage): ) for operator in ['gt', 'gte', 'lt', 'lte'] ], + *[ + tuple( + [ + 'redis', + lambda operator, threshold: {'price': {operator: threshold}}, + numeric_operators_redis, + operator, + ] + ) + for operator in numeric_operators_redis.keys() + ], ], ) def test_search_pre_filtering( @@ -338,9 +366,16 @@ def test_search_pre_filtering( ): np.random.seed(0) n_dim = 128 - da = DocumentArray( - storage=storage, config={'n_dim': n_dim, 'columns': [('price', 'int')]} - ) + + if storage == 'redis': + da = DocumentArray( + storage=storage, + config={'n_dim': n_dim, 'columns': [('price', 'int')], 'flush': True}, + ) + else: + da = DocumentArray( + storage=storage, config={'n_dim': n_dim, 'columns': [('price', 'int')]} + ) da.extend( [ @@ -420,13 +455,31 @@ def test_search_pre_filtering( ) for operator in numeric_operators_annlite.keys() ], + *[ + tuple( + [ + 'redis', + lambda operator, threshold: {'price': {operator: threshold}}, + numeric_operators_redis, + operator, + ] + ) + for operator in numeric_operators_redis.keys() + ], ], ) def test_filtering(storage, filter_gen, operator, numeric_operators, start_storage): n_dim = 128 - da = DocumentArray( - storage=storage, config={'n_dim': n_dim, 'columns': [('price', 'float')]} - ) + + if storage == 'redis': + da = DocumentArray( + storage=storage, + config={'n_dim': n_dim, 'columns': [('price', 'float')], 'flush': True}, + ) + else: + da = DocumentArray( + storage=storage, config={'n_dim': n_dim, 'columns': [('price', 'float')]} + ) da.extend([Document(id=f'r{i}', tags={'price': i}) for i in range(50)]) thresholds = [10, 20, 30] @@ -465,6 +518,67 @@ def test_weaviate_filter_query(start_storage): assert isinstance(da._filter(filter={}), type(da)) +def test_redis_category_filter(start_storage): + n_dim = 128 + da = DocumentArray( + storage='redis', + config={ + 'n_dim': n_dim, + 'columns': [('color', 'str'), ('isfake', 'bool')], + 'flush': True, + }, + ) + + da.extend( + [ + Document( + id=f'r{i}', + embedding=np.random.rand(n_dim), + tags={'color': 'red', 'isfake': True}, + ) + for i in range(10) + ] + ) + + da.extend( + [ + Document( + id=f'r{i}', + embedding=np.random.rand(n_dim), + tags={'color': 'blue', 'isfake': False}, + ) + for i in range(10, 20) + ] + ) + + da.extend( + [ + Document( + id=f'r{i}', + embedding=np.random.rand(n_dim), + tags={'color': 'green', 'isfake': False}, + ) + for i in range(20, 30) + ] + ) + + results = da.find(np.random.rand(n_dim), filter={'color': {'$eq': 'red'}}) + assert len(results) > 0 + assert all([(r.tags['color'] == 'red') for r in results]) + + results = da.find(np.random.rand(n_dim), filter={'color': {'$ne': 'red'}}) + assert len(results) > 0 + assert all([(r.tags['color'] != 'red') for r in results]) + + results = da.find(np.random.rand(n_dim), filter={'isfake': {'$eq': True}}) + assert len(results) > 0 + assert all([(r.tags['isfake'] == True) for r in results]) + + results = da.find(np.random.rand(n_dim), filter={'isfake': {'$ne': True}}) + assert len(results) > 0 + assert all([(r.tags['isfake'] == False) for r in results]) + + @pytest.mark.parametrize('storage', ['memory']) def test_unsupported_pre_filtering(storage, start_storage): @@ -515,6 +629,7 @@ def test_elastic_id_filter(storage, config, limit): ('qdrant', {'n_dim': 3, 'distance': 'euclidean'}), ('elasticsearch', {'n_dim': 3, 'distance': 'l2_norm'}), ('sqlite', dict()), + ('redis', {'n_dim': 3, 'distance': 'L2', 'flush': True}), ], ) def test_find_subindex(storage, config): @@ -522,7 +637,7 @@ def test_find_subindex(storage, config): subindex_configs = {'@c': None} if storage == 'sqlite': subindex_configs['@c'] = dict() - elif storage in ['weaviate', 'annlite', 'qdrant', 'elasticsearch']: + elif storage in ['weaviate', 'annlite', 'qdrant', 'elasticsearch', 'redis']: subindex_configs['@c'] = {'n_dim': 2} da = DocumentArray( @@ -569,6 +684,7 @@ def test_find_subindex(storage, config): ('qdrant', {'n_dim': 3, 'distance': 'euclidean'}), ('elasticsearch', {'n_dim': 3, 'distance': 'l2_norm'}), ('sqlite', dict()), + ('redis', {'n_dim': 3, 'distance': 'L2', 'flush': True}), ], ) def test_find_subindex_multimodal(storage, config): diff --git a/tests/unit/array/mixins/test_getset.py b/tests/unit/array/mixins/test_getset.py index 47bf333ec11..3a32ddbc8b4 100644 --- a/tests/unit/array/mixins/test_getset.py +++ b/tests/unit/array/mixins/test_getset.py @@ -1,3 +1,5 @@ +import gc + import numpy as np import pytest import scipy.sparse @@ -13,6 +15,7 @@ from docarray.array.storage.weaviate import WeaviateConfig from docarray.array.weaviate import DocumentArrayWeaviate from docarray.array.elastic import DocumentArrayElastic, ElasticConfig +from docarray.array.redis import DocumentArrayRedis, RedisConfig from tests import random_docs rand_array = np.random.random([10, 3]) @@ -42,6 +45,7 @@ def nested_docs(): ('weaviate', {'n_dim': 3}), ('qdrant', {'n_dim': 3}), ('elasticsearch', {'n_dim': 3}), + ('redis', {'n_dim': 3, 'flush': True}), ], ) @pytest.mark.parametrize( @@ -67,6 +71,7 @@ def test_set_embeddings_multi_kind(array, storage, config, start_storage): (DocumentArrayWeaviate, WeaviateConfig(n_dim=10)), (DocumentArrayQdrant, QdrantConfig(n_dim=10)), (DocumentArrayElastic, ElasticConfig(n_dim=10)), + (DocumentArrayRedis, RedisConfig(n_dim=10, flush=True)), ], ) def test_da_get_embeddings(docs, config, da_cls, start_storage): @@ -88,6 +93,7 @@ def test_da_get_embeddings(docs, config, da_cls, start_storage): (DocumentArrayWeaviate, WeaviateConfig(n_dim=10)), (DocumentArrayQdrant, QdrantConfig(n_dim=10)), (DocumentArrayElastic, ElasticConfig(n_dim=10)), + (DocumentArrayRedis, RedisConfig(n_dim=10, flush=True)), ], ) def test_embeddings_setter_da(docs, config, da_cls, start_storage): @@ -118,6 +124,7 @@ def test_embeddings_setter_da(docs, config, da_cls, start_storage): (DocumentArrayWeaviate, WeaviateConfig(n_dim=10)), (DocumentArrayQdrant, QdrantConfig(n_dim=10)), (DocumentArrayElastic, ElasticConfig(n_dim=10)), + (DocumentArrayRedis, RedisConfig(n_dim=10, flush=True)), ], ) def test_embeddings_wrong_len(docs, config, da_cls, start_storage): @@ -141,6 +148,7 @@ def test_embeddings_wrong_len(docs, config, da_cls, start_storage): (DocumentArrayWeaviate, WeaviateConfig(n_dim=10)), (DocumentArrayQdrant, QdrantConfig(n_dim=10)), (DocumentArrayElastic, ElasticConfig(n_dim=10)), + (DocumentArrayRedis, RedisConfig(n_dim=10, flush=True)), ], ) def test_tensors_getter_da(docs, config, da_cls, start_storage): @@ -167,6 +175,7 @@ def test_tensors_getter_da(docs, config, da_cls, start_storage): (DocumentArrayWeaviate, WeaviateConfig(n_dim=10)), (DocumentArrayQdrant, QdrantConfig(n_dim=10)), (DocumentArrayElastic, ElasticConfig(n_dim=10)), + (DocumentArrayRedis, RedisConfig(n_dim=10, flush=True)), ], ) def test_texts_getter_da(docs, config, da_cls, start_storage): @@ -202,6 +211,7 @@ def test_texts_getter_da(docs, config, da_cls, start_storage): (DocumentArrayWeaviate, WeaviateConfig(n_dim=10)), (DocumentArrayQdrant, QdrantConfig(n_dim=10)), (DocumentArrayElastic, ElasticConfig(n_dim=10)), + (DocumentArrayRedis, RedisConfig(n_dim=10, flush=True)), ], ) def test_setter_by_sequences_in_selected_docs_da(docs, config, da_cls, start_storage): @@ -239,6 +249,7 @@ def test_setter_by_sequences_in_selected_docs_da(docs, config, da_cls, start_sto (DocumentArrayWeaviate, WeaviateConfig(n_dim=10)), (DocumentArrayQdrant, QdrantConfig(n_dim=10)), (DocumentArrayElastic, ElasticConfig(n_dim=10)), + (DocumentArrayRedis, RedisConfig(n_dim=10, flush=True)), ], ) def test_texts_wrong_len(docs, config, da_cls, start_storage): @@ -262,6 +273,7 @@ def test_texts_wrong_len(docs, config, da_cls, start_storage): (DocumentArrayWeaviate, WeaviateConfig(n_dim=10)), (DocumentArrayQdrant, QdrantConfig(n_dim=10)), (DocumentArrayElastic, ElasticConfig(n_dim=10)), + (DocumentArrayRedis, RedisConfig(n_dim=10, flush=True)), ], ) def test_tensors_wrong_len(docs, config, da_cls, start_storage): @@ -285,6 +297,7 @@ def test_tensors_wrong_len(docs, config, da_cls, start_storage): (DocumentArrayWeaviate, WeaviateConfig(n_dim=10)), (DocumentArrayQdrant, QdrantConfig(n_dim=10)), (DocumentArrayElastic, ElasticConfig(n_dim=10)), + (DocumentArrayRedis, RedisConfig(n_dim=10, flush=True)), ], ) def test_blobs_getter_setter(docs, da_cls, config, start_storage): @@ -317,6 +330,7 @@ def test_blobs_getter_setter(docs, da_cls, config, start_storage): (DocumentArrayWeaviate, WeaviateConfig(n_dim=10)), (DocumentArrayQdrant, QdrantConfig(n_dim=10)), (DocumentArrayElastic, ElasticConfig(n_dim=10)), + (DocumentArrayRedis, RedisConfig(n_dim=10, flush=True)), ], ) def test_ellipsis_getter(nested_docs, da_cls, config, start_storage): @@ -340,6 +354,7 @@ def test_ellipsis_getter(nested_docs, da_cls, config, start_storage): (DocumentArrayWeaviate, WeaviateConfig(n_dim=10)), (DocumentArrayQdrant, QdrantConfig(n_dim=10)), (DocumentArrayElastic, ElasticConfig(n_dim=10)), + (DocumentArrayRedis, RedisConfig(n_dim=10, flush=True)), ], ) def test_ellipsis_attribute_setter(nested_docs, da_cls, config, start_storage): @@ -360,6 +375,7 @@ def test_ellipsis_attribute_setter(nested_docs, da_cls, config, start_storage): (DocumentArrayAnnlite, AnnliteConfig(n_dim=6)), (DocumentArrayWeaviate, WeaviateConfig(n_dim=6)), (DocumentArrayElastic, ElasticConfig(n_dim=6)), + (DocumentArrayRedis, RedisConfig(n_dim=10, flush=True)), ], ) def test_zero_embeddings(da_cls, config, start_storage): @@ -411,6 +427,7 @@ def embeddings_eq(emb1, emb2): ('qdrant', {'n_dim': 3, 'distance': 'euclidean'}), ('elasticsearch', {'n_dim': 3, 'distance': 'l2_norm'}), ('sqlite', dict()), + ('redis', {'n_dim': 3, 'distance': 'L2', 'flush': True}), ], ) def test_getset_subindex(storage, config): @@ -493,9 +510,13 @@ def test_getset_subindex(storage, config): ('qdrant', {'n_dim': 3, 'distance': 'euclidean'}), ('elasticsearch', {'n_dim': 3, 'distance': 'l2_norm'}), ('sqlite', dict()), + ('redis', {'n_dim': 3, 'distance': 'L2', 'flush': True}), ], ) def test_init_subindex(storage, config): + if storage == 'redis': + gc.collect() + num_top_level_docs = 5 num_chunks_per_doc = 3 subindex_configs = ( @@ -532,6 +553,7 @@ def test_init_subindex(storage, config): ('qdrant', {'n_dim': 3, 'distance': 'euclidean'}), ('elasticsearch', {'n_dim': 3, 'distance': 'l2_norm'}), ('sqlite', dict()), + ('redis', {'n_dim': 3, 'distance': 'L2', 'flush': True}), ], ) def test_set_on_subindex(storage, config): diff --git a/tests/unit/array/mixins/test_magic.py b/tests/unit/array/mixins/test_magic.py index 70a11979839..66edce5b152 100644 --- a/tests/unit/array/mixins/test_magic.py +++ b/tests/unit/array/mixins/test_magic.py @@ -8,6 +8,7 @@ from docarray.array.storage.weaviate import WeaviateConfig from docarray.array.weaviate import DocumentArrayWeaviate from docarray.array.elastic import DocumentArrayElastic, ElasticConfig +from docarray.array.redis import DocumentArrayRedis, RedisConfig N = 100 @@ -32,6 +33,7 @@ def docs(): (DocumentArrayWeaviate, WeaviateConfig(n_dim=128)), (DocumentArrayQdrant, QdrantConfig(n_dim=128)), (DocumentArrayElastic, ElasticConfig(n_dim=128)), + (DocumentArrayRedis, RedisConfig(n_dim=1, flush=True)), ], ) def test_iter_len_bool(da_cls, config, start_storage): @@ -58,6 +60,7 @@ def test_iter_len_bool(da_cls, config, start_storage): (DocumentArrayWeaviate, WeaviateConfig(n_dim=128)), (DocumentArrayQdrant, QdrantConfig(n_dim=128)), (DocumentArrayElastic, ElasticConfig(n_dim=128)), + (DocumentArrayRedis, RedisConfig(n_dim=128, flush=True)), ], ) def test_repr(da_cls, config, start_storage): @@ -77,6 +80,7 @@ def test_repr(da_cls, config, start_storage): ('weaviate', WeaviateConfig(n_dim=128)), ('qdrant', QdrantConfig(n_dim=128)), ('elasticsearch', ElasticConfig(n_dim=128)), + ('redis', RedisConfig(n_dim=128, flush=True)), ], ) def test_repr_str(docs, storage, config, start_storage): @@ -100,6 +104,7 @@ def test_repr_str(docs, storage, config, start_storage): (DocumentArrayWeaviate, WeaviateConfig(n_dim=10)), (DocumentArrayQdrant, QdrantConfig(n_dim=10)), (DocumentArrayElastic, ElasticConfig(n_dim=10)), + (DocumentArrayRedis, RedisConfig(n_dim=10, flush=True)), ], ) def test_iadd(da_cls, config, start_storage): diff --git a/tests/unit/array/mixins/test_match.py b/tests/unit/array/mixins/test_match.py index 847b028d23e..f246a928e04 100644 --- a/tests/unit/array/mixins/test_match.py +++ b/tests/unit/array/mixins/test_match.py @@ -75,6 +75,7 @@ def doc_lists_to_doc_arrays(doc_lists, *args, **kwargs): ('annlite', {'n_dim': 3}), ('qdrant', {'n_dim': 3}), ('weaviate', {'n_dim': 3}), + ('redis', {'n_dim': 3, 'flush': True}), ], ) @pytest.mark.parametrize('limit', [1, 2, 3]) @@ -91,9 +92,14 @@ def test_match(storage, config, doc_lists, limit, exclude_self, start_storage): for m in D1[:, 'matches']: assert len(m) == limit - expected_sorted_values = [ - D1[0].matches[i].scores['cosine'].value for i in range(limit) - ] + if storage == 'redis': + expected_sorted_values = [ + D1[0].matches[i].scores['score'].value for i in range(limit) + ] + else: + expected_sorted_values = [ + D1[0].matches[i].scores['cosine'].value for i in range(limit) + ] assert expected_sorted_values == sorted(expected_sorted_values) @@ -607,6 +613,15 @@ def test_match_ensure_scores_unique(): 'neq': operator.ne, } +numeric_operators_redis = { + '$gte': operator.ge, + '$gt': operator.gt, + '$lte': operator.le, + '$lt': operator.lt, + '$eq': operator.eq, + '$ne': operator.ne, +} + @pytest.mark.parametrize( 'storage,filter_gen,numeric_operators,operator', @@ -670,15 +685,33 @@ def test_match_ensure_scores_unique(): ) for operator in numeric_operators_annlite.keys() ], + *[ + tuple( + [ + 'redis', + lambda operator, threshold: {'price': {operator: threshold}}, + numeric_operators_redis, + operator, + ] + ) + for operator in numeric_operators_redis.keys() + ], ], ) def test_match_pre_filtering( storage, filter_gen, operator, numeric_operators, start_storage ): n_dim = 128 - da = DocumentArray( - storage=storage, config={'n_dim': n_dim, 'columns': [('price', 'int')]} - ) + + if storage == 'redis': + da = DocumentArray( + storage=storage, + config={'n_dim': n_dim, 'columns': [('price', 'int')], 'flush': True}, + ) + else: + da = DocumentArray( + storage=storage, config={'n_dim': n_dim, 'columns': [('price', 'int')]} + ) da.extend( [ @@ -722,6 +755,7 @@ def embeddings_eq(emb1, emb2): ('qdrant', {'n_dim': 3, 'distance': 'euclidean'}), ('elasticsearch', {'n_dim': 3, 'distance': 'l2_norm'}), ('sqlite', dict()), + ('redis', {'n_dim': 3, 'distance': 'L2', 'flush': True}), ], ) def test_match_subindex(storage, config): diff --git a/tests/unit/array/mixins/test_parallel.py b/tests/unit/array/mixins/test_parallel.py index e929dbb9479..52901c977d7 100644 --- a/tests/unit/array/mixins/test_parallel.py +++ b/tests/unit/array/mixins/test_parallel.py @@ -12,6 +12,7 @@ from docarray.array.storage.weaviate import WeaviateConfig from docarray.array.weaviate import DocumentArrayWeaviate from docarray.array.elastic import DocumentArrayElastic, ElasticConfig +from docarray.array.redis import DocumentArrayRedis, RedisConfig def foo(d: Document): @@ -52,6 +53,7 @@ def test_parallel_map_apply_external_pool(pytestconfig, pool): (DocumentArrayWeaviate, WeaviateConfig(n_dim=10)), (DocumentArrayQdrant, QdrantConfig(n_dim=10)), (DocumentArrayElastic, ElasticConfig(n_dim=10)), + (DocumentArrayRedis, RedisConfig(n_dim=10, flush=True)), ], ) @pytest.mark.parametrize('backend', ['process', 'thread']) @@ -108,6 +110,7 @@ def test_parallel_map( (DocumentArrayWeaviate, WeaviateConfig(n_dim=10)), (DocumentArrayQdrant, QdrantConfig(n_dim=10)), (DocumentArrayElastic, ElasticConfig(n_dim=10)), + (DocumentArrayRedis, RedisConfig(n_dim=10, flush=True)), ], ) @pytest.mark.parametrize('backend', ['thread']) @@ -179,6 +182,7 @@ def test_parallel_map_batch( (DocumentArrayWeaviate, WeaviateConfig(n_dim=10)), (DocumentArrayQdrant, QdrantConfig(n_dim=10)), (DocumentArrayElastic, ElasticConfig(n_dim=10)), + (DocumentArrayRedis, RedisConfig(n_dim=10, flush=True)), ], ) def test_map_lambda(pytestconfig, da_cls, config, start_storage): @@ -207,6 +211,7 @@ def test_map_lambda(pytestconfig, da_cls, config, start_storage): (DocumentArrayWeaviate, WeaviateConfig(n_dim=10)), (DocumentArrayQdrant, QdrantConfig(n_dim=10)), (DocumentArrayElastic, ElasticConfig(n_dim=10)), + (DocumentArrayRedis, RedisConfig(n_dim=10, flush=True)), ], ) def test_apply_partial(pytestconfig, da_cls, config, start_storage): @@ -236,6 +241,7 @@ def test_apply_partial(pytestconfig, da_cls, config, start_storage): ('weaviate', WeaviateConfig(n_dim=256)), ('qdrant', QdrantConfig(n_dim=256)), ('elasticsearch', ElasticConfig(n_dim=256)), + ('redis', RedisConfig(n_dim=256, flush=True)), ], ) @pytest.mark.parametrize('backend', ['thread', 'process']) diff --git a/tests/unit/array/mixins/test_plot.py b/tests/unit/array/mixins/test_plot.py index e61d6a082fd..e9d76fa97e9 100644 --- a/tests/unit/array/mixins/test_plot.py +++ b/tests/unit/array/mixins/test_plot.py @@ -14,6 +14,7 @@ from docarray.array.annlite import DocumentArrayAnnlite from docarray.array.storage.annlite import AnnliteConfig from docarray.array.elastic import DocumentArrayElastic, ElasticConfig +from docarray.array.redis import DocumentArrayRedis, RedisConfig @pytest.mark.parametrize('keep_aspect_ratio', [True, False]) @@ -27,6 +28,7 @@ # (DocumentArrayWeaviate, WeaviateConfig(n_dim=128)), (DocumentArrayQdrant, QdrantConfig(n_dim=128, scroll_batch_size=8)), (DocumentArrayElastic, ElasticConfig(n_dim=128)), + (DocumentArrayRedis, RedisConfig(n_dim=128, flush=True)), ], ) def test_sprite_fail_tensor_success_uri( @@ -65,6 +67,7 @@ def test_sprite_fail_tensor_success_uri( (DocumentArrayWeaviate, lambda: WeaviateConfig(n_dim=128)), (DocumentArrayQdrant, lambda: QdrantConfig(n_dim=128, scroll_batch_size=8)), (DocumentArrayElastic, lambda: ElasticConfig(n_dim=128)), + (DocumentArrayRedis, lambda: RedisConfig(n_dim=128, flush=True)), ], ) @pytest.mark.parametrize('canvas_size', [50, 512]) @@ -114,6 +117,7 @@ def da_and_dam(start_storage): (DocumentArrayWeaviate, {'config': {'n_dim': 3}}), (DocumentArrayAnnlite, {'config': {'n_dim': 3}}), (DocumentArrayQdrant, {'config': {'n_dim': 3}}), + (DocumentArrayRedis, {'config': {'n_dim': 3, 'flush': True}}), ] ] @@ -178,6 +182,7 @@ def test_plot_embeddings_same_path(tmpdir, da_cls, config_gen, start_storage): (DocumentArrayWeaviate, WeaviateConfig(n_dim=128)), (DocumentArrayQdrant, QdrantConfig(n_dim=128)), (DocumentArrayElastic, ElasticConfig(n_dim=128)), + (DocumentArrayRedis, RedisConfig(n_dim=128, flush=True)), ], ) def test_summary_homo_hetero(da_cls, config, start_storage): @@ -201,6 +206,7 @@ def test_summary_homo_hetero(da_cls, config, start_storage): (DocumentArrayWeaviate, WeaviateConfig(n_dim=128)), (DocumentArrayQdrant, QdrantConfig(n_dim=128)), (DocumentArrayElastic, ElasticConfig(n_dim=128)), + (DocumentArrayRedis, RedisConfig(n_dim=128, flush=True)), ], ) def test_empty_get_attributes(da_cls, config, start_storage): diff --git a/tests/unit/array/mixins/test_sample.py b/tests/unit/array/mixins/test_sample.py index 2a01c1bfd01..5844db56afc 100644 --- a/tests/unit/array/mixins/test_sample.py +++ b/tests/unit/array/mixins/test_sample.py @@ -8,6 +8,7 @@ from docarray.array.storage.weaviate import WeaviateConfig from docarray.array.weaviate import DocumentArrayWeaviate from docarray.array.elastic import DocumentArrayElastic, ElasticConfig +from docarray.array.redis import DocumentArrayRedis, RedisConfig @pytest.mark.parametrize( @@ -19,6 +20,7 @@ (DocumentArrayWeaviate, WeaviateConfig(n_dim=128)), (DocumentArrayQdrant, QdrantConfig(n_dim=128)), (DocumentArrayElastic, ElasticConfig(n_dim=128)), + (DocumentArrayRedis, RedisConfig(n_dim=128, flush=True)), ], ) def test_sample(da_cls, config, start_storage): @@ -44,6 +46,7 @@ def test_sample(da_cls, config, start_storage): (DocumentArrayWeaviate, WeaviateConfig(n_dim=128)), (DocumentArrayQdrant, QdrantConfig(n_dim=128)), (DocumentArrayElastic, ElasticConfig(n_dim=128)), + (DocumentArrayRedis, RedisConfig(n_dim=128, flush=True)), ], ) def test_sample_with_seed(da_cls, config, start_storage): @@ -68,6 +71,7 @@ def test_sample_with_seed(da_cls, config, start_storage): (DocumentArrayWeaviate, WeaviateConfig(n_dim=128)), (DocumentArrayQdrant, QdrantConfig(n_dim=128)), (DocumentArrayElastic, ElasticConfig(n_dim=128)), + (DocumentArrayRedis, RedisConfig(n_dim=128, flush=True)), ], ) def test_shuffle(da_cls, config, start_storage): @@ -93,6 +97,7 @@ def test_shuffle(da_cls, config, start_storage): (DocumentArrayWeaviate, WeaviateConfig(n_dim=128)), (DocumentArrayQdrant, QdrantConfig(n_dim=128)), (DocumentArrayElastic, ElasticConfig(n_dim=128)), + (DocumentArrayRedis, RedisConfig(n_dim=128, flush=True)), ], ) def test_shuffle_with_seed(da_cls, config, start_storage): diff --git a/tests/unit/array/mixins/test_text.py b/tests/unit/array/mixins/test_text.py index 0f5ace32655..5c1eb163cb7 100644 --- a/tests/unit/array/mixins/test_text.py +++ b/tests/unit/array/mixins/test_text.py @@ -9,6 +9,7 @@ from docarray.array.storage.weaviate import WeaviateConfig from docarray.array.weaviate import DocumentArrayWeaviate from docarray.array.elastic import DocumentArrayElastic, ElasticConfig +from docarray.array.redis import DocumentArrayRedis, RedisConfig @pytest.fixture(scope='function') @@ -30,6 +31,7 @@ def docs(): (DocumentArrayWeaviate, WeaviateConfig(n_dim=128)), (DocumentArrayQdrant, QdrantConfig(n_dim=128)), (DocumentArrayElastic, ElasticConfig(n_dim=128)), + (DocumentArrayRedis, RedisConfig(n_dim=128, flush=True)), ], ) def test_da_vocabulary(da_cls, config, docs, min_freq, start_storage): @@ -58,6 +60,7 @@ def test_da_vocabulary(da_cls, config, docs, min_freq, start_storage): (DocumentArrayWeaviate, WeaviateConfig(n_dim=128)), (DocumentArrayQdrant, QdrantConfig(n_dim=128)), (DocumentArrayElastic, ElasticConfig(n_dim=128)), + (DocumentArrayRedis, RedisConfig(n_dim=128, flush=True)), ], ) def test_da_text_to_tensor_non_max_len(docs, da_cls, config, start_storage): @@ -86,6 +89,7 @@ def test_da_text_to_tensor_non_max_len(docs, da_cls, config, start_storage): (DocumentArrayWeaviate, WeaviateConfig(n_dim=128)), (DocumentArrayQdrant, QdrantConfig(n_dim=128)), (DocumentArrayElastic, ElasticConfig(n_dim=128)), + (DocumentArrayRedis, RedisConfig(n_dim=128, flush=True)), ], ) def test_da_text_to_tensor_max_len_3(docs, da_cls, config, start_storage): @@ -116,6 +120,7 @@ def test_da_text_to_tensor_max_len_3(docs, da_cls, config, start_storage): (DocumentArrayWeaviate, WeaviateConfig(n_dim=128)), (DocumentArrayQdrant, QdrantConfig(n_dim=128)), (DocumentArrayElastic, ElasticConfig(n_dim=128)), + (DocumentArrayRedis, RedisConfig(n_dim=128, flush=True)), ], ) def test_da_text_to_tensor_max_len_1(docs, da_cls, config, start_storage): @@ -146,6 +151,7 @@ def test_da_text_to_tensor_max_len_1(docs, da_cls, config, start_storage): (DocumentArrayWeaviate, WeaviateConfig(n_dim=128)), (DocumentArrayQdrant, QdrantConfig(n_dim=128)), (DocumentArrayElastic, ElasticConfig(n_dim=128)), + (DocumentArrayRedis, RedisConfig(n_dim=128, flush=True)), ], ) def test_convert_text_tensor_random_text(da_cls, docs, config, start_storage): diff --git a/tests/unit/array/mixins/test_traverse.py b/tests/unit/array/mixins/test_traverse.py index 0a128551de2..af8a75c296e 100644 --- a/tests/unit/array/mixins/test_traverse.py +++ b/tests/unit/array/mixins/test_traverse.py @@ -9,6 +9,7 @@ from docarray.array.weaviate import DocumentArrayWeaviate from docarray.array.annlite import DocumentArrayAnnlite from docarray.array.elastic import DocumentArrayElastic +from docarray.array.redis import DocumentArrayRedis from tests import random_docs # some random prime number for sanity check @@ -42,6 +43,7 @@ def doc_req(): (DocumentArrayWeaviate, {'config': {'n_dim': 10}}), (DocumentArrayQdrant, {'config': {'n_dim': 10}}), (DocumentArrayElastic, {'config': {'n_dim': 10}}), + (DocumentArrayRedis, {'config': {'n_dim': 10, 'flush': True}}), ], ) def test_traverse_type(doc_req, filter_fn, da_cls, kwargs, start_storage): @@ -61,6 +63,7 @@ def test_traverse_type(doc_req, filter_fn, da_cls, kwargs, start_storage): (DocumentArrayWeaviate, {'config': {'n_dim': 10}}), (DocumentArrayQdrant, {'config': {'n_dim': 10}}), (DocumentArrayElastic, {'config': {'n_dim': 10}}), + (DocumentArrayRedis, {'config': {'n_dim': 10, 'flush': True}}), ], ) def test_traverse_root(doc_req, filter_fn, da_cls, kwargs, start_storage): @@ -80,6 +83,7 @@ def test_traverse_root(doc_req, filter_fn, da_cls, kwargs, start_storage): (DocumentArrayWeaviate, {'config': {'n_dim': 10}}), (DocumentArrayQdrant, {'config': {'n_dim': 10}}), (DocumentArrayElastic, {'config': {'n_dim': 10}}), + (DocumentArrayRedis, {'config': {'n_dim': 10, 'flush': True}}), ], ) def test_traverse_chunk(doc_req, filter_fn, da_cls, kwargs, start_storage): @@ -99,6 +103,7 @@ def test_traverse_chunk(doc_req, filter_fn, da_cls, kwargs, start_storage): (DocumentArrayWeaviate, {'config': {'n_dim': 10}}), (DocumentArrayQdrant, {'config': {'n_dim': 10}}), (DocumentArrayElastic, {'config': {'n_dim': 10}}), + (DocumentArrayRedis, {'config': {'n_dim': 10, 'flush': True}}), ], ) def test_traverse_root_plus_chunk(doc_req, filter_fn, da_cls, kwargs, start_storage): @@ -119,6 +124,7 @@ def test_traverse_root_plus_chunk(doc_req, filter_fn, da_cls, kwargs, start_stor (DocumentArrayWeaviate, {'config': {'n_dim': 10}}), (DocumentArrayQdrant, {'config': {'n_dim': 10}}), (DocumentArrayElastic, {'config': {'n_dim': 10}}), + (DocumentArrayRedis, {'config': {'n_dim': 10, 'flush': True}}), ], ) def test_traverse_chunk_plus_root(doc_req, filter_fn, da_cls, kwargs, start_storage): @@ -139,6 +145,7 @@ def test_traverse_chunk_plus_root(doc_req, filter_fn, da_cls, kwargs, start_stor (DocumentArrayWeaviate, {'config': {'n_dim': 10}}), (DocumentArrayQdrant, {'config': {'n_dim': 10}}), (DocumentArrayElastic, {'config': {'n_dim': 10}}), + (DocumentArrayRedis, {'config': {'n_dim': 10, 'flush': True}}), ], ) def test_traverse_match(doc_req, filter_fn, da_cls, kwargs, start_storage): @@ -158,6 +165,7 @@ def test_traverse_match(doc_req, filter_fn, da_cls, kwargs, start_storage): (DocumentArrayWeaviate, {'config': {'n_dim': 10}}), (DocumentArrayQdrant, {'config': {'n_dim': 10}}), (DocumentArrayElastic, {'config': {'n_dim': 10}}), + (DocumentArrayRedis, {'config': {'n_dim': 10, 'flush': True}}), ], ) def test_traverse_match_chunk(doc_req, filter_fn, da_cls, kwargs, start_storage): @@ -177,6 +185,7 @@ def test_traverse_match_chunk(doc_req, filter_fn, da_cls, kwargs, start_storage) (DocumentArrayWeaviate, {'config': {'n_dim': 10}}), (DocumentArrayQdrant, {'config': {'n_dim': 10}}), (DocumentArrayElastic, {'config': {'n_dim': 10}}), + (DocumentArrayRedis, {'config': {'n_dim': 10, 'flush': True}}), ], ) def test_traverse_root_match_chunk(doc_req, filter_fn, da_cls, kwargs, start_storage): @@ -195,6 +204,7 @@ def test_traverse_root_match_chunk(doc_req, filter_fn, da_cls, kwargs, start_sto (DocumentArrayWeaviate, {'config': {'n_dim': 10}}), (DocumentArrayQdrant, {'config': {'n_dim': 10}}), (DocumentArrayElastic, {'config': {'n_dim': 10}}), + (DocumentArrayRedis, {'config': {'n_dim': 10, 'flush': True}}), ], ) def test_traverse_flatten_embedding(doc_req, filter_fn, da_cls, kwargs, start_storage): @@ -214,6 +224,7 @@ def test_traverse_flatten_embedding(doc_req, filter_fn, da_cls, kwargs, start_st (DocumentArrayWeaviate, {'config': {'n_dim': 10}}), (DocumentArrayQdrant, {'config': {'n_dim': 10}}), (DocumentArrayElastic, {'config': {'n_dim': 10}}), + (DocumentArrayRedis, {'config': {'n_dim': 10, 'flush': True}}), ], ) def test_traverse_flatten_root(doc_req, filter_fn, da_cls, kwargs, start_storage): @@ -232,6 +243,7 @@ def test_traverse_flatten_root(doc_req, filter_fn, da_cls, kwargs, start_storage (DocumentArrayWeaviate, {'config': {'n_dim': 10}}), (DocumentArrayQdrant, {'config': {'n_dim': 10}}), (DocumentArrayElastic, {'config': {'n_dim': 10}}), + (DocumentArrayRedis, {'config': {'n_dim': 10, 'flush': True}}), ], ) def test_traverse_flatten_chunk(doc_req, filter_fn, da_cls, kwargs, start_storage): @@ -250,6 +262,7 @@ def test_traverse_flatten_chunk(doc_req, filter_fn, da_cls, kwargs, start_storag (DocumentArrayWeaviate, {'config': {'n_dim': 10}}), (DocumentArrayQdrant, {'config': {'n_dim': 10}}), (DocumentArrayElastic, {'config': {'n_dim': 10}}), + (DocumentArrayRedis, {'config': {'n_dim': 10, 'flush': True}}), ], ) def test_traverse_flatten_root_plus_chunk( @@ -270,6 +283,7 @@ def test_traverse_flatten_root_plus_chunk( (DocumentArrayWeaviate, {'config': {'n_dim': 10}}), (DocumentArrayQdrant, {'config': {'n_dim': 10}}), (DocumentArrayElastic, {'config': {'n_dim': 10}}), + (DocumentArrayRedis, {'config': {'n_dim': 10, 'flush': True}}), ], ) def test_traverse_flatten_match(doc_req, filter_fn, da_cls, kwargs, start_storage): @@ -288,6 +302,7 @@ def test_traverse_flatten_match(doc_req, filter_fn, da_cls, kwargs, start_storag (DocumentArrayWeaviate, {'config': {'n_dim': 10}}), (DocumentArrayQdrant, {'config': {'n_dim': 10}}), (DocumentArrayElastic, {'config': {'n_dim': 10}}), + (DocumentArrayRedis, {'config': {'n_dim': 10, 'flush': True}}), ], ) def test_traverse_flatten_match_chunk( @@ -308,6 +323,7 @@ def test_traverse_flatten_match_chunk( (DocumentArrayWeaviate, {'config': {'n_dim': 10}}), (DocumentArrayQdrant, {'config': {'n_dim': 10}}), (DocumentArrayElastic, {'config': {'n_dim': 10}}), + (DocumentArrayRedis, {'config': {'n_dim': 10, 'flush': True}}), ], ) def test_traverse_flatten_root_match_chunk( @@ -334,6 +350,7 @@ def test_traverse_flatten_root_match_chunk( (DocumentArrayWeaviate, {'config': {'n_dim': 10}}), (DocumentArrayQdrant, {'config': {'n_dim': 10}}), (DocumentArrayElastic, {'config': {'n_dim': 10}}), + (DocumentArrayRedis, {'config': {'n_dim': 10, 'flush': True}}), ], ) def test_traverse_flattened_per_path_embedding( @@ -358,6 +375,7 @@ def test_traverse_flattened_per_path_embedding( (DocumentArrayWeaviate, {'config': {'n_dim': 10}}), (DocumentArrayQdrant, {'config': {'n_dim': 10}}), (DocumentArrayElastic, {'config': {'n_dim': 10}}), + (DocumentArrayRedis, {'config': {'n_dim': 10, 'flush': True}}), ], ) def test_traverse_flattened_per_path_root( @@ -378,6 +396,7 @@ def test_traverse_flattened_per_path_root( (DocumentArrayWeaviate, {'config': {'n_dim': 10}}), (DocumentArrayQdrant, {'config': {'n_dim': 10}}), (DocumentArrayElastic, {'config': {'n_dim': 10}}), + (DocumentArrayRedis, {'config': {'n_dim': 10, 'flush': True}}), ], ) def test_traverse_flattened_per_path_chunk( @@ -398,6 +417,7 @@ def test_traverse_flattened_per_path_chunk( (DocumentArrayWeaviate, {'config': {'n_dim': 10}}), (DocumentArrayQdrant, {'config': {'n_dim': 10}}), (DocumentArrayElastic, {'config': {'n_dim': 10}}), + (DocumentArrayRedis, {'config': {'n_dim': 10, 'flush': True}}), ], ) def test_traverse_flattened_per_path_root_plus_chunk( @@ -419,6 +439,7 @@ def test_traverse_flattened_per_path_root_plus_chunk( (DocumentArrayWeaviate, {'config': {'n_dim': 10}}), (DocumentArrayQdrant, {'config': {'n_dim': 10}}), (DocumentArrayElastic, {'config': {'n_dim': 10}}), + (DocumentArrayRedis, {'config': {'n_dim': 10, 'flush': True}}), ], ) def test_traverse_flattened_per_path_match( @@ -439,6 +460,7 @@ def test_traverse_flattened_per_path_match( (DocumentArrayWeaviate, {'config': {'n_dim': 10}}), (DocumentArrayQdrant, {'config': {'n_dim': 10}}), (DocumentArrayElastic, {'config': {'n_dim': 10}}), + (DocumentArrayRedis, {'config': {'n_dim': 10, 'flush': True}}), ], ) def test_traverse_flattened_per_path_root_match_chunk( @@ -462,6 +484,7 @@ def test_traverse_flattened_per_path_root_match_chunk( (DocumentArrayWeaviate, {'config': {'n_dim': 10}}), (DocumentArrayQdrant, {'config': {'n_dim': 10}}), (DocumentArrayElastic, {'config': {'n_dim': 10}}), + (DocumentArrayRedis, {'config': {'n_dim': 10, 'flush': True}}), ], ) def test_docuset_traverse_over_iterator_HACKY(da_cls, kwargs, filter_fn): @@ -489,6 +512,7 @@ def test_docuset_traverse_over_iterator_HACKY(da_cls, kwargs, filter_fn): (DocumentArrayWeaviate, {'config': {'n_dim': 10}}), (DocumentArrayQdrant, {'config': {'n_dim': 10}}), (DocumentArrayElastic, {'config': {'n_dim': 10}}), + (DocumentArrayRedis, {'config': {'n_dim': 10, 'flush': True}}), ], ) def test_docuset_traverse_over_iterator_CAVEAT(da_cls, kwargs, filter_fn): @@ -555,6 +579,7 @@ def test_traverse_chunkarray(filter_fn): (DocumentArrayWeaviate, {'config': {'n_dim': 10}}), (DocumentArrayQdrant, {'config': {'n_dim': 10}}), (DocumentArrayElastic, {'config': {'n_dim': 10}}), + (DocumentArrayRedis, {'config': {'n_dim': 10, 'flush': True}}), ], ) @pytest.mark.parametrize( @@ -600,6 +625,7 @@ def test_filter_fn_traverse_flat( (DocumentArrayWeaviate, {'config': {'n_dim': 10}}), (DocumentArrayQdrant, {'config': {'n_dim': 10}}), (DocumentArrayElastic, {'config': {'n_dim': 10}}), + (DocumentArrayRedis, {'config': {'n_dim': 10, 'flush': True}}), ], ) @pytest.mark.parametrize( @@ -651,6 +677,7 @@ def test_filter_fn_traverse_flat_per_path( (DocumentArrayWeaviate, {'config': {'n_dim': 10}}), (DocumentArrayQdrant, {'config': {'n_dim': 10}}), (DocumentArrayElastic, {'config': {'n_dim': 10}}), + (DocumentArrayRedis, {'config': {'n_dim': 10, 'flush': True}}), ], ) def test_traversal_path(da_cls, kwargs): @@ -669,6 +696,7 @@ def test_traversal_path(da_cls, kwargs): (DocumentArrayWeaviate, {'config': {'n_dim': 10}}), (DocumentArrayQdrant, {'config': {'n_dim': 10}}), (DocumentArrayElastic, {'config': {'n_dim': 10}}), + (DocumentArrayRedis, {'config': {'n_dim': 10, 'flush': True}}), ], ) def test_traverse_flat_root_itself(da_cls, kwargs): @@ -691,6 +719,7 @@ def da_and_dam(N): (DocumentArrayWeaviate, {'config': {'n_dim': 10}}), (DocumentArrayQdrant, {'config': {'n_dim': 10}}), (DocumentArrayElastic, {'config': {'n_dim': 10}}), + (DocumentArrayRedis, {'config': {'n_dim': 10, 'flush': True}}), ], ) def test_flatten(da_cls, kwargs): diff --git a/tests/unit/array/storage/redis/test_backend.py b/tests/unit/array/storage/redis/test_backend.py new file mode 100644 index 00000000000..04de55febba --- /dev/null +++ b/tests/unit/array/storage/redis/test_backend.py @@ -0,0 +1,117 @@ +from abc import ABC + +import pytest +from docarray import DocumentArray +from docarray.array.storage.memory import GetSetDelMixin, SequenceLikeMixin +from docarray.array.storage.redis.backend import BackendMixin, RedisConfig + + +class StorageMixins(BackendMixin, GetSetDelMixin, SequenceLikeMixin, ABC): + ... + + +class DocumentArrayDummy(StorageMixins, DocumentArray): + def __new__(cls, *args, **kwargs): + return super().__new__(cls) + + def _load_offset2ids(self): + pass + + def _save_offset2ids(self): + pass + + +type_convert = { + 'int': b'NUMERIC', + 'float': b'NUMERIC', + 'double': b'NUMERIC', + 'long': b'NUMERIC', + 'str': b'TEXT', + 'bytes': b'TEXT', + 'bool': b'NUMERIC', +} + + +@pytest.fixture(scope='function') +def da_redis(): + cfg = RedisConfig(n_dim=128, flush=True) + da_redis = DocumentArrayDummy(storage='redis', config=cfg) + return da_redis + + +@pytest.mark.parametrize('distance', ['L2', 'IP', 'COSINE']) +@pytest.mark.parametrize( + 'method,initial_cap,ef_construction,block_size', + [ + ('HNSW', 10, 250, 1000000), + ('FLAT', 10, 250, 1000000), + ], +) +@pytest.mark.parametrize( + 'columns', + [ + [('attr1', 'str'), ('attr2', 'bytes')], + [('attr1', 'int'), ('attr2', 'float')], + [('attr1', 'double'), ('attr2', 'long'), ('attr3', 'bool')], + ], +) +@pytest.mark.parametrize( + 'redis_config', + [ + {'decode_responses': True}, + {'decode_responses': False}, + {'retry_on_timeout': True}, + {'decode_responses': True, 'retry_on_timeout': True}, + {}, + ], +) +def test_init_storage( + distance, + columns, + method, + initial_cap, + ef_construction, + block_size, + redis_config, + start_storage, +): + cfg = RedisConfig( + n_dim=128, + distance=distance, + flush=True, + columns=columns, + method=method, + initial_cap=initial_cap, + ef_construction=ef_construction, + block_size=block_size, + redis_config=redis_config, + ) + redis_da = DocumentArrayDummy(storage='redis', config=cfg) + + assert redis_da._client.info()['tcp_port'] == redis_da._config.port + assert redis_da._client.ft().info()['attributes'][0][1] == b'embedding' + assert redis_da._client.ft().info()['attributes'][0][5] == b'VECTOR' + + for i in range(len(columns)): + assert redis_da._client.ft().info()['attributes'][i + 1][1] == bytes( + redis_da._config.columns[i][0], 'utf-8' + ) + assert ( + redis_da._client.ft().info()['attributes'][i + 1][5] + == type_convert[redis_da._config.columns[i][1]] + ) + + +def test_init_storage_update_schema(start_storage): + + cfg = RedisConfig(n_dim=128, columns=[('attr1', 'str')], flush=True) + redis_da = DocumentArrayDummy(storage='redis', config=cfg) + assert redis_da._client.ft().info()['attributes'][1][1] == b'attr1' + + cfg = RedisConfig(n_dim=128, columns=[('attr2', 'str')], update_schema=False) + redis_da = DocumentArrayDummy(storage='redis', config=cfg) + assert redis_da._client.ft().info()['attributes'][1][1] == b'attr1' + + cfg = RedisConfig(n_dim=128, columns=[('attr2', 'str')], update_schema=True) + redis_da = DocumentArrayDummy(storage='redis', config=cfg) + assert redis_da._client.ft().info()['attributes'][1][1] == b'attr2' diff --git a/tests/unit/array/storage/redis/test_getsetdel.py b/tests/unit/array/storage/redis/test_getsetdel.py new file mode 100644 index 00000000000..cd2b3f3d43c --- /dev/null +++ b/tests/unit/array/storage/redis/test_getsetdel.py @@ -0,0 +1,151 @@ +from abc import ABC + +import numpy as np +import pytest +from docarray import Document, DocumentArray +from docarray.array.storage.base.helper import Offset2ID +from docarray.array.storage.memory import SequenceLikeMixin +from docarray.array.storage.redis.getsetdel import GetSetDelMixin +from docarray.array.storage.redis.backend import BackendMixin, RedisConfig + + +class StorageMixins(BackendMixin, GetSetDelMixin, SequenceLikeMixin, ABC): + ... + + +class DocumentArrayDummy(StorageMixins, DocumentArray): + def __new__(cls, *args, **kwargs): + return super().__new__(cls) + + def _load_offset2ids(self): + pass + + def _save_offset2ids(self): + pass + + +@pytest.fixture(scope='function') +def columns(): + columns = [ + ('col_str', 'str'), + ('col_bytes', 'bytes'), + ('col_int', 'int'), + ('col_float', 'float'), + ('col_long', 'long'), + ('col_double', 'double'), + ] + return columns + + +@pytest.fixture(scope='function') +def da_redis(columns): + cfg = RedisConfig(n_dim=3, flush=True, columns=columns) + da_redis = DocumentArrayDummy(storage='redis', config=cfg) + return da_redis + + +@pytest.mark.parametrize( + 'embedding', [[1, 2, 3], [1.0, 2.0, 3.0], [1, 2, 3, 4, 5], None] +) +@pytest.mark.parametrize('text', ['test_text', None]) +@pytest.mark.parametrize( + 'tag', + [ + {'tag_1': 'tag1'}, + {'tag_1': 'tag1', 'tag_2': 'tag2'}, + {'tag_1': 'tag1', 'tag_2': 'tag2', 'tag_3': 'tag3'}, + None, + ], +) +@pytest.mark.parametrize( + 'col', + [ + {'col_str': 'hello', 'col_bytes': b'world'}, + {'col_int': 1, 'col_float': 1.0}, + {'col_long': 123, 'col_double': 1.1}, + None, + ], +) +def test_document_to_embedding( + embedding, text, tag, col, da_redis, columns, start_storage +): + tags = {} + if tag is not None: + tags.update(tag) + if col is not None: + tags.update(col) + doc = Document(embedding=embedding, text=text, tags=tags) + payload = da_redis._document_to_redis(doc) + + if embedding is None: + assert np.allclose( + np.frombuffer(payload['embedding'], dtype=np.float32), np.zeros((3)) + ) + else: + assert np.allclose( + np.frombuffer(payload['embedding'], dtype=np.float32), np.array(embedding) + ) + + if text is None: + with pytest.raises(KeyError): + payload['text'] + else: + assert payload['text'] == text + + for col, _ in columns: + if col in tags: + assert payload[col] == tags[col] + else: + with pytest.raises(KeyError): + payload[col] + + for key in tags: + if key not in (col[0] for col in columns): + assert key not in payload + + +@pytest.mark.parametrize( + 'doc', + [ + Document(id='0'), + Document(id='1', text='hello world'), + Document(id='2', embedding=[1, 2, 3], tags={'tag_1': 'tag1', 'tag_2': 'tag2'}), + Document( + text='hello world', + embedding=[1, 2, 3], + tags={'tag_1': 'tag1', 'tag_2': 'tag2'}, + chunks=[Document(text='token1'), Document(text='token2')], + ), + ], +) +def test_setgetdel_doc_by_id(doc, da_redis, start_storage): + da_redis._set_doc_by_id(doc.id, doc) + doc_get = da_redis._get_doc_by_id(doc.id) + assert doc == doc_get + + da_redis._del_doc_by_id(doc.id) + with pytest.raises(KeyError): + da_redis._get_doc_by_id(doc.id) + + +def test_clear_storage(da_redis, start_storage): + for i in range(3): + doc = Document(id=str(i)) + da_redis._set_doc_by_id(str(i), doc) + + da_redis._clear_storage() + + for i in range(3): + with pytest.raises(KeyError): + da_redis._get_doc_by_id(i) + + +def test_offset2ids(da_redis, start_storage): + ids = [str(i) for i in range(3)] + for id in ids: + doc = Document(id=id) + da_redis._set_doc_by_id(id, doc) + da_redis._offset2ids = Offset2ID(ids) + da_redis._save_offset2ids() + da_redis._load_offset2ids() + assert da_redis._offset2ids.ids == ids diff --git a/tests/unit/array/test_advance_indexing.py b/tests/unit/array/test_advance_indexing.py index 70f24759df7..0e1ff6884e1 100644 --- a/tests/unit/array/test_advance_indexing.py +++ b/tests/unit/array/test_advance_indexing.py @@ -6,6 +6,8 @@ from docarray.array.annlite import AnnliteConfig from docarray.array.qdrant import QdrantConfig from docarray.array.elastic import ElasticConfig +from docarray.array.redis import RedisConfig +import gc @pytest.fixture @@ -27,6 +29,7 @@ def indices(): ('annlite', AnnliteConfig(n_dim=123)), ('qdrant', QdrantConfig(n_dim=123)), ('elasticsearch', ElasticConfig(n_dim=123)), + ('redis', RedisConfig(n_dim=123, flush=True)), ], ) def test_getter_int_str(docs, storage, config, start_storage): @@ -59,6 +62,7 @@ def test_getter_int_str(docs, storage, config, start_storage): ('weaviate', WeaviateConfig(n_dim=123)), ('annlite', AnnliteConfig(n_dim=123)), ('qdrant', QdrantConfig(n_dim=123)), + ('redis', RedisConfig(n_dim=123, flush=True)), ], ) def test_setter_int_str(docs, storage, config, start_storage): @@ -88,6 +92,7 @@ def test_setter_int_str(docs, storage, config, start_storage): ('annlite', AnnliteConfig(n_dim=123)), ('qdrant', QdrantConfig(n_dim=123)), ('elasticsearch', ElasticConfig(n_dim=123)), + ('redis', RedisConfig(n_dim=123, flush=True)), ], ) def test_del_int_str(docs, storage, config, start_storage, indices): @@ -122,6 +127,7 @@ def test_del_int_str(docs, storage, config, start_storage, indices): ('annlite', AnnliteConfig(n_dim=123)), ('qdrant', QdrantConfig(n_dim=123)), ('elasticsearch', ElasticConfig(n_dim=123)), + ('redis', RedisConfig(n_dim=123, flush=True)), ], ) def test_slice(docs, storage, config, start_storage): @@ -160,6 +166,7 @@ def test_slice(docs, storage, config, start_storage): ('annlite', AnnliteConfig(n_dim=123)), ('qdrant', QdrantConfig(n_dim=123)), ('elasticsearch', ElasticConfig(n_dim=123)), + ('redis', RedisConfig(n_dim=123, flush=True)), ], ) def test_sequence_bool_index(docs, storage, config, start_storage): @@ -206,6 +213,7 @@ def test_sequence_bool_index(docs, storage, config, start_storage): ('annlite', AnnliteConfig(n_dim=123)), ('qdrant', QdrantConfig(n_dim=123)), ('elasticsearch', ElasticConfig(n_dim=123)), + ('redis', RedisConfig(n_dim=123, flush=True)), ], ) def test_sequence_int(docs, nparray, storage, config, start_storage): @@ -242,6 +250,7 @@ def test_sequence_int(docs, nparray, storage, config, start_storage): ('annlite', AnnliteConfig(n_dim=123)), ('qdrant', QdrantConfig(n_dim=123)), ('elasticsearch', ElasticConfig(n_dim=123)), + ('redis', RedisConfig(n_dim=123, flush=True)), ], ) def test_sequence_str(docs, storage, config, start_storage): @@ -276,6 +285,7 @@ def test_sequence_str(docs, storage, config, start_storage): ('annlite', AnnliteConfig(n_dim=123)), ('qdrant', QdrantConfig(n_dim=123)), ('elasticsearch', ElasticConfig(n_dim=123)), + ('redis', RedisConfig(n_dim=123, flush=True)), ], ) def test_docarray_list_tuple(docs, storage, config, start_storage): @@ -296,6 +306,7 @@ def test_docarray_list_tuple(docs, storage, config, start_storage): ('annlite', AnnliteConfig(n_dim=123)), ('qdrant', QdrantConfig(n_dim=123)), ('elasticsearch', ElasticConfig(n_dim=123)), + ('redis', RedisConfig(n_dim=123, flush=True)), ], ) def test_path_syntax_indexing(storage, config, start_storage): @@ -335,6 +346,7 @@ def test_path_syntax_indexing(storage, config, start_storage): ('annlite', AnnliteConfig(n_dim=123)), ('qdrant', QdrantConfig(n_dim=123)), ('elasticsearch', ElasticConfig(n_dim=123)), + ('redis', RedisConfig(n_dim=123, flush=True)), ], ) @pytest.mark.parametrize('use_subindex', [False, True]) @@ -476,6 +488,7 @@ def test_getset_subindex(storage, config, start_storage): ('annlite', lambda: AnnliteConfig(n_dim=123)), ('qdrant', lambda: QdrantConfig(n_dim=123)), ('elasticsearch', lambda: ElasticConfig(n_dim=123)), + ('redis', lambda: RedisConfig(n_dim=123, flush=True)), ], ) def test_attribute_indexing(storage, config_gen, start_storage, size): @@ -506,7 +519,8 @@ def test_attribute_indexing(storage, config_gen, start_storage, size): @pytest.mark.parametrize( - 'storage', ['memory', 'sqlite', 'weaviate', 'annlite', 'qdrant', 'elasticsearch'] + 'storage', + ['memory', 'sqlite', 'weaviate', 'annlite', 'qdrant', 'elasticsearch', 'redis'], ) def test_tensor_attribute_selector(storage, start_storage): import scipy.sparse @@ -517,6 +531,8 @@ def test_tensor_attribute_selector(storage, start_storage): if storage in ('annlite', 'weaviate', 'qdrant', 'elasticsearch'): da = DocumentArray(storage=storage, config={'n_dim': 10}) + elif storage == 'redis': + da = DocumentArray(storage=storage, config={'n_dim': 10, 'flush': True}) else: da = DocumentArray(storage=storage) @@ -558,11 +574,14 @@ def test_advance_selector_mixed(storage): @pytest.mark.parametrize( - 'storage', ['memory', 'sqlite', 'weaviate', 'annlite', 'qdrant', 'elasticsearch'] + 'storage', + ['memory', 'sqlite', 'weaviate', 'annlite', 'qdrant', 'elasticsearch', 'redis'], ) def test_single_boolean_and_padding(storage, start_storage): if storage in ('annlite', 'weaviate', 'qdrant', 'elasticsearch'): da = DocumentArray(storage=storage, config={'n_dim': 10}) + elif storage == 'redis': + da = DocumentArray(storage=storage, config={'n_dim': 10, 'flush': True}) else: da = DocumentArray(storage=storage) da.extend(DocumentArray.empty(3)) @@ -590,6 +609,7 @@ def test_single_boolean_and_padding(storage, start_storage): ('annlite', lambda: AnnliteConfig(n_dim=123)), ('qdrant', lambda: QdrantConfig(n_dim=123)), ('elasticsearch', lambda: ElasticConfig(n_dim=123)), + ('redis', lambda: RedisConfig(n_dim=123, flush=True)), ], ) def test_edge_case_two_strings(storage, config_gen, start_storage): @@ -658,6 +678,9 @@ def test_edge_case_two_strings(storage, config_gen, start_storage): with pytest.raises(IndexError): da['1', 'hellohello'] = 'hello' + if storage == 'redis': + gc.collect() + @pytest.mark.parametrize( 'storage,config', @@ -667,6 +690,7 @@ def test_edge_case_two_strings(storage, config_gen, start_storage): ('annlite', AnnliteConfig(n_dim=123)), ('qdrant', QdrantConfig(n_dim=123)), ('elasticsearch', ElasticConfig(n_dim=123)), + ('redis', RedisConfig(n_dim=123, flush=True)), ], ) def test_offset2ids_persistence(storage, config, start_storage): @@ -688,6 +712,9 @@ def test_offset2ids_persistence(storage, config, start_storage): da._persist = True da.__del__() + if storage == 'redis': + config.flush = False + config.update_schema = False da = DocumentArray(storage=storage, config=config) assert da[:, 'id'] == da_ids diff --git a/tests/unit/array/test_construct.py b/tests/unit/array/test_construct.py index 303cb80061c..e25bb7576c2 100644 --- a/tests/unit/array/test_construct.py +++ b/tests/unit/array/test_construct.py @@ -9,6 +9,7 @@ from docarray.array.storage.qdrant import QdrantConfig from docarray.array.weaviate import DocumentArrayWeaviate, WeaviateConfig from docarray.array.elastic import DocumentArrayElastic, ElasticConfig +from docarray.array.redis import DocumentArrayRedis, RedisConfig @pytest.mark.parametrize( @@ -20,6 +21,7 @@ (DocumentArrayWeaviate, WeaviateConfig(n_dim=128)), (DocumentArrayQdrant, QdrantConfig(n_dim=128)), (DocumentArrayElastic, ElasticConfig(n_dim=128)), + (DocumentArrayRedis, RedisConfig(n_dim=128, flush=True)), ], ) def test_construct_docarray(da_cls, config, start_storage): @@ -68,6 +70,7 @@ def test_construct_docarray(da_cls, config, start_storage): (DocumentArrayWeaviate, WeaviateConfig(n_dim=128)), (DocumentArrayQdrant, QdrantConfig(n_dim=128)), (DocumentArrayElastic, ElasticConfig(n_dim=128)), + (DocumentArrayRedis, RedisConfig(n_dim=128, flush=True)), ], ) @pytest.mark.parametrize('is_copy', [True, False]) @@ -97,6 +100,7 @@ def test_docarray_copy_singleton(da_cls, config, is_copy, start_storage): (DocumentArrayWeaviate, WeaviateConfig(n_dim=128)), (DocumentArrayQdrant, QdrantConfig(n_dim=128)), (DocumentArrayElastic, ElasticConfig(n_dim=128)), + (DocumentArrayRedis, RedisConfig(n_dim=128, flush=True)), ], ) @pytest.mark.parametrize('is_copy', [True, False]) @@ -125,6 +129,7 @@ def test_docarray_copy_da(da_cls, config, is_copy, start_storage): (DocumentArrayAnnlite, AnnliteConfig(n_dim=1)), (DocumentArrayQdrant, QdrantConfig(n_dim=1)), (DocumentArrayElastic, ElasticConfig(n_dim=128)), + (DocumentArrayRedis, RedisConfig(n_dim=128, flush=True)), ], ) @pytest.mark.parametrize('is_copy', [True, False]) diff --git a/tests/unit/array/test_pull_out.py b/tests/unit/array/test_pull_out.py index fc3a194a7db..76a3b5b1c69 100644 --- a/tests/unit/array/test_pull_out.py +++ b/tests/unit/array/test_pull_out.py @@ -22,6 +22,7 @@ def docs(): ('annlite', {'n_dim': 2}), ('qdrant', {'n_dim': 2}), ('elasticsearch', {'n_dim': 2}), + ('redis', {'n_dim': 2, 'flush': True}), ], ) def test_update_embedding(docs, storage, config, start_storage): @@ -56,6 +57,7 @@ def test_update_embedding(docs, storage, config, start_storage): ('annlite', {'n_dim': 2}), ('qdrant', {'n_dim': 2}), ('elasticsearch', {'n_dim': 2}), + ('redis', {'n_dim': 2, 'flush': True}), ], ) def test_update_doc_embedding(docs, storage, config, start_storage): @@ -90,6 +92,7 @@ def test_update_doc_embedding(docs, storage, config, start_storage): ('annlite', {'n_dim': 2}), ('qdrant', {'n_dim': 2}), ('elasticsearch', {'n_dim': 2}), + ('redis', {'n_dim': 2, 'flush': True}), ], ) def test_batch_update_embedding(docs, storage, config, start_storage): @@ -122,6 +125,7 @@ def test_batch_update_embedding(docs, storage, config, start_storage): ('annlite', {'n_dim': 2}), ('qdrant', {'n_dim': 2}), ('elasticsearch', {'n_dim': 2}), + ('redis', {'n_dim': 2, 'flush': True}), ], ) def test_batch_update_doc_embedding(docs, storage, config, start_storage): @@ -156,6 +160,7 @@ def test_batch_update_doc_embedding(docs, storage, config, start_storage): ('annlite', {'n_dim': 2}), ('qdrant', {'n_dim': 2}), ('elasticsearch', {'n_dim': 2}), + ('redis', {'n_dim': 2, 'flush': True}), ], ) def test_update_id(docs, storage, config, start_storage): @@ -177,6 +182,7 @@ def test_update_id(docs, storage, config, start_storage): ('annlite', {'n_dim': 2}), ('qdrant', {'n_dim': 2}), ('elasticsearch', {'n_dim': 2}), + ('redis', {'n_dim': 2, 'flush': True}), ], ) def test_update_doc_id(docs, storage, config, start_storage): @@ -197,6 +203,7 @@ def test_update_doc_id(docs, storage, config, start_storage): ('annlite', {'n_dim': 2}), ('qdrant', {'n_dim': 2}), ('elasticsearch', {'n_dim': 2}), + ('redis', {'n_dim': 2, 'flush': True}), ], ) def test_batch_update_id(docs, storage, config, start_storage): @@ -220,6 +227,7 @@ def test_batch_update_id(docs, storage, config, start_storage): ('annlite', {'n_dim': 2}), ('qdrant', {'n_dim': 2}), ('elasticsearch', {'n_dim': 2}), + ('redis', {'n_dim': 2, 'flush': True}), ], ) def test_batch_update_doc_id(docs, storage, config, start_storage): diff --git a/tests/unit/array/test_sequence.py b/tests/unit/array/test_sequence.py index 8487e597126..33b25b8db4d 100644 --- a/tests/unit/array/test_sequence.py +++ b/tests/unit/array/test_sequence.py @@ -1,20 +1,21 @@ +import gc +import tempfile import uuid +import numpy as np import pytest -import tempfile - from docarray import Document, DocumentArray +from docarray.array.elastic import DocumentArrayElastic from docarray.array.memory import DocumentArrayInMemory from docarray.array.qdrant import DocumentArrayQdrant +from docarray.array.redis import DocumentArrayRedis from docarray.array.sqlite import DocumentArraySqlite -from docarray.array.storage.sqlite import SqliteConfig -from docarray.array.weaviate import DocumentArrayWeaviate -from docarray.array.elastic import DocumentArrayElastic +from docarray.array.storage.elastic import ElasticConfig from docarray.array.storage.qdrant import QdrantConfig +from docarray.array.storage.redis import RedisConfig +from docarray.array.storage.sqlite import SqliteConfig from docarray.array.storage.weaviate import WeaviateConfig -from docarray.array.storage.elastic import ElasticConfig -import numpy as np - +from docarray.array.weaviate import DocumentArrayWeaviate from tests.conftest import tmpfile @@ -26,6 +27,7 @@ (DocumentArrayWeaviate, lambda: WeaviateConfig(n_dim=1)), (DocumentArrayQdrant, lambda: QdrantConfig(n_dim=1)), (DocumentArrayElastic, lambda: ElasticConfig(n_dim=1)), + (DocumentArrayRedis, lambda: RedisConfig(n_dim=1, flush=True)), ], ) def test_insert(da_cls, config, start_storage): @@ -48,6 +50,7 @@ def test_insert(da_cls, config, start_storage): (DocumentArrayWeaviate, lambda: WeaviateConfig(n_dim=1)), (DocumentArrayQdrant, lambda: QdrantConfig(n_dim=1)), (DocumentArrayElastic, lambda: ElasticConfig(n_dim=1)), + (DocumentArrayRedis, lambda: RedisConfig(n_dim=1, flush=True)), ], ) def test_append_extend(da_cls, config, start_storage): @@ -81,12 +84,16 @@ def update_config_inplace(config, tmpdir, tmpfile): ('weaviate', {'n_dim': 3, 'name': 'Weaviate'}), ('qdrant', {'n_dim': 3, 'collection_name': 'qdrant'}), ('elasticsearch', {'n_dim': 3, 'index_name': 'elasticsearch'}), + ('redis', {'n_dim': 3, 'flush': True}), ], ) def test_context_manager_from_disk(storage, config, start_storage, tmpdir, tmpfile): config = config update_config_inplace(config, tmpdir, tmpfile) + if storage == 'redis': + gc.collect() + da = DocumentArray(storage=storage, config=config) with da as da_open: @@ -96,6 +103,9 @@ def test_context_manager_from_disk(storage, config, start_storage, tmpdir, tmpfi assert len(da) == 2 assert len(da._offset2ids.ids) == 2 + if storage == 'redis': + config['flush'] = False + config['update_schema'] = False da2 = DocumentArray(storage=storage, config=config) assert len(da2) == 2 @@ -114,6 +124,7 @@ def test_context_manager_from_disk(storage, config, start_storage, tmpdir, tmpfi ('qdrant', {'n_dim': 3, 'distance': 'euclidean'}), ('elasticsearch', {'n_dim': 3, 'distance': 'l2_norm'}), ('sqlite', dict()), + ('redis', {'n_dim': 3, 'distance': 'L2', 'flush': True}), ], ) def test_extend_subindex(storage, config): @@ -159,6 +170,7 @@ def test_extend_subindex(storage, config): ('qdrant', {'n_dim': 3, 'distance': 'euclidean'}), ('elasticsearch', {'n_dim': 3, 'distance': 'l2_norm'}), ('sqlite', dict()), + ('redis', {'n_dim': 3, 'distance': 'L2', 'flush': True}), ], ) def test_append_subindex(storage, config): @@ -208,6 +220,7 @@ def embeddings_eq(emb1, emb2): ('qdrant', {'n_dim': 3, 'distance': 'euclidean'}), ('elasticsearch', {'n_dim': 3, 'distance': 'l2_norm'}), ('sqlite', dict()), + ('redis', {'n_dim': 3, 'distance': 'L2', 'flush': True}), ], ) @pytest.mark.parametrize( @@ -234,6 +247,7 @@ def test_del_and_append(index, storage, config): ('qdrant', {'n_dim': 3, 'distance': 'euclidean'}), ('elasticsearch', {'n_dim': 3, 'distance': 'l2_norm'}), ('sqlite', dict()), + ('redis', {'n_dim': 3, 'distance': 'L2', 'flush': True}), ], ) @pytest.mark.parametrize(