From 45825aae0f67ce42e6cfc66d4fe6a7bd5892f7ed Mon Sep 17 00:00:00 2001 From: Joan Fontanals Martinez Date: Tue, 6 Sep 2022 09:28:44 +0200 Subject: [PATCH 01/10] feat: columns should be a dictionary not list of tuples --- docarray/array/storage/annlite/backend.py | 9 +++------ docarray/array/storage/base/backend.py | 14 +++++++++++--- docarray/array/storage/elastic/backend.py | 4 ++-- docarray/array/storage/elastic/getsetdel.py | 4 +++- docarray/array/storage/qdrant/backend.py | 2 +- docarray/array/storage/qdrant/getsetdel.py | 4 +++- docarray/array/storage/redis/backend.py | 4 ++-- docarray/array/storage/redis/getsetdel.py | 2 +- docarray/array/storage/sqlite/backend.py | 12 ++---------- docarray/array/storage/weaviate/backend.py | 9 ++++----- 10 files changed, 32 insertions(+), 32 deletions(-) diff --git a/docarray/array/storage/annlite/backend.py b/docarray/array/storage/annlite/backend.py index 5a5cd625ede..5bd9d5cc918 100644 --- a/docarray/array/storage/annlite/backend.py +++ b/docarray/array/storage/annlite/backend.py @@ -27,7 +27,7 @@ class AnnliteConfig: ef_construction: Optional[int] = None ef_search: Optional[int] = None max_connection: Optional[int] = None - columns: Optional[List[Tuple[str, str]]] = None + columns: Optional[Union[List[Tuple[str, str]], Dict[str, str]]] = None class BackendMixin(BaseBackendMixin): @@ -53,11 +53,8 @@ def _map_embedding(self, embedding: 'ArrayType') -> 'ArrayType': def _normalize_columns(self, columns): columns = super()._normalize_columns(columns) - for i in range(len(columns)): - columns[i] = ( - columns[i][0], - self._map_type(columns[i][1]), - ) + for key in columns.keys(): + columns[key] = self._map_type(columns[key]) return columns def _ensure_unique_config( diff --git a/docarray/array/storage/base/backend.py b/docarray/array/storage/base/backend.py index 68d77611d80..be196d5e229 100644 --- a/docarray/array/storage/base/backend.py +++ b/docarray/array/storage/base/backend.py @@ -1,7 +1,8 @@ from abc import ABC, abstractmethod +import warnings from collections import namedtuple from dataclasses import is_dataclass, asdict -from typing import Dict, Optional, TYPE_CHECKING +from typing import Dict, Optional, TYPE_CHECKING, Union, List, Tuple if TYPE_CHECKING: from docarray.typing import DocumentArraySourceType, ArrayType @@ -77,7 +78,14 @@ def _map_embedding(self, embedding: 'ArrayType') -> 'ArrayType': def _map_type(self, col_type: str) -> str: return self.TYPE_MAP[col_type].type - def _normalize_columns(self, columns): + def _normalize_columns( + self, columns: Optional[Union[List[Tuple[str, str]], Dict[str, str]]] + ) -> Dict[str, str]: if columns is None: - return [] + return {} + if isinstance(columns, list): + warnings.warn( + 'Using "columns" as a List of Tuples will be deprecated soon. Please provide a Dictionary' + ) + columns = {col_desc[0]: col_desc[1] for col_desc in columns} return columns diff --git a/docarray/array/storage/elastic/backend.py b/docarray/array/storage/elastic/backend.py index 882dca65e5f..27279eff94a 100644 --- a/docarray/array/storage/elastic/backend.py +++ b/docarray/array/storage/elastic/backend.py @@ -44,7 +44,7 @@ class ElasticConfig: batch_size: int = 64 ef_construction: Optional[int] = None m: Optional[int] = None - columns: Optional[List[Tuple[str, str]]] = None + columns: Optional[Union[List[Tuple[str, str]], Dict[str, str]]] = None _banned_indexname_chars = ['[', ' ', '"', '*', '\\', '<', '|', ',', '>', '/', '?', ']'] @@ -150,7 +150,7 @@ def _build_schema_from_elastic_config(self, elastic_config): 'index': True, } - for col, coltype in self._config.columns: + for col, coltype in self._config.columns.items(): da_schema['mappings']['properties'][col] = { 'type': self._map_type(coltype), 'index': True, diff --git a/docarray/array/storage/elastic/getsetdel.py b/docarray/array/storage/elastic/getsetdel.py index 7c20a0d2693..fcc93dc6924 100644 --- a/docarray/array/storage/elastic/getsetdel.py +++ b/docarray/array/storage/elastic/getsetdel.py @@ -12,7 +12,9 @@ class GetSetDelMixin(BaseGetSetDelMixin): MAX_ES_RETURNED_DOCS = 10000 def _document_to_elastic(self, doc: 'Document') -> Dict: - extra_columns = {col: doc.tags.get(col) for col, _ in self._config.columns} + extra_columns = { + col: doc.tags.get(col) for col, _ in self._config.columns.items() + } request = { '_op_type': 'index', '_id': doc.id, diff --git a/docarray/array/storage/qdrant/backend.py b/docarray/array/storage/qdrant/backend.py index b24df4171aa..5561def8801 100644 --- a/docarray/array/storage/qdrant/backend.py +++ b/docarray/array/storage/qdrant/backend.py @@ -42,7 +42,7 @@ class QdrantConfig: ef_construct: Optional[int] = None full_scan_threshold: Optional[int] = None m: Optional[int] = None - columns: Optional[List[Tuple[str, str]]] = None + columns: Optional[Union[List[Tuple[str, str]], Dict[str, str]]] = None class BackendMixin(BaseBackendMixin): diff --git a/docarray/array/storage/qdrant/getsetdel.py b/docarray/array/storage/qdrant/getsetdel.py index fdc2f1b1069..17e5194ca49 100644 --- a/docarray/array/storage/qdrant/getsetdel.py +++ b/docarray/array/storage/qdrant/getsetdel.py @@ -65,7 +65,9 @@ def _qdrant_to_document(self, qdrant_record: dict) -> 'Document': ) def _document_to_qdrant(self, doc: 'Document') -> 'PointStruct': - extra_columns = {col: doc.tags.get(col) for col, _ in self._config.columns} + extra_columns = { + col: doc.tags.get(col) for col, _ in self._config.columns.items() + } return PointStruct( id=self._map_id(doc.id), diff --git a/docarray/array/storage/redis/backend.py b/docarray/array/storage/redis/backend.py index 41728834714..b9f54a821a3 100644 --- a/docarray/array/storage/redis/backend.py +++ b/docarray/array/storage/redis/backend.py @@ -31,7 +31,7 @@ class RedisConfig: ef_runtime: int = field(default=10) block_size: int = field(default=1048576) initial_cap: Optional[int] = None - columns: Optional[List[Tuple[str, str]]] = None + columns: Optional[Union[List[Tuple[str, str]], Dict[str, str]]] = None class BackendMixin(BaseBackendMixin): @@ -146,7 +146,7 @@ def _build_schema_from_redis_config(self): index_param['INITIAL_CAP'] = self._config.initial_cap schema = [VectorField('embedding', self._config.method, index_param)] - for col, coltype in self._config.columns: + for col, coltype in self._config.columns.items(): schema.append(self._map_column(col, coltype)) return schema diff --git a/docarray/array/storage/redis/getsetdel.py b/docarray/array/storage/redis/getsetdel.py index 709d404d45e..d201a164c8e 100644 --- a/docarray/array/storage/redis/getsetdel.py +++ b/docarray/array/storage/redis/getsetdel.py @@ -90,7 +90,7 @@ def _del_doc_by_id(self, _id: str): def _document_to_redis(self, doc: 'Document') -> Dict: extra_columns = {} - for col, _ in self._config.columns: + for col, _ in self._config.columns.items(): tag = doc.tags.get(col) if tag is not None: extra_columns[col] = int(tag) if isinstance(tag, bool) else tag diff --git a/docarray/array/storage/sqlite/backend.py b/docarray/array/storage/sqlite/backend.py index 10e48ecbfe7..7422738a1e7 100644 --- a/docarray/array/storage/sqlite/backend.py +++ b/docarray/array/storage/sqlite/backend.py @@ -1,16 +1,8 @@ import sqlite3 import warnings -from dataclasses import dataclass, field, asdict +from dataclasses import dataclass, field from tempfile import NamedTemporaryFile -from typing import ( - Iterable, - Dict, - Optional, - TYPE_CHECKING, - Union, - List, - Tuple, -) +from typing import Iterable, Dict, Optional, TYPE_CHECKING, Union from docarray.array.storage.sqlite.helper import initialize_table from docarray.array.storage.base.backend import BaseBackendMixin diff --git a/docarray/array/storage/weaviate/backend.py b/docarray/array/storage/weaviate/backend.py index ba25b12d280..a83cbf11235 100644 --- a/docarray/array/storage/weaviate/backend.py +++ b/docarray/array/storage/weaviate/backend.py @@ -45,7 +45,7 @@ class WeaviateConfig: flat_search_cutoff: Optional[int] = None cleanup_interval_seconds: Optional[int] = None skip: Optional[bool] = None - columns: Optional[List[Tuple[str, str]]] = None + columns: Optional[Union[List[Tuple[str, str]], Dict[str, str]]] = None distance: Optional[str] = None @@ -215,7 +215,7 @@ def _get_schema_by_name(self, cls_name: str) -> Dict: }, ] } - for col, coltype in self._config.columns: + for col, coltype in self._config.columns.items(): new_property = { 'dataType': [self._map_type(coltype)], 'name': col, @@ -352,10 +352,9 @@ def _doc2weaviate_create_payload(self, value: 'Document'): :param value: document to create a payload for :return: the payload dictionary """ - columns_dict = {key: val for [key, val] in self._config.columns} extra_columns = { - col: self._map_column(value.tags.get(col), columns_dict[col]) - for col, _ in self._config.columns + k: self._map_column(value.tags.get(k), v) + for k, v in self._config.columns.items() } return dict( From 01f680cec59e817b992de8cec1a73d812f299b3b Mon Sep 17 00:00:00 2001 From: Joan Fontanals Martinez Date: Tue, 6 Sep 2022 10:01:30 +0200 Subject: [PATCH 02/10] test: adapt tests to new column format --- tests/unit/array/mixins/test_find.py | 40 ++++++++++--------- tests/unit/array/mixins/test_match.py | 10 ++--- tests/unit/array/storage/elastic/test_add.py | 29 +++++++++----- .../array/storage/elastic/test_data_type.py | 20 +++++++--- tests/unit/array/storage/elastic/test_del.py | 12 ++++-- tests/unit/array/storage/elastic/test_get.py | 10 +++-- .../unit/array/storage/redis/test_backend.py | 9 +++-- .../array/storage/redis/test_getsetdel.py | 21 +++++----- .../unit/array/test_backend_configuration.py | 14 +++---- 9 files changed, 96 insertions(+), 69 deletions(-) diff --git a/tests/unit/array/mixins/test_find.py b/tests/unit/array/mixins/test_find.py index 6ad66f22a5b..9d14b90e7b5 100644 --- a/tests/unit/array/mixins/test_find.py +++ b/tests/unit/array/mixins/test_find.py @@ -1,5 +1,3 @@ -from itertools import product - import numpy as np import pytest @@ -361,8 +359,9 @@ def test_find_by_tag(storage, config, start_storage): ], ], ) +@pytest.mark.parametrize('columns', [[('price', 'int')], {'price': 'int'}]) def test_search_pre_filtering( - storage, filter_gen, operator, numeric_operators, start_storage + storage, filter_gen, operator, numeric_operators, start_storage, columns ): np.random.seed(0) n_dim = 128 @@ -370,12 +369,10 @@ def test_search_pre_filtering( if storage == 'redis': da = DocumentArray( storage=storage, - config={'n_dim': n_dim, 'columns': [('price', 'int')], 'flush': True}, + config={'n_dim': n_dim, 'columns': columns, 'flush': True}, ) else: - da = DocumentArray( - storage=storage, config={'n_dim': n_dim, 'columns': [('price', 'int')]} - ) + da = DocumentArray(storage=storage, config={'n_dim': n_dim, 'columns': columns}) da.extend( [ @@ -468,18 +465,19 @@ def test_search_pre_filtering( ], ], ) -def test_filtering(storage, filter_gen, operator, numeric_operators, start_storage): +@pytest.mark.parametrize('columns', [[('price', 'int')], {'price': 'int'}]) +def test_filtering( + storage, filter_gen, operator, numeric_operators, start_storage, columns +): n_dim = 128 if storage == 'redis': da = DocumentArray( storage=storage, - config={'n_dim': n_dim, 'columns': [('price', 'float')], 'flush': True}, + config={'n_dim': n_dim, 'columns': columns, 'flush': True}, ) else: - da = DocumentArray( - storage=storage, config={'n_dim': n_dim, 'columns': [('price', 'float')]} - ) + da = DocumentArray(storage=storage, config={'n_dim': n_dim, 'columns': columns}) da.extend([Document(id=f'r{i}', tags={'price': i}) for i in range(50)]) thresholds = [10, 20, 30] @@ -496,11 +494,10 @@ def test_filtering(storage, filter_gen, operator, numeric_operators, start_stora ) -def test_weaviate_filter_query(start_storage): +@pytest.mark.parametrize('columns', [[('price', 'int')], {'price': 'int'}]) +def test_weaviate_filter_query(start_storage, columns): n_dim = 128 - da = DocumentArray( - storage='weaviate', config={'n_dim': n_dim, 'columns': [('price', 'int')]} - ) + da = DocumentArray(storage='weaviate', config={'n_dim': n_dim, 'columns': columns}) da.extend( [ @@ -518,13 +515,17 @@ def test_weaviate_filter_query(start_storage): assert isinstance(da._filter(filter={}), type(da)) -def test_redis_category_filter(start_storage): +@pytest.mark.parametrize( + 'columns', + [[('color', 'str'), ('isfake', 'bool')], {'color': 'str', 'isfake': 'bool'}], +) +def test_redis_category_filter(start_storage, columns): n_dim = 128 da = DocumentArray( storage='redis', config={ 'n_dim': n_dim, - 'columns': [('color', 'str'), ('isfake', 'bool')], + 'columns': columns, 'flush': True, }, ) @@ -580,7 +581,8 @@ def test_redis_category_filter(start_storage): @pytest.mark.parametrize('storage', ['memory']) -def test_unsupported_pre_filtering(storage, start_storage): +@pytest.mark.parametrize('columns', [[('price', 'int')], {'price': 'int'}]) +def test_unsupported_pre_filtering(storage, start_storage, columns): n_dim = 128 da = DocumentArray( diff --git a/tests/unit/array/mixins/test_match.py b/tests/unit/array/mixins/test_match.py index f246a928e04..c052a46b570 100644 --- a/tests/unit/array/mixins/test_match.py +++ b/tests/unit/array/mixins/test_match.py @@ -697,21 +697,21 @@ def test_match_ensure_scores_unique(): for operator in numeric_operators_redis.keys() ], ], + 'columns', + [[('price', 'int')], {'price': 'int'}], ) def test_match_pre_filtering( - storage, filter_gen, operator, numeric_operators, start_storage + storage, filter_gen, operator, numeric_operators, start_storage, columns ): n_dim = 128 if storage == 'redis': da = DocumentArray( storage=storage, - config={'n_dim': n_dim, 'columns': [('price', 'int')], 'flush': True}, + config={'n_dim': n_dim, 'columns': columns, 'flush': True}, ) else: - da = DocumentArray( - storage=storage, config={'n_dim': n_dim, 'columns': [('price', 'int')]} - ) + da = DocumentArray(storage=storage, config={'n_dim': n_dim, 'columns': columns}) da.extend( [ diff --git a/tests/unit/array/storage/elastic/test_add.py b/tests/unit/array/storage/elastic/test_add.py index 86f53d6031f..775bfcfec6a 100644 --- a/tests/unit/array/storage/elastic/test_add.py +++ b/tests/unit/array/storage/elastic/test_add.py @@ -5,12 +5,13 @@ @pytest.mark.filterwarnings('ignore::UserWarning') -def test_add_ignore_existing_doc_id(start_storage): +@pytest.mark.parametrize('columns', [[('price', 'int')], {'price': 'int'}]) +def test_add_ignore_existing_doc_id(start_storage, columns): elastic_doc = DocumentArray( storage='elasticsearch', config={ 'n_dim': 3, - 'columns': [('price', 'int')], + 'columns': columns, 'distance': 'l2_norm', 'index_name': 'test_add_ignore_existing_doc_id', }, @@ -48,12 +49,13 @@ def test_add_ignore_existing_doc_id(start_storage): @pytest.mark.filterwarnings('ignore::UserWarning') -def test_add_skip_wrong_data_type_and_fix_offset(start_storage): +@pytest.mark.parametrize('columns', [[('price', 'int')], {'price': 'int'}]) +def test_add_skip_wrong_data_type_and_fix_offset(start_storage, columns): elastic_doc = DocumentArray( storage='elasticsearch', config={ 'n_dim': 3, - 'columns': [('price', 'int')], + 'columns': columns, 'index_name': 'test_add_skip_wrong_data_type_and_fix_offset', }, ) @@ -91,8 +93,19 @@ def test_add_skip_wrong_data_type_and_fix_offset(start_storage): @pytest.mark.filterwarnings('ignore::UserWarning') @pytest.mark.parametrize("assert_customization_propagation", [True, False]) +@pytest.mark.parametrize( + 'columns', + [ + [ + ('is_true', 'bool'), + ('test_long', 'long'), + ('test_double', 'double'), + ], + {'is_true': 'bool', 'test_long': 'long', 'test_double': 'double'}, + ], +) def test_succes_add_bulk_custom_params( - monkeypatch, start_storage, assert_customization_propagation + monkeypatch, start_storage, assert_customization_propagation, columns ): bulk_custom_params = { 'thread_count': 4, @@ -117,11 +130,7 @@ def _mock_send_requests(requests, **kwargs): storage='elasticsearch', config={ 'n_dim': 3, - 'columns': [ - ('is_true', 'bool'), - ('test_long', 'long'), - ('test_double', 'double'), - ], + 'columns': columns, 'distance': 'l2_norm', 'index_name': 'test_succes_add_bulk_custom_params', }, diff --git a/tests/unit/array/storage/elastic/test_data_type.py b/tests/unit/array/storage/elastic/test_data_type.py index 6813fa0177b..c849d4600d6 100644 --- a/tests/unit/array/storage/elastic/test_data_type.py +++ b/tests/unit/array/storage/elastic/test_data_type.py @@ -1,16 +1,24 @@ +import pytest from docarray import DocumentArray, Document -def test_data_type(start_storage): +@pytest.mark.parametrize( + 'columns', + [ + [ + ('is_true', 'bool'), + ('test_long', 'long'), + ('test_double', 'double'), + ], + {'is_true': 'bool', 'test_long': 'long', 'test_double': 'double'}, + ], +) +def test_data_type(start_storage, columns): elastic_doc = DocumentArray( storage='elasticsearch', config={ 'n_dim': 3, - 'columns': [ - ('is_true', 'bool'), - ('test_long', 'long'), - ('test_double', 'double'), - ], + 'columns': columns, 'distance': 'l2_norm', 'index_name': 'test_data_type', }, diff --git a/tests/unit/array/storage/elastic/test_del.py b/tests/unit/array/storage/elastic/test_del.py index fd8e54b1843..8bad716e7b7 100644 --- a/tests/unit/array/storage/elastic/test_del.py +++ b/tests/unit/array/storage/elastic/test_del.py @@ -4,12 +4,15 @@ @pytest.mark.filterwarnings('ignore::UserWarning') @pytest.mark.parametrize('deleted_elmnts', [[0, 1], ['r0', 'r1']]) -def test_delete_offset_success_sync_es_offset_index(deleted_elmnts, start_storage): +@pytest.mark.parametrize('columns', [[('price', 'int')], {'price': 'int'}]) +def test_delete_offset_success_sync_es_offset_index( + deleted_elmnts, start_storage, columns +): elastic_doc = DocumentArray( storage='elasticsearch', config={ 'n_dim': 3, - 'columns': [('price', 'int')], + 'columns': columns, 'distance': 'l2_norm', 'index_name': 'test_delete_offset_success_sync_es_offset_index', }, @@ -51,12 +54,13 @@ def test_delete_offset_success_sync_es_offset_index(deleted_elmnts, start_storag @pytest.mark.filterwarnings('ignore::UserWarning') -def test_success_handle_bulk_delete_not_found(start_storage): +@pytest.mark.parametrize('columns', [[('price', 'int')], {'price': 'int'}]) +def test_success_handle_bulk_delete_not_found(start_storage, columns): elastic_doc = DocumentArray( storage='elasticsearch', config={ 'n_dim': 3, - 'columns': [('price', 'int')], + 'columns': columns, 'distance': 'l2_norm', 'index_name': 'test_bulk_delete_not_found', }, diff --git a/tests/unit/array/storage/elastic/test_get.py b/tests/unit/array/storage/elastic/test_get.py index b40b2eaaf5c..15ab0134845 100644 --- a/tests/unit/array/storage/elastic/test_get.py +++ b/tests/unit/array/storage/elastic/test_get.py @@ -4,12 +4,13 @@ @pytest.mark.parametrize('nrof_docs', [10, 100, 10_000, 10_100, 20_000, 20_100]) -def test_success_get_bulk_data(start_storage, nrof_docs): +@pytest.mark.parametrize('columns', [[('price', 'int')], {'price': 'int'}]) +def test_success_get_bulk_data(start_storage, nrof_docs, columns): elastic_doc = DocumentArray( storage='elasticsearch', config={ 'n_dim': 3, - 'columns': [('price', 'int')], + 'columns': columns, 'distance': 'l2_norm', 'index_name': 'test_get_bulk_data', }, @@ -26,14 +27,15 @@ def test_success_get_bulk_data(start_storage, nrof_docs): assert len(elastic_doc[:, 'id']) == nrof_docs -def test_error_get_bulk_data_id_not_exist(start_storage): +@pytest.mark.parametrize('columns', [[('price', 'int')], {'price': 'int'}]) +def test_error_get_bulk_data_id_not_exist(start_storage, columns): nrof_docs = 10 elastic_doc = DocumentArray( storage='elasticsearch', config={ 'n_dim': 3, - 'columns': [('price', 'int')], + 'columns': columns, 'distance': 'l2_norm', 'index_name': 'test_error_get_bulk_data_id_not_exist', }, diff --git a/tests/unit/array/storage/redis/test_backend.py b/tests/unit/array/storage/redis/test_backend.py index 04de55febba..776f9990029 100644 --- a/tests/unit/array/storage/redis/test_backend.py +++ b/tests/unit/array/storage/redis/test_backend.py @@ -53,6 +53,9 @@ def da_redis(): [('attr1', 'str'), ('attr2', 'bytes')], [('attr1', 'int'), ('attr2', 'float')], [('attr1', 'double'), ('attr2', 'long'), ('attr3', 'bool')], + {'attr1': 'str', 'attr2': 'bytes'}, + {'attr1': 'int', 'attr2': 'float'}, + {'attr1': 'double', 'attr2': 'long', 'attr3': 'bool'}, ], ) @pytest.mark.parametrize( @@ -104,14 +107,14 @@ def test_init_storage( def test_init_storage_update_schema(start_storage): - cfg = RedisConfig(n_dim=128, columns=[('attr1', 'str')], flush=True) + cfg = RedisConfig(n_dim=128, columns={'attr1': 'str'}, flush=True) redis_da = DocumentArrayDummy(storage='redis', config=cfg) assert redis_da._client.ft().info()['attributes'][1][1] == b'attr1' - cfg = RedisConfig(n_dim=128, columns=[('attr2', 'str')], update_schema=False) + cfg = RedisConfig(n_dim=128, columns={'attr2': 'str'}, update_schema=False) redis_da = DocumentArrayDummy(storage='redis', config=cfg) assert redis_da._client.ft().info()['attributes'][1][1] == b'attr1' - cfg = RedisConfig(n_dim=128, columns=[('attr2', 'str')], update_schema=True) + cfg = RedisConfig(n_dim=128, columns={'attr2': 'str'}, update_schema=True) redis_da = DocumentArrayDummy(storage='redis', config=cfg) assert redis_da._client.ft().info()['attributes'][1][1] == b'attr2' diff --git a/tests/unit/array/storage/redis/test_getsetdel.py b/tests/unit/array/storage/redis/test_getsetdel.py index cd2b3f3d43c..d6b31ae5b76 100644 --- a/tests/unit/array/storage/redis/test_getsetdel.py +++ b/tests/unit/array/storage/redis/test_getsetdel.py @@ -26,14 +26,15 @@ def _save_offset2ids(self): @pytest.fixture(scope='function') def columns(): - columns = [ - ('col_str', 'str'), - ('col_bytes', 'bytes'), - ('col_int', 'int'), - ('col_float', 'float'), - ('col_long', 'long'), - ('col_double', 'double'), - ] + columns = { + 'col_str': 'str', + 'col_bytes': 'bytes', + 'col_int': 'int', + 'col_float': 'float', + 'col_long': 'long', + 'col_double': 'double', + } + return columns @@ -92,7 +93,7 @@ def test_document_to_embedding( else: assert payload['text'] == text - for col, _ in columns: + for col, _ in columns.item(): if col in tags: assert payload[col] == tags[col] else: @@ -100,7 +101,7 @@ def test_document_to_embedding( payload[col] for key in tags: - if key not in (col[0] for col in columns): + if key not in (col for col in columns.keys()): assert key not in payload diff --git a/tests/unit/array/test_backend_configuration.py b/tests/unit/array/test_backend_configuration.py index 86ddc969a52..6c8bbe984a3 100644 --- a/tests/unit/array/test_backend_configuration.py +++ b/tests/unit/array/test_backend_configuration.py @@ -1,8 +1,5 @@ -from typing import Tuple, Iterator - import pytest import requests -import itertools from docarray import DocumentArray, Document @@ -51,7 +48,8 @@ def test_weaviate_hnsw(start_storage): assert main_class.get('vectorIndexConfig', {}).get('distance') == 'l2-squared' -def test_weaviate_da_w_protobuff(start_storage): +@pytest.mark.parametrize('columns', [[('price', 'int')], {'price': 'int'}]) +def test_weaviate_da_w_protobuff(start_storage, columns): N = 10 @@ -59,7 +57,7 @@ def test_weaviate_da_w_protobuff(start_storage): storage='weaviate', config={ 'name': 'Test', - 'columns': [('price', 'int')], + 'columns': columns, }, ) @@ -86,7 +84,7 @@ def test_cast_columns_weaviate(start_storage, type_da, type_column, request): storage='weaviate', config={ 'name': f'Test{test_id}', - 'columns': [('price', type_column)], + 'columns': {'price': type_column}, }, ) @@ -107,7 +105,7 @@ def test_cast_columns_annlite(start_storage, type_da, type_column): storage='annlite', config={ 'n_dim': 3, - 'columns': [('price', type_column)], + 'columns': {'price': type_column}, }, ) @@ -132,7 +130,7 @@ def test_cast_columns_qdrant(start_storage, type_da, type_column, request): config={ 'collection_name': f'test{test_id}', 'n_dim': 3, - 'columns': [('price', type_column)], + 'columns': {'price': type_column}, }, ) From f789ca7eb2d5ac2ac586a94ecf83153e45f4287a Mon Sep 17 00:00:00 2001 From: Joan Fontanals Martinez Date: Tue, 6 Sep 2022 10:11:18 +0200 Subject: [PATCH 03/10] docs: change docs to use proper config --- docs/advanced/document-store/annlite.md | 4 ++-- docs/advanced/document-store/elasticsearch.md | 6 +++--- docs/advanced/document-store/index.md | 6 +++--- docs/advanced/document-store/qdrant.md | 2 +- docs/advanced/document-store/redis.md | 4 ++-- docs/advanced/document-store/weaviate.md | 6 +++--- docs/fundamentals/documentarray/subindex.md | 6 +++--- 7 files changed, 17 insertions(+), 17 deletions(-) diff --git a/docs/advanced/document-store/annlite.md b/docs/advanced/document-store/annlite.md index a63f1fafd8b..09add22d81f 100644 --- a/docs/advanced/document-store/annlite.md +++ b/docs/advanced/document-store/annlite.md @@ -72,7 +72,7 @@ da = DocumentArray( storage='annlite', config={ 'n_dim': n_dim, - 'columns': [('price', 'float')], + 'columns': {'price': 'float'}, }, ) @@ -125,7 +125,7 @@ metric = 'Euclidean' da = DocumentArray( storage='annlite', - config={'n_dim': n_dim, 'columns': [('price', 'float')], 'metric': metric}, + config={'n_dim': n_dim, 'columns': {'price': 'float'}, 'metric': metric}, ) with da: diff --git a/docs/advanced/document-store/elasticsearch.md b/docs/advanced/document-store/elasticsearch.md index db76e23fd34..67f069cd122 100644 --- a/docs/advanced/document-store/elasticsearch.md +++ b/docs/advanced/document-store/elasticsearch.md @@ -132,7 +132,7 @@ n_dim = 3 da = DocumentArray( storage='elasticsearch', - config={'n_dim': 3, 'columns': [('price', 'int')], 'distance': 'l2_norm'}, + config={'n_dim': 3, 'columns': {'price': 'int'}, 'distance': 'l2_norm'}, ) with da: @@ -172,7 +172,7 @@ n_dim = 3 da = DocumentArray( storage='elasticsearch', - config={'n_dim': n_dim, 'columns': [('price', 'int')], 'distance': 'l2_norm'}, + config={'n_dim': n_dim, 'columns': {'price': 'int'}, 'distance': 'l2_norm'}, ) with da: @@ -248,7 +248,7 @@ da = DocumentArray( storage='elasticsearch', config={ 'n_dim': n_dim, - 'columns': [('price', 'float')], + 'columns': {'price': 'float'}, }, ) diff --git a/docs/advanced/document-store/index.md b/docs/advanced/document-store/index.md index 19ec7d84f6e..5d5ae4cc148 100644 --- a/docs/advanced/document-store/index.md +++ b/docs/advanced/document-store/index.md @@ -244,7 +244,7 @@ metric = 'Euclidean' da = DocumentArray( storage='annlite', - config={'n_dim': n_dim, 'columns': [('price', 'float')], 'metric': metric}, + config={'n_dim': n_dim, 'columns': {'price': 'float'}, 'metric': metric}, ) with da: @@ -276,7 +276,7 @@ metric = 'Euclidean' da = DocumentArray( storage='annlite', - config={'n_dim': n_dim, 'columns': [('price', 'float')], 'metric': metric}, + config={'n_dim': n_dim, 'columns': {'price': 'float'}, 'metric': metric}, ) with da: @@ -317,7 +317,7 @@ metric = 'Euclidean' da = DocumentArray( storage='annlite', - config={'n_dim': n_dim, 'columns': [('price', 'float')], 'metric': metric}, + config={'n_dim': n_dim, 'columns': {'price': 'float'}, 'metric': metric}, ) with da: diff --git a/docs/advanced/document-store/qdrant.md b/docs/advanced/document-store/qdrant.md index c6fb2c86040..d5a8b1b35c5 100644 --- a/docs/advanced/document-store/qdrant.md +++ b/docs/advanced/document-store/qdrant.md @@ -155,7 +155,7 @@ distance = 'euclidean' da = DocumentArray( storage='qdrant', - config={'n_dim': n_dim, 'columns': [('price', 'float')], 'distance': distance}, + config={'n_dim': n_dim, 'columns': {'price': 'float'}, 'distance': distance}, ) print(f'\nDocumentArray distance: {distance}') diff --git a/docs/advanced/document-store/redis.md b/docs/advanced/document-store/redis.md index 7aad6e82956..763d6378606 100644 --- a/docs/advanced/document-store/redis.md +++ b/docs/advanced/document-store/redis.md @@ -111,7 +111,7 @@ da2.summary() │ ef_runtime 10 │ │ block_size 1048576 │ │ initial_cap None │ -│ columns [] │ +│ columns {} │ │ │ ╰─────────────────────────────────╯ ``` @@ -146,7 +146,7 @@ da = DocumentArray( storage='redis', config={ 'n_dim': n_dim, - 'columns': [('price', 'int'), ('color', 'str')], + 'columns': {'price': 'int', 'color': 'str'}, 'flush': True, 'distance': 'L2', }, diff --git a/docs/advanced/document-store/weaviate.md b/docs/advanced/document-store/weaviate.md index 9fadcec01d9..f94b341d685 100644 --- a/docs/advanced/document-store/weaviate.md +++ b/docs/advanced/document-store/weaviate.md @@ -191,7 +191,7 @@ da = DocumentArray( storage='weaviate', config={ 'n_dim': n_dim, - 'columns': [('price', 'float')], + 'columns': {'price': 'float'}, }, ) @@ -243,7 +243,7 @@ n_dim = 3 da = DocumentArray( storage='weaviate', - config={'n_dim': n_dim, 'columns': [('price', 'int')], 'distance': 'l2-squared'}, + config={'n_dim': n_dim, 'columns': {'price': 'int'}, 'distance': 'l2-squared'}, ) with da: @@ -317,7 +317,7 @@ da = DocumentArray( storage='weaviate', config={ 'n_dim': n_dim, - 'columns': [('price', 'float')], + 'columns': {'price': 'float'}, 'distance': 'l2-squared', "name": "Persisted", "host": "localhost", diff --git a/docs/fundamentals/documentarray/subindex.md b/docs/fundamentals/documentarray/subindex.md index f6e24395856..405f7b0f19b 100644 --- a/docs/fundamentals/documentarray/subindex.md +++ b/docs/fundamentals/documentarray/subindex.md @@ -71,7 +71,7 @@ da = DocumentArray( │ ef_construction None │ │ ef_search None │ │ max_connection None │ -│ columns [] │ +│ columns {} │ │ │ ╰─────────────────────────────────────────╯ ``` @@ -129,7 +129,7 @@ da = DocumentArray( │ ef_construction None │ │ ef_search None │ │ max_connection None │ -│ columns [] │ +│ columns {} │ │ │ ╰─────────────────────────────────────────╯ ``` @@ -231,4 +231,4 @@ top_level_matches = da[top_image_matches[:, 'parent_id']] top_image_matches = da.find(query=np.random.rand(512), on='@c') top_level_matches = da[top_image_matches[:, 'parent_id']] ``` -```` \ No newline at end of file +```` From ae0f06ed0823893b1ebb76554ac92bc3690cf22f Mon Sep 17 00:00:00 2001 From: Joan Fontanals Date: Tue, 6 Sep 2022 10:21:02 +0200 Subject: [PATCH 04/10] fix: apply suggestions from code review --- docarray/array/storage/weaviate/backend.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docarray/array/storage/weaviate/backend.py b/docarray/array/storage/weaviate/backend.py index a83cbf11235..9d747908562 100644 --- a/docarray/array/storage/weaviate/backend.py +++ b/docarray/array/storage/weaviate/backend.py @@ -353,8 +353,8 @@ def _doc2weaviate_create_payload(self, value: 'Document'): :return: the payload dictionary """ extra_columns = { - k: self._map_column(value.tags.get(k), v) - for k, v in self._config.columns.items() + col: self._map_column(value.tags.get(col), col_type) + for col, col_type in self._config.columns.items() } return dict( From 44912e7501a1abe7161ffd5286bfd1b4247fe7e9 Mon Sep 17 00:00:00 2001 From: Joan Fontanals Date: Tue, 6 Sep 2022 10:21:22 +0200 Subject: [PATCH 05/10] fix: update docarray/array/storage/base/backend.py Co-authored-by: Johannes Messner <44071807+JohannesMessner@users.noreply.github.com> --- docarray/array/storage/base/backend.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docarray/array/storage/base/backend.py b/docarray/array/storage/base/backend.py index be196d5e229..0fcb2416df0 100644 --- a/docarray/array/storage/base/backend.py +++ b/docarray/array/storage/base/backend.py @@ -85,7 +85,7 @@ def _normalize_columns( return {} if isinstance(columns, list): warnings.warn( - 'Using "columns" as a List of Tuples will be deprecated soon. Please provide a Dictionary' + 'Using "columns" as a List of Tuples will be deprecated soon. Please provide a Dictionary.' ) columns = {col_desc[0]: col_desc[1] for col_desc in columns} return columns From baca7b4025fc8fa54f9db2931084202ee3e5f2e9 Mon Sep 17 00:00:00 2001 From: Joan Fontanals Martinez Date: Tue, 6 Sep 2022 11:43:29 +0200 Subject: [PATCH 06/10] ci: depend on new annlite patch --- setup.py | 4 ++-- tests/unit/array/mixins/test_match.py | 3 +-- tests/unit/array/storage/redis/__init__.py | 0 tests/unit/array/storage/redis/test_backend.py | 10 ---------- tests/unit/array/storage/redis/test_getsetdel.py | 2 +- 5 files changed, 4 insertions(+), 15 deletions(-) create mode 100644 tests/unit/array/storage/redis/__init__.py diff --git a/setup.py b/setup.py index 6b1ef0cc4b3..a9f72500dfc 100644 --- a/setup.py +++ b/setup.py @@ -70,7 +70,7 @@ 'qdrant-client~=0.7.3', ], 'annlite': [ - 'annlite>=0.3.2', + 'annlite>=0.3.10', ], 'weaviate': [ 'weaviate-client~=3.3.0', @@ -105,7 +105,7 @@ 'jupyterlab', 'transformers>=4.16.2', 'weaviate-client~=3.3.0', - 'annlite>=0.3.2', + 'annlite>=0.3.10', 'elasticsearch>=8.2.0', 'redis>=4.3.0', 'jina', diff --git a/tests/unit/array/mixins/test_match.py b/tests/unit/array/mixins/test_match.py index c052a46b570..70cd79902bb 100644 --- a/tests/unit/array/mixins/test_match.py +++ b/tests/unit/array/mixins/test_match.py @@ -697,9 +697,8 @@ def test_match_ensure_scores_unique(): for operator in numeric_operators_redis.keys() ], ], - 'columns', - [[('price', 'int')], {'price': 'int'}], ) +@pytest.mark.parametrize('columns', [[('price', 'int')], {'price': 'int'}]) def test_match_pre_filtering( storage, filter_gen, operator, numeric_operators, start_storage, columns ): diff --git a/tests/unit/array/storage/redis/__init__.py b/tests/unit/array/storage/redis/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/tests/unit/array/storage/redis/test_backend.py b/tests/unit/array/storage/redis/test_backend.py index 776f9990029..ca57f99915b 100644 --- a/tests/unit/array/storage/redis/test_backend.py +++ b/tests/unit/array/storage/redis/test_backend.py @@ -95,18 +95,8 @@ def test_init_storage( assert redis_da._client.ft().info()['attributes'][0][1] == b'embedding' assert redis_da._client.ft().info()['attributes'][0][5] == b'VECTOR' - for i in range(len(columns)): - assert redis_da._client.ft().info()['attributes'][i + 1][1] == bytes( - redis_da._config.columns[i][0], 'utf-8' - ) - assert ( - redis_da._client.ft().info()['attributes'][i + 1][5] - == type_convert[redis_da._config.columns[i][1]] - ) - def test_init_storage_update_schema(start_storage): - cfg = RedisConfig(n_dim=128, columns={'attr1': 'str'}, flush=True) redis_da = DocumentArrayDummy(storage='redis', config=cfg) assert redis_da._client.ft().info()['attributes'][1][1] == b'attr1' diff --git a/tests/unit/array/storage/redis/test_getsetdel.py b/tests/unit/array/storage/redis/test_getsetdel.py index d6b31ae5b76..dfe7d9b0cdc 100644 --- a/tests/unit/array/storage/redis/test_getsetdel.py +++ b/tests/unit/array/storage/redis/test_getsetdel.py @@ -93,7 +93,7 @@ def test_document_to_embedding( else: assert payload['text'] == text - for col, _ in columns.item(): + for col, _ in columns.items(): if col in tags: assert payload[col] == tags[col] else: From cd53950ffd30fff50b308a607ae5b711f2c52ae2 Mon Sep 17 00:00:00 2001 From: Joan Fontanals Martinez Date: Thu, 8 Sep 2022 10:37:42 +0200 Subject: [PATCH 07/10] fix: fix annlite type map --- docarray/array/storage/annlite/backend.py | 4 ++-- docarray/array/storage/base/backend.py | 3 +++ docarray/array/storage/elastic/backend.py | 1 + docarray/array/storage/weaviate/backend.py | 1 + 4 files changed, 7 insertions(+), 2 deletions(-) diff --git a/docarray/array/storage/annlite/backend.py b/docarray/array/storage/annlite/backend.py index 5bd9d5cc918..89bb9e94bde 100644 --- a/docarray/array/storage/annlite/backend.py +++ b/docarray/array/storage/annlite/backend.py @@ -34,9 +34,9 @@ class BackendMixin(BaseBackendMixin): """Provide necessary functions to enable this storage backend.""" TYPE_MAP = { - 'str': TypeMap(type='TEXT', converter=str), + 'str': TypeMap(type='str', converter=str), 'float': TypeMap(type='float', converter=float), - 'int': TypeMap(type='integer', converter=_safe_cast_int), + 'int': TypeMap(type='int', converter=_safe_cast_int), } def _map_embedding(self, embedding: 'ArrayType') -> 'ArrayType': diff --git a/docarray/array/storage/base/backend.py b/docarray/array/storage/base/backend.py index 0fcb2416df0..4674c3d9581 100644 --- a/docarray/array/storage/base/backend.py +++ b/docarray/array/storage/base/backend.py @@ -87,5 +87,8 @@ def _normalize_columns( warnings.warn( 'Using "columns" as a List of Tuples will be deprecated soon. Please provide a Dictionary.' ) + warnings.warn(f'Columns before change: {columns}') columns = {col_desc[0]: col_desc[1] for col_desc in columns} + warnings.warn(f'Columns after change: {columns}') + warnings.warn(f'Normalized columns: {columns}') return columns diff --git a/docarray/array/storage/elastic/backend.py b/docarray/array/storage/elastic/backend.py index 27279eff94a..b48cb1557de 100644 --- a/docarray/array/storage/elastic/backend.py +++ b/docarray/array/storage/elastic/backend.py @@ -93,6 +93,7 @@ def _init_storage( self._config = config self._config.columns = self._normalize_columns(self._config.columns) + print(f' ELASTIC self.config.columns {self._config.columns}') self.n_dim = self._config.n_dim self._client = self._build_client() diff --git a/docarray/array/storage/weaviate/backend.py b/docarray/array/storage/weaviate/backend.py index 9d747908562..af1054a0c9d 100644 --- a/docarray/array/storage/weaviate/backend.py +++ b/docarray/array/storage/weaviate/backend.py @@ -120,6 +120,7 @@ def _init_storage( self._config = config self._config.columns = self._normalize_columns(self._config.columns) + print(f'WEAVIATE self.config.columns {self._config.columns}') self._schemas = self._load_or_create_weaviate_schema() From 8d08e13e9643c80dd609f253443e6853acfb0d9e Mon Sep 17 00:00:00 2001 From: Joan Fontanals Martinez Date: Thu, 8 Sep 2022 12:35:51 +0200 Subject: [PATCH 08/10] test: use random index names --- docarray/array/storage/base/backend.py | 3 --- docarray/array/storage/elastic/backend.py | 1 - docarray/array/storage/weaviate/backend.py | 1 - tests/unit/array/storage/elastic/test_del.py | 13 ++++++++++--- tests/unit/array/storage/elastic/test_get.py | 14 +++++++++++--- tests/unit/array/test_backend_configuration.py | 7 ++++++- 6 files changed, 27 insertions(+), 12 deletions(-) diff --git a/docarray/array/storage/base/backend.py b/docarray/array/storage/base/backend.py index 4674c3d9581..0fcb2416df0 100644 --- a/docarray/array/storage/base/backend.py +++ b/docarray/array/storage/base/backend.py @@ -87,8 +87,5 @@ def _normalize_columns( warnings.warn( 'Using "columns" as a List of Tuples will be deprecated soon. Please provide a Dictionary.' ) - warnings.warn(f'Columns before change: {columns}') columns = {col_desc[0]: col_desc[1] for col_desc in columns} - warnings.warn(f'Columns after change: {columns}') - warnings.warn(f'Normalized columns: {columns}') return columns diff --git a/docarray/array/storage/elastic/backend.py b/docarray/array/storage/elastic/backend.py index b48cb1557de..27279eff94a 100644 --- a/docarray/array/storage/elastic/backend.py +++ b/docarray/array/storage/elastic/backend.py @@ -93,7 +93,6 @@ def _init_storage( self._config = config self._config.columns = self._normalize_columns(self._config.columns) - print(f' ELASTIC self.config.columns {self._config.columns}') self.n_dim = self._config.n_dim self._client = self._build_client() diff --git a/docarray/array/storage/weaviate/backend.py b/docarray/array/storage/weaviate/backend.py index af1054a0c9d..9d747908562 100644 --- a/docarray/array/storage/weaviate/backend.py +++ b/docarray/array/storage/weaviate/backend.py @@ -120,7 +120,6 @@ def _init_storage( self._config = config self._config.columns = self._normalize_columns(self._config.columns) - print(f'WEAVIATE self.config.columns {self._config.columns}') self._schemas = self._load_or_create_weaviate_schema() diff --git a/tests/unit/array/storage/elastic/test_del.py b/tests/unit/array/storage/elastic/test_del.py index 8bad716e7b7..23595ac48fa 100644 --- a/tests/unit/array/storage/elastic/test_del.py +++ b/tests/unit/array/storage/elastic/test_del.py @@ -1,5 +1,8 @@ -from docarray import Document, DocumentArray import pytest +import string +import random + +from docarray import Document, DocumentArray @pytest.mark.filterwarnings('ignore::UserWarning') @@ -8,13 +11,15 @@ def test_delete_offset_success_sync_es_offset_index( deleted_elmnts, start_storage, columns ): + letters = string.ascii_lowercase + random_name = ''.join(random.choice(letters) for _ in range(5)) elastic_doc = DocumentArray( storage='elasticsearch', config={ 'n_dim': 3, 'columns': columns, 'distance': 'l2_norm', - 'index_name': 'test_delete_offset_success_sync_es_offset_index', + 'index_name': random_name, }, ) @@ -56,13 +61,15 @@ def test_delete_offset_success_sync_es_offset_index( @pytest.mark.filterwarnings('ignore::UserWarning') @pytest.mark.parametrize('columns', [[('price', 'int')], {'price': 'int'}]) def test_success_handle_bulk_delete_not_found(start_storage, columns): + letters = string.ascii_lowercase + random_name = ''.join(random.choice(letters) for _ in range(5)) elastic_doc = DocumentArray( storage='elasticsearch', config={ 'n_dim': 3, 'columns': columns, 'distance': 'l2_norm', - 'index_name': 'test_bulk_delete_not_found', + 'index_name': random_name, }, ) with elastic_doc: diff --git a/tests/unit/array/storage/elastic/test_get.py b/tests/unit/array/storage/elastic/test_get.py index 15ab0134845..923440a0c8c 100644 --- a/tests/unit/array/storage/elastic/test_get.py +++ b/tests/unit/array/storage/elastic/test_get.py @@ -1,18 +1,23 @@ -from docarray import Document, DocumentArray import numpy as np import pytest +import string +import random + +from docarray import Document, DocumentArray @pytest.mark.parametrize('nrof_docs', [10, 100, 10_000, 10_100, 20_000, 20_100]) @pytest.mark.parametrize('columns', [[('price', 'int')], {'price': 'int'}]) def test_success_get_bulk_data(start_storage, nrof_docs, columns): + letters = string.ascii_lowercase + random_name = ''.join(random.choice(letters) for _ in range(5)) elastic_doc = DocumentArray( storage='elasticsearch', config={ 'n_dim': 3, 'columns': columns, 'distance': 'l2_norm', - 'index_name': 'test_get_bulk_data', + 'index_name': random_name, }, ) @@ -31,13 +36,16 @@ def test_success_get_bulk_data(start_storage, nrof_docs, columns): def test_error_get_bulk_data_id_not_exist(start_storage, columns): nrof_docs = 10 + letters = string.ascii_lowercase + random_name = ''.join(random.choice(letters) for _ in range(5)) + elastic_doc = DocumentArray( storage='elasticsearch', config={ 'n_dim': 3, 'columns': columns, 'distance': 'l2_norm', - 'index_name': 'test_error_get_bulk_data_id_not_exist', + 'index_name': random_name, }, ) diff --git a/tests/unit/array/test_backend_configuration.py b/tests/unit/array/test_backend_configuration.py index 6c8bbe984a3..706718fe787 100644 --- a/tests/unit/array/test_backend_configuration.py +++ b/tests/unit/array/test_backend_configuration.py @@ -1,5 +1,7 @@ import pytest import requests +import string +import random from docarray import DocumentArray, Document @@ -53,10 +55,13 @@ def test_weaviate_da_w_protobuff(start_storage, columns): N = 10 + letters = string.ascii_lowercase + random_name = ''.join(random.choice(letters) for _ in range(5)) + index = DocumentArray( storage='weaviate', config={ - 'name': 'Test', + 'name': random_name, 'columns': columns, }, ) From ae827ebb227fd45e2202b085a08c645fe1db854b Mon Sep 17 00:00:00 2001 From: Joan Fontanals Martinez Date: Thu, 8 Sep 2022 15:44:56 +0200 Subject: [PATCH 09/10] test: fix tests --- tests/unit/array/mixins/test_find.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/tests/unit/array/mixins/test_find.py b/tests/unit/array/mixins/test_find.py index 9d14b90e7b5..6ce3d4d1fbc 100644 --- a/tests/unit/array/mixins/test_find.py +++ b/tests/unit/array/mixins/test_find.py @@ -465,7 +465,7 @@ def test_search_pre_filtering( ], ], ) -@pytest.mark.parametrize('columns', [[('price', 'int')], {'price': 'int'}]) +@pytest.mark.parametrize('columns', [[('price', 'float')], {'price': 'float'}]) def test_filtering( storage, filter_gen, operator, numeric_operators, start_storage, columns ): @@ -585,9 +585,7 @@ def test_redis_category_filter(start_storage, columns): def test_unsupported_pre_filtering(storage, start_storage, columns): n_dim = 128 - da = DocumentArray( - storage=storage, config={'n_dim': n_dim, 'columns': [('price', 'int')]} - ) + da = DocumentArray(storage=storage, config={'n_dim': n_dim, 'columns': columns}) da.extend( [ From 3a57cc1161590f490d72bbefbd9eb1fd69d1b558 Mon Sep 17 00:00:00 2001 From: Joan Fontanals Martinez Date: Thu, 8 Sep 2022 17:01:52 +0200 Subject: [PATCH 10/10] test: random name does not need to be explicit --- tests/unit/array/storage/elastic/test_del.py | 8 -------- tests/unit/array/storage/elastic/test_get.py | 9 --------- tests/unit/array/test_backend_configuration.py | 6 ------ 3 files changed, 23 deletions(-) diff --git a/tests/unit/array/storage/elastic/test_del.py b/tests/unit/array/storage/elastic/test_del.py index 23595ac48fa..8e646239017 100644 --- a/tests/unit/array/storage/elastic/test_del.py +++ b/tests/unit/array/storage/elastic/test_del.py @@ -1,6 +1,4 @@ import pytest -import string -import random from docarray import Document, DocumentArray @@ -11,15 +9,12 @@ def test_delete_offset_success_sync_es_offset_index( deleted_elmnts, start_storage, columns ): - letters = string.ascii_lowercase - random_name = ''.join(random.choice(letters) for _ in range(5)) elastic_doc = DocumentArray( storage='elasticsearch', config={ 'n_dim': 3, 'columns': columns, 'distance': 'l2_norm', - 'index_name': random_name, }, ) @@ -61,15 +56,12 @@ def test_delete_offset_success_sync_es_offset_index( @pytest.mark.filterwarnings('ignore::UserWarning') @pytest.mark.parametrize('columns', [[('price', 'int')], {'price': 'int'}]) def test_success_handle_bulk_delete_not_found(start_storage, columns): - letters = string.ascii_lowercase - random_name = ''.join(random.choice(letters) for _ in range(5)) elastic_doc = DocumentArray( storage='elasticsearch', config={ 'n_dim': 3, 'columns': columns, 'distance': 'l2_norm', - 'index_name': random_name, }, ) with elastic_doc: diff --git a/tests/unit/array/storage/elastic/test_get.py b/tests/unit/array/storage/elastic/test_get.py index 923440a0c8c..b27e129a071 100644 --- a/tests/unit/array/storage/elastic/test_get.py +++ b/tests/unit/array/storage/elastic/test_get.py @@ -1,7 +1,5 @@ import numpy as np import pytest -import string -import random from docarray import Document, DocumentArray @@ -9,15 +7,12 @@ @pytest.mark.parametrize('nrof_docs', [10, 100, 10_000, 10_100, 20_000, 20_100]) @pytest.mark.parametrize('columns', [[('price', 'int')], {'price': 'int'}]) def test_success_get_bulk_data(start_storage, nrof_docs, columns): - letters = string.ascii_lowercase - random_name = ''.join(random.choice(letters) for _ in range(5)) elastic_doc = DocumentArray( storage='elasticsearch', config={ 'n_dim': 3, 'columns': columns, 'distance': 'l2_norm', - 'index_name': random_name, }, ) @@ -36,16 +31,12 @@ def test_success_get_bulk_data(start_storage, nrof_docs, columns): def test_error_get_bulk_data_id_not_exist(start_storage, columns): nrof_docs = 10 - letters = string.ascii_lowercase - random_name = ''.join(random.choice(letters) for _ in range(5)) - elastic_doc = DocumentArray( storage='elasticsearch', config={ 'n_dim': 3, 'columns': columns, 'distance': 'l2_norm', - 'index_name': random_name, }, ) diff --git a/tests/unit/array/test_backend_configuration.py b/tests/unit/array/test_backend_configuration.py index e00ea4f6c52..cec6326c4eb 100644 --- a/tests/unit/array/test_backend_configuration.py +++ b/tests/unit/array/test_backend_configuration.py @@ -1,7 +1,5 @@ import pytest import requests -import string -import random from docarray import DocumentArray, Document @@ -55,13 +53,9 @@ def test_weaviate_da_w_protobuff(start_storage, columns): N = 10 - letters = string.ascii_lowercase - random_name = ''.join(random.choice(letters) for _ in range(5)) - index = DocumentArray( storage='weaviate', config={ - 'name': random_name.capitalize(), 'columns': columns, }, )