diff --git a/docarray/__init__.py b/docarray/__init__.py index c76602a3de9..75b50e31968 100644 --- a/docarray/__init__.py +++ b/docarray/__init__.py @@ -5,6 +5,7 @@ from .document import Document from .array import DocumentArray + if 'DA_NO_RICH_HANDLER' not in os.environ: from rich.traceback import install diff --git a/docarray/array/base.py b/docarray/array/base.py index 4d0a49a5a03..eeb8f2b9232 100644 --- a/docarray/array/base.py +++ b/docarray/array/base.py @@ -1,4 +1,4 @@ -from typing import MutableSequence, TYPE_CHECKING, Union, Iterable, Type +from typing import MutableSequence, TYPE_CHECKING, Union, Iterable from .. import Document diff --git a/docarray/array/document.py b/docarray/array/document.py index ddd70ff7cd5..83533c1451f 100644 --- a/docarray/array/document.py +++ b/docarray/array/document.py @@ -58,7 +58,7 @@ def __new__( def __new__( cls, _docs: Optional['DocumentArraySourceType'] = None, - storage: str = 'elastic', + storage: str = 'elasticsearch', config: Optional[Union['ElasticConfig', Dict]] = None, ) -> 'DocumentArrayElastic': """Create a Elastic-powered DocumentArray object.""" @@ -86,7 +86,7 @@ def __new__(cls, *args, storage: str = 'memory', **kwargs): from .qdrant import DocumentArrayQdrant instance = super().__new__(DocumentArrayQdrant) - elif storage == 'elastic': + elif storage == 'elasticsearch': from .elastic import DocumentArrayElastic instance = super().__new__(DocumentArrayElastic) diff --git a/docarray/array/elastic.py b/docarray/array/elastic.py index 0c28b373105..d32ff45109a 100644 --- a/docarray/array/elastic.py +++ b/docarray/array/elastic.py @@ -5,7 +5,7 @@ class DocumentArrayElastic(StorageMixins, DocumentArray): - """This is a :class:`DocumentArray` that uses Elastic Search as + """This is a :class:`DocumentArray` that uses Elasticsearch as vector search engine and storage. """ diff --git a/docarray/array/mixins/io/binary.py b/docarray/array/mixins/io/binary.py index 9d7696b99b3..f84144b911f 100644 --- a/docarray/array/mixins/io/binary.py +++ b/docarray/array/mixins/io/binary.py @@ -1,13 +1,12 @@ import base64 import io -import os.path import os +import os.path import pickle from contextlib import nullcontext from pathlib import Path from typing import Union, BinaryIO, TYPE_CHECKING, Type, Optional, Generator - from ....helper import ( get_compress_ctx, decompress_bytes, diff --git a/docarray/array/mixins/io/json.py b/docarray/array/mixins/io/json.py index 8d7146a574a..4365f000190 100644 --- a/docarray/array/mixins/io/json.py +++ b/docarray/array/mixins/io/json.py @@ -1,5 +1,4 @@ import json -import os.path from contextlib import nullcontext from typing import Union, TextIO, TYPE_CHECKING, Type, List diff --git a/docarray/array/mixins/plot.py b/docarray/array/mixins/plot.py index e19e0d1c16c..384a603d955 100644 --- a/docarray/array/mixins/plot.py +++ b/docarray/array/mixins/plot.py @@ -29,6 +29,8 @@ def summary(self): from rich.table import Table from rich.console import Console + from rich.panel import Panel + from rich import box tables = [] @@ -37,7 +39,7 @@ def summary(self): all_attrs = self._get_attributes('non_empty_fields') attr_counter = Counter(all_attrs) - table = Table(box=box.SIMPLE, title='Documents Summary') + table = Table(box=box.SIMPLE) table.show_header = False table.add_row('Length', str(len(self))) is_homo = len(attr_counter) == 1 @@ -70,12 +72,12 @@ def summary(self): _text = f'{_doc_text} attributes' table.add_row(_text, str(_a)) - tables.append(table) + tables.append(Panel(table, title='Documents Summary', expand=False)) all_attrs_names = tuple(sorted(all_attrs_names)) if all_attrs_names: - attr_table = Table(box=box.SIMPLE, title='Attributes Summary') + attr_table = Table(box=box.SIMPLE) attr_table.add_column('Attribute') attr_table.add_column('Data type') attr_table.add_column('#Unique values') @@ -96,16 +98,16 @@ def summary(self): str(len(_a)), str(any(_aa is None for _aa in _a)), ) - tables.append(attr_table) + tables.append(Panel(attr_table, title='Attributes Summary', expand=False)) storage_infos = self._get_storage_infos() if storage_infos: - storage_table = Table(box=box.SIMPLE, title='Storage Summary') + storage_table = Table(box=box.SIMPLE) storage_table.show_header = False for k, v in storage_infos.items(): storage_table.add_row(k, v) - tables.append(storage_table) + tables.append(Panel(storage_table, title='Storage Summary', expand=False)) console.print(*tables) diff --git a/docarray/array/mixins/traverse.py b/docarray/array/mixins/traverse.py index 0b62f4a03b3..e00a6d65b5e 100644 --- a/docarray/array/mixins/traverse.py +++ b/docarray/array/mixins/traverse.py @@ -5,7 +5,6 @@ TYPE_CHECKING, Optional, Callable, - Tuple, Dict, List, ) @@ -14,7 +13,6 @@ from ... import DocumentArray, Document from ...types import T - ATTRIBUTES_SEPARATOR = ',' PATHS_SEPARATOR = ',' @@ -41,7 +39,6 @@ TRAVERSAL_PATH = rf'{SELECTOR}{SLICE}{REMAINDER}' TRAVERSAL_PATH_TAGGED = rf'(?P{SELECTOR_TAGGED}{SLICE_TAGGED}){REMAINDER_TAGGED}' - PATHS_REMAINDER_TAGGED = rf'(?P({PATHS_SEPARATOR}{TRAVERSAL_PATH})*)' TRAVERSAL_PATH_LIST_TAGGED = ( diff --git a/docarray/array/qdrant.py b/docarray/array/qdrant.py index 8c71dc83364..77e8b41b6b0 100644 --- a/docarray/array/qdrant.py +++ b/docarray/array/qdrant.py @@ -1,12 +1,8 @@ -from typing import Iterable - from .document import DocumentArray from .storage.qdrant import StorageMixins, QdrantConfig __all__ = ['DocumentArrayQdrant', 'QdrantConfig'] -from .. import Document - class DocumentArrayQdrant(StorageMixins, DocumentArray): """This is a :class:`DocumentArray` that uses Qdrant as diff --git a/docarray/array/queryset/lookup.py b/docarray/array/queryset/lookup.py index c896556f565..8d5872f17e8 100644 --- a/docarray/array/queryset/lookup.py +++ b/docarray/array/queryset/lookup.py @@ -26,7 +26,6 @@ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. """ -import re from typing import TYPE_CHECKING if TYPE_CHECKING: diff --git a/docarray/array/storage/annlite/backend.py b/docarray/array/storage/annlite/backend.py index b41a4fde8e5..8edf406a640 100644 --- a/docarray/array/storage/annlite/backend.py +++ b/docarray/array/storage/annlite/backend.py @@ -1,4 +1,3 @@ -import numpy as np from dataclasses import dataclass, asdict, field from typing import ( Union, @@ -8,6 +7,8 @@ Iterable, ) +import numpy as np + from ..base.backend import BaseBackendMixin from ....helper import dataclass_from_dict @@ -64,7 +65,6 @@ def _init_storage( from annlite import AnnLite self._annlite = AnnLite(self.n_dim, lock=False, **config) - from ... import DocumentArray from .... import Document super()._init_storage() diff --git a/docarray/array/storage/annlite/seqlike.py b/docarray/array/storage/annlite/seqlike.py index 1a2a017609b..da0bc5df565 100644 --- a/docarray/array/storage/annlite/seqlike.py +++ b/docarray/array/storage/annlite/seqlike.py @@ -1,9 +1,8 @@ -from typing import Union, Iterable, Sequence +from typing import Union, Iterable from ..base.seqlike import BaseSequenceLikeMixin -from .... import Document - from ...memory import DocumentArrayInMemory +from .... import Document class SequenceLikeMixin(BaseSequenceLikeMixin): diff --git a/docarray/array/storage/base/seqlike.py b/docarray/array/storage/base/seqlike.py index d35bb88b659..067900f4bd9 100644 --- a/docarray/array/storage/base/seqlike.py +++ b/docarray/array/storage/base/seqlike.py @@ -1,5 +1,5 @@ -from typing import Iterator, Iterable, MutableSequence from abc import abstractmethod +from typing import Iterator, Iterable, MutableSequence from .... import Document diff --git a/docarray/array/storage/elastic/__init__.py b/docarray/array/storage/elastic/__init__.py index d5c395c3707..8ac50031a51 100644 --- a/docarray/array/storage/elastic/__init__.py +++ b/docarray/array/storage/elastic/__init__.py @@ -1,8 +1,8 @@ from abc import ABC from .backend import BackendMixin, ElasticConfig -from .getsetdel import GetSetDelMixin from .find import FindMixin +from .getsetdel import GetSetDelMixin from .seqlike import SequenceLikeMixin __all__ = ['StorageMixins', 'ElasticConfig'] diff --git a/docarray/array/storage/elastic/backend.py b/docarray/array/storage/elastic/backend.py index 8b3ead4f597..7eb615d6991 100644 --- a/docarray/array/storage/elastic/backend.py +++ b/docarray/array/storage/elastic/backend.py @@ -1,56 +1,38 @@ import copy -import itertools -import warnings +import uuid from dataclasses import dataclass, field from typing import ( Dict, Optional, TYPE_CHECKING, Union, - Tuple, List, - Sequence, - Generator, - Iterator, Iterable, + Any, ) import numpy as np -import uuid +from elasticsearch import Elasticsearch +from elasticsearch.helpers import bulk from ..base.backend import BaseBackendMixin -from .... import DocumentArray, Document +from .... import Document from ....helper import dataclass_from_dict -from elasticsearch import Elasticsearch -from elasticsearch.helpers import bulk - if TYPE_CHECKING: from ....types import ( DocumentArraySourceType, ) from ....types import DocumentArraySourceType, ArrayType -from docarray.math.helper import EPSILON - - -def _sanitize_table_name(table_name: str) -> str: - ret = ''.join(c for c in table_name if c.isalnum() or c == '_') - if ret != table_name: - warnings.warn(f'The table name is changed to {ret} due to illegal characters') - return ret - @dataclass class ElasticConfig: n_dim: int # dims in elastic - basic_auth: Optional[Tuple[str, str]] = None - ca_certs: Optional[str] = None distance: str = 'cosine' # similarity in elastic - host: Optional[str] = field(default='http://localhost') - port: Optional[int] = field(default=9200) - index_name: Optional[str] = field(default=None) - serialize_config: Dict = field(default_factory=dict) + hosts: str = 'http://localhost:9200' + index_name: Optional[str] = None + es_config: Dict[str, Any] = field(default_factory=dict) class BackendMixin(BaseBackendMixin): @@ -97,9 +79,6 @@ def _build_offset2id_index(self): if not self._client.indices.exists(index=self._index_name_offset2id): self._client.indices.create(index=self._index_name_offset2id, ignore=[404]) - def _build_hosts(self): - return self._config.host + ':' + str(self._config.port) - def _build_schema_from_elastic_config(self, elastic_config): da_schema = { "mappings": { @@ -120,9 +99,8 @@ def _build_schema_from_elastic_config(self, elastic_config): def _build_client(self): client = Elasticsearch( - hosts=self._build_hosts(), - ca_certs=self._config.ca_certs, - basic_auth=self._config.basic_auth, + hosts=self._config.hosts, + **self._config.es_config, ) schema = self._build_schema_from_elastic_config(self._config) @@ -147,9 +125,9 @@ def _doc_id_exists(self, doc_id): def _get_storage_infos(self) -> Dict: return { 'Backend': 'ElasticSearch', - 'Host': str(self._config.host), - 'Port': str(self._config.port), - 'distance': str(self._config.distance), + 'Hosts': str(self._config.hosts), + 'ES config': str(self._config.es_config), + 'Distance': str(self._config.distance), 'Vector dimension': str(self._config.n_dim), } @@ -189,6 +167,8 @@ def _get_offset2ids_meta(self) -> List: return [] def _map_embedding(self, embedding: 'ArrayType') -> List[float]: + from ....math.helper import EPSILON + if embedding is None: embedding = np.zeros(self.n_dim) + EPSILON else: diff --git a/docarray/array/storage/elastic/getsetdel.py b/docarray/array/storage/elastic/getsetdel.py index 62471b7cb51..8dd230222df 100644 --- a/docarray/array/storage/elastic/getsetdel.py +++ b/docarray/array/storage/elastic/getsetdel.py @@ -1,4 +1,3 @@ -import copy from typing import Iterable, Dict from ..base.getsetdel import BaseGetSetDelMixin diff --git a/docarray/array/storage/elastic/seqlike.py b/docarray/array/storage/elastic/seqlike.py index 168a797f65c..19cbdd5694c 100644 --- a/docarray/array/storage/elastic/seqlike.py +++ b/docarray/array/storage/elastic/seqlike.py @@ -2,7 +2,6 @@ from ..base.seqlike import BaseSequenceLikeMixin from .... import Document -from ..registry import _REGISTRY class SequenceLikeMixin(BaseSequenceLikeMixin): diff --git a/docarray/array/storage/memory/backend.py b/docarray/array/storage/memory/backend.py index c730185da87..804320343f9 100644 --- a/docarray/array/storage/memory/backend.py +++ b/docarray/array/storage/memory/backend.py @@ -25,8 +25,6 @@ def _init_storage( ): super()._init_storage(_docs, copy=copy, *args, **kwargs) - from ... import DocumentArray - self._data = {} if _docs is None: return diff --git a/docarray/array/storage/memory/seqlike.py b/docarray/array/storage/memory/seqlike.py index 57d84e38de2..e210f521e1a 100644 --- a/docarray/array/storage/memory/seqlike.py +++ b/docarray/array/storage/memory/seqlike.py @@ -1,4 +1,4 @@ -from typing import Iterator, Union, Iterable +from typing import Union, Iterable from ..base.seqlike import BaseSequenceLikeMixin from .... import Document diff --git a/docarray/array/storage/qdrant/__init__.py b/docarray/array/storage/qdrant/__init__.py index 3646fc07f4a..a7b30e17d9c 100644 --- a/docarray/array/storage/qdrant/__init__.py +++ b/docarray/array/storage/qdrant/__init__.py @@ -36,7 +36,6 @@ def collection_name(self) -> str: @property def collection_name_meta(self) -> str: - return f'{self.collection_name}_meta' @property diff --git a/docarray/array/storage/qdrant/backend.py b/docarray/array/storage/qdrant/backend.py index e57f02ffee8..64dfcf6f3c5 100644 --- a/docarray/array/storage/qdrant/backend.py +++ b/docarray/array/storage/qdrant/backend.py @@ -59,8 +59,6 @@ def _init_storage( raise an error if both are provided """ - from ... import DocumentArray - self._schemas = None if not config: diff --git a/docarray/array/storage/qdrant/find.py b/docarray/array/storage/qdrant/find.py index 6e0925af0e9..c110cd0face 100644 --- a/docarray/array/storage/qdrant/find.py +++ b/docarray/array/storage/qdrant/find.py @@ -1,16 +1,16 @@ from abc import abstractmethod from typing import ( - Union, TYPE_CHECKING, TypeVar, Sequence, List, ) +from qdrant_openapi_client.models.models import Distance + from .... import Document, DocumentArray from ....math import ndarray from ....score import NamedScore -from qdrant_openapi_client.models.models import Distance if TYPE_CHECKING: import tensorflow diff --git a/docarray/array/storage/qdrant/seqlike.py b/docarray/array/storage/qdrant/seqlike.py index 397125c5551..234caeade07 100644 --- a/docarray/array/storage/qdrant/seqlike.py +++ b/docarray/array/storage/qdrant/seqlike.py @@ -1,10 +1,10 @@ from abc import abstractmethod from typing import Iterable, Union -from docarray import Document from qdrant_client import QdrantClient -from docarray.array.storage.base.seqlike import BaseSequenceLikeMixin +from ..base.seqlike import BaseSequenceLikeMixin +from .... import Document class SequenceLikeMixin(BaseSequenceLikeMixin): diff --git a/docarray/array/storage/registry.py b/docarray/array/storage/registry.py index 4e78938493c..73a53d78811 100644 --- a/docarray/array/storage/registry.py +++ b/docarray/array/storage/registry.py @@ -1,4 +1,3 @@ from collections import defaultdict - _REGISTRY = defaultdict(lambda: defaultdict(list)) diff --git a/docarray/array/storage/sqlite/backend.py b/docarray/array/storage/sqlite/backend.py index f174fdb046d..7d18ef870d3 100644 --- a/docarray/array/storage/sqlite/backend.py +++ b/docarray/array/storage/sqlite/backend.py @@ -109,8 +109,6 @@ def _init_storage( super()._init_storage() - from ... import DocumentArray - if _docs is None: return elif isinstance(_docs, Iterable): diff --git a/docarray/array/storage/weaviate/backend.py b/docarray/array/storage/weaviate/backend.py index 1cd71accd9e..379d86925a0 100644 --- a/docarray/array/storage/weaviate/backend.py +++ b/docarray/array/storage/weaviate/backend.py @@ -14,9 +14,9 @@ import weaviate from ..base.backend import BaseBackendMixin +from ..registry import _REGISTRY from .... import Document from ....helper import dataclass_from_dict -from ..registry import _REGISTRY if TYPE_CHECKING: from ....types import DocumentArraySourceType, ArrayType @@ -58,8 +58,6 @@ def _init_storage( elif isinstance(config, dict): config = dataclass_from_dict(WeaviateConfig, config) - from ... import DocumentArray - self._serialize_config = config.serialize_config if config.name and config.name != config.name.capitalize(): diff --git a/docarray/array/storage/weaviate/seqlike.py b/docarray/array/storage/weaviate/seqlike.py index c2f7a0525a7..109c72e657c 100644 --- a/docarray/array/storage/weaviate/seqlike.py +++ b/docarray/array/storage/weaviate/seqlike.py @@ -1,8 +1,8 @@ from typing import Union, Iterable from ..base.seqlike import BaseSequenceLikeMixin -from .... import Document from ..registry import _REGISTRY +from .... import Document class SequenceLikeMixin(BaseSequenceLikeMixin): diff --git a/docarray/document/data.py b/docarray/document/data.py index 43abd910f2e..a69053a93f6 100644 --- a/docarray/document/data.py +++ b/docarray/document/data.py @@ -148,7 +148,7 @@ def __eq__(self, other): getattr(self, key), getattr(other, key) ) print( - f'are_equal( {getattr(self, key)}, { getattr(other, key)}) ---> {are_equal}' + f'are_equal( {getattr(self, key)}, {getattr(other, key)}) ---> {are_equal}' ) if are_equal == False: return False diff --git a/docarray/document/mixins/__init__.py b/docarray/document/mixins/__init__.py index a7ef2cba648..b1b2a549f09 100644 --- a/docarray/document/mixins/__init__.py +++ b/docarray/document/mixins/__init__.py @@ -13,9 +13,9 @@ from .property import PropertyMixin from .protobuf import ProtobufMixin from .pydantic import PydanticMixin +from .strawberry import StrawberryMixin from .sugar import SingletonSugarMixin from .text import TextDataMixin -from .strawberry import StrawberryMixin from .video import VideoDataMixin diff --git a/docarray/document/mixins/multimodal.py b/docarray/document/mixins/multimodal.py index 7f49e588122..18bde18dc42 100644 --- a/docarray/document/mixins/multimodal.py +++ b/docarray/document/mixins/multimodal.py @@ -1,10 +1,8 @@ import base64 - import typing from enum import Enum -from docarray.types.multimodal import Image, Text, Field, is_dataclass -from docarray.types.multimodal import TYPES_REGISTRY +from docarray.types.multimodal import Field, is_dataclass if typing.TYPE_CHECKING: from docarray import Document, DocumentArray diff --git a/docarray/document/mixins/porting.py b/docarray/document/mixins/porting.py index e952e00ba2d..2c4e7054882 100644 --- a/docarray/document/mixins/porting.py +++ b/docarray/document/mixins/porting.py @@ -1,8 +1,6 @@ import base64 -import dataclasses import json import pickle -import warnings from typing import Optional, TYPE_CHECKING, Type, Dict, Any, Union from ...helper import compress_bytes, decompress_bytes diff --git a/docarray/document/pydantic_model.py b/docarray/document/pydantic_model.py index 469a8507054..ab3fcec19a5 100644 --- a/docarray/document/pydantic_model.py +++ b/docarray/document/pydantic_model.py @@ -1,4 +1,3 @@ -import base64 from typing import Optional, List, Dict, Any, TYPE_CHECKING, Union from pydantic import BaseModel, validator diff --git a/docarray/resources/ci-vendors.json b/docarray/resources/ci-vendors.json index af133f1f923..78ebc7fcac5 100644 --- a/docarray/resources/ci-vendors.json +++ b/docarray/resources/ci-vendors.json @@ -43,7 +43,10 @@ "name": "Buildkite", "constant": "BUILDKITE", "env": "BUILDKITE", - "pr": { "env": "BUILDKITE_PULL_REQUEST", "ne": "false" } + "pr": { + "env": "BUILDKITE_PULL_REQUEST", + "ne": "false" + } }, { "name": "CircleCI", @@ -66,18 +69,27 @@ "name": "Codefresh", "constant": "CODEFRESH", "env": "CF_BUILD_ID", - "pr": { "any": ["CF_PULL_REQUEST_NUMBER", "CF_PULL_REQUEST_ID"] } + "pr": { + "any": [ + "CF_PULL_REQUEST_NUMBER", + "CF_PULL_REQUEST_ID" + ] + } }, { "name": "Codeship", "constant": "CODESHIP", - "env": { "CI_NAME": "codeship" } + "env": { + "CI_NAME": "codeship" + } }, { "name": "Drone", "constant": "DRONE", "env": "DRONE", - "pr": { "DRONE_BUILD_EVENT": "pull_request" } + "pr": { + "DRONE_BUILD_EVENT": "pull_request" + } }, { "name": "dsari", @@ -88,7 +100,9 @@ "name": "GitHub Actions", "constant": "GITHUB_ACTIONS", "env": "GITHUB_ACTIONS", - "pr": { "GITHUB_EVENT_NAME": "pull_request" } + "pr": { + "GITHUB_EVENT_NAME": "pull_request" + } }, { "name": "GitLab CI", @@ -115,8 +129,16 @@ { "name": "Jenkins", "constant": "JENKINS", - "env": ["JENKINS_URL", "BUILD_ID"], - "pr": { "any": ["ghprbPullId", "CHANGE_ID"] } + "env": [ + "JENKINS_URL", + "BUILD_ID" + ], + "pr": { + "any": [ + "ghprbPullId", + "CHANGE_ID" + ] + } }, { "name": "Magnum CI", @@ -127,19 +149,27 @@ "name": "Netlify CI", "constant": "NETLIFY", "env": "NETLIFY", - "pr": { "env": "PULL_REQUEST", "ne": "false" } + "pr": { + "env": "PULL_REQUEST", + "ne": "false" + } }, { "name": "Nevercode", "constant": "NEVERCODE", "env": "NEVERCODE", - "pr": { "env": "NEVERCODE_PULL_REQUEST", "ne": "false" } + "pr": { + "env": "NEVERCODE_PULL_REQUEST", + "ne": "false" + } }, { "name": "Render", "constant": "RENDER", "env": "RENDER", - "pr": { "IS_PULL_REQUEST": "true" } + "pr": { + "IS_PULL_REQUEST": "true" + } }, { "name": "Sail CI", @@ -157,13 +187,18 @@ "name": "Screwdriver", "constant": "SCREWDRIVER", "env": "SCREWDRIVER", - "pr": { "env": "SD_PULL_REQUEST", "ne": "false" } + "pr": { + "env": "SD_PULL_REQUEST", + "ne": "false" + } }, { "name": "Shippable", "constant": "SHIPPABLE", "env": "SHIPPABLE", - "pr": { "IS_PULL_REQUEST": "true" } + "pr": { + "IS_PULL_REQUEST": "true" + } }, { "name": "Solano CI", @@ -179,7 +214,10 @@ { "name": "TaskCluster", "constant": "TASKCLUSTER", - "env": ["TASK_ID", "RUN_ID"] + "env": [ + "TASK_ID", + "RUN_ID" + ] }, { "name": "TeamCity", @@ -190,7 +228,10 @@ "name": "Travis CI", "constant": "TRAVIS", "env": "TRAVIS", - "pr": { "env": "TRAVIS_PULL_REQUEST", "ne": "false" } + "pr": { + "env": "TRAVIS_PULL_REQUEST", + "ne": "false" + } }, { "name": "Vercel", diff --git a/docarray/types/__init__.py b/docarray/types/__init__.py index 8940d56d367..c06defb202f 100644 --- a/docarray/types/__init__.py +++ b/docarray/types/__init__.py @@ -11,7 +11,6 @@ Tuple, ) - if TYPE_CHECKING: import scipy.sparse import tensorflow diff --git a/docarray/types/multimodal.py b/docarray/types/multimodal.py index f61d6d69fbc..7f2707ffcff 100644 --- a/docarray/types/multimodal.py +++ b/docarray/types/multimodal.py @@ -1,15 +1,13 @@ import base64 +import typing from dataclasses import ( dataclass as std_dataclass, is_dataclass as std_is_dataclass, - MISSING, Field as StdField, field, ) from pathlib import Path -from typing import TypeVar, ForwardRef, Callable, Any, Optional, TYPE_CHECKING - -import typing +from typing import TypeVar, ForwardRef, Callable, Optional, TYPE_CHECKING from docarray.types.deserializers import ( image_deserializer, diff --git a/docs/advanced/document-store/elastic.md b/docs/advanced/document-store/elastic.md deleted file mode 100644 index 38e870be319..00000000000 --- a/docs/advanced/document-store/elastic.md +++ /dev/null @@ -1,74 +0,0 @@ -# Elastic - -One can use [Elastic](https://www.elastic.co) as the document store for DocumentArray. It is useful when one wants to have faster Document retrieval on embeddings, i.e. `.match()`, `.find()`. - -````{tip} -This feature requires `elasticsearch-client`. You can install it via `pip install "docarray[full]".` -```` - -## Usage - -### Start Elastic service - -To use Elastic as the storage backend, it is required to have the Elastic service started. Create `docker-compose.yml` as follows: - -```yaml -version: "3.3" -services: - elastic: - image: docker.elastic.co/elasticsearch/elasticsearch:8.1.0 - environment: - - xpack.security.enabled=false - - discovery.type=single-node - ports: - - "9200:9200" - networks: - - elastic - -networks: - elastic: - name: elastic -``` - -Then - -```bash -docker-compose up -``` - -### Create DocumentArray with Elastic backend - -Assuming service is started using the default configuration (i.e. server address is `http://localhost:9200`), one can instantiate a DocumentArray with Elastic storage as such: - -```python -from docarray import DocumentArray - -da = DocumentArray(storage='elastic',config={'n_dim':128}) -``` - -The usage would be the same as the ordinary DocumentArray, but the dimension of an embedding for a Document must be provided at creation time. - -To access a DocumentArray formerly persisted, one can specify the index name, the host, the port and the protocol to connect to the server. If they are not provided, then it will connect to the Elastic service bound to `http://localhost:9200`. - -```python -from docarray import DocumentArray - -da = DocumentArray(storage='elastic', config={'index_name':'ndim_128', 'n_dim':128, 'port': 9200}) - -da.summary() -``` - -Other functions behave the same as in-memory DocumentArray. - -## Config - -The following configs can be set: - -| Name | Description | Default | -|--------------|--------------------------------------------------------------------------------------|-------------| -| `host` | Hostname of the Elastic server | 'localhost' | -| `port` | port of the Elastic server | 9200 | -| `protocol` | protocol to be used. Can be 'http' or 'https' | 'http' | -| `index_name` | Elastic index name; the class name of Elastic index object to set this DocumentArray | None | -| `n_dim` | Dimensionality of the embeddings | None | - diff --git a/docs/advanced/document-store/elasticsearch.md b/docs/advanced/document-store/elasticsearch.md new file mode 100644 index 00000000000..448eb6f82ba --- /dev/null +++ b/docs/advanced/document-store/elasticsearch.md @@ -0,0 +1,136 @@ +# Elasticsearch + +One can use [Elasticsearch](https://www.elastic.co) as the document store for DocumentArray. It is useful when one wants to have faster Document retrieval on embeddings, i.e. `.match()`, `.find()`. + +````{tip} +This feature requires `elasticsearch`. You can install it via `pip install "docarray[full]".` +```` + +## Usage + +### Start Elastic service + +To use Elasticsearch as the storage backend, it is required to have the Elasticsearch service started. Create `docker-compose.yml` as follows: + +```yaml +version: "3.3" +services: + elastic: + image: docker.elastic.co/elasticsearch/elasticsearch:8.1.0 + environment: + - xpack.security.enabled=false + - discovery.type=single-node + ports: + - "9200:9200" + networks: + - elastic + +networks: + elastic: + name: elastic +``` + +Then + +```bash +docker-compose up +``` + + +### Create DocumentArray with Elasticsearch backend + +Assuming service is started using the default configuration (i.e. server address is `http://localhost:9200`), one can instantiate a DocumentArray with Elasticsearch storage as such: + +```python +from docarray import DocumentArray + +da = DocumentArray(storage='elasticsearch', config={'n_dim': 128}) +``` + +The usage would be the same as the ordinary DocumentArray, but the dimension of an embedding for a Document must be provided at creation time. + +### Secure connection + +By default, Elasticsearch server runs with security layer that disables the plain HTTP connection. You can pass the `host` with `api_id` or `ca_certs` inside `es_config` to the constructor. For example, + +```python +from docarray import DocumentArray + +da = DocumentArray( + storage='elasticsearch', + config={ + 'hosts': 'https://elastic:PRq7je_hJ4i4auh+Hq+*@localhost:9200', + 'n_dim': 128, + 'es_config': {'ca_certs': '/Users/hanxiao/http_ca.crt'}, + }, +) +``` + +Here is [the official Documentation](https://www.elastic.co/guide/en/elasticsearch/reference/current/docker.html#elasticsearch-security-certificates) for you to get certificate, password etc. + +To access a DocumentArray formerly persisted, one can specify `index_name` and the hosts. + +The following example will build a DocumentArray with previously stored data from `old_stuff` on `http://localhost:9200`: + +```python +from docarray import DocumentArray, Document + +da = DocumentArray( + storage='elasticsearch', + config={'index_name': 'old_stuff', 'n_dim': 128}, +) + +da.extend([Document() for _ in range(1000)]) + +da2 = DocumentArray( + storage='elasticsearch', + config={'index_name': 'old_stuff', 'n_dim': 128}, +) + +da2.summary() +``` + +```text + Documents Summary + + Length 2000 + Homogenous Documents True + Common Attributes ('id', 'embedding') + + Attributes Summary + + Attribute Data type #Unique values Has empty value + ───────────────────────────────────────────────────────────── + embedding ('ndarray',) 1000 False + id ('str',) 1000 False + + Storage Summary + + Backend ElasticSearch + Host http://localhost:9200 + Distance cosine + Vector dimension 128 + ES config {} + +[0.14890289 0.3168339 0.03050802 0.06785086 0.94719299 0.32490566 + ...] +``` + +Other functions behave the same as in-memory DocumentArray. + +## Config + +The following configs can be set: + +| Name | Description | Default | +|--------------|--------------------------------------------------------------------------------------------------|-------------| +| `hosts` | Hostname of the Elasticsearch server | `http://localhost:9200` | +| `es_config` | Other ES configs in a Dict and pass to `Elasticsearch` client constructor, e.g. `cloud_id`, `api_key`| None | +| `index_name` | Elasticsearch index name; the class name of Elasticsearch index object to set this DocumentArray | None | +| `n_dim` | Dimensionality of the embeddings | None | +| `distance` | Similarity metric in Elasticsearch | `cosine`| + + +```{tip} +Note that it is plural `hosts` not `host`, to comply with Elasticsearch client's interface. +``` diff --git a/docs/advanced/document-store/index.md b/docs/advanced/document-store/index.md index be9b2af5899..81e002779f2 100644 --- a/docs/advanced/document-store/index.md +++ b/docs/advanced/document-store/index.md @@ -8,7 +8,7 @@ weaviate sqlite qdrant annlite -elastic +elasticsearch ``` Documents inside a DocumentArray can live in a [document store](https://en.wikipedia.org/wiki/Document-oriented_database) instead of in memory, e.g. in SQLite, Redis. Comparing to the in-memory storage, the benefit of using an external store is often about longer persistence and faster retrieval. diff --git a/tests/unit/array/mixins/test_eval_class.py b/tests/unit/array/mixins/test_eval_class.py index e501f780b34..a8f9255e678 100644 --- a/tests/unit/array/mixins/test_eval_class.py +++ b/tests/unit/array/mixins/test_eval_class.py @@ -14,7 +14,7 @@ ('sqlite', {}), ('annlite', {'n_dim': 256}), ('qdrant', {'n_dim': 256}), - ('elastic', {'n_dim': 256}), + ('elasticsearch', {'n_dim': 256}), ], ) @pytest.mark.parametrize( @@ -50,7 +50,7 @@ def test_eval_mixin_perfect_match(metric_fn, kwargs, storage, config, start_stor ('sqlite', {}), ('annlite', {'n_dim': 256}), ('qdrant', {'n_dim': 256}), - ('elastic', {'n_dim': 256}), + ('elasticsearch', {'n_dim': 256}), ], ) @pytest.mark.parametrize( @@ -93,7 +93,7 @@ def test_eval_mixin_zero_match(storage, config, metric_fn, start_storage, kwargs ('sqlite', {}), ('annlite', {'n_dim': 256}), ('qdrant', {'n_dim': 256}), - ('elastic', {'n_dim': 256}), + ('elasticsearch', {'n_dim': 256}), ], ) def test_diff_len_should_raise(storage, config, start_storage): @@ -111,7 +111,7 @@ def test_diff_len_should_raise(storage, config, start_storage): ('sqlite', {}), ('annlite', {'n_dim': 256}), ('qdrant', {'n_dim': 256}), - ('elastic', {'n_dim': 256}), + ('elasticsearch', {'n_dim': 256}), ], ) def test_diff_hash_fun_should_raise(storage, config, start_storage): @@ -129,7 +129,7 @@ def test_diff_hash_fun_should_raise(storage, config, start_storage): ('sqlite', {}), ('annlite', {'n_dim': 3}), ('qdrant', {'n_dim': 3}), - ('elastic', {'n_dim': 3}), + ('elasticsearch', {'n_dim': 3}), ], ) def test_same_hash_same_len_fun_should_work(storage, config, start_storage): @@ -157,7 +157,7 @@ def test_same_hash_same_len_fun_should_work(storage, config, start_storage): ('sqlite', {}), ('annlite', {'n_dim': 3}), ('qdrant', {'n_dim': 3}), - ('elastic', {'n_dim': 3}), + ('elasticsearch', {'n_dim': 3}), ], ) def test_adding_noise(storage, config, start_storage): @@ -187,7 +187,7 @@ def test_adding_noise(storage, config, start_storage): ('sqlite', {}), ('annlite', {'n_dim': 128}), ('qdrant', {'n_dim': 128}), - ('elastic', {'n_dim': 128}), + ('elasticsearch', {'n_dim': 128}), ], ) @pytest.mark.parametrize( diff --git a/tests/unit/array/mixins/test_find.py b/tests/unit/array/mixins/test_find.py index 9115d3f3134..588de8ff077 100644 --- a/tests/unit/array/mixins/test_find.py +++ b/tests/unit/array/mixins/test_find.py @@ -13,7 +13,7 @@ ('weaviate', {'n_dim': 32}), ('annlite', {'n_dim': 32}), ('qdrant', {'n_dim': 32}), - ('elastic', {'n_dim': 32}), + ('elasticsearch', {'n_dim': 32}), ], ) @pytest.mark.parametrize('limit', [1, 5, 10]) @@ -49,7 +49,7 @@ def test_find(storage, config, limit, query, start_storage): t['cosine_similarity'].value for t in result[:, 'scores'] ] assert sorted(cosine_similarities, reverse=True) == cosine_similarities - elif storage in ['memory', 'annlite', 'elastic']: + elif storage in ['memory', 'annlite', 'elasticsearch']: cosine_distances = [t['cosine'].value for t in da[:, 'scores']] assert sorted(cosine_distances, reverse=False) == cosine_distances else: @@ -59,7 +59,7 @@ def test_find(storage, config, limit, query, start_storage): t['cosine_similarity'].value for t in da[:, 'scores'] ] assert sorted(cosine_similarities, reverse=True) == cosine_similarities - elif storage in ['memory', 'annlite', 'elastic']: + elif storage in ['memory', 'annlite', 'elasticsearch']: for da in result: cosine_distances = [t['cosine'].value for t in da[:, 'scores']] assert sorted(cosine_distances, reverse=False) == cosine_distances diff --git a/tests/unit/array/mixins/test_getset.py b/tests/unit/array/mixins/test_getset.py index cb428dcb8a5..871fa84114a 100644 --- a/tests/unit/array/mixins/test_getset.py +++ b/tests/unit/array/mixins/test_getset.py @@ -41,7 +41,7 @@ def nested_docs(): ('annlite', {'n_dim': 3}), ('weaviate', {'n_dim': 3}), ('qdrant', {'n_dim': 3}), - ('elastic', {'n_dim': 3}), + ('elasticsearch', {'n_dim': 3}), ], ) @pytest.mark.parametrize( diff --git a/tests/unit/array/mixins/test_magic.py b/tests/unit/array/mixins/test_magic.py index 089312cf317..70a11979839 100644 --- a/tests/unit/array/mixins/test_magic.py +++ b/tests/unit/array/mixins/test_magic.py @@ -76,7 +76,7 @@ def test_repr(da_cls, config, start_storage): ('annlite', AnnliteConfig(n_dim=128)), ('weaviate', WeaviateConfig(n_dim=128)), ('qdrant', QdrantConfig(n_dim=128)), - ('elastic', ElasticConfig(n_dim=128)), + ('elasticsearch', ElasticConfig(n_dim=128)), ], ) def test_repr_str(docs, storage, config, start_storage): diff --git a/tests/unit/array/mixins/test_parallel.py b/tests/unit/array/mixins/test_parallel.py index 3e582e65702..e58d7510870 100644 --- a/tests/unit/array/mixins/test_parallel.py +++ b/tests/unit/array/mixins/test_parallel.py @@ -188,7 +188,7 @@ def test_map_lambda(pytestconfig, da_cls, config, start_storage): ('annlite', AnnliteConfig(n_dim=256)), ('weaviate', WeaviateConfig(n_dim=256)), ('qdrant', QdrantConfig(n_dim=256)), - ('elastic', ElasticConfig(n_dim=256)), + ('elasticsearch', ElasticConfig(n_dim=256)), ], ) @pytest.mark.parametrize('backend', ['thread', 'process']) diff --git a/tests/unit/array/test_advance_indexing.py b/tests/unit/array/test_advance_indexing.py index 51d9839a7b1..bad7725606e 100644 --- a/tests/unit/array/test_advance_indexing.py +++ b/tests/unit/array/test_advance_indexing.py @@ -26,7 +26,7 @@ def indices(): ('weaviate', WeaviateConfig(n_dim=123)), ('annlite', AnnliteConfig(n_dim=123)), ('qdrant', QdrantConfig(n_dim=123)), - ('elastic', ElasticConfig(n_dim=123)), + ('elasticsearch', ElasticConfig(n_dim=123)), ], ) def test_getter_int_str(docs, storage, config, start_storage): @@ -87,7 +87,7 @@ def test_setter_int_str(docs, storage, config, start_storage): ('weaviate', WeaviateConfig(n_dim=123)), ('annlite', AnnliteConfig(n_dim=123)), ('qdrant', QdrantConfig(n_dim=123)), - ('elastic', ElasticConfig(n_dim=123)), + ('elasticsearch', ElasticConfig(n_dim=123)), ], ) def test_del_int_str(docs, storage, config, start_storage, indices): @@ -121,7 +121,7 @@ def test_del_int_str(docs, storage, config, start_storage, indices): ('weaviate', WeaviateConfig(n_dim=123)), ('annlite', AnnliteConfig(n_dim=123)), ('qdrant', QdrantConfig(n_dim=123)), - ('elastic', ElasticConfig(n_dim=123)), + ('elasticsearch', ElasticConfig(n_dim=123)), ], ) def test_slice(docs, storage, config, start_storage): @@ -159,7 +159,7 @@ def test_slice(docs, storage, config, start_storage): ('weaviate', WeaviateConfig(n_dim=123)), ('annlite', AnnliteConfig(n_dim=123)), ('qdrant', QdrantConfig(n_dim=123)), - ('elastic', ElasticConfig(n_dim=123)), + ('elasticsearch', ElasticConfig(n_dim=123)), ], ) def test_sequence_bool_index(docs, storage, config, start_storage): @@ -205,7 +205,7 @@ def test_sequence_bool_index(docs, storage, config, start_storage): ('weaviate', WeaviateConfig(n_dim=123)), ('annlite', AnnliteConfig(n_dim=123)), ('qdrant', QdrantConfig(n_dim=123)), - ('elastic', ElasticConfig(n_dim=123)), + ('elasticsearch', ElasticConfig(n_dim=123)), ], ) def test_sequence_int(docs, nparray, storage, config, start_storage): @@ -241,7 +241,7 @@ def test_sequence_int(docs, nparray, storage, config, start_storage): ('weaviate', WeaviateConfig(n_dim=123)), ('annlite', AnnliteConfig(n_dim=123)), ('qdrant', QdrantConfig(n_dim=123)), - ('elastic', ElasticConfig(n_dim=123)), + ('elasticsearch', ElasticConfig(n_dim=123)), ], ) def test_sequence_str(docs, storage, config, start_storage): @@ -275,7 +275,7 @@ def test_sequence_str(docs, storage, config, start_storage): ('weaviate', WeaviateConfig(n_dim=123)), ('annlite', AnnliteConfig(n_dim=123)), ('qdrant', QdrantConfig(n_dim=123)), - ('elastic', ElasticConfig(n_dim=123)), + ('elasticsearch', ElasticConfig(n_dim=123)), ], ) def test_docarray_list_tuple(docs, storage, config, start_storage): @@ -295,7 +295,7 @@ def test_docarray_list_tuple(docs, storage, config, start_storage): ('weaviate', WeaviateConfig(n_dim=123)), ('annlite', AnnliteConfig(n_dim=123)), ('qdrant', QdrantConfig(n_dim=123)), - ('elastic', ElasticConfig(n_dim=123)), + ('elasticsearch', ElasticConfig(n_dim=123)), ], ) def test_path_syntax_indexing(storage, config, start_storage): @@ -334,7 +334,7 @@ def test_path_syntax_indexing(storage, config, start_storage): ('weaviate', WeaviateConfig(n_dim=123)), ('annlite', AnnliteConfig(n_dim=123)), ('qdrant', QdrantConfig(n_dim=123)), - ('elastic', ElasticConfig(n_dim=123)), + ('elasticsearch', ElasticConfig(n_dim=123)), ], ) def test_path_syntax_indexing_set(storage, config, start_storage): @@ -419,7 +419,7 @@ def test_path_syntax_indexing_set(storage, config, start_storage): ('weaviate', lambda: WeaviateConfig(n_dim=123)), ('annlite', lambda: AnnliteConfig(n_dim=123)), ('qdrant', lambda: QdrantConfig(n_dim=123)), - ('elastic', lambda: ElasticConfig(n_dim=123)), + ('elasticsearch', lambda: ElasticConfig(n_dim=123)), ], ) def test_attribute_indexing(storage, config_gen, start_storage, size): @@ -450,7 +450,7 @@ def test_attribute_indexing(storage, config_gen, start_storage, size): @pytest.mark.parametrize( - 'storage', ['memory', 'sqlite', 'weaviate', 'annlite', 'qdrant', 'elastic'] + 'storage', ['memory', 'sqlite', 'weaviate', 'annlite', 'qdrant', 'elasticsearch'] ) def test_tensor_attribute_selector(storage, start_storage): import scipy.sparse @@ -459,7 +459,7 @@ def test_tensor_attribute_selector(storage, start_storage): sp_embed[sp_embed > 0.1] = 0 sp_embed = scipy.sparse.coo_matrix(sp_embed) - if storage in ('annlite', 'weaviate', 'qdrant', 'elastic'): + if storage in ('annlite', 'weaviate', 'qdrant', 'elasticsearch'): da = DocumentArray(storage=storage, config={'n_dim': 10}) else: da = DocumentArray(storage=storage) @@ -502,10 +502,10 @@ def test_advance_selector_mixed(storage): @pytest.mark.parametrize( - 'storage', ['memory', 'sqlite', 'weaviate', 'annlite', 'qdrant', 'elastic'] + 'storage', ['memory', 'sqlite', 'weaviate', 'annlite', 'qdrant', 'elasticsearch'] ) def test_single_boolean_and_padding(storage, start_storage): - if storage in ('annlite', 'weaviate', 'qdrant', 'elastic'): + if storage in ('annlite', 'weaviate', 'qdrant', 'elasticsearch'): da = DocumentArray(storage=storage, config={'n_dim': 10}) else: da = DocumentArray(storage=storage) @@ -533,7 +533,7 @@ def test_single_boolean_and_padding(storage, start_storage): ('weaviate', lambda: WeaviateConfig(n_dim=123)), ('annlite', lambda: AnnliteConfig(n_dim=123)), ('qdrant', lambda: QdrantConfig(n_dim=123)), - ('elastic', lambda: ElasticConfig(n_dim=123)), + ('elasticsearch', lambda: ElasticConfig(n_dim=123)), ], ) def test_edge_case_two_strings(storage, config_gen, start_storage): @@ -610,7 +610,7 @@ def test_edge_case_two_strings(storage, config_gen, start_storage): ('weaviate', WeaviateConfig(n_dim=123)), ('annlite', AnnliteConfig(n_dim=123)), ('qdrant', QdrantConfig(n_dim=123)), - ('elastic', ElasticConfig(n_dim=123)), + ('elasticsearch', ElasticConfig(n_dim=123)), ], ) def test_offset2ids_persistence(storage, config, start_storage): diff --git a/tests/unit/array/test_pull_out.py b/tests/unit/array/test_pull_out.py index 5bc4b8bacaa..fc3a194a7db 100644 --- a/tests/unit/array/test_pull_out.py +++ b/tests/unit/array/test_pull_out.py @@ -21,7 +21,7 @@ def docs(): ('weaviate', {'n_dim': 2}), ('annlite', {'n_dim': 2}), ('qdrant', {'n_dim': 2}), - ('elastic', {'n_dim': 2}), + ('elasticsearch', {'n_dim': 2}), ], ) def test_update_embedding(docs, storage, config, start_storage): @@ -55,7 +55,7 @@ def test_update_embedding(docs, storage, config, start_storage): ('weaviate', {'n_dim': 2}), ('annlite', {'n_dim': 2}), ('qdrant', {'n_dim': 2}), - ('elastic', {'n_dim': 2}), + ('elasticsearch', {'n_dim': 2}), ], ) def test_update_doc_embedding(docs, storage, config, start_storage): @@ -89,7 +89,7 @@ def test_update_doc_embedding(docs, storage, config, start_storage): ('weaviate', {'n_dim': 2}), ('annlite', {'n_dim': 2}), ('qdrant', {'n_dim': 2}), - ('elastic', {'n_dim': 2}), + ('elasticsearch', {'n_dim': 2}), ], ) def test_batch_update_embedding(docs, storage, config, start_storage): @@ -121,7 +121,7 @@ def test_batch_update_embedding(docs, storage, config, start_storage): ('weaviate', {'n_dim': 2}), ('annlite', {'n_dim': 2}), ('qdrant', {'n_dim': 2}), - ('elastic', {'n_dim': 2}), + ('elasticsearch', {'n_dim': 2}), ], ) def test_batch_update_doc_embedding(docs, storage, config, start_storage): @@ -155,7 +155,7 @@ def test_batch_update_doc_embedding(docs, storage, config, start_storage): ('weaviate', {'n_dim': 2}), ('annlite', {'n_dim': 2}), ('qdrant', {'n_dim': 2}), - ('elastic', {'n_dim': 2}), + ('elasticsearch', {'n_dim': 2}), ], ) def test_update_id(docs, storage, config, start_storage): @@ -176,7 +176,7 @@ def test_update_id(docs, storage, config, start_storage): ('weaviate', {'n_dim': 2}), ('annlite', {'n_dim': 2}), ('qdrant', {'n_dim': 2}), - ('elastic', {'n_dim': 2}), + ('elasticsearch', {'n_dim': 2}), ], ) def test_update_doc_id(docs, storage, config, start_storage): @@ -196,7 +196,7 @@ def test_update_doc_id(docs, storage, config, start_storage): ('weaviate', {'n_dim': 2}), ('annlite', {'n_dim': 2}), ('qdrant', {'n_dim': 2}), - ('elastic', {'n_dim': 2}), + ('elasticsearch', {'n_dim': 2}), ], ) def test_batch_update_id(docs, storage, config, start_storage): @@ -219,7 +219,7 @@ def test_batch_update_id(docs, storage, config, start_storage): ('weaviate', {'n_dim': 2}), ('annlite', {'n_dim': 2}), ('qdrant', {'n_dim': 2}), - ('elastic', {'n_dim': 2}), + ('elasticsearch', {'n_dim': 2}), ], ) def test_batch_update_doc_id(docs, storage, config, start_storage):