From 4d13c4c33321ac22a68139df81e272f282ca23a9 Mon Sep 17 00:00:00 2001
From: AnneY <evangeline-lun@foxmail.com>
Date: Fri, 17 Mar 2023 13:01:09 +0800
Subject: [PATCH 01/14] feat: elastic store based on version 8

Signed-off-by: AnneY <evangeline-lun@foxmail.com>
---
 .../doc_index/backends/elasticv8_doc_index.py | 453 ++++++++++++++++++
 poetry.lock                                   |  47 +-
 pyproject.toml                                |   5 +-
 tests/doc_index/elastic/fixture.py            |  58 +++
 tests/doc_index/elastic/v8/docker-compose.yml |  16 +
 tests/doc_index/elastic/v8/test_find.py       | 278 +++++++++++
 .../elastic/v8/test_index_get_del.py          | 232 +++++++++
 7 files changed, 1084 insertions(+), 5 deletions(-)
 create mode 100644 docarray/doc_index/backends/elasticv8_doc_index.py
 create mode 100644 tests/doc_index/elastic/fixture.py
 create mode 100644 tests/doc_index/elastic/v8/docker-compose.yml
 create mode 100644 tests/doc_index/elastic/v8/test_find.py
 create mode 100644 tests/doc_index/elastic/v8/test_index_get_del.py

diff --git a/docarray/doc_index/backends/elasticv8_doc_index.py b/docarray/doc_index/backends/elasticv8_doc_index.py
new file mode 100644
index 00000000000..ee10d20c0d2
--- /dev/null
+++ b/docarray/doc_index/backends/elasticv8_doc_index.py
@@ -0,0 +1,453 @@
+import uuid
+import warnings
+from collections import defaultdict
+from dataclasses import dataclass, field
+from typing import (
+    Any,
+    Dict,
+    Generator,
+    Generic,
+    Iterable,
+    List,
+    Mapping,
+    Optional,
+    Sequence,
+    Tuple,
+    Type,
+    TypeVar,
+    Union,
+    cast,
+)
+
+import numpy as np
+from elastic_transport import NodeConfig
+from elasticsearch import Elasticsearch
+from elasticsearch.helpers import parallel_bulk
+
+import docarray.typing
+from docarray import BaseDocument
+from docarray.doc_index.abstract_doc_index import (
+    BaseDocumentIndex,
+    _ColumnInfo,
+    _FindResultBatched,
+    _raise_not_composable,
+)
+from docarray.typing import AnyTensor
+from docarray.utils.find import _FindResult
+from docarray.utils.misc import torch_imported
+
+TSchema = TypeVar('TSchema', bound=BaseDocument)
+T = TypeVar('T', bound='ElasticDocumentV8Index')
+
+ELASTIC_PY_VEC_TYPES = [list, tuple, np.ndarray]
+ELASTIC_PY_TYPES = [bool, int, float, str, docarray.typing.ID]
+if torch_imported:
+    import torch
+
+    ELASTIC_PY_VEC_TYPES.append(torch.Tensor)
+
+
+class ElasticDocumentV8Index(BaseDocumentIndex, Generic[TSchema]):
+    def __init__(self, db_config=None, **kwargs):
+        super().__init__(db_config=db_config, **kwargs)
+        self._db_config = cast(ElasticDocumentV8Index.DBConfig, self._db_config)
+
+        if self._db_config.index_name is None:
+            id = uuid.uuid4().hex
+            self._db_config.index_name = 'index__' + id
+
+        self._index_name = self._db_config.index_name
+
+        self._client = Elasticsearch(
+            hosts=self._db_config.hosts,
+            **self._db_config.es_config,
+        )
+
+        # ElasticSearh index setup
+        self._index_init_params = ('type',)
+        self._index_vector_params = ('dims', 'similarity', 'index')
+        self._index_vector_options = ('m', 'ef_construction')
+
+        mappings: Dict[str, Any] = {
+            'dynamic': True,
+            '_source': {'enabled': 'true'},
+            'properties': {},
+        }
+
+        for col_name, col in self._column_infos.items():
+            if not col.config:
+                continue  # do not create column index if no config is given
+            mappings['properties'][col_name] = self._create_index(col)
+
+        if self._client.indices.exists(index=self._index_name):  # type: ignore
+            self._client.indices.put_mapping(
+                index=self._index_name, properties=mappings['properties']
+            )
+        else:
+            self._client.indices.create(index=self._index_name, mappings=mappings)
+
+        if len(self._db_config.index_settings):
+            self._client.indices.put_settings(
+                index=self._index_name, settings=self._db_config.index_settings
+            )
+
+        self._refresh(self._index_name)
+
+    ###############################################
+    # Inner classes for query builder and configs #
+    ###############################################
+    class QueryBuilder(BaseDocumentIndex.QueryBuilder):
+        def __init__(self, outer_instance, **kwargs):
+            super().__init__()
+            self._outer_instance = outer_instance
+            self._query: Dict[str, Any] = {
+                'query': defaultdict(lambda: defaultdict(list))
+            }
+
+        def build(self, *args, **kwargs) -> Any:
+            if len(self._query['query']) == 0:
+                del self._query['query']
+            elif 'knn' in self._query:
+                self._query['knn']['filter'] = self._query['query']
+                del self._query['query']
+
+            return self._query
+
+        def find(
+            self,
+            query: Union[AnyTensor, BaseDocument],
+            search_field: str = 'embedding',
+            limit: int = 10,
+        ):
+            if isinstance(query, BaseDocument):
+                query_vec = BaseDocumentIndex._get_values_by_column(
+                    [query], search_field
+                )[0]
+            else:
+                query_vec = query
+            query_vec_np = BaseDocumentIndex._to_numpy(self._outer_instance, query_vec)
+            self._query['knn'] = {
+                'field': search_field,
+                'query_vector': query_vec_np,
+                'k': limit,
+                'num_candidates': self._outer_instance._runtime_config.default_column_config[
+                    np.ndarray
+                ][
+                    'num_candidates'
+                ],
+            }
+            return self
+
+        # filter accrpts Leaf/Compound query clauses
+        # https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl.html
+        def filter(self, query: Dict[str, Any], limit: int = 10):
+            self._query['size'] = limit
+            self._query['query']['bool']['filter'].append(query)
+            return self
+
+        def text_search(self, query: str, search_field: str = 'text', limit: int = 10):
+            self._query['size'] = limit
+            self._query['query']['bool']['must'].append(
+                {'match': {search_field: query}}
+            )
+            return self
+
+        find_batched = _raise_not_composable('find_batched')
+        filter_batched = _raise_not_composable('find_batched')
+        text_search_batched = _raise_not_composable('text_search')
+
+    def build_query(self, **kwargs) -> QueryBuilder:
+        """
+        Build a query for this DocumentIndex.
+        """
+        return self.QueryBuilder(self, **kwargs)  # type: ignore
+
+    @dataclass
+    class DBConfig(BaseDocumentIndex.DBConfig):
+
+        hosts: Union[
+            str, List[Union[str, Mapping[str, Union[str, int]], NodeConfig]], None
+        ] = 'http://localhost:9200'
+        index_name: Optional[str] = None
+        es_config: Dict[str, Any] = field(default_factory=dict)
+        index_settings: Dict[str, Any] = field(default_factory=dict)
+
+    @dataclass
+    class RuntimeConfig(BaseDocumentIndex.RuntimeConfig):
+        default_column_config: Dict[Type, Dict[str, Any]] = field(
+            default_factory=lambda: {
+                np.ndarray: {
+                    'type': 'dense_vector',
+                    'index': True,
+                    'dims': 128,
+                    'similarity': 'cosine',  # 'l2_norm', 'dot_product', 'cosine'
+                    'm': 16,
+                    'ef_construction': 100,
+                    'num_candidates': 10000,
+                },
+                docarray.typing.ID: {'type': 'keyword'},
+                bool: {'type': 'boolean'},
+                int: {'type': 'integer'},
+                float: {'type': 'float'},
+                str: {'type': 'text'},
+                # `None` is not a Type, but we allow it here anyway
+                None: {},  # type: ignore
+            }
+        )
+
+    ###############################################
+    # Implementation of abstract methods          #
+    ###############################################
+
+    def python_type_to_db_type(self, python_type: Type) -> Any:
+        """Map python type to database type."""
+        for allowed_type in ELASTIC_PY_VEC_TYPES:
+            if issubclass(python_type, allowed_type):
+                return np.ndarray
+
+        if python_type in ELASTIC_PY_TYPES:
+            return python_type
+
+        raise ValueError(f'Unsupported column type for {type(self)}: {python_type}')
+
+    def _index(
+        self,
+        column_to_data: Dict[str, Generator[Any, None, None]],
+        refresh: bool = True,
+    ):
+
+        data = self._transpose_col_value_dict(column_to_data)  # type: ignore
+        requests = []
+
+        for row in data:
+            request = {
+                '_index': self._index_name,
+                '_id': row['id'],
+            }
+            for col_name, col in self._column_infos.items():
+                if not col.config:
+                    continue
+                if col.db_type == np.ndarray and np.all(row[col_name] == 0):
+                    row[col_name] = row[col_name] + 1.0e-9
+                request[col_name] = row[col_name]
+            requests.append(request)
+
+        _, warning_info = self._send_requests(requests)
+        for info in warning_info:
+            warnings.warn(str(info))
+
+        if refresh:
+            self._refresh(self._index_name)
+
+    def num_docs(self) -> int:
+        return self._client.count(index=self._index_name)['count']
+
+    def _del_items(self, doc_ids: Sequence[str]):
+        requests = []
+        for _id in doc_ids:
+            requests.append(
+                {'_op_type': 'delete', '_index': self._index_name, '_id': _id}
+            )
+
+        _, warning_info = self._send_requests(requests)
+
+        # raise warning if some ids are not found
+        if warning_info:
+            ids = [info['delete']['_id'] for info in warning_info]
+            warnings.warn(f'No document with id {ids} found')
+
+        self._refresh(self._index_name)
+
+    def _get_items(self, doc_ids: Sequence[str]) -> Sequence[TSchema]:
+        accumulated_docs = []
+        accumulated_docs_id_not_found = []
+
+        es_rows = self._client.mget(
+            index=self._index_name,
+            ids=doc_ids,  # type: ignore
+        )['docs']
+
+        for row in es_rows:
+            if row['found']:
+                doc_dict = row['_source']
+                accumulated_docs.append(doc_dict)
+            else:
+                accumulated_docs_id_not_found.append(row['_id'])
+
+        # raise warning if some ids are not found
+        if accumulated_docs_id_not_found:
+            warnings.warn(f'No document with id {accumulated_docs_id_not_found} found')
+
+        return accumulated_docs
+
+    def _find(
+        self,
+        query: np.ndarray,
+        limit: int,
+        search_field: str = '',
+    ) -> _FindResult:
+        knn_query = {
+            'field': search_field,
+            'query_vector': query,
+            'k': limit,
+            'num_candidates': self._runtime_config.default_column_config[np.ndarray][
+                'num_candidates'
+            ],
+        }
+
+        resp = self._client.search(
+            index=self._index_name,
+            knn=knn_query,
+            size=limit,
+        )
+
+        docs, scores = self._format_response(resp)
+
+        return _FindResult(documents=docs, scores=np.array(scores))  # type: ignore
+
+    def _find_batched(
+        self,
+        queries: np.ndarray,
+        limit: int,
+        search_field: str = '',
+    ) -> _FindResultBatched:
+        result_das = []
+        result_scores = []
+
+        for query in queries:
+            documents, scores = self._find(query, limit, search_field)
+            result_das.append(documents)
+            result_scores.append(scores)
+
+        return _FindResultBatched(documents=result_das, scores=np.array(result_scores))  # type: ignore
+
+    def _filter(
+        self,
+        filter_query: Dict[str, Any],
+        limit: int,
+    ) -> List[Dict]:
+        resp = self._client.search(
+            index=self._index_name,
+            query=filter_query,
+            size=limit,
+        )
+
+        docs, _ = self._format_response(resp)
+
+        return docs
+
+    def _filter_batched(
+        self,
+        filter_queries: Any,
+        limit: int,
+    ) -> List[List[Dict]]:
+        result_das = []
+        for query in filter_queries:
+            result_das.append(self._filter(query, limit))
+        return result_das
+
+    def _text_search(
+        self,
+        query: str,
+        limit: int,
+        search_field: str = '',
+    ) -> _FindResult:
+        search_query = {
+            "bool": {
+                "must": [
+                    {"match": {search_field: query}},
+                ],
+            }
+        }
+
+        resp = self._client.search(
+            index=self._index_name,
+            query=search_query,
+            size=limit,
+        )
+
+        docs, scores = self._format_response(resp)
+
+        return _FindResult(documents=docs, scores=np.array(scores))  # type: ignore
+
+    def _text_search_batched(
+        self,
+        queries: Sequence[str],
+        limit: int,
+        search_field: str = '',
+    ) -> _FindResultBatched:
+        result_das = []
+        result_scores = []
+
+        for query in queries:
+            documents, scores = self._text_search(query, limit, search_field)
+            result_das.append(documents)
+            result_scores.append(scores)
+
+        return _FindResultBatched(documents=result_das, scores=np.array(result_scores, dtype=object))  # type: ignore
+
+    def execute_query(self, query: Dict[str, Any], *args, **kwargs) -> Any:
+        if args or kwargs:
+            raise ValueError(
+                f'args and kwargs not supported for `execute_query` on {type(self)}'
+            )
+
+        resp = self._client.search(index=self._index_name, **query)
+        docs, scores = self._format_response(resp)
+        return _FindResult(documents=docs, scores=np.array(scores))  # type: ignore
+
+    ###############################################
+    # Helpers                                     #
+    ###############################################
+
+    # ElasticSearch helpers
+    def _create_index(self, col: '_ColumnInfo') -> Dict[str, Any]:
+        """Create a new HNSW index for a column, and initialize it."""
+        index = dict((k, col.config[k]) for k in self._index_init_params)
+        if col.db_type == np.ndarray:
+            for k in self._index_vector_params:
+                index[k] = col.config[k]
+            if col.n_dim:
+                index['dims'] = col.n_dim
+            index['index_options'] = dict(
+                (k, col.config[k]) for k in self._index_vector_options
+            )
+            index['index_options']['type'] = 'hnsw'
+        return index
+
+    def _send_requests(
+        self, request: Iterable[Dict[str, Any]], **kwargs
+    ) -> Tuple[List[Dict], List[Any]]:
+        """Send bulk request to Elastic and gather the successful info"""
+
+        # TODO chunk_size
+
+        accumulated_info = []
+        warning_info = []
+        for success, info in parallel_bulk(
+            self._client,
+            request,
+            raise_on_error=False,
+            raise_on_exception=False,
+            **kwargs,
+        ):
+            if not success:
+                warning_info.append(info)
+            else:
+                accumulated_info.append(info)
+
+        return accumulated_info, warning_info
+
+    def _format_response(self, response: Any) -> Tuple[List[Dict], List[float]]:
+        docs = []
+        scores = []
+        for result in response['hits']['hits']:
+            doc_dict = result['_source']
+            doc_dict['id'] = result['_id']
+            docs.append(doc_dict)
+            scores.append(result['_score'])
+
+        return docs, scores
+
+    def _refresh(self, index_name: str):
+        self._client.indices.refresh(index=index_name)
diff --git a/poetry.lock b/poetry.lock
index 8b8d2526924..2ef1f5edf90 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -155,7 +155,7 @@ dev = ["Sphinx (==4.3.2)", "black (==22.3.0)", "build (==0.8.0)", "flake8 (==4.0
 name = "certifi"
 version = "2022.9.24"
 description = "Python package for providing Mozilla's CA Bundle."
-category = "dev"
+category = "main"
 optional = false
 python-versions = ">=3.6"
 
@@ -252,6 +252,36 @@ category = "dev"
 optional = false
 python-versions = "*"
 
+[[package]]
+name = "elastic-transport"
+version = "8.4.0"
+description = "Transport classes and utilities shared among Python Elastic client libraries"
+category = "main"
+optional = false
+python-versions = ">=3.6"
+
+[package.dependencies]
+certifi = "*"
+urllib3 = ">=1.26.2,<2"
+
+[package.extras]
+develop = ["aiohttp", "mock", "pytest", "pytest-asyncio", "pytest-cov", "pytest-httpserver", "pytest-mock", "requests", "trustme"]
+
+[[package]]
+name = "elasticsearch"
+version = "8.6.2"
+description = "Python client for Elasticsearch"
+category = "main"
+optional = false
+python-versions = ">=3.6, <4"
+
+[package.dependencies]
+elastic-transport = ">=8,<9"
+
+[package.extras]
+async = ["aiohttp (>=3,<4)"]
+requests = ["requests (>=2.4.0,<3.0.0)"]
+
 [[package]]
 name = "entrypoints"
 version = "0.4"
@@ -1626,7 +1656,7 @@ typing-extensions = ">=3.7.4"
 name = "urllib3"
 version = "1.26.14"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
-category = "dev"
+category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*"
 
@@ -1724,6 +1754,7 @@ testing = ["flake8 (<5)", "func-timeout", "jaraco.functools", "jaraco.itertools"
 [extras]
 audio = ["pydub"]
 common = ["protobuf", "lz4"]
+elasticsearch = ["elasticsearch", "elastic-transport"]
 hnswlib = ["hnswlib"]
 image = ["pillow", "types-pillow"]
 mesh = ["trimesh"]
@@ -1734,8 +1765,8 @@ web = ["fastapi"]
 
 [metadata]
 lock-version = "1.1"
-python-versions = ">=3.7"
-content-hash = "3ec1e886d794ed803736bc2b49a626c600a97bda8a3db734b53604c10f08d252"
+python-versions = ">=3.7, <4"
+content-hash = "b4a74c6ca07e3ebe3beda8ba5909257646a41410ac45c39d7277bc1bfa9e37a2"
 
 [metadata.files]
 anyio = [
@@ -1984,6 +2015,14 @@ distlib = [
     {file = "distlib-0.3.6-py2.py3-none-any.whl", hash = "sha256:f35c4b692542ca110de7ef0bea44d73981caeb34ca0b9b6b2e6d7790dda8f80e"},
     {file = "distlib-0.3.6.tar.gz", hash = "sha256:14bad2d9b04d3a36127ac97f30b12a19268f211063d8f8ee4f47108896e11b46"},
 ]
+elastic-transport = [
+    {file = "elastic-transport-8.4.0.tar.gz", hash = "sha256:b9ad708ceb7fcdbc6b30a96f886609a109f042c0b9d9f2e44403b3133ba7ff10"},
+    {file = "elastic_transport-8.4.0-py3-none-any.whl", hash = "sha256:19db271ab79c9f70f8c43f8f5b5111408781a6176b54ab2e54d713b6d9ceb815"},
+]
+elasticsearch = [
+    {file = "elasticsearch-8.6.2-py3-none-any.whl", hash = "sha256:8ccbebd9a0f6f523c7db67bb54863dde8bdb93daae4ff97f7c814e0500a73e84"},
+    {file = "elasticsearch-8.6.2.tar.gz", hash = "sha256:084458e84caa91e3ad807b68aa82c022e785bead853a3b125641a25e894a1d47"},
+]
 entrypoints = [
     {file = "entrypoints-0.4-py3-none-any.whl", hash = "sha256:f174b5ff827504fd3cd97cc3f8649f3693f51538c7e4bdf3ef002c8429d42f9f"},
     {file = "entrypoints-0.4.tar.gz", hash = "sha256:b706eddaa9218a19ebcd67b56818f05bb27589b1ca9e8d797b74affad4ccacd4"},
diff --git a/pyproject.toml b/pyproject.toml
index 22636b46fa5..66ba648117c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -6,7 +6,7 @@ authors=['DocArray']
 license='Apache 2.0'
 
 [tool.poetry.dependencies]
-python = ">=3.7"
+python = ">=3.7, <4"
 pydantic = ">=1.10.2"
 numpy = ">=1.17.3"
 protobuf = { version = ">=3.19.0", optional = true }
@@ -24,6 +24,8 @@ hnswlib = {version = ">=0.6.2", optional = true }
 lz4 = {version= ">=1.0.0", optional = true}
 pydub = {version = "^0.25.1", optional = true }
 pandas = {version = ">=1.1.0", optional = true }
+elastic-transport = "^8.4.0"
+elasticsearch = "^8.6.2"
 
 [tool.poetry.extras]
 common = ["protobuf", "lz4"]
@@ -35,6 +37,7 @@ mesh = ["trimesh"]
 web = ["fastapi"]
 hnswlib = ["hnswlib"]
 pandas = ["pandas"]
+elasticsearch = ["elasticsearch", "elastic_transport"]
 
 [tool.poetry.dev-dependencies]
 pytest = ">=7.0"
diff --git a/tests/doc_index/elastic/fixture.py b/tests/doc_index/elastic/fixture.py
new file mode 100644
index 00000000000..3862b04f8d4
--- /dev/null
+++ b/tests/doc_index/elastic/fixture.py
@@ -0,0 +1,58 @@
+import os
+import time
+
+import pytest
+from pydantic import Field
+
+from docarray import BaseDocument
+from docarray.typing import NdArray
+
+pytestmark = [pytest.mark.slow, pytest.mark.doc_index]
+
+
+class SimpleDoc(BaseDocument):
+    tens: NdArray[10] = Field(dims=1000)
+
+
+class FlatDoc(BaseDocument):
+    tens_one: NdArray = Field(dims=10)
+    tens_two: NdArray = Field(dims=50)
+
+
+class NestedDoc(BaseDocument):
+    d: SimpleDoc
+
+
+class DeepNestedDoc(BaseDocument):
+    d: NestedDoc
+
+
+cur_dir = os.path.dirname(os.path.abspath(__file__))
+compose_yml_v7 = os.path.abspath(os.path.join(cur_dir, 'v7/docker-compose.yml'))
+compose_yml_v8 = os.path.abspath(os.path.join(cur_dir, 'v8/docker-compose.yml'))
+
+
+@pytest.fixture(scope='module', autouse=True)
+def start_storage_v7():
+    os.system(f"docker-compose -f {compose_yml_v7} up -d --remove-orphans")
+    _wait_for_es()
+
+    yield
+    os.system(f"docker-compose -f {compose_yml_v7} down --remove-orphans")
+
+
+@pytest.fixture(scope='module', autouse=True)
+def start_storage_v8():
+    os.system(f"docker-compose -f {compose_yml_v8} up -d --remove-orphans")
+    _wait_for_es()
+
+    yield
+    os.system(f"docker-compose -f {compose_yml_v8} down --remove-orphans")
+
+
+def _wait_for_es():
+    from elasticsearch import Elasticsearch
+
+    es = Elasticsearch(hosts='http://localhost:9200/')
+    while not es.ping():
+        time.sleep(0.5)
diff --git a/tests/doc_index/elastic/v8/docker-compose.yml b/tests/doc_index/elastic/v8/docker-compose.yml
new file mode 100644
index 00000000000..70eedba34f5
--- /dev/null
+++ b/tests/doc_index/elastic/v8/docker-compose.yml
@@ -0,0 +1,16 @@
+version: "3.3"
+services:
+  elastic:
+    image: docker.elastic.co/elasticsearch/elasticsearch:8.6.2
+    environment:
+      - xpack.security.enabled=false
+      - discovery.type=single-node
+      - ES_JAVA_OPTS=-Xmx1024m
+    ports:
+      - "9200:9200"
+    networks:
+      - elastic
+
+networks:
+  elastic:
+    name: elastic
\ No newline at end of file
diff --git a/tests/doc_index/elastic/v8/test_find.py b/tests/doc_index/elastic/v8/test_find.py
new file mode 100644
index 00000000000..944ed9887d7
--- /dev/null
+++ b/tests/doc_index/elastic/v8/test_find.py
@@ -0,0 +1,278 @@
+import numpy as np
+import pytest
+from pydantic import Field
+
+from docarray import BaseDocument
+from docarray.doc_index.backends.elasticv8_doc_index import ElasticDocumentV8Index
+from docarray.typing import NdArray
+from tests.doc_index.elastic.fixture import start_storage_v8  # noqa: F401
+from tests.doc_index.elastic.fixture import FlatDoc, SimpleDoc
+
+
+@pytest.mark.parametrize('similarity', ['cosine', 'l2_norm', 'dot_product'])
+def test_find_simple_schema(similarity):
+    class SimpleSchema(BaseDocument):
+        tens: NdArray[10] = Field(similarity=similarity)
+
+    store = ElasticDocumentV8Index[SimpleSchema]()
+
+    index_docs = []
+    for _ in range(10):
+        vec = np.random.rand(10)
+        if similarity == 'dot_product':
+            vec = vec / np.linalg.norm(vec)
+        index_docs.append(SimpleDoc(tens=vec))
+    store.index(index_docs)
+
+    query = index_docs[-1]
+    docs, scores = store.find(query, search_field='tens', limit=5)
+
+    assert len(docs) == 5
+    assert len(scores) == 5
+    assert docs[0].id == index_docs[-1].id
+    assert np.allclose(docs[0].tens, index_docs[-1].tens)
+
+
+@pytest.mark.parametrize('similarity', ['cosine', 'l2_norm', 'dot_product'])
+def test_find_flat_schema(similarity):
+    class FlatSchema(BaseDocument):
+        tens_one: NdArray = Field(dims=10, similarity=similarity)
+        tens_two: NdArray = Field(dims=50, similarity=similarity)
+
+    store = ElasticDocumentV8Index[FlatSchema]()
+
+    index_docs = []
+    for _ in range(10):
+        vec_one = np.random.rand(10)
+        vec_two = np.random.rand(50)
+        if similarity == 'dot_product':
+            vec_one = vec_one / np.linalg.norm(vec_one)
+            vec_two = vec_two / np.linalg.norm(vec_two)
+        index_docs.append(FlatDoc(tens_one=vec_one, tens_two=vec_two))
+
+    store.index(index_docs)
+
+    query = index_docs[-1]
+
+    # find on tens_one
+    docs, scores = store.find(query, search_field='tens_one', limit=5)
+    assert len(docs) == 5
+    assert len(scores) == 5
+    assert docs[0].id == index_docs[-1].id
+    assert np.allclose(docs[0].tens_one, index_docs[-1].tens_one)
+    assert np.allclose(docs[0].tens_two, index_docs[-1].tens_two)
+
+    # find on tens_two
+    docs, scores = store.find(query, search_field='tens_two', limit=5)
+    assert len(docs) == 5
+    assert len(scores) == 5
+    assert docs[0].id == index_docs[-1].id
+    assert np.allclose(docs[0].tens_one, index_docs[-1].tens_one)
+    assert np.allclose(docs[0].tens_two, index_docs[-1].tens_two)
+
+
+@pytest.mark.parametrize('similarity', ['cosine', 'l2_norm', 'dot_product'])
+def test_find_nested_schema(similarity):
+    class SimpleDoc(BaseDocument):
+        tens: NdArray[10] = Field(similarity=similarity)
+
+    class NestedDoc(BaseDocument):
+        d: SimpleDoc
+        tens: NdArray[10] = Field(similarity=similarity)
+
+    class DeepNestedDoc(BaseDocument):
+        d: NestedDoc
+        tens: NdArray = Field(similarity=similarity, dims=10)
+
+    store = ElasticDocumentV8Index[DeepNestedDoc]()
+
+    index_docs = []
+    for _ in range(10):
+        vec_simple = np.random.rand(10)
+        vec_nested = np.random.rand(10)
+        vec_deep = np.random.rand(10)
+        if similarity == 'dot_product':
+            vec_simple = vec_simple / np.linalg.norm(vec_simple)
+            vec_nested = vec_nested / np.linalg.norm(vec_nested)
+            vec_deep = vec_deep / np.linalg.norm(vec_deep)
+        index_docs.append(
+            DeepNestedDoc(
+                d=NestedDoc(d=SimpleDoc(tens=vec_simple), tens=vec_nested),
+                tens=vec_deep,
+            )
+        )
+
+    store.index(index_docs)
+
+    query = index_docs[-1]
+
+    # find on root level
+    docs, scores = store.find(query, search_field='tens', limit=5)
+    assert len(docs) == 5
+    assert len(scores) == 5
+    assert docs[0].id == index_docs[-1].id
+    assert np.allclose(docs[0].tens, index_docs[-1].tens)
+
+    # find on first nesting level
+    docs, scores = store.find(query, search_field='d__tens', limit=5)
+    assert len(docs) == 5
+    assert len(scores) == 5
+    assert docs[0].id == index_docs[-1].id
+    assert np.allclose(docs[0].d.tens, index_docs[-1].d.tens)
+
+    # find on second nesting level
+    docs, scores = store.find(query, search_field='d__d__tens', limit=5)
+    assert len(docs) == 5
+    assert len(scores) == 5
+    assert docs[0].id == index_docs[-1].id
+    assert np.allclose(docs[0].d.d.tens, index_docs[-1].d.d.tens)
+
+
+def test_find_batched():
+    store = ElasticDocumentV8Index[SimpleDoc]()
+
+    index_docs = [SimpleDoc(tens=np.random.rand(10)) for _ in range(10)]
+    store.index(index_docs)
+
+    queries = index_docs[-2:]
+    docs_batched, scores_batched = store.find_batched(
+        queries, search_field='tens', limit=5
+    )
+
+    for docs, scores, query in zip(docs_batched, scores_batched, queries):
+        assert len(docs) == 5
+        assert len(scores) == 5
+        assert docs[0].id == query.id
+        assert np.allclose(docs[0].tens, query.tens)
+
+
+def test_filter():
+    import itertools
+
+    class MyDoc(BaseDocument):
+        A: bool
+        B: int
+        C: float
+
+    store = ElasticDocumentV8Index[MyDoc]()
+
+    A_list = [True, False]
+    B_list = [1, 2]
+    C_list = [1.5, 2.5]
+
+    # cross product of all possible combinations
+    combinations = itertools.product(A_list, B_list, C_list)
+    index_docs = [MyDoc(A=A, B=B, C=C) for A, B, C in combinations]
+    store.index(index_docs)
+
+    filter_query = {'term': {'A': True}}
+    docs = store.filter(filter_query)
+    assert len(docs) > 0
+    for doc in docs:
+        assert doc.A
+
+    filter_query = {'term': {'B': 1}}
+    docs = store.filter(filter_query)
+    assert len(docs) > 0
+    for doc in docs:
+        assert doc.B == 1
+
+    filter_query = {'term': {'C': 1.5}}
+    docs = store.filter(filter_query)
+    assert len(docs) > 0
+    for doc in docs:
+        assert doc.C == 1.5
+
+
+def test_text_search():
+    class MyDoc(BaseDocument):
+        text: str
+
+    store = ElasticDocumentV8Index[MyDoc]()
+    index_docs = [
+        MyDoc(text='hello world'),
+        MyDoc(text='never gonna give you up'),
+        MyDoc(text='we are the world'),
+    ]
+    store.index(index_docs)
+
+    query = 'world'
+    docs, scores = store.text_search(query, search_field='text')
+
+    assert len(docs) == 2
+    assert len(scores) == 2
+    assert docs[0].text.index(query) >= 0
+    assert docs[1].text.index(query) >= 0
+
+    queries = ['world', 'never']
+    docs, scores = store.text_search_batched(queries, search_field='text')
+    for query, da, score in zip(queries, docs, scores):
+        assert len(da) > 0
+        assert len(score) > 0
+        for doc in da:
+            assert doc.text.index(query) >= 0
+
+
+def test_query_builder():
+    class MyDoc(BaseDocument):
+        tens: NdArray[10] = Field(similarity='l2_norm')
+        num: int
+        text: str
+
+    store = ElasticDocumentV8Index[MyDoc]()
+    index_docs = [
+        MyDoc(id=f'{i}', tens=np.ones(10) * i, num=int(i / 2), text=f'text {int(i/2)}')
+        for i in range(10)
+    ]
+    store.index(index_docs)
+
+    # build_query
+    q = store.build_query()
+    assert isinstance(q, store.QueryBuilder)
+
+    # filter
+    q = store.build_query().filter({'term': {'num': 0}}).build()
+    docs, _ = store.execute_query(q)
+    assert [doc['id'] for doc in docs] == ['0', '1']
+
+    # find
+    q = store.build_query().find(index_docs[-1], search_field='tens', limit=3).build()
+    docs, _ = store.execute_query(q)
+    assert [doc['id'] for doc in docs] == ['9', '8', '7']
+
+    # text_search
+    q = store.build_query().text_search('0', search_field='text').build()
+    docs, _ = store.execute_query(q)
+    assert [doc['id'] for doc in docs] == ['0', '1']
+
+    # combination
+    q = (
+        store.build_query()
+        .filter({'range': {'num': {'lte': 3}}})
+        .find(index_docs[-1], search_field='tens')
+        .text_search('0', search_field='text')
+        .build()
+    )
+    docs, _ = store.execute_query(q)
+    assert [doc['id'] for doc in docs] == ['1', '0']
+
+    # direct
+    query = {
+        'knn': {
+            'field': 'tens',
+            'query_vector': [9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0],
+            'k': 10,
+            'num_candidates': 10000,
+            'filter': {
+                'bool': {
+                    'filter': [
+                        {'range': {'num': {'gte': 2}}},
+                        {'range': {'num': {'lte': 3}}},
+                    ]
+                }
+            },
+        },
+    }
+
+    docs, _ = store.execute_query(query)
+    assert [doc['id'] for doc in docs] == ['7', '6', '5', '4']
diff --git a/tests/doc_index/elastic/v8/test_index_get_del.py b/tests/doc_index/elastic/v8/test_index_get_del.py
new file mode 100644
index 00000000000..c826ed344c4
--- /dev/null
+++ b/tests/doc_index/elastic/v8/test_index_get_del.py
@@ -0,0 +1,232 @@
+import numpy as np
+import pytest
+
+from docarray import DocumentArray
+from docarray.doc_index.backends.elasticv8_doc_index import ElasticDocumentV8Index
+from tests.doc_index.elastic.fixture import start_storage_v8  # noqa: F401
+from tests.doc_index.elastic.fixture import DeepNestedDoc, FlatDoc, NestedDoc, SimpleDoc
+
+
+@pytest.fixture
+def ten_simple_docs():
+    return [SimpleDoc(tens=np.random.randn(10)) for _ in range(10)]
+
+
+@pytest.fixture
+def ten_flat_docs():
+    return [
+        FlatDoc(tens_one=np.random.randn(10), tens_two=np.random.randn(50))
+        for _ in range(10)
+    ]
+
+
+@pytest.fixture
+def ten_nested_docs():
+    return [NestedDoc(d=SimpleDoc(tens=np.random.randn(10))) for _ in range(10)]
+
+
+@pytest.fixture
+def ten_deep_nested_docs():
+    return [
+        DeepNestedDoc(d=NestedDoc(d=SimpleDoc(tens=np.random.randn(10))))
+        for _ in range(10)
+    ]
+
+
+@pytest.mark.parametrize('use_docarray', [True, False])
+def test_index_simple_schema(ten_simple_docs, use_docarray):
+    store = ElasticDocumentV8Index[SimpleDoc]()
+    if use_docarray:
+        ten_simple_docs = DocumentArray[SimpleDoc](ten_simple_docs)
+
+    store.index(ten_simple_docs)
+    assert store.num_docs() == 10
+
+
+@pytest.mark.parametrize('use_docarray', [True, False])
+def test_index_flat_schema(ten_flat_docs, use_docarray):
+    store = ElasticDocumentV8Index[FlatDoc]()
+    if use_docarray:
+        ten_flat_docs = DocumentArray[FlatDoc](ten_flat_docs)
+
+    store.index(ten_flat_docs)
+    assert store.num_docs() == 10
+
+
+@pytest.mark.parametrize('use_docarray', [True, False])
+def test_index_nested_schema(ten_nested_docs, use_docarray):
+    store = ElasticDocumentV8Index[NestedDoc]()
+    if use_docarray:
+        ten_nested_docs = DocumentArray[NestedDoc](ten_nested_docs)
+
+    store.index(ten_nested_docs)
+    assert store.num_docs() == 10
+
+
+@pytest.mark.parametrize('use_docarray', [True, False])
+def test_index_deep_nested_schema(ten_deep_nested_docs, use_docarray):
+    store = ElasticDocumentV8Index[DeepNestedDoc]()
+    if use_docarray:
+        ten_deep_nested_docs = DocumentArray[DeepNestedDoc](ten_deep_nested_docs)
+
+    store.index(ten_deep_nested_docs)
+    assert store.num_docs() == 10
+
+
+def test_get_single(ten_simple_docs, ten_flat_docs, ten_nested_docs):
+    # simple
+    store = ElasticDocumentV8Index[SimpleDoc]()
+    store.index(ten_simple_docs)
+
+    assert store.num_docs() == 10
+    for d in ten_simple_docs:
+        id_ = d.id
+        assert store[id_].id == id_
+        assert np.all(store[id_].tens == d.tens)
+
+    # flat
+    store = ElasticDocumentV8Index[FlatDoc]()
+    store.index(ten_flat_docs)
+
+    assert store.num_docs() == 10
+    for d in ten_flat_docs:
+        id_ = d.id
+        assert store[id_].id == id_
+        assert np.all(store[id_].tens_one == d.tens_one)
+        assert np.all(store[id_].tens_two == d.tens_two)
+
+    # nested
+    store = ElasticDocumentV8Index[NestedDoc]()
+    store.index(ten_nested_docs)
+
+    assert store.num_docs() == 10
+    for d in ten_nested_docs:
+        id_ = d.id
+        assert store[id_].id == id_
+        assert store[id_].d.id == d.d.id
+        assert np.all(store[id_].d.tens == d.d.tens)
+
+
+def test_get_multiple(ten_simple_docs, ten_flat_docs, ten_nested_docs):
+    docs_to_get_idx = [0, 2, 4, 6, 8]
+
+    # simple
+    store = ElasticDocumentV8Index[SimpleDoc]()
+    store.index(ten_simple_docs)
+
+    assert store.num_docs() == 10
+    docs_to_get = [ten_simple_docs[i] for i in docs_to_get_idx]
+    ids_to_get = [d.id for d in docs_to_get]
+    retrieved_docs = store[ids_to_get]
+    for id_, d_in, d_out in zip(ids_to_get, docs_to_get, retrieved_docs):
+        assert d_out.id == id_
+        assert np.all(d_out.tens == d_in.tens)
+
+    # flat
+    store = ElasticDocumentV8Index[FlatDoc]()
+    store.index(ten_flat_docs)
+
+    assert store.num_docs() == 10
+    docs_to_get = [ten_flat_docs[i] for i in docs_to_get_idx]
+    ids_to_get = [d.id for d in docs_to_get]
+    retrieved_docs = store[ids_to_get]
+    for id_, d_in, d_out in zip(ids_to_get, docs_to_get, retrieved_docs):
+        assert d_out.id == id_
+        assert np.all(d_out.tens_one == d_in.tens_one)
+        assert np.all(d_out.tens_two == d_in.tens_two)
+
+    # nested
+    store = ElasticDocumentV8Index[NestedDoc]()
+    store.index(ten_nested_docs)
+
+    assert store.num_docs() == 10
+    docs_to_get = [ten_nested_docs[i] for i in docs_to_get_idx]
+    ids_to_get = [d.id for d in docs_to_get]
+    retrieved_docs = store[ids_to_get]
+    for id_, d_in, d_out in zip(ids_to_get, docs_to_get, retrieved_docs):
+        assert d_out.id == id_
+        assert d_out.d.id == d_in.d.id
+        assert np.all(d_out.d.tens == d_in.d.tens)
+
+
+def test_get_key_error(ten_simple_docs):
+    store = ElasticDocumentV8Index[SimpleDoc]()
+    store.index(ten_simple_docs)
+
+    with pytest.raises(KeyError):
+        store['not_a_real_id']
+
+
+def test_del_single(ten_simple_docs):
+    store = ElasticDocumentV8Index[SimpleDoc]()
+    store.index(ten_simple_docs)
+    # delete once
+    assert store.num_docs() == 10
+    del store[ten_simple_docs[0].id]
+    assert store.num_docs() == 9
+    for i, d in enumerate(ten_simple_docs):
+        id_ = d.id
+        if i == 0:  # deleted
+            with pytest.raises(KeyError):
+                store[id_]
+        else:
+            assert store[id_].id == id_
+            assert np.all(store[id_].tens == d.tens)
+    # delete again
+    del store[ten_simple_docs[3].id]
+    assert store.num_docs() == 8
+    for i, d in enumerate(ten_simple_docs):
+        id_ = d.id
+        if i in (0, 3):  # deleted
+            with pytest.raises(KeyError):
+                store[id_]
+        else:
+            assert store[id_].id == id_
+            assert np.all(store[id_].tens == d.tens)
+
+
+def test_del_multiple(ten_simple_docs):
+    docs_to_del_idx = [0, 2, 4, 6, 8]
+
+    store = ElasticDocumentV8Index[SimpleDoc]()
+    store.index(ten_simple_docs)
+
+    assert store.num_docs() == 10
+    docs_to_del = [ten_simple_docs[i] for i in docs_to_del_idx]
+    ids_to_del = [d.id for d in docs_to_del]
+    del store[ids_to_del]
+    for i, doc in enumerate(ten_simple_docs):
+        if i in docs_to_del_idx:
+            with pytest.raises(KeyError):
+                store[doc.id]
+        else:
+            assert store[doc.id].id == doc.id
+            assert np.all(store[doc.id].tens == doc.tens)
+
+
+def test_del_key_error(ten_simple_docs):
+    store = ElasticDocumentV8Index[SimpleDoc]()
+    store.index(ten_simple_docs)
+
+    with pytest.warns(UserWarning):
+        del store['not_a_real_id']
+
+
+def test_num_docs(ten_simple_docs):
+    store = ElasticDocumentV8Index[SimpleDoc]()
+    store.index(ten_simple_docs)
+
+    assert store.num_docs() == 10
+
+    del store[ten_simple_docs[0].id]
+    assert store.num_docs() == 9
+
+    del store[ten_simple_docs[3].id, ten_simple_docs[5].id]
+    assert store.num_docs() == 7
+
+    more_docs = [SimpleDoc(tens=np.random.rand(10)) for _ in range(5)]
+    store.index(more_docs)
+    assert store.num_docs() == 12
+
+    del store[more_docs[2].id, ten_simple_docs[7].id]
+    assert store.num_docs() == 10

From 6ba2f32ccd327f861b9be722cfbf1c2ff4f0f3df Mon Sep 17 00:00:00 2001
From: AnneY <evangeline-lun@foxmail.com>
Date: Thu, 23 Mar 2023 14:12:52 +0800
Subject: [PATCH 02/14] fix: update

Signed-off-by: AnneY <evangeline-lun@foxmail.com>
---
 docarray/index/__init__.py                    |   3 +-
 .../{elasticv8_doc_index.py => elasticv8.py}  | 110 ++++++++++--------
 poetry.lock                                   |  49 +++++++-
 tests/{doc_index => index}/elastic/fixture.py |   0
 .../elastic/v8/docker-compose.yml             |   0
 .../elastic/v8/test_find.py                   |  35 +++---
 .../elastic/v8/test_index_get_del.py          |   6 +-
 7 files changed, 121 insertions(+), 82 deletions(-)
 rename docarray/index/backends/{elasticv8_doc_index.py => elasticv8.py} (87%)
 rename tests/{doc_index => index}/elastic/fixture.py (100%)
 rename tests/{doc_index => index}/elastic/v8/docker-compose.yml (100%)
 rename tests/{doc_index => index}/elastic/v8/test_find.py (91%)
 rename tests/{doc_index => index}/elastic/v8/test_index_get_del.py (96%)

diff --git a/docarray/index/__init__.py b/docarray/index/__init__.py
index 5fdbf8ad736..dd348fda606 100644
--- a/docarray/index/__init__.py
+++ b/docarray/index/__init__.py
@@ -1,3 +1,4 @@
+from docarray.index.backends.elasticv8 import ElasticDocumentV8Index
 from docarray.index.backends.hnswlib import HnswDocumentIndex
 
-__all__ = ['HnswDocumentIndex']
+__all__ = ['HnswDocumentIndex', 'ElasticDocumentV8Index']
diff --git a/docarray/index/backends/elasticv8_doc_index.py b/docarray/index/backends/elasticv8.py
similarity index 87%
rename from docarray/index/backends/elasticv8_doc_index.py
rename to docarray/index/backends/elasticv8.py
index ee10d20c0d2..2b84f640272 100644
--- a/docarray/index/backends/elasticv8_doc_index.py
+++ b/docarray/index/backends/elasticv8.py
@@ -26,7 +26,7 @@
 
 import docarray.typing
 from docarray import BaseDocument
-from docarray.doc_index.abstract_doc_index import (
+from docarray.index.abstract import (
     BaseDocumentIndex,
     _ColumnInfo,
     _FindResultBatched,
@@ -39,8 +39,7 @@
 TSchema = TypeVar('TSchema', bound=BaseDocument)
 T = TypeVar('T', bound='ElasticDocumentV8Index')
 
-ELASTIC_PY_VEC_TYPES = [list, tuple, np.ndarray]
-ELASTIC_PY_TYPES = [bool, int, float, str, docarray.typing.ID]
+ELASTIC_PY_VEC_TYPES: List[Any] = [np.ndarray]
 if torch_imported:
     import torch
 
@@ -64,7 +63,6 @@ def __init__(self, db_config=None, **kwargs):
         )
 
         # ElasticSearh index setup
-        self._index_init_params = ('type',)
         self._index_vector_params = ('dims', 'similarity', 'index')
         self._index_vector_options = ('m', 'ef_construction')
 
@@ -75,11 +73,9 @@ def __init__(self, db_config=None, **kwargs):
         }
 
         for col_name, col in self._column_infos.items():
-            if not col.config:
-                continue  # do not create column index if no config is given
-            mappings['properties'][col_name] = self._create_index(col)
+            mappings['properties'][col_name] = self._create_index_mapping(col)
 
-        if self._client.indices.exists(index=self._index_name):  # type: ignore
+        if self._client.indices.exists(index=self._index_name):
             self._client.indices.put_mapping(
                 index=self._index_name, properties=mappings['properties']
             )
@@ -131,7 +127,7 @@ def find(
                 'query_vector': query_vec_np,
                 'k': limit,
                 'num_candidates': self._outer_instance._runtime_config.default_column_config[
-                    np.ndarray
+                    'dense_vector'
                 ][
                     'num_candidates'
                 ],
@@ -160,7 +156,7 @@ def build_query(self, **kwargs) -> QueryBuilder:
         """
         Build a query for this DocumentIndex.
         """
-        return self.QueryBuilder(self, **kwargs)  # type: ignore
+        return self.QueryBuilder(self, **kwargs)
 
     @dataclass
     class DBConfig(BaseDocumentIndex.DBConfig):
@@ -174,10 +170,9 @@ class DBConfig(BaseDocumentIndex.DBConfig):
 
     @dataclass
     class RuntimeConfig(BaseDocumentIndex.RuntimeConfig):
-        default_column_config: Dict[Type, Dict[str, Any]] = field(
+        default_column_config: Dict[Any, Dict[str, Any]] = field(
             default_factory=lambda: {
-                np.ndarray: {
-                    'type': 'dense_vector',
+                'dense_vector': {
                     'index': True,
                     'dims': 128,
                     'similarity': 'cosine',  # 'l2_norm', 'dot_product', 'cosine'
@@ -185,15 +180,16 @@ class RuntimeConfig(BaseDocumentIndex.RuntimeConfig):
                     'ef_construction': 100,
                     'num_candidates': 10000,
                 },
-                docarray.typing.ID: {'type': 'keyword'},
-                bool: {'type': 'boolean'},
-                int: {'type': 'integer'},
-                float: {'type': 'float'},
-                str: {'type': 'text'},
+                'keyword': {},
+                'boolean': {},
+                'integer': {},
+                'float': {},
+                'text': {},
                 # `None` is not a Type, but we allow it here anyway
                 None: {},  # type: ignore
             }
         )
+        chunk_size: int = 500
 
     ###############################################
     # Implementation of abstract methods          #
@@ -203,10 +199,18 @@ def python_type_to_db_type(self, python_type: Type) -> Any:
         """Map python type to database type."""
         for allowed_type in ELASTIC_PY_VEC_TYPES:
             if issubclass(python_type, allowed_type):
-                return np.ndarray
+                return 'dense_vector'
+
+        elastic_py_types = {
+            bool: 'boolean',
+            int: 'integer',
+            float: 'float',
+            str: 'text',
+            docarray.typing.ID: 'keyword',
+        }
 
-        if python_type in ELASTIC_PY_TYPES:
-            return python_type
+        if python_type in elastic_py_types:
+            return elastic_py_types[python_type]
 
         raise ValueError(f'Unsupported column type for {type(self)}: {python_type}')
 
@@ -214,6 +218,7 @@ def _index(
         self,
         column_to_data: Dict[str, Generator[Any, None, None]],
         refresh: bool = True,
+        chunk_size: Optional[int] = None,
     ):
 
         data = self._transpose_col_value_dict(column_to_data)  # type: ignore
@@ -225,14 +230,12 @@ def _index(
                 '_id': row['id'],
             }
             for col_name, col in self._column_infos.items():
-                if not col.config:
-                    continue
-                if col.db_type == np.ndarray and np.all(row[col_name] == 0):
+                if col.db_type == 'dense_vector' and np.all(row[col_name] == 0):
                     row[col_name] = row[col_name] + 1.0e-9
                 request[col_name] = row[col_name]
             requests.append(request)
 
-        _, warning_info = self._send_requests(requests)
+        _, warning_info = self._send_requests(requests, chunk_size)
         for info in warning_info:
             warnings.warn(str(info))
 
@@ -242,14 +245,18 @@ def _index(
     def num_docs(self) -> int:
         return self._client.count(index=self._index_name)['count']
 
-    def _del_items(self, doc_ids: Sequence[str]):
+    def _del_items(
+        self,
+        doc_ids: Sequence[str],
+        chunk_size: Optional[int] = None,
+    ):
         requests = []
         for _id in doc_ids:
             requests.append(
                 {'_op_type': 'delete', '_index': self._index_name, '_id': _id}
             )
 
-        _, warning_info = self._send_requests(requests)
+        _, warning_info = self._send_requests(requests, chunk_size)
 
         # raise warning if some ids are not found
         if warning_info:
@@ -280,17 +287,24 @@ def _get_items(self, doc_ids: Sequence[str]) -> Sequence[TSchema]:
 
         return accumulated_docs
 
+    def execute_query(self, query: Dict[str, Any], *args, **kwargs) -> Any:
+        if args or kwargs:
+            raise ValueError(
+                f'args and kwargs not supported for `execute_query` on {type(self)}'
+            )
+
+        resp = self._client.search(index=self._index_name, **query)
+        docs, scores = self._format_response(resp)
+        return _FindResult(documents=docs, scores=np.array(scores))  # type: ignore
+
     def _find(
-        self,
-        query: np.ndarray,
-        limit: int,
-        search_field: str = '',
+        self, query: np.ndarray, limit: int, search_field: str = ''
     ) -> _FindResult:
         knn_query = {
             'field': search_field,
             'query_vector': query,
             'k': limit,
-            'num_candidates': self._runtime_config.default_column_config[np.ndarray][
+            'num_candidates': self._runtime_config.default_column_config['dense_vector'][  # type: ignore
                 'num_candidates'
             ],
         }
@@ -353,9 +367,9 @@ def _text_search(
         search_field: str = '',
     ) -> _FindResult:
         search_query = {
-            "bool": {
-                "must": [
-                    {"match": {search_field: query}},
+            'bool': {
+                'must': [
+                    {'match': {search_field: query}},
                 ],
             }
         }
@@ -386,25 +400,17 @@ def _text_search_batched(
 
         return _FindResultBatched(documents=result_das, scores=np.array(result_scores, dtype=object))  # type: ignore
 
-    def execute_query(self, query: Dict[str, Any], *args, **kwargs) -> Any:
-        if args or kwargs:
-            raise ValueError(
-                f'args and kwargs not supported for `execute_query` on {type(self)}'
-            )
-
-        resp = self._client.search(index=self._index_name, **query)
-        docs, scores = self._format_response(resp)
-        return _FindResult(documents=docs, scores=np.array(scores))  # type: ignore
-
     ###############################################
     # Helpers                                     #
     ###############################################
 
     # ElasticSearch helpers
-    def _create_index(self, col: '_ColumnInfo') -> Dict[str, Any]:
+    def _create_index_mapping(self, col: '_ColumnInfo') -> Dict[str, Any]:
         """Create a new HNSW index for a column, and initialize it."""
-        index = dict((k, col.config[k]) for k in self._index_init_params)
-        if col.db_type == np.ndarray:
+
+        index = {'type': col.config['type'] if 'type' in col.config else col.db_type}
+
+        if col.db_type == 'dense_vector':
             for k in self._index_vector_params:
                 index[k] = col.config[k]
             if col.n_dim:
@@ -416,12 +422,13 @@ def _create_index(self, col: '_ColumnInfo') -> Dict[str, Any]:
         return index
 
     def _send_requests(
-        self, request: Iterable[Dict[str, Any]], **kwargs
+        self,
+        request: Iterable[Dict[str, Any]],
+        chunk_size: Optional[int] = None,
+        **kwargs,
     ) -> Tuple[List[Dict], List[Any]]:
         """Send bulk request to Elastic and gather the successful info"""
 
-        # TODO chunk_size
-
         accumulated_info = []
         warning_info = []
         for success, info in parallel_bulk(
@@ -429,6 +436,7 @@ def _send_requests(
             request,
             raise_on_error=False,
             raise_on_exception=False,
+            chunk_size=chunk_size if chunk_size else self._runtime_config.chunk_size,  # type: ignore
             **kwargs,
         ):
             if not success:
diff --git a/poetry.lock b/poetry.lock
index d1889e68e3c..5f38ec62978 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -293,7 +293,7 @@ files = [
 name = "certifi"
 version = "2022.9.24"
 description = "Python package for providing Mozilla's CA Bundle."
-category = "dev"
+category = "main"
 optional = false
 python-versions = ">=3.6"
 files = [
@@ -512,6 +512,44 @@ files = [
     {file = "distlib-0.3.6.tar.gz", hash = "sha256:14bad2d9b04d3a36127ac97f30b12a19268f211063d8f8ee4f47108896e11b46"},
 ]
 
+[[package]]
+name = "elastic-transport"
+version = "8.4.0"
+description = "Transport classes and utilities shared among Python Elastic client libraries"
+category = "main"
+optional = false
+python-versions = ">=3.6"
+files = [
+    {file = "elastic-transport-8.4.0.tar.gz", hash = "sha256:b9ad708ceb7fcdbc6b30a96f886609a109f042c0b9d9f2e44403b3133ba7ff10"},
+    {file = "elastic_transport-8.4.0-py3-none-any.whl", hash = "sha256:19db271ab79c9f70f8c43f8f5b5111408781a6176b54ab2e54d713b6d9ceb815"},
+]
+
+[package.dependencies]
+certifi = "*"
+urllib3 = ">=1.26.2,<2"
+
+[package.extras]
+develop = ["aiohttp", "mock", "pytest", "pytest-asyncio", "pytest-cov", "pytest-httpserver", "pytest-mock", "requests", "trustme"]
+
+[[package]]
+name = "elasticsearch"
+version = "8.6.2"
+description = "Python client for Elasticsearch"
+category = "main"
+optional = false
+python-versions = ">=3.6, <4"
+files = [
+    {file = "elasticsearch-8.6.2-py3-none-any.whl", hash = "sha256:8ccbebd9a0f6f523c7db67bb54863dde8bdb93daae4ff97f7c814e0500a73e84"},
+    {file = "elasticsearch-8.6.2.tar.gz", hash = "sha256:084458e84caa91e3ad807b68aa82c022e785bead853a3b125641a25e894a1d47"},
+]
+
+[package.dependencies]
+elastic-transport = ">=8,<9"
+
+[package.extras]
+async = ["aiohttp (>=3,<4)"]
+requests = ["requests (>=2.4.0,<3.0.0)"]
+
 [[package]]
 name = "entrypoints"
 version = "0.4"
@@ -3134,7 +3172,7 @@ typing-extensions = ">=3.7.4"
 name = "urllib3"
 version = "1.26.14"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
-category = "dev"
+category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*"
 files = [
@@ -3304,7 +3342,8 @@ testing = ["flake8 (<5)", "func-timeout", "jaraco.functools", "jaraco.itertools"
 
 [extras]
 audio = ["pydub"]
-common = ["protobuf", "lz4"]
+common = ["lz4", "protobuf"]
+elasticsearch = ["elastic-transport", "elasticsearch"]
 hnswlib = ["hnswlib"]
 image = ["pillow", "types-pillow"]
 mesh = ["trimesh"]
@@ -3315,5 +3354,5 @@ web = ["fastapi"]
 
 [metadata]
 lock-version = "2.0"
-python-versions = ">=3.7"
-content-hash = "60dc7dedebd775c6fe3f45ddd2869a07df2c28bbc83420e875eb61e118b064b2"
+python-versions = ">=3.7, <4"
+content-hash = "379b7b8c2142c02057a764e6adf06128da9b4917744fa30bbebf9a2c129abd03"
diff --git a/tests/doc_index/elastic/fixture.py b/tests/index/elastic/fixture.py
similarity index 100%
rename from tests/doc_index/elastic/fixture.py
rename to tests/index/elastic/fixture.py
diff --git a/tests/doc_index/elastic/v8/docker-compose.yml b/tests/index/elastic/v8/docker-compose.yml
similarity index 100%
rename from tests/doc_index/elastic/v8/docker-compose.yml
rename to tests/index/elastic/v8/docker-compose.yml
diff --git a/tests/doc_index/elastic/v8/test_find.py b/tests/index/elastic/v8/test_find.py
similarity index 91%
rename from tests/doc_index/elastic/v8/test_find.py
rename to tests/index/elastic/v8/test_find.py
index 944ed9887d7..14c5d5ef20a 100644
--- a/tests/doc_index/elastic/v8/test_find.py
+++ b/tests/index/elastic/v8/test_find.py
@@ -3,10 +3,10 @@
 from pydantic import Field
 
 from docarray import BaseDocument
-from docarray.doc_index.backends.elasticv8_doc_index import ElasticDocumentV8Index
+from docarray.index import ElasticDocumentV8Index
 from docarray.typing import NdArray
-from tests.doc_index.elastic.fixture import start_storage_v8  # noqa: F401
-from tests.doc_index.elastic.fixture import FlatDoc, SimpleDoc
+from tests.index.elastic.fixture import start_storage_v8  # noqa: F401
+from tests.index.elastic.fixture import FlatDoc, SimpleDoc
 
 
 @pytest.mark.parametrize('similarity', ['cosine', 'l2_norm', 'dot_product'])
@@ -147,8 +147,6 @@ def test_find_batched():
 
 
 def test_filter():
-    import itertools
-
     class MyDoc(BaseDocument):
         A: bool
         B: int
@@ -156,13 +154,7 @@ class MyDoc(BaseDocument):
 
     store = ElasticDocumentV8Index[MyDoc]()
 
-    A_list = [True, False]
-    B_list = [1, 2]
-    C_list = [1.5, 2.5]
-
-    # cross product of all possible combinations
-    combinations = itertools.product(A_list, B_list, C_list)
-    index_docs = [MyDoc(A=A, B=B, C=C) for A, B, C in combinations]
+    index_docs = [MyDoc(id=f'{i}', A=(i % 2 == 0), B=i, C=i + 0.5) for i in range(10)]
     store.index(index_docs)
 
     filter_query = {'term': {'A': True}}
@@ -171,17 +163,16 @@ class MyDoc(BaseDocument):
     for doc in docs:
         assert doc.A
 
-    filter_query = {'term': {'B': 1}}
-    docs = store.filter(filter_query)
-    assert len(docs) > 0
-    for doc in docs:
-        assert doc.B == 1
-
-    filter_query = {'term': {'C': 1.5}}
+    filter_query = {
+        "bool": {
+            "filter": [
+                {"terms": {"B": [3, 4, 7, 8]}},
+                {"range": {"C": {"gte": 3, "lte": 5}}},
+            ]
+        }
+    }
     docs = store.filter(filter_query)
-    assert len(docs) > 0
-    for doc in docs:
-        assert doc.C == 1.5
+    assert [doc.id for doc in docs] == ['3', '4']
 
 
 def test_text_search():
diff --git a/tests/doc_index/elastic/v8/test_index_get_del.py b/tests/index/elastic/v8/test_index_get_del.py
similarity index 96%
rename from tests/doc_index/elastic/v8/test_index_get_del.py
rename to tests/index/elastic/v8/test_index_get_del.py
index c826ed344c4..5777a3bc977 100644
--- a/tests/doc_index/elastic/v8/test_index_get_del.py
+++ b/tests/index/elastic/v8/test_index_get_del.py
@@ -2,9 +2,9 @@
 import pytest
 
 from docarray import DocumentArray
-from docarray.doc_index.backends.elasticv8_doc_index import ElasticDocumentV8Index
-from tests.doc_index.elastic.fixture import start_storage_v8  # noqa: F401
-from tests.doc_index.elastic.fixture import DeepNestedDoc, FlatDoc, NestedDoc, SimpleDoc
+from docarray.index import ElasticDocumentV8Index
+from tests.index.elastic.fixture import start_storage_v8  # noqa: F401
+from tests.index.elastic.fixture import DeepNestedDoc, FlatDoc, NestedDoc, SimpleDoc
 
 
 @pytest.fixture

From 9470e624cc17bfe30038702a4b4cad636379c85d Mon Sep 17 00:00:00 2001
From: AnneY <evangeline-lun@foxmail.com>
Date: Fri, 31 Mar 2023 11:03:56 +0800
Subject: [PATCH 03/14] refactor: elastic v7 inherits v8

Signed-off-by: AnneY <evangeline-lun@foxmail.com>
---
 docarray/index/__init__.py                   |   6 +-
 docarray/index/backends/elastic.py           | 205 +++----
 docarray/index/backends/elasticv7.py         | 133 +++++
 docarray/index/backends/elasticv8.py         | 461 ----------------
 tests/index/elastic/v8/test_find.py          | 538 +++++++++----------
 tests/index/elastic/v8/test_index_get_del.py | 424 +++++++--------
 6 files changed, 728 insertions(+), 1039 deletions(-)
 create mode 100644 docarray/index/backends/elasticv7.py
 delete mode 100644 docarray/index/backends/elasticv8.py

diff --git a/docarray/index/__init__.py b/docarray/index/__init__.py
index 1e4f3ad8f7c..a678a03a415 100644
--- a/docarray/index/__init__.py
+++ b/docarray/index/__init__.py
@@ -1,5 +1,5 @@
-from docarray.index.backends.elastic import ElasticV7DocIndex
-from docarray.index.backends.elasticv8 import ElasticDocumentIndex
+from docarray.index.backends.elastic import ElasticDocIndex
+from docarray.index.backends.elasticv7 import ElasticV7DocIndex
 from docarray.index.backends.hnswlib import HnswDocumentIndex
 
-__all__ = ['HnswDocumentIndex', 'ElasticDocumentIndex', 'ElasticV7DocIndex']
+__all__ = ['HnswDocumentIndex', 'ElasticDocIndex', 'ElasticV7DocIndex']
diff --git a/docarray/index/backends/elastic.py b/docarray/index/backends/elastic.py
index deefc3b2a86..462b3a56591 100644
--- a/docarray/index/backends/elastic.py
+++ b/docarray/index/backends/elastic.py
@@ -1,4 +1,4 @@
-import os
+# mypy: ignore-errors
 import uuid
 import warnings
 from collections import defaultdict
@@ -21,6 +21,7 @@
 )
 
 import numpy as np
+from elastic_transport import NodeConfig
 from elasticsearch import Elasticsearch
 from elasticsearch.helpers import parallel_bulk
 from pydantic import parse_obj_as
@@ -40,7 +41,7 @@
 from docarray.utils.find import _FindResult
 
 TSchema = TypeVar('TSchema', bound=BaseDoc)
-T = TypeVar('T', bound='ElasticV7DocIndex')
+T = TypeVar('T', bound='ElasticDocIndex')
 
 ELASTIC_PY_VEC_TYPES: List[Any] = [list, tuple, np.ndarray, AbstractTensor]
 
@@ -58,11 +59,15 @@
     ELASTIC_PY_VEC_TYPES.append(TensorFlowTensor)
 
 
-class ElasticV7DocIndex(BaseDocIndex, Generic[TSchema]):
+# toml
+# elastic-transport = "^8.4.0"
+# elasticsearch = "^8.6.2"
+class ElasticDocIndex(BaseDocIndex, Generic[TSchema]):
     def __init__(self, db_config=None, **kwargs):
         super().__init__(db_config=db_config, **kwargs)
-        self._db_config = cast(ElasticV7DocIndex.DBConfig, self._db_config)
+        self._db_config = cast(self.DBConfig, self._db_config)
 
+        # ElasticSearch client creation
         if self._db_config.index_name is None:
             id = uuid.uuid4().hex
             self._db_config.index_name = 'index__' + id
@@ -74,40 +79,32 @@ def __init__(self, db_config=None, **kwargs):
             **self._db_config.es_config,
         )
 
-        # compatibility
-        self._server_version = self._client.info()['version']['number']
-        if int(self._server_version.split('.')[0]) >= 8:
-            os.environ['ELASTIC_CLIENT_APIVERSIONING'] = '1'
+        # ElasticSearh index setup
+        self._index_vector_params = ('dims', 'similarity', 'index')
+        self._index_vector_options = ('m', 'ef_construction')
 
-        body: Dict[str, Any] = {
-            'mappings': {
-                'dynamic': True,
-                '_source': {'enabled': 'true'},
-                'properties': {},
-            }
+        mappings: Dict[str, Any] = {
+            'dynamic': True,
+            '_source': {'enabled': 'true'},
+            'properties': {},
         }
 
         for col_name, col in self._column_infos.items():
-            body['mappings']['properties'][col_name] = self._create_index_mapping(col)
+            mappings['properties'][col_name] = self._create_index_mapping(col)
 
         if self._client.indices.exists(index=self._index_name):
-            self._client.indices.put_mapping(
-                index=self._index_name, body=body['mappings']
-            )
+            self._client_put_mapping(mappings)
         else:
-            self._client.indices.create(index=self._index_name, body=body)
+            self._client_create(mappings)
 
         if len(self._db_config.index_settings):
-            self._client.indices.put_settings(
-                index=self._index_name, body=self._db_config.index_settings
-            )
+            self._client_put_settings(self._db_config.index_settings)
 
         self._refresh(self._index_name)
 
     ###############################################
     # Inner classes for query builder and configs #
     ###############################################
-
     class QueryBuilder(BaseDocIndex.QueryBuilder):
         def __init__(self, outer_instance, **kwargs):
             super().__init__()
@@ -117,16 +114,11 @@ def __init__(self, outer_instance, **kwargs):
             }
 
         def build(self, *args, **kwargs) -> Any:
-            if (
-                'script_score' in self._query['query']
-                and 'bool' in self._query['query']
-                and len(self._query['query']['bool']) > 0
-            ):
-                self._query['query']['script_score']['query'] = {}
-                self._query['query']['script_score']['query']['bool'] = self._query[
-                    'query'
-                ]['bool']
-                del self._query['query']['bool']
+            if len(self._query['query']) == 0:
+                del self._query['query']
+            elif 'knn' in self._query:
+                self._query['knn']['filter'] = self._query['query']
+                del self._query['query']
 
             return self._query
 
@@ -141,13 +133,19 @@ def find(
             else:
                 query_vec = query
             query_vec_np = BaseDocIndex._to_numpy(self._outer_instance, query_vec)
-            self._query['size'] = limit
-            self._query['query']['script_score'] = ElasticV7DocIndex._form_search_body(
-                query_vec_np, limit, search_field
-            )['query']['script_score']
+            self._query['knn'] = ElasticDocIndex._form_search_body(
+                query_vec_np,
+                limit,
+                search_field,
+                self._outer_instance._runtime_config.default_column_config[
+                    'dense_vector'
+                ]['num_candidates'],
+            )['knn']
 
             return self
 
+        # filter accrpts Leaf/Compound query clauses
+        # https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl.html
         def filter(self, query: Dict[str, Any], limit: int = 10):
             self._query['size'] = limit
             self._query['query']['bool']['filter'].append(query)
@@ -172,15 +170,20 @@ def build_query(self, **kwargs) -> QueryBuilder:
 
     @dataclass
     class DBConfig(BaseDocIndex.DBConfig):
-        hosts: Union[str, List[str], None] = 'http://localhost:9200'
+        hosts: Union[
+            str, List[Union[str, Mapping[str, Union[str, int]], NodeConfig]], None
+        ] = 'http://localhost:9200'
         index_name: Optional[str] = None
         es_config: Dict[str, Any] = field(default_factory=dict)
         index_settings: Dict[str, Any] = field(default_factory=dict)
 
     @dataclass
     class RuntimeConfig(BaseDocIndex.RuntimeConfig):
-        default_column_config: Dict[Any, Dict[str, Any]] = field(
-            default_factory=lambda: {
+        default_column_config: Dict[Any, Dict[str, Any]] = field(default_factory=dict)
+        chunk_size: int = 500
+
+        def __post_init__(self):
+            self.default_column_config = {
                 'binary': {},
                 'boolean': {},
                 'keyword': {},
@@ -212,7 +215,6 @@ class RuntimeConfig(BaseDocIndex.RuntimeConfig):
                 'completion': {},
                 'search_as_you_type': {},
                 'token_count': {},
-                'dense_vector': {'dims': 128},
                 'sparse_vector': {},
                 'rank_feature': {},
                 'rank_features': {},
@@ -224,8 +226,19 @@ class RuntimeConfig(BaseDocIndex.RuntimeConfig):
                 # `None` is not a Type, but we allow it here anyway
                 None: {},  # type: ignore
             }
-        )
-        chunk_size: int = 500
+            self.default_column_config['dense_vector'] = self.dense_vector_config()
+
+        def dense_vector_config(self):
+            config = {
+                'index': True,
+                'dims': 128,
+                'similarity': 'cosine',  # 'l2_norm', 'dot_product', 'cosine'
+                'm': 16,
+                'ef_construction': 100,
+                'num_candidates': 10000,
+            }
+
+            return config
 
     ###############################################
     # Implementation of abstract methods          #
@@ -233,7 +246,6 @@ class RuntimeConfig(BaseDocIndex.RuntimeConfig):
 
     def python_type_to_db_type(self, python_type: Type) -> Any:
         """Map python type to database type."""
-
         for allowed_type in ELASTIC_PY_VEC_TYPES:
             if issubclass(python_type, allowed_type):
                 return 'dense_vector'
@@ -312,10 +324,7 @@ def _get_items(self, doc_ids: Sequence[str]) -> Sequence[TSchema]:
         accumulated_docs = []
         accumulated_docs_id_not_found = []
 
-        es_rows = self._client.mget(
-            index=self._index_name,
-            body={'ids': doc_ids},
-        )['docs']
+        es_rows = self._client_mget(doc_ids)['docs']
 
         for row in es_rows:
             if row['found']:
@@ -336,7 +345,7 @@ def execute_query(self, query: Dict[str, Any], *args, **kwargs) -> Any:
                 f'args and kwargs not supported for `execute_query` on {type(self)}'
             )
 
-        resp = self._client.search(index=self._index_name, body=query)
+        resp = self._client.search(index=self._index_name, **query)
         docs, scores = self._format_response(resp)
 
         return _FindResult(documents=docs, scores=scores)
@@ -344,17 +353,9 @@ def execute_query(self, query: Dict[str, Any], *args, **kwargs) -> Any:
     def _find(
         self, query: np.ndarray, limit: int, search_field: str = ''
     ) -> _FindResult:
-        if int(self._server_version.split('.')[0]) >= 8:
-            warnings.warn(
-                'You are using Elasticsearch 8.0+ and the current client is 7.10.1. HNSW based vector search is not supported and the find method has a default implementation using exhaustive KNN search with cosineSimilarity, which may result in slow performance.'
-            )
-
         body = self._form_search_body(query, limit, search_field)
 
-        resp = self._client.search(
-            index=self._index_name,
-            body=body,
-        )
+        resp = self._client_search(**body)
 
         docs, scores = self._format_response(resp)
 
@@ -372,7 +373,7 @@ def _find_batched(
             body = self._form_search_body(query, limit, search_field)
             request.extend([head, body])
 
-        responses = self._client.msearch(body=request)
+        responses = self._client_msearch(request)
 
         das, scores = zip(
             *[self._format_response(resp) for resp in responses['responses']]
@@ -384,15 +385,7 @@ def _filter(
         filter_query: Dict[str, Any],
         limit: int,
     ) -> List[Dict]:
-        body = {
-            'size': limit,
-            'query': filter_query,
-        }
-
-        resp = self._client.search(
-            index=self._index_name,
-            body=body,
-        )
+        resp = self._client_search(query=filter_query, size=limit)
 
         docs, _ = self._format_response(resp)
 
@@ -409,7 +402,7 @@ def _filter_batched(
             body = {'query': query, 'size': limit}
             request.extend([head, body])
 
-        responses = self._client.msearch(body=request)
+        responses = self._client_msearch(request)
         das, _ = zip(*[self._format_response(resp) for resp in responses['responses']])
 
         return list(das)
@@ -422,15 +415,11 @@ def _text_search(
     ) -> _FindResult:
 
         body = self._form_text_search_body(query, limit, search_field)
-
-        resp = self._client.search(
-            index=self._index_name,
-            body=body,
-        )
+        resp = self._client_search(**body)
 
         docs, scores = self._format_response(resp)
 
-        return _FindResult(documents=docs, scores=scores)
+        return _FindResult(documents=docs, scores=np.array(scores))  # type: ignore
 
     def _text_search_batched(
         self,
@@ -444,28 +433,32 @@ def _text_search_batched(
             body = self._form_text_search_body(query, limit, search_field)
             request.extend([head, body])
 
-        responses = self._client.msearch(body=request)
-
+        responses = self._client_msearch(request)
         das, scores = zip(
             *[self._format_response(resp) for resp in responses['responses']]
         )
-        return _FindResultBatched(documents=list(das), scores=np.array(scores))
+        return _FindResultBatched(
+            documents=list(das), scores=np.array(scores, dtype=object)
+        )
 
     ###############################################
     # Helpers                                     #
     ###############################################
 
-    # ElasticSearch helpers
     def _create_index_mapping(self, col: '_ColumnInfo') -> Dict[str, Any]:
         """Create a new HNSW index for a column, and initialize it."""
 
-        index = col.config.copy()
-        if 'type' not in index:
-            index['type'] = col.db_type
-
-        if col.db_type == 'dense_vector' and col.n_dim:
-            index['dims'] = col.n_dim
+        index = {'type': col.config['type'] if 'type' in col.config else col.db_type}
 
+        if col.db_type == 'dense_vector':
+            for k in self._index_vector_params:
+                index[k] = col.config[k]
+            if col.n_dim:
+                index['dims'] = col.n_dim
+            index['index_options'] = dict(
+                (k, col.config[k]) for k in self._index_vector_options
+            )
+            index['index_options']['type'] = 'hnsw'
         return index
 
     def _send_requests(
@@ -495,18 +488,18 @@ def _send_requests(
 
     @staticmethod
     def _form_search_body(
-        query: np.ndarray, limit: int, search_field: str = ''
+        query: np.ndarray,
+        limit: int,
+        search_field: str = '',
+        num_candidates: int = 10000,
     ) -> Dict[str, Any]:
         body = {
             'size': limit,
-            'query': {
-                'script_score': {
-                    'query': {'match_all': {}},
-                    'script': {
-                        'source': f'cosineSimilarity(params.query_vector, \'{search_field}\') + 1.0',
-                        'params': {'query_vector': query},
-                    },
-                }
+            'knn': {
+                'field': search_field,
+                'query_vector': query,
+                'k': limit,
+                'num_candidates': num_candidates,
             },
         }
         return body
@@ -544,3 +537,27 @@ def _format_response(self, response: Any) -> Tuple[List[Dict], NdArray]:
 
     def _refresh(self, index_name: str):
         self._client.indices.refresh(index=index_name)
+
+    ###############################################
+    # API Wrappers                                #
+    ###############################################
+
+    def _client_put_mapping(self, mappings: Dict[str, Any]):
+        self._client.indices.put_mapping(
+            index=self._index_name, properties=mappings['properties']
+        )
+
+    def _client_create(self, mappings: Dict[str, Any]):
+        self._client.indices.create(index=self._index_name, mappings=mappings)
+
+    def _client_put_settings(self, settings: Dict[str, Any]):
+        self._client.indices.put_settings(index=self._index_name, settings=settings)
+
+    def _client_mget(self, ids: Sequence[str]):
+        return self._client.mget(index=self._index_name, ids=ids)
+
+    def _client_search(self, **kwargs):
+        return self._client.search(index=self._index_name, **kwargs)
+
+    def _client_msearch(self, request: List[Dict[str, Any]]):
+        return self._client.msearch(index=self._index_name, searches=request)
diff --git a/docarray/index/backends/elasticv7.py b/docarray/index/backends/elasticv7.py
new file mode 100644
index 00000000000..5f80379f85e
--- /dev/null
+++ b/docarray/index/backends/elasticv7.py
@@ -0,0 +1,133 @@
+from dataclasses import dataclass
+from typing import Any, Dict, List, Sequence, TypeVar, Union
+
+import numpy as np
+
+from docarray import BaseDoc
+from docarray.index import ElasticDocIndex
+from docarray.index.abstract import BaseDocIndex, _ColumnInfo
+from docarray.typing import AnyTensor
+from docarray.utils.find import _FindResult
+
+TSchema = TypeVar('TSchema', bound=BaseDoc)
+T = TypeVar('T', bound='ElasticV7DocIndex')
+
+
+class ElasticV7DocIndex(ElasticDocIndex):
+
+    ###############################################
+    # Inner classes for query builder and configs #
+    ###############################################
+
+    class QueryBuilder(ElasticDocIndex.QueryBuilder):
+        def build(self, *args, **kwargs) -> Any:
+            if (
+                'script_score' in self._query['query']
+                and 'bool' in self._query['query']
+                and len(self._query['query']['bool']) > 0
+            ):
+                self._query['query']['script_score']['query'] = {}
+                self._query['query']['script_score']['query']['bool'] = self._query[
+                    'query'
+                ]['bool']
+                del self._query['query']['bool']
+
+            return self._query
+
+        def find(
+            self,
+            query: Union[AnyTensor, BaseDoc],
+            search_field: str = 'embedding',
+            limit: int = 10,
+        ):
+            if isinstance(query, BaseDoc):
+                query_vec = BaseDocIndex._get_values_by_column([query], search_field)[0]
+            else:
+                query_vec = query
+            query_vec_np = BaseDocIndex._to_numpy(self._outer_instance, query_vec)
+            self._query['size'] = limit
+            self._query['query']['script_score'] = ElasticV7DocIndex._form_search_body(
+                query_vec_np, limit, search_field
+            )['query']['script_score']
+
+            return self
+
+    @dataclass
+    class DBConfig(ElasticDocIndex.DBConfig):
+        hosts: Union[str, List[str], None] = 'http://localhost:9200'  # type: ignore
+
+    @dataclass
+    class RuntimeConfig(ElasticDocIndex.RuntimeConfig):
+        def dense_vector_config(self):
+            return {'dims': 128}
+
+    ###############################################
+    # Implementation of abstract methods          #
+    ###############################################
+
+    def execute_query(self, query: Dict[str, Any], *args, **kwargs) -> Any:
+        if args or kwargs:
+            raise ValueError(
+                f'args and kwargs not supported for `execute_query` on {type(self)}'
+            )
+
+        resp = self._client.search(index=self._index_name, body=query)
+        docs, scores = self._format_response(resp)
+
+        return _FindResult(documents=docs, scores=scores)
+
+    ###############################################
+    # Helpers                                     #
+    ###############################################
+
+    # ElasticSearch helpers
+    def _create_index_mapping(self, col: '_ColumnInfo') -> Dict[str, Any]:
+        """Create a new HNSW index for a column, and initialize it."""
+
+        index = col.config.copy()
+        if 'type' not in index:
+            index['type'] = col.db_type
+
+        if col.db_type == 'dense_vector' and col.n_dim:
+            index['dims'] = col.n_dim
+
+        return index
+
+    @staticmethod
+    def _form_search_body(query: np.ndarray, limit: int, search_field: str = '') -> Dict[str, Any]:  # type: ignore
+        body = {
+            'size': limit,
+            'query': {
+                'script_score': {
+                    'query': {'match_all': {}},
+                    'script': {
+                        'source': f'cosineSimilarity(params.query_vector, \'{search_field}\') + 1.0',
+                        'params': {'query_vector': query},
+                    },
+                }
+            },
+        }
+        return body
+
+    ###############################################
+    # API Wrappers                                #
+    ###############################################
+
+    def _client_put_mapping(self, mappings: Dict[str, Any]):
+        self._client.indices.put_mapping(index=self._index_name, body=mappings)
+
+    def _client_create(self, mappings: Dict[str, Any]):
+        body = {'mappings': mappings}
+        self._client.indices.create(index=self._index_name, body=body)
+
+    def _client_put_settings(self, settings: Dict[str, Any]):
+        self._client.indices.put_settings(index=self._index_name, body=settings)
+
+    def _client_mget(self, ids: Sequence[str]):
+        return self._client.mget(index=self._index_name, body={'ids': ids})
+
+    def _client_search(self, **kwargs):
+        return self._client.search(index=self._index_name, body=kwargs)
+
+    def _client_msearch(self, request: List[Dict[str, Any]]):
+        return self._client.msearch(index=self._index_name, body=request)
diff --git a/docarray/index/backends/elasticv8.py b/docarray/index/backends/elasticv8.py
deleted file mode 100644
index 2b84f640272..00000000000
--- a/docarray/index/backends/elasticv8.py
+++ /dev/null
@@ -1,461 +0,0 @@
-import uuid
-import warnings
-from collections import defaultdict
-from dataclasses import dataclass, field
-from typing import (
-    Any,
-    Dict,
-    Generator,
-    Generic,
-    Iterable,
-    List,
-    Mapping,
-    Optional,
-    Sequence,
-    Tuple,
-    Type,
-    TypeVar,
-    Union,
-    cast,
-)
-
-import numpy as np
-from elastic_transport import NodeConfig
-from elasticsearch import Elasticsearch
-from elasticsearch.helpers import parallel_bulk
-
-import docarray.typing
-from docarray import BaseDocument
-from docarray.index.abstract import (
-    BaseDocumentIndex,
-    _ColumnInfo,
-    _FindResultBatched,
-    _raise_not_composable,
-)
-from docarray.typing import AnyTensor
-from docarray.utils.find import _FindResult
-from docarray.utils.misc import torch_imported
-
-TSchema = TypeVar('TSchema', bound=BaseDocument)
-T = TypeVar('T', bound='ElasticDocumentV8Index')
-
-ELASTIC_PY_VEC_TYPES: List[Any] = [np.ndarray]
-if torch_imported:
-    import torch
-
-    ELASTIC_PY_VEC_TYPES.append(torch.Tensor)
-
-
-class ElasticDocumentV8Index(BaseDocumentIndex, Generic[TSchema]):
-    def __init__(self, db_config=None, **kwargs):
-        super().__init__(db_config=db_config, **kwargs)
-        self._db_config = cast(ElasticDocumentV8Index.DBConfig, self._db_config)
-
-        if self._db_config.index_name is None:
-            id = uuid.uuid4().hex
-            self._db_config.index_name = 'index__' + id
-
-        self._index_name = self._db_config.index_name
-
-        self._client = Elasticsearch(
-            hosts=self._db_config.hosts,
-            **self._db_config.es_config,
-        )
-
-        # ElasticSearh index setup
-        self._index_vector_params = ('dims', 'similarity', 'index')
-        self._index_vector_options = ('m', 'ef_construction')
-
-        mappings: Dict[str, Any] = {
-            'dynamic': True,
-            '_source': {'enabled': 'true'},
-            'properties': {},
-        }
-
-        for col_name, col in self._column_infos.items():
-            mappings['properties'][col_name] = self._create_index_mapping(col)
-
-        if self._client.indices.exists(index=self._index_name):
-            self._client.indices.put_mapping(
-                index=self._index_name, properties=mappings['properties']
-            )
-        else:
-            self._client.indices.create(index=self._index_name, mappings=mappings)
-
-        if len(self._db_config.index_settings):
-            self._client.indices.put_settings(
-                index=self._index_name, settings=self._db_config.index_settings
-            )
-
-        self._refresh(self._index_name)
-
-    ###############################################
-    # Inner classes for query builder and configs #
-    ###############################################
-    class QueryBuilder(BaseDocumentIndex.QueryBuilder):
-        def __init__(self, outer_instance, **kwargs):
-            super().__init__()
-            self._outer_instance = outer_instance
-            self._query: Dict[str, Any] = {
-                'query': defaultdict(lambda: defaultdict(list))
-            }
-
-        def build(self, *args, **kwargs) -> Any:
-            if len(self._query['query']) == 0:
-                del self._query['query']
-            elif 'knn' in self._query:
-                self._query['knn']['filter'] = self._query['query']
-                del self._query['query']
-
-            return self._query
-
-        def find(
-            self,
-            query: Union[AnyTensor, BaseDocument],
-            search_field: str = 'embedding',
-            limit: int = 10,
-        ):
-            if isinstance(query, BaseDocument):
-                query_vec = BaseDocumentIndex._get_values_by_column(
-                    [query], search_field
-                )[0]
-            else:
-                query_vec = query
-            query_vec_np = BaseDocumentIndex._to_numpy(self._outer_instance, query_vec)
-            self._query['knn'] = {
-                'field': search_field,
-                'query_vector': query_vec_np,
-                'k': limit,
-                'num_candidates': self._outer_instance._runtime_config.default_column_config[
-                    'dense_vector'
-                ][
-                    'num_candidates'
-                ],
-            }
-            return self
-
-        # filter accrpts Leaf/Compound query clauses
-        # https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl.html
-        def filter(self, query: Dict[str, Any], limit: int = 10):
-            self._query['size'] = limit
-            self._query['query']['bool']['filter'].append(query)
-            return self
-
-        def text_search(self, query: str, search_field: str = 'text', limit: int = 10):
-            self._query['size'] = limit
-            self._query['query']['bool']['must'].append(
-                {'match': {search_field: query}}
-            )
-            return self
-
-        find_batched = _raise_not_composable('find_batched')
-        filter_batched = _raise_not_composable('find_batched')
-        text_search_batched = _raise_not_composable('text_search')
-
-    def build_query(self, **kwargs) -> QueryBuilder:
-        """
-        Build a query for this DocumentIndex.
-        """
-        return self.QueryBuilder(self, **kwargs)
-
-    @dataclass
-    class DBConfig(BaseDocumentIndex.DBConfig):
-
-        hosts: Union[
-            str, List[Union[str, Mapping[str, Union[str, int]], NodeConfig]], None
-        ] = 'http://localhost:9200'
-        index_name: Optional[str] = None
-        es_config: Dict[str, Any] = field(default_factory=dict)
-        index_settings: Dict[str, Any] = field(default_factory=dict)
-
-    @dataclass
-    class RuntimeConfig(BaseDocumentIndex.RuntimeConfig):
-        default_column_config: Dict[Any, Dict[str, Any]] = field(
-            default_factory=lambda: {
-                'dense_vector': {
-                    'index': True,
-                    'dims': 128,
-                    'similarity': 'cosine',  # 'l2_norm', 'dot_product', 'cosine'
-                    'm': 16,
-                    'ef_construction': 100,
-                    'num_candidates': 10000,
-                },
-                'keyword': {},
-                'boolean': {},
-                'integer': {},
-                'float': {},
-                'text': {},
-                # `None` is not a Type, but we allow it here anyway
-                None: {},  # type: ignore
-            }
-        )
-        chunk_size: int = 500
-
-    ###############################################
-    # Implementation of abstract methods          #
-    ###############################################
-
-    def python_type_to_db_type(self, python_type: Type) -> Any:
-        """Map python type to database type."""
-        for allowed_type in ELASTIC_PY_VEC_TYPES:
-            if issubclass(python_type, allowed_type):
-                return 'dense_vector'
-
-        elastic_py_types = {
-            bool: 'boolean',
-            int: 'integer',
-            float: 'float',
-            str: 'text',
-            docarray.typing.ID: 'keyword',
-        }
-
-        if python_type in elastic_py_types:
-            return elastic_py_types[python_type]
-
-        raise ValueError(f'Unsupported column type for {type(self)}: {python_type}')
-
-    def _index(
-        self,
-        column_to_data: Dict[str, Generator[Any, None, None]],
-        refresh: bool = True,
-        chunk_size: Optional[int] = None,
-    ):
-
-        data = self._transpose_col_value_dict(column_to_data)  # type: ignore
-        requests = []
-
-        for row in data:
-            request = {
-                '_index': self._index_name,
-                '_id': row['id'],
-            }
-            for col_name, col in self._column_infos.items():
-                if col.db_type == 'dense_vector' and np.all(row[col_name] == 0):
-                    row[col_name] = row[col_name] + 1.0e-9
-                request[col_name] = row[col_name]
-            requests.append(request)
-
-        _, warning_info = self._send_requests(requests, chunk_size)
-        for info in warning_info:
-            warnings.warn(str(info))
-
-        if refresh:
-            self._refresh(self._index_name)
-
-    def num_docs(self) -> int:
-        return self._client.count(index=self._index_name)['count']
-
-    def _del_items(
-        self,
-        doc_ids: Sequence[str],
-        chunk_size: Optional[int] = None,
-    ):
-        requests = []
-        for _id in doc_ids:
-            requests.append(
-                {'_op_type': 'delete', '_index': self._index_name, '_id': _id}
-            )
-
-        _, warning_info = self._send_requests(requests, chunk_size)
-
-        # raise warning if some ids are not found
-        if warning_info:
-            ids = [info['delete']['_id'] for info in warning_info]
-            warnings.warn(f'No document with id {ids} found')
-
-        self._refresh(self._index_name)
-
-    def _get_items(self, doc_ids: Sequence[str]) -> Sequence[TSchema]:
-        accumulated_docs = []
-        accumulated_docs_id_not_found = []
-
-        es_rows = self._client.mget(
-            index=self._index_name,
-            ids=doc_ids,  # type: ignore
-        )['docs']
-
-        for row in es_rows:
-            if row['found']:
-                doc_dict = row['_source']
-                accumulated_docs.append(doc_dict)
-            else:
-                accumulated_docs_id_not_found.append(row['_id'])
-
-        # raise warning if some ids are not found
-        if accumulated_docs_id_not_found:
-            warnings.warn(f'No document with id {accumulated_docs_id_not_found} found')
-
-        return accumulated_docs
-
-    def execute_query(self, query: Dict[str, Any], *args, **kwargs) -> Any:
-        if args or kwargs:
-            raise ValueError(
-                f'args and kwargs not supported for `execute_query` on {type(self)}'
-            )
-
-        resp = self._client.search(index=self._index_name, **query)
-        docs, scores = self._format_response(resp)
-        return _FindResult(documents=docs, scores=np.array(scores))  # type: ignore
-
-    def _find(
-        self, query: np.ndarray, limit: int, search_field: str = ''
-    ) -> _FindResult:
-        knn_query = {
-            'field': search_field,
-            'query_vector': query,
-            'k': limit,
-            'num_candidates': self._runtime_config.default_column_config['dense_vector'][  # type: ignore
-                'num_candidates'
-            ],
-        }
-
-        resp = self._client.search(
-            index=self._index_name,
-            knn=knn_query,
-            size=limit,
-        )
-
-        docs, scores = self._format_response(resp)
-
-        return _FindResult(documents=docs, scores=np.array(scores))  # type: ignore
-
-    def _find_batched(
-        self,
-        queries: np.ndarray,
-        limit: int,
-        search_field: str = '',
-    ) -> _FindResultBatched:
-        result_das = []
-        result_scores = []
-
-        for query in queries:
-            documents, scores = self._find(query, limit, search_field)
-            result_das.append(documents)
-            result_scores.append(scores)
-
-        return _FindResultBatched(documents=result_das, scores=np.array(result_scores))  # type: ignore
-
-    def _filter(
-        self,
-        filter_query: Dict[str, Any],
-        limit: int,
-    ) -> List[Dict]:
-        resp = self._client.search(
-            index=self._index_name,
-            query=filter_query,
-            size=limit,
-        )
-
-        docs, _ = self._format_response(resp)
-
-        return docs
-
-    def _filter_batched(
-        self,
-        filter_queries: Any,
-        limit: int,
-    ) -> List[List[Dict]]:
-        result_das = []
-        for query in filter_queries:
-            result_das.append(self._filter(query, limit))
-        return result_das
-
-    def _text_search(
-        self,
-        query: str,
-        limit: int,
-        search_field: str = '',
-    ) -> _FindResult:
-        search_query = {
-            'bool': {
-                'must': [
-                    {'match': {search_field: query}},
-                ],
-            }
-        }
-
-        resp = self._client.search(
-            index=self._index_name,
-            query=search_query,
-            size=limit,
-        )
-
-        docs, scores = self._format_response(resp)
-
-        return _FindResult(documents=docs, scores=np.array(scores))  # type: ignore
-
-    def _text_search_batched(
-        self,
-        queries: Sequence[str],
-        limit: int,
-        search_field: str = '',
-    ) -> _FindResultBatched:
-        result_das = []
-        result_scores = []
-
-        for query in queries:
-            documents, scores = self._text_search(query, limit, search_field)
-            result_das.append(documents)
-            result_scores.append(scores)
-
-        return _FindResultBatched(documents=result_das, scores=np.array(result_scores, dtype=object))  # type: ignore
-
-    ###############################################
-    # Helpers                                     #
-    ###############################################
-
-    # ElasticSearch helpers
-    def _create_index_mapping(self, col: '_ColumnInfo') -> Dict[str, Any]:
-        """Create a new HNSW index for a column, and initialize it."""
-
-        index = {'type': col.config['type'] if 'type' in col.config else col.db_type}
-
-        if col.db_type == 'dense_vector':
-            for k in self._index_vector_params:
-                index[k] = col.config[k]
-            if col.n_dim:
-                index['dims'] = col.n_dim
-            index['index_options'] = dict(
-                (k, col.config[k]) for k in self._index_vector_options
-            )
-            index['index_options']['type'] = 'hnsw'
-        return index
-
-    def _send_requests(
-        self,
-        request: Iterable[Dict[str, Any]],
-        chunk_size: Optional[int] = None,
-        **kwargs,
-    ) -> Tuple[List[Dict], List[Any]]:
-        """Send bulk request to Elastic and gather the successful info"""
-
-        accumulated_info = []
-        warning_info = []
-        for success, info in parallel_bulk(
-            self._client,
-            request,
-            raise_on_error=False,
-            raise_on_exception=False,
-            chunk_size=chunk_size if chunk_size else self._runtime_config.chunk_size,  # type: ignore
-            **kwargs,
-        ):
-            if not success:
-                warning_info.append(info)
-            else:
-                accumulated_info.append(info)
-
-        return accumulated_info, warning_info
-
-    def _format_response(self, response: Any) -> Tuple[List[Dict], List[float]]:
-        docs = []
-        scores = []
-        for result in response['hits']['hits']:
-            doc_dict = result['_source']
-            doc_dict['id'] = result['_id']
-            docs.append(doc_dict)
-            scores.append(result['_score'])
-
-        return docs, scores
-
-    def _refresh(self, index_name: str):
-        self._client.indices.refresh(index=index_name)
diff --git a/tests/index/elastic/v8/test_find.py b/tests/index/elastic/v8/test_find.py
index 14c5d5ef20a..d61ae643ae0 100644
--- a/tests/index/elastic/v8/test_find.py
+++ b/tests/index/elastic/v8/test_find.py
@@ -1,269 +1,269 @@
-import numpy as np
-import pytest
-from pydantic import Field
-
-from docarray import BaseDocument
-from docarray.index import ElasticDocumentV8Index
-from docarray.typing import NdArray
-from tests.index.elastic.fixture import start_storage_v8  # noqa: F401
-from tests.index.elastic.fixture import FlatDoc, SimpleDoc
-
-
-@pytest.mark.parametrize('similarity', ['cosine', 'l2_norm', 'dot_product'])
-def test_find_simple_schema(similarity):
-    class SimpleSchema(BaseDocument):
-        tens: NdArray[10] = Field(similarity=similarity)
-
-    store = ElasticDocumentV8Index[SimpleSchema]()
-
-    index_docs = []
-    for _ in range(10):
-        vec = np.random.rand(10)
-        if similarity == 'dot_product':
-            vec = vec / np.linalg.norm(vec)
-        index_docs.append(SimpleDoc(tens=vec))
-    store.index(index_docs)
-
-    query = index_docs[-1]
-    docs, scores = store.find(query, search_field='tens', limit=5)
-
-    assert len(docs) == 5
-    assert len(scores) == 5
-    assert docs[0].id == index_docs[-1].id
-    assert np.allclose(docs[0].tens, index_docs[-1].tens)
-
-
-@pytest.mark.parametrize('similarity', ['cosine', 'l2_norm', 'dot_product'])
-def test_find_flat_schema(similarity):
-    class FlatSchema(BaseDocument):
-        tens_one: NdArray = Field(dims=10, similarity=similarity)
-        tens_two: NdArray = Field(dims=50, similarity=similarity)
-
-    store = ElasticDocumentV8Index[FlatSchema]()
-
-    index_docs = []
-    for _ in range(10):
-        vec_one = np.random.rand(10)
-        vec_two = np.random.rand(50)
-        if similarity == 'dot_product':
-            vec_one = vec_one / np.linalg.norm(vec_one)
-            vec_two = vec_two / np.linalg.norm(vec_two)
-        index_docs.append(FlatDoc(tens_one=vec_one, tens_two=vec_two))
-
-    store.index(index_docs)
-
-    query = index_docs[-1]
-
-    # find on tens_one
-    docs, scores = store.find(query, search_field='tens_one', limit=5)
-    assert len(docs) == 5
-    assert len(scores) == 5
-    assert docs[0].id == index_docs[-1].id
-    assert np.allclose(docs[0].tens_one, index_docs[-1].tens_one)
-    assert np.allclose(docs[0].tens_two, index_docs[-1].tens_two)
-
-    # find on tens_two
-    docs, scores = store.find(query, search_field='tens_two', limit=5)
-    assert len(docs) == 5
-    assert len(scores) == 5
-    assert docs[0].id == index_docs[-1].id
-    assert np.allclose(docs[0].tens_one, index_docs[-1].tens_one)
-    assert np.allclose(docs[0].tens_two, index_docs[-1].tens_two)
-
-
-@pytest.mark.parametrize('similarity', ['cosine', 'l2_norm', 'dot_product'])
-def test_find_nested_schema(similarity):
-    class SimpleDoc(BaseDocument):
-        tens: NdArray[10] = Field(similarity=similarity)
-
-    class NestedDoc(BaseDocument):
-        d: SimpleDoc
-        tens: NdArray[10] = Field(similarity=similarity)
-
-    class DeepNestedDoc(BaseDocument):
-        d: NestedDoc
-        tens: NdArray = Field(similarity=similarity, dims=10)
-
-    store = ElasticDocumentV8Index[DeepNestedDoc]()
-
-    index_docs = []
-    for _ in range(10):
-        vec_simple = np.random.rand(10)
-        vec_nested = np.random.rand(10)
-        vec_deep = np.random.rand(10)
-        if similarity == 'dot_product':
-            vec_simple = vec_simple / np.linalg.norm(vec_simple)
-            vec_nested = vec_nested / np.linalg.norm(vec_nested)
-            vec_deep = vec_deep / np.linalg.norm(vec_deep)
-        index_docs.append(
-            DeepNestedDoc(
-                d=NestedDoc(d=SimpleDoc(tens=vec_simple), tens=vec_nested),
-                tens=vec_deep,
-            )
-        )
-
-    store.index(index_docs)
-
-    query = index_docs[-1]
-
-    # find on root level
-    docs, scores = store.find(query, search_field='tens', limit=5)
-    assert len(docs) == 5
-    assert len(scores) == 5
-    assert docs[0].id == index_docs[-1].id
-    assert np.allclose(docs[0].tens, index_docs[-1].tens)
-
-    # find on first nesting level
-    docs, scores = store.find(query, search_field='d__tens', limit=5)
-    assert len(docs) == 5
-    assert len(scores) == 5
-    assert docs[0].id == index_docs[-1].id
-    assert np.allclose(docs[0].d.tens, index_docs[-1].d.tens)
-
-    # find on second nesting level
-    docs, scores = store.find(query, search_field='d__d__tens', limit=5)
-    assert len(docs) == 5
-    assert len(scores) == 5
-    assert docs[0].id == index_docs[-1].id
-    assert np.allclose(docs[0].d.d.tens, index_docs[-1].d.d.tens)
-
-
-def test_find_batched():
-    store = ElasticDocumentV8Index[SimpleDoc]()
-
-    index_docs = [SimpleDoc(tens=np.random.rand(10)) for _ in range(10)]
-    store.index(index_docs)
-
-    queries = index_docs[-2:]
-    docs_batched, scores_batched = store.find_batched(
-        queries, search_field='tens', limit=5
-    )
-
-    for docs, scores, query in zip(docs_batched, scores_batched, queries):
-        assert len(docs) == 5
-        assert len(scores) == 5
-        assert docs[0].id == query.id
-        assert np.allclose(docs[0].tens, query.tens)
-
-
-def test_filter():
-    class MyDoc(BaseDocument):
-        A: bool
-        B: int
-        C: float
-
-    store = ElasticDocumentV8Index[MyDoc]()
-
-    index_docs = [MyDoc(id=f'{i}', A=(i % 2 == 0), B=i, C=i + 0.5) for i in range(10)]
-    store.index(index_docs)
-
-    filter_query = {'term': {'A': True}}
-    docs = store.filter(filter_query)
-    assert len(docs) > 0
-    for doc in docs:
-        assert doc.A
-
-    filter_query = {
-        "bool": {
-            "filter": [
-                {"terms": {"B": [3, 4, 7, 8]}},
-                {"range": {"C": {"gte": 3, "lte": 5}}},
-            ]
-        }
-    }
-    docs = store.filter(filter_query)
-    assert [doc.id for doc in docs] == ['3', '4']
-
-
-def test_text_search():
-    class MyDoc(BaseDocument):
-        text: str
-
-    store = ElasticDocumentV8Index[MyDoc]()
-    index_docs = [
-        MyDoc(text='hello world'),
-        MyDoc(text='never gonna give you up'),
-        MyDoc(text='we are the world'),
-    ]
-    store.index(index_docs)
-
-    query = 'world'
-    docs, scores = store.text_search(query, search_field='text')
-
-    assert len(docs) == 2
-    assert len(scores) == 2
-    assert docs[0].text.index(query) >= 0
-    assert docs[1].text.index(query) >= 0
-
-    queries = ['world', 'never']
-    docs, scores = store.text_search_batched(queries, search_field='text')
-    for query, da, score in zip(queries, docs, scores):
-        assert len(da) > 0
-        assert len(score) > 0
-        for doc in da:
-            assert doc.text.index(query) >= 0
-
-
-def test_query_builder():
-    class MyDoc(BaseDocument):
-        tens: NdArray[10] = Field(similarity='l2_norm')
-        num: int
-        text: str
-
-    store = ElasticDocumentV8Index[MyDoc]()
-    index_docs = [
-        MyDoc(id=f'{i}', tens=np.ones(10) * i, num=int(i / 2), text=f'text {int(i/2)}')
-        for i in range(10)
-    ]
-    store.index(index_docs)
-
-    # build_query
-    q = store.build_query()
-    assert isinstance(q, store.QueryBuilder)
-
-    # filter
-    q = store.build_query().filter({'term': {'num': 0}}).build()
-    docs, _ = store.execute_query(q)
-    assert [doc['id'] for doc in docs] == ['0', '1']
-
-    # find
-    q = store.build_query().find(index_docs[-1], search_field='tens', limit=3).build()
-    docs, _ = store.execute_query(q)
-    assert [doc['id'] for doc in docs] == ['9', '8', '7']
-
-    # text_search
-    q = store.build_query().text_search('0', search_field='text').build()
-    docs, _ = store.execute_query(q)
-    assert [doc['id'] for doc in docs] == ['0', '1']
-
-    # combination
-    q = (
-        store.build_query()
-        .filter({'range': {'num': {'lte': 3}}})
-        .find(index_docs[-1], search_field='tens')
-        .text_search('0', search_field='text')
-        .build()
-    )
-    docs, _ = store.execute_query(q)
-    assert [doc['id'] for doc in docs] == ['1', '0']
-
-    # direct
-    query = {
-        'knn': {
-            'field': 'tens',
-            'query_vector': [9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0],
-            'k': 10,
-            'num_candidates': 10000,
-            'filter': {
-                'bool': {
-                    'filter': [
-                        {'range': {'num': {'gte': 2}}},
-                        {'range': {'num': {'lte': 3}}},
-                    ]
-                }
-            },
-        },
-    }
-
-    docs, _ = store.execute_query(query)
-    assert [doc['id'] for doc in docs] == ['7', '6', '5', '4']
+# import numpy as np
+# import pytest
+# from pydantic import Field
+
+# from docarray import BaseDoc
+# from docarray.index import ElasticDocIndex
+# from docarray.typing import NdArray
+# from tests.index.elastic.fixture import start_storage_v8  # noqa: F401
+# from tests.index.elastic.fixture import FlatDoc, SimpleDoc
+
+
+# @pytest.mark.parametrize('similarity', ['cosine', 'l2_norm', 'dot_product'])
+# def test_find_simple_schema(similarity):
+#     class SimpleSchema(BaseDoc):
+#         tens: NdArray[10] = Field(similarity=similarity)
+
+#     store = ElasticDocIndex[SimpleSchema]()
+
+#     index_docs = []
+#     for _ in range(10):
+#         vec = np.random.rand(10)
+#         if similarity == 'dot_product':
+#             vec = vec / np.linalg.norm(vec)
+#         index_docs.append(SimpleDoc(tens=vec))
+#     store.index(index_docs)
+
+#     query = index_docs[-1]
+#     docs, scores = store.find(query, search_field='tens', limit=5)
+
+#     assert len(docs) == 5
+#     assert len(scores) == 5
+#     assert docs[0].id == index_docs[-1].id
+#     assert np.allclose(docs[0].tens, index_docs[-1].tens)
+
+
+# @pytest.mark.parametrize('similarity', ['cosine', 'l2_norm', 'dot_product'])
+# def test_find_flat_schema(similarity):
+#     class FlatSchema(BaseDoc):
+#         tens_one: NdArray = Field(dims=10, similarity=similarity)
+#         tens_two: NdArray = Field(dims=50, similarity=similarity)
+
+#     store = ElasticDocIndex[FlatSchema]()
+
+#     index_docs = []
+#     for _ in range(10):
+#         vec_one = np.random.rand(10)
+#         vec_two = np.random.rand(50)
+#         if similarity == 'dot_product':
+#             vec_one = vec_one / np.linalg.norm(vec_one)
+#             vec_two = vec_two / np.linalg.norm(vec_two)
+#         index_docs.append(FlatDoc(tens_one=vec_one, tens_two=vec_two))
+
+#     store.index(index_docs)
+
+#     query = index_docs[-1]
+
+#     # find on tens_one
+#     docs, scores = store.find(query, search_field='tens_one', limit=5)
+#     assert len(docs) == 5
+#     assert len(scores) == 5
+#     assert docs[0].id == index_docs[-1].id
+#     assert np.allclose(docs[0].tens_one, index_docs[-1].tens_one)
+#     assert np.allclose(docs[0].tens_two, index_docs[-1].tens_two)
+
+#     # find on tens_two
+#     docs, scores = store.find(query, search_field='tens_two', limit=5)
+#     assert len(docs) == 5
+#     assert len(scores) == 5
+#     assert docs[0].id == index_docs[-1].id
+#     assert np.allclose(docs[0].tens_one, index_docs[-1].tens_one)
+#     assert np.allclose(docs[0].tens_two, index_docs[-1].tens_two)
+
+
+# @pytest.mark.parametrize('similarity', ['cosine', 'l2_norm', 'dot_product'])
+# def test_find_nested_schema(similarity):
+#     class SimpleDoc(BaseDoc):
+#         tens: NdArray[10] = Field(similarity=similarity)
+
+#     class NestedDoc(BaseDoc):
+#         d: SimpleDoc
+#         tens: NdArray[10] = Field(similarity=similarity)
+
+#     class DeepNestedDoc(BaseDoc):
+#         d: NestedDoc
+#         tens: NdArray = Field(similarity=similarity, dims=10)
+
+#     store = ElasticDocIndex[DeepNestedDoc]()
+
+#     index_docs = []
+#     for _ in range(10):
+#         vec_simple = np.random.rand(10)
+#         vec_nested = np.random.rand(10)
+#         vec_deep = np.random.rand(10)
+#         if similarity == 'dot_product':
+#             vec_simple = vec_simple / np.linalg.norm(vec_simple)
+#             vec_nested = vec_nested / np.linalg.norm(vec_nested)
+#             vec_deep = vec_deep / np.linalg.norm(vec_deep)
+#         index_docs.append(
+#             DeepNestedDoc(
+#                 d=NestedDoc(d=SimpleDoc(tens=vec_simple), tens=vec_nested),
+#                 tens=vec_deep,
+#             )
+#         )
+
+#     store.index(index_docs)
+
+#     query = index_docs[-1]
+
+#     # find on root level
+#     docs, scores = store.find(query, search_field='tens', limit=5)
+#     assert len(docs) == 5
+#     assert len(scores) == 5
+#     assert docs[0].id == index_docs[-1].id
+#     assert np.allclose(docs[0].tens, index_docs[-1].tens)
+
+#     # find on first nesting level
+#     docs, scores = store.find(query, search_field='d__tens', limit=5)
+#     assert len(docs) == 5
+#     assert len(scores) == 5
+#     assert docs[0].id == index_docs[-1].id
+#     assert np.allclose(docs[0].d.tens, index_docs[-1].d.tens)
+
+#     # find on second nesting level
+#     docs, scores = store.find(query, search_field='d__d__tens', limit=5)
+#     assert len(docs) == 5
+#     assert len(scores) == 5
+#     assert docs[0].id == index_docs[-1].id
+#     assert np.allclose(docs[0].d.d.tens, index_docs[-1].d.d.tens)
+
+
+# def test_find_batched():
+#     store = ElasticDocIndex[SimpleDoc]()
+
+#     index_docs = [SimpleDoc(tens=np.random.rand(10)) for _ in range(10)]
+#     store.index(index_docs)
+
+#     queries = index_docs[-2:]
+#     docs_batched, scores_batched = store.find_batched(
+#         queries, search_field='tens', limit=5
+#     )
+
+#     for docs, scores, query in zip(docs_batched, scores_batched, queries):
+#         assert len(docs) == 5
+#         assert len(scores) == 5
+#         assert docs[0].id == query.id
+#         assert np.allclose(docs[0].tens, query.tens)
+
+
+# def test_filter():
+#     class MyDoc(BaseDoc):
+#         A: bool
+#         B: int
+#         C: float
+
+#     store = ElasticDocIndex[MyDoc]()
+
+#     index_docs = [MyDoc(id=f'{i}', A=(i % 2 == 0), B=i, C=i + 0.5) for i in range(10)]
+#     store.index(index_docs)
+
+#     filter_query = {'term': {'A': True}}
+#     docs = store.filter(filter_query)
+#     assert len(docs) > 0
+#     for doc in docs:
+#         assert doc.A
+
+#     filter_query = {
+#         "bool": {
+#             "filter": [
+#                 {"terms": {"B": [3, 4, 7, 8]}},
+#                 {"range": {"C": {"gte": 3, "lte": 5}}},
+#             ]
+#         }
+#     }
+#     docs = store.filter(filter_query)
+#     assert [doc.id for doc in docs] == ['3', '4']
+
+
+# def test_text_search():
+#     class MyDoc(BaseDoc):
+#         text: str
+
+#     store = ElasticDocIndex[MyDoc]()
+#     index_docs = [
+#         MyDoc(text='hello world'),
+#         MyDoc(text='never gonna give you up'),
+#         MyDoc(text='we are the world'),
+#     ]
+#     store.index(index_docs)
+
+#     query = 'world'
+#     docs, scores = store.text_search(query, search_field='text')
+
+#     assert len(docs) == 2
+#     assert len(scores) == 2
+#     assert docs[0].text.index(query) >= 0
+#     assert docs[1].text.index(query) >= 0
+
+#     queries = ['world', 'never']
+#     docs, scores = store.text_search_batched(queries, search_field='text')
+#     for query, da, score in zip(queries, docs, scores):
+#         assert len(da) > 0
+#         assert len(score) > 0
+#         for doc in da:
+#             assert doc.text.index(query) >= 0
+
+
+# def test_query_builder():
+#     class MyDoc(BaseDoc):
+#         tens: NdArray[10] = Field(similarity='l2_norm')
+#         num: int
+#         text: str
+
+#     store = ElasticDocIndex[MyDoc]()
+#     index_docs = [
+#         MyDoc(id=f'{i}', tens=np.ones(10) * i, num=int(i / 2), text=f'text {int(i/2)}')
+#         for i in range(10)
+#     ]
+#     store.index(index_docs)
+
+#     # build_query
+#     q = store.build_query()
+#     assert isinstance(q, store.QueryBuilder)
+
+#     # filter
+#     q = store.build_query().filter({'term': {'num': 0}}).build()
+#     docs, _ = store.execute_query(q)
+#     assert [doc['id'] for doc in docs] == ['0', '1']
+
+#     # find
+#     q = store.build_query().find(index_docs[-1], search_field='tens', limit=3).build()
+#     docs, _ = store.execute_query(q)
+#     assert [doc['id'] for doc in docs] == ['9', '8', '7']
+
+#     # text_search
+#     q = store.build_query().text_search('0', search_field='text').build()
+#     docs, _ = store.execute_query(q)
+#     assert [doc['id'] for doc in docs] == ['0', '1']
+
+#     # combination
+#     q = (
+#         store.build_query()
+#         .filter({'range': {'num': {'lte': 3}}})
+#         .find(index_docs[-1], search_field='tens')
+#         .text_search('0', search_field='text')
+#         .build()
+#     )
+#     docs, _ = store.execute_query(q)
+#     assert [doc['id'] for doc in docs] == ['1', '0']
+
+#     # direct
+#     query = {
+#         'knn': {
+#             'field': 'tens',
+#             'query_vector': [9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0],
+#             'k': 10,
+#             'num_candidates': 10000,
+#             'filter': {
+#                 'bool': {
+#                     'filter': [
+#                         {'range': {'num': {'gte': 2}}},
+#                         {'range': {'num': {'lte': 3}}},
+#                     ]
+#                 }
+#             },
+#         },
+#     }
+
+#     docs, _ = store.execute_query(query)
+#     assert [doc['id'] for doc in docs] == ['7', '6', '5', '4']
diff --git a/tests/index/elastic/v8/test_index_get_del.py b/tests/index/elastic/v8/test_index_get_del.py
index 5777a3bc977..b3e8b27c162 100644
--- a/tests/index/elastic/v8/test_index_get_del.py
+++ b/tests/index/elastic/v8/test_index_get_del.py
@@ -1,232 +1,232 @@
-import numpy as np
-import pytest
+# import numpy as np
+# import pytest
 
-from docarray import DocumentArray
-from docarray.index import ElasticDocumentV8Index
-from tests.index.elastic.fixture import start_storage_v8  # noqa: F401
-from tests.index.elastic.fixture import DeepNestedDoc, FlatDoc, NestedDoc, SimpleDoc
+# from docarray import DocArray
+# from docarray.index import ElasticDocIndex
+# from tests.index.elastic.fixture import start_storage_v8  # noqa: F401
+# from tests.index.elastic.fixture import DeepNestedDoc, FlatDoc, NestedDoc, SimpleDoc
 
 
-@pytest.fixture
-def ten_simple_docs():
-    return [SimpleDoc(tens=np.random.randn(10)) for _ in range(10)]
+# @pytest.fixture
+# def ten_simple_docs():
+#     return [SimpleDoc(tens=np.random.randn(10)) for _ in range(10)]
 
 
-@pytest.fixture
-def ten_flat_docs():
-    return [
-        FlatDoc(tens_one=np.random.randn(10), tens_two=np.random.randn(50))
-        for _ in range(10)
-    ]
+# @pytest.fixture
+# def ten_flat_docs():
+#     return [
+#         FlatDoc(tens_one=np.random.randn(10), tens_two=np.random.randn(50))
+#         for _ in range(10)
+#     ]
 
 
-@pytest.fixture
-def ten_nested_docs():
-    return [NestedDoc(d=SimpleDoc(tens=np.random.randn(10))) for _ in range(10)]
+# @pytest.fixture
+# def ten_nested_docs():
+#     return [NestedDoc(d=SimpleDoc(tens=np.random.randn(10))) for _ in range(10)]
 
 
-@pytest.fixture
-def ten_deep_nested_docs():
-    return [
-        DeepNestedDoc(d=NestedDoc(d=SimpleDoc(tens=np.random.randn(10))))
-        for _ in range(10)
-    ]
+# @pytest.fixture
+# def ten_deep_nested_docs():
+#     return [
+#         DeepNestedDoc(d=NestedDoc(d=SimpleDoc(tens=np.random.randn(10))))
+#         for _ in range(10)
+#     ]
 
 
-@pytest.mark.parametrize('use_docarray', [True, False])
-def test_index_simple_schema(ten_simple_docs, use_docarray):
-    store = ElasticDocumentV8Index[SimpleDoc]()
-    if use_docarray:
-        ten_simple_docs = DocumentArray[SimpleDoc](ten_simple_docs)
+# @pytest.mark.parametrize('use_docarray', [True, False])
+# def test_index_simple_schema(ten_simple_docs, use_docarray):
+#     store = ElasticDocIndex[SimpleDoc]()
+#     if use_docarray:
+#         ten_simple_docs = DocArray[SimpleDoc](ten_simple_docs)
 
-    store.index(ten_simple_docs)
-    assert store.num_docs() == 10
+#     store.index(ten_simple_docs)
+#     assert store.num_docs() == 10
 
 
-@pytest.mark.parametrize('use_docarray', [True, False])
-def test_index_flat_schema(ten_flat_docs, use_docarray):
-    store = ElasticDocumentV8Index[FlatDoc]()
-    if use_docarray:
-        ten_flat_docs = DocumentArray[FlatDoc](ten_flat_docs)
+# @pytest.mark.parametrize('use_docarray', [True, False])
+# def test_index_flat_schema(ten_flat_docs, use_docarray):
+#     store = ElasticDocIndex[FlatDoc]()
+#     if use_docarray:
+#         ten_flat_docs = DocArray[FlatDoc](ten_flat_docs)
 
-    store.index(ten_flat_docs)
-    assert store.num_docs() == 10
+#     store.index(ten_flat_docs)
+#     assert store.num_docs() == 10
 
 
-@pytest.mark.parametrize('use_docarray', [True, False])
-def test_index_nested_schema(ten_nested_docs, use_docarray):
-    store = ElasticDocumentV8Index[NestedDoc]()
-    if use_docarray:
-        ten_nested_docs = DocumentArray[NestedDoc](ten_nested_docs)
+# @pytest.mark.parametrize('use_docarray', [True, False])
+# def test_index_nested_schema(ten_nested_docs, use_docarray):
+#     store = ElasticDocIndex[NestedDoc]()
+#     if use_docarray:
+#         ten_nested_docs = DocArray[NestedDoc](ten_nested_docs)
 
-    store.index(ten_nested_docs)
-    assert store.num_docs() == 10
+#     store.index(ten_nested_docs)
+#     assert store.num_docs() == 10
 
 
-@pytest.mark.parametrize('use_docarray', [True, False])
-def test_index_deep_nested_schema(ten_deep_nested_docs, use_docarray):
-    store = ElasticDocumentV8Index[DeepNestedDoc]()
-    if use_docarray:
-        ten_deep_nested_docs = DocumentArray[DeepNestedDoc](ten_deep_nested_docs)
-
-    store.index(ten_deep_nested_docs)
-    assert store.num_docs() == 10
-
-
-def test_get_single(ten_simple_docs, ten_flat_docs, ten_nested_docs):
-    # simple
-    store = ElasticDocumentV8Index[SimpleDoc]()
-    store.index(ten_simple_docs)
-
-    assert store.num_docs() == 10
-    for d in ten_simple_docs:
-        id_ = d.id
-        assert store[id_].id == id_
-        assert np.all(store[id_].tens == d.tens)
-
-    # flat
-    store = ElasticDocumentV8Index[FlatDoc]()
-    store.index(ten_flat_docs)
-
-    assert store.num_docs() == 10
-    for d in ten_flat_docs:
-        id_ = d.id
-        assert store[id_].id == id_
-        assert np.all(store[id_].tens_one == d.tens_one)
-        assert np.all(store[id_].tens_two == d.tens_two)
-
-    # nested
-    store = ElasticDocumentV8Index[NestedDoc]()
-    store.index(ten_nested_docs)
-
-    assert store.num_docs() == 10
-    for d in ten_nested_docs:
-        id_ = d.id
-        assert store[id_].id == id_
-        assert store[id_].d.id == d.d.id
-        assert np.all(store[id_].d.tens == d.d.tens)
-
-
-def test_get_multiple(ten_simple_docs, ten_flat_docs, ten_nested_docs):
-    docs_to_get_idx = [0, 2, 4, 6, 8]
-
-    # simple
-    store = ElasticDocumentV8Index[SimpleDoc]()
-    store.index(ten_simple_docs)
-
-    assert store.num_docs() == 10
-    docs_to_get = [ten_simple_docs[i] for i in docs_to_get_idx]
-    ids_to_get = [d.id for d in docs_to_get]
-    retrieved_docs = store[ids_to_get]
-    for id_, d_in, d_out in zip(ids_to_get, docs_to_get, retrieved_docs):
-        assert d_out.id == id_
-        assert np.all(d_out.tens == d_in.tens)
-
-    # flat
-    store = ElasticDocumentV8Index[FlatDoc]()
-    store.index(ten_flat_docs)
-
-    assert store.num_docs() == 10
-    docs_to_get = [ten_flat_docs[i] for i in docs_to_get_idx]
-    ids_to_get = [d.id for d in docs_to_get]
-    retrieved_docs = store[ids_to_get]
-    for id_, d_in, d_out in zip(ids_to_get, docs_to_get, retrieved_docs):
-        assert d_out.id == id_
-        assert np.all(d_out.tens_one == d_in.tens_one)
-        assert np.all(d_out.tens_two == d_in.tens_two)
-
-    # nested
-    store = ElasticDocumentV8Index[NestedDoc]()
-    store.index(ten_nested_docs)
-
-    assert store.num_docs() == 10
-    docs_to_get = [ten_nested_docs[i] for i in docs_to_get_idx]
-    ids_to_get = [d.id for d in docs_to_get]
-    retrieved_docs = store[ids_to_get]
-    for id_, d_in, d_out in zip(ids_to_get, docs_to_get, retrieved_docs):
-        assert d_out.id == id_
-        assert d_out.d.id == d_in.d.id
-        assert np.all(d_out.d.tens == d_in.d.tens)
-
-
-def test_get_key_error(ten_simple_docs):
-    store = ElasticDocumentV8Index[SimpleDoc]()
-    store.index(ten_simple_docs)
-
-    with pytest.raises(KeyError):
-        store['not_a_real_id']
-
-
-def test_del_single(ten_simple_docs):
-    store = ElasticDocumentV8Index[SimpleDoc]()
-    store.index(ten_simple_docs)
-    # delete once
-    assert store.num_docs() == 10
-    del store[ten_simple_docs[0].id]
-    assert store.num_docs() == 9
-    for i, d in enumerate(ten_simple_docs):
-        id_ = d.id
-        if i == 0:  # deleted
-            with pytest.raises(KeyError):
-                store[id_]
-        else:
-            assert store[id_].id == id_
-            assert np.all(store[id_].tens == d.tens)
-    # delete again
-    del store[ten_simple_docs[3].id]
-    assert store.num_docs() == 8
-    for i, d in enumerate(ten_simple_docs):
-        id_ = d.id
-        if i in (0, 3):  # deleted
-            with pytest.raises(KeyError):
-                store[id_]
-        else:
-            assert store[id_].id == id_
-            assert np.all(store[id_].tens == d.tens)
-
-
-def test_del_multiple(ten_simple_docs):
-    docs_to_del_idx = [0, 2, 4, 6, 8]
-
-    store = ElasticDocumentV8Index[SimpleDoc]()
-    store.index(ten_simple_docs)
-
-    assert store.num_docs() == 10
-    docs_to_del = [ten_simple_docs[i] for i in docs_to_del_idx]
-    ids_to_del = [d.id for d in docs_to_del]
-    del store[ids_to_del]
-    for i, doc in enumerate(ten_simple_docs):
-        if i in docs_to_del_idx:
-            with pytest.raises(KeyError):
-                store[doc.id]
-        else:
-            assert store[doc.id].id == doc.id
-            assert np.all(store[doc.id].tens == doc.tens)
-
-
-def test_del_key_error(ten_simple_docs):
-    store = ElasticDocumentV8Index[SimpleDoc]()
-    store.index(ten_simple_docs)
-
-    with pytest.warns(UserWarning):
-        del store['not_a_real_id']
-
-
-def test_num_docs(ten_simple_docs):
-    store = ElasticDocumentV8Index[SimpleDoc]()
-    store.index(ten_simple_docs)
-
-    assert store.num_docs() == 10
-
-    del store[ten_simple_docs[0].id]
-    assert store.num_docs() == 9
-
-    del store[ten_simple_docs[3].id, ten_simple_docs[5].id]
-    assert store.num_docs() == 7
-
-    more_docs = [SimpleDoc(tens=np.random.rand(10)) for _ in range(5)]
-    store.index(more_docs)
-    assert store.num_docs() == 12
-
-    del store[more_docs[2].id, ten_simple_docs[7].id]
-    assert store.num_docs() == 10
+# @pytest.mark.parametrize('use_docarray', [True, False])
+# def test_index_deep_nested_schema(ten_deep_nested_docs, use_docarray):
+#     store = ElasticDocIndex[DeepNestedDoc]()
+#     if use_docarray:
+#         ten_deep_nested_docs = DocArray[DeepNestedDoc](ten_deep_nested_docs)
+
+#     store.index(ten_deep_nested_docs)
+#     assert store.num_docs() == 10
+
+
+# def test_get_single(ten_simple_docs, ten_flat_docs, ten_nested_docs):
+#     # simple
+#     store = ElasticDocIndex[SimpleDoc]()
+#     store.index(ten_simple_docs)
+
+#     assert store.num_docs() == 10
+#     for d in ten_simple_docs:
+#         id_ = d.id
+#         assert store[id_].id == id_
+#         assert np.all(store[id_].tens == d.tens)
+
+#     # flat
+#     store = ElasticDocIndex[FlatDoc]()
+#     store.index(ten_flat_docs)
+
+#     assert store.num_docs() == 10
+#     for d in ten_flat_docs:
+#         id_ = d.id
+#         assert store[id_].id == id_
+#         assert np.all(store[id_].tens_one == d.tens_one)
+#         assert np.all(store[id_].tens_two == d.tens_two)
+
+#     # nested
+#     store = ElasticDocIndex[NestedDoc]()
+#     store.index(ten_nested_docs)
+
+#     assert store.num_docs() == 10
+#     for d in ten_nested_docs:
+#         id_ = d.id
+#         assert store[id_].id == id_
+#         assert store[id_].d.id == d.d.id
+#         assert np.all(store[id_].d.tens == d.d.tens)
+
+
+# def test_get_multiple(ten_simple_docs, ten_flat_docs, ten_nested_docs):
+#     docs_to_get_idx = [0, 2, 4, 6, 8]
+
+#     # simple
+#     store = ElasticDocIndex[SimpleDoc]()
+#     store.index(ten_simple_docs)
+
+#     assert store.num_docs() == 10
+#     docs_to_get = [ten_simple_docs[i] for i in docs_to_get_idx]
+#     ids_to_get = [d.id for d in docs_to_get]
+#     retrieved_docs = store[ids_to_get]
+#     for id_, d_in, d_out in zip(ids_to_get, docs_to_get, retrieved_docs):
+#         assert d_out.id == id_
+#         assert np.all(d_out.tens == d_in.tens)
+
+#     # flat
+#     store = ElasticDocIndex[FlatDoc]()
+#     store.index(ten_flat_docs)
+
+#     assert store.num_docs() == 10
+#     docs_to_get = [ten_flat_docs[i] for i in docs_to_get_idx]
+#     ids_to_get = [d.id for d in docs_to_get]
+#     retrieved_docs = store[ids_to_get]
+#     for id_, d_in, d_out in zip(ids_to_get, docs_to_get, retrieved_docs):
+#         assert d_out.id == id_
+#         assert np.all(d_out.tens_one == d_in.tens_one)
+#         assert np.all(d_out.tens_two == d_in.tens_two)
+
+#     # nested
+#     store = ElasticDocIndex[NestedDoc]()
+#     store.index(ten_nested_docs)
+
+#     assert store.num_docs() == 10
+#     docs_to_get = [ten_nested_docs[i] for i in docs_to_get_idx]
+#     ids_to_get = [d.id for d in docs_to_get]
+#     retrieved_docs = store[ids_to_get]
+#     for id_, d_in, d_out in zip(ids_to_get, docs_to_get, retrieved_docs):
+#         assert d_out.id == id_
+#         assert d_out.d.id == d_in.d.id
+#         assert np.all(d_out.d.tens == d_in.d.tens)
+
+
+# def test_get_key_error(ten_simple_docs):
+#     store = ElasticDocIndex[SimpleDoc]()
+#     store.index(ten_simple_docs)
+
+#     with pytest.raises(KeyError):
+#         store['not_a_real_id']
+
+
+# def test_del_single(ten_simple_docs):
+#     store = ElasticDocIndex[SimpleDoc]()
+#     store.index(ten_simple_docs)
+#     # delete once
+#     assert store.num_docs() == 10
+#     del store[ten_simple_docs[0].id]
+#     assert store.num_docs() == 9
+#     for i, d in enumerate(ten_simple_docs):
+#         id_ = d.id
+#         if i == 0:  # deleted
+#             with pytest.raises(KeyError):
+#                 store[id_]
+#         else:
+#             assert store[id_].id == id_
+#             assert np.all(store[id_].tens == d.tens)
+#     # delete again
+#     del store[ten_simple_docs[3].id]
+#     assert store.num_docs() == 8
+#     for i, d in enumerate(ten_simple_docs):
+#         id_ = d.id
+#         if i in (0, 3):  # deleted
+#             with pytest.raises(KeyError):
+#                 store[id_]
+#         else:
+#             assert store[id_].id == id_
+#             assert np.all(store[id_].tens == d.tens)
+
+
+# def test_del_multiple(ten_simple_docs):
+#     docs_to_del_idx = [0, 2, 4, 6, 8]
+
+#     store = ElasticDocIndex[SimpleDoc]()
+#     store.index(ten_simple_docs)
+
+#     assert store.num_docs() == 10
+#     docs_to_del = [ten_simple_docs[i] for i in docs_to_del_idx]
+#     ids_to_del = [d.id for d in docs_to_del]
+#     del store[ids_to_del]
+#     for i, doc in enumerate(ten_simple_docs):
+#         if i in docs_to_del_idx:
+#             with pytest.raises(KeyError):
+#                 store[doc.id]
+#         else:
+#             assert store[doc.id].id == doc.id
+#             assert np.all(store[doc.id].tens == doc.tens)
+
+
+# def test_del_key_error(ten_simple_docs):
+#     store = ElasticDocIndex[SimpleDoc]()
+#     store.index(ten_simple_docs)
+
+#     with pytest.warns(UserWarning):
+#         del store['not_a_real_id']
+
+
+# def test_num_docs(ten_simple_docs):
+#     store = ElasticDocIndex[SimpleDoc]()
+#     store.index(ten_simple_docs)
+
+#     assert store.num_docs() == 10
+
+#     del store[ten_simple_docs[0].id]
+#     assert store.num_docs() == 9
+
+#     del store[ten_simple_docs[3].id, ten_simple_docs[5].id]
+#     assert store.num_docs() == 7
+
+#     more_docs = [SimpleDoc(tens=np.random.rand(10)) for _ in range(5)]
+#     store.index(more_docs)
+#     assert store.num_docs() == 12
+
+#     del store[more_docs[2].id, ten_simple_docs[7].id]
+#     assert store.num_docs() == 10

From da5380554087edf5f90b3e18f2c76b3115456a98 Mon Sep 17 00:00:00 2001
From: AnneY <evangeline-lun@foxmail.com>
Date: Fri, 31 Mar 2023 11:17:07 +0800
Subject: [PATCH 04/14] feat: add elasticdoc v8

Signed-off-by: AnneY <evangeline-lun@foxmail.com>
---
 docarray/index/__init__.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/docarray/index/__init__.py b/docarray/index/__init__.py
index aefe7fa6bfe..df0d133d29c 100644
--- a/docarray/index/__init__.py
+++ b/docarray/index/__init__.py
@@ -20,11 +20,11 @@ def __getattr__(name: str):
         import_library('hnswlib', raise_error=True)
         import docarray.index.backends.hnswlib as lib
     elif name == 'ElasticDocIndex':
-        import_library('elasticsearch==8.6.2', raise_error=True)
-        import docarray.index.backends.elasticv7 as lib
-    elif name == 'ElasticV7DocIndex':
-        import_library('elasticsearch==7.10.1', raise_error=True)
+        import_library('elasticsearch', raise_error=True)
         import docarray.index.backends.elastic as lib
+    elif name == 'ElasticV7DocIndex':
+        import_library('elasticsearch', raise_error=True)
+        import docarray.index.backends.elasticv7 as lib
     else:
         raise ImportError(
             f'cannot import name \'{name}\' from \'{_get_path_from_docarray_root_level(__file__)}\''

From c0a3c5d6ee165cb7e965411cf070cfdc61447787 Mon Sep 17 00:00:00 2001
From: AnneY <evangeline-lun@foxmail.com>
Date: Fri, 31 Mar 2023 14:35:58 +0800
Subject: [PATCH 05/14] fix: update poetry

Signed-off-by: AnneY <evangeline-lun@foxmail.com>
---
 poetry.lock    | 25 ++++++++++++++++++++++---
 pyproject.toml |  1 +
 2 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index a9bc680af7f..dbbe7aeae8f 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -803,6 +803,25 @@ six = ">=1.9.0"
 gmpy = ["gmpy"]
 gmpy2 = ["gmpy2"]
 
+[[package]]
+name = "elastic-transport"
+version = "8.4.0"
+description = "Transport classes and utilities shared among Python Elastic client libraries"
+category = "main"
+optional = false
+python-versions = ">=3.6"
+files = [
+    {file = "elastic-transport-8.4.0.tar.gz", hash = "sha256:b9ad708ceb7fcdbc6b30a96f886609a109f042c0b9d9f2e44403b3133ba7ff10"},
+    {file = "elastic_transport-8.4.0-py3-none-any.whl", hash = "sha256:19db271ab79c9f70f8c43f8f5b5111408781a6176b54ab2e54d713b6d9ceb815"},
+]
+
+[package.dependencies]
+certifi = "*"
+urllib3 = ">=1.26.2,<2"
+
+[package.extras]
+develop = ["aiohttp", "mock", "pytest", "pytest-asyncio", "pytest-cov", "pytest-httpserver", "pytest-mock", "requests", "trustme"]
+
 [[package]]
 name = "elasticsearch"
 version = "7.10.1"
@@ -4590,9 +4609,9 @@ testing = ["flake8 (<5)", "func-timeout", "jaraco.functools", "jaraco.itertools"
 [extras]
 audio = ["pydub"]
 aws = ["smart-open"]
-common = ["protobuf", "lz4"]
+common = ["lz4", "protobuf"]
 elasticsearch = ["elasticsearch"]
-full = ["protobuf", "lz4", "pandas", "pillow", "types-pillow", "av", "pydub", "trimesh"]
+full = ["av", "lz4", "pandas", "pillow", "protobuf", "pydub", "trimesh", "types-pillow"]
 hnswlib = ["hnswlib"]
 image = ["pillow", "types-pillow"]
 jac = ["jina-hubble-sdk"]
@@ -4605,4 +4624,4 @@ web = ["fastapi"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.7,<4.0"
-content-hash = "821f6cd00f78c456f6146f39c14f0704e4f2d113c35db00c58462d8cfbe3a538"
+content-hash = "49f70eda2036ec961a1ed06e9364c56710c91f152d030ddf566519b443b52f93"
diff --git a/pyproject.toml b/pyproject.toml
index 3114ff8dc61..229151108d1 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -27,6 +27,7 @@ pandas = {version = ">=1.1.0", optional = true }
 elasticsearch = {version = "7.10.1", optional = true }
 smart-open = {version = ">=6.3.0", extras = ["s3"], optional = true}
 jina-hubble-sdk = {version = ">=0.34.0", optional = true}
+elastic-transport = "^8.4.0"
 
 [tool.poetry.extras]
 common = ["protobuf", "lz4"]

From 82a76815b2da6f64fadc5013fdcd8c115efa9e15 Mon Sep 17 00:00:00 2001
From: AnneY <evangeline-lun@foxmail.com>
Date: Fri, 31 Mar 2023 20:34:15 +0800
Subject: [PATCH 06/14] refactor: adjust folder structure

Signed-off-by: AnneY <evangeline-lun@foxmail.com>
---
 tests/{index => integrations/doc_index}/hnswlib/__init__.py       | 0
 tests/{index => integrations/doc_index}/hnswlib/test_find.py      | 0
 .../doc_index}/hnswlib/test_index_get_del.py                      | 0
 .../doc_index}/hnswlib/test_persist_data.py                       | 0
 4 files changed, 0 insertions(+), 0 deletions(-)
 rename tests/{index => integrations/doc_index}/hnswlib/__init__.py (100%)
 rename tests/{index => integrations/doc_index}/hnswlib/test_find.py (100%)
 rename tests/{index => integrations/doc_index}/hnswlib/test_index_get_del.py (100%)
 rename tests/{index => integrations/doc_index}/hnswlib/test_persist_data.py (100%)

diff --git a/tests/index/hnswlib/__init__.py b/tests/integrations/doc_index/hnswlib/__init__.py
similarity index 100%
rename from tests/index/hnswlib/__init__.py
rename to tests/integrations/doc_index/hnswlib/__init__.py
diff --git a/tests/index/hnswlib/test_find.py b/tests/integrations/doc_index/hnswlib/test_find.py
similarity index 100%
rename from tests/index/hnswlib/test_find.py
rename to tests/integrations/doc_index/hnswlib/test_find.py
diff --git a/tests/index/hnswlib/test_index_get_del.py b/tests/integrations/doc_index/hnswlib/test_index_get_del.py
similarity index 100%
rename from tests/index/hnswlib/test_index_get_del.py
rename to tests/integrations/doc_index/hnswlib/test_index_get_del.py
diff --git a/tests/index/hnswlib/test_persist_data.py b/tests/integrations/doc_index/hnswlib/test_persist_data.py
similarity index 100%
rename from tests/index/hnswlib/test_persist_data.py
rename to tests/integrations/doc_index/hnswlib/test_persist_data.py

From f757a2791e8e9014890cc96fa0efd8ed31970383 Mon Sep 17 00:00:00 2001
From: AnneY <evangeline-lun@foxmail.com>
Date: Fri, 31 Mar 2023 20:44:51 +0800
Subject: [PATCH 07/14] test: elastic v8 tests

Signed-off-by: AnneY <evangeline-lun@foxmail.com>
---
 docarray/index/backends/elastic.py            |   3 -
 tests/index/elastic/v8/test_find.py           | 269 --------------
 tests/index/elastic/v8/test_index_get_del.py  | 232 ------------
 .../integrations/doc_index/elastic/fixture.py |  62 +++-
 .../elastic/v7/test_index_get_del.py          |  58 +--
 .../doc_index}/elastic/v8/docker-compose.yml  |   0
 .../elastic/v8/test_column_config.py          | 131 +++++++
 .../doc_index/elastic/v8/test_find.py         | 329 ++++++++++++++++++
 .../elastic/v8/test_index_get_del.py          | 270 ++++++++++++++
 9 files changed, 792 insertions(+), 562 deletions(-)
 delete mode 100644 tests/index/elastic/v8/test_find.py
 delete mode 100644 tests/index/elastic/v8/test_index_get_del.py
 rename tests/{index => integrations/doc_index}/elastic/v8/docker-compose.yml (100%)
 create mode 100644 tests/integrations/doc_index/elastic/v8/test_column_config.py
 create mode 100644 tests/integrations/doc_index/elastic/v8/test_find.py
 create mode 100644 tests/integrations/doc_index/elastic/v8/test_index_get_del.py

diff --git a/docarray/index/backends/elastic.py b/docarray/index/backends/elastic.py
index 462b3a56591..c003e9d4095 100644
--- a/docarray/index/backends/elastic.py
+++ b/docarray/index/backends/elastic.py
@@ -59,9 +59,6 @@
     ELASTIC_PY_VEC_TYPES.append(TensorFlowTensor)
 
 
-# toml
-# elastic-transport = "^8.4.0"
-# elasticsearch = "^8.6.2"
 class ElasticDocIndex(BaseDocIndex, Generic[TSchema]):
     def __init__(self, db_config=None, **kwargs):
         super().__init__(db_config=db_config, **kwargs)
diff --git a/tests/index/elastic/v8/test_find.py b/tests/index/elastic/v8/test_find.py
deleted file mode 100644
index d61ae643ae0..00000000000
--- a/tests/index/elastic/v8/test_find.py
+++ /dev/null
@@ -1,269 +0,0 @@
-# import numpy as np
-# import pytest
-# from pydantic import Field
-
-# from docarray import BaseDoc
-# from docarray.index import ElasticDocIndex
-# from docarray.typing import NdArray
-# from tests.index.elastic.fixture import start_storage_v8  # noqa: F401
-# from tests.index.elastic.fixture import FlatDoc, SimpleDoc
-
-
-# @pytest.mark.parametrize('similarity', ['cosine', 'l2_norm', 'dot_product'])
-# def test_find_simple_schema(similarity):
-#     class SimpleSchema(BaseDoc):
-#         tens: NdArray[10] = Field(similarity=similarity)
-
-#     store = ElasticDocIndex[SimpleSchema]()
-
-#     index_docs = []
-#     for _ in range(10):
-#         vec = np.random.rand(10)
-#         if similarity == 'dot_product':
-#             vec = vec / np.linalg.norm(vec)
-#         index_docs.append(SimpleDoc(tens=vec))
-#     store.index(index_docs)
-
-#     query = index_docs[-1]
-#     docs, scores = store.find(query, search_field='tens', limit=5)
-
-#     assert len(docs) == 5
-#     assert len(scores) == 5
-#     assert docs[0].id == index_docs[-1].id
-#     assert np.allclose(docs[0].tens, index_docs[-1].tens)
-
-
-# @pytest.mark.parametrize('similarity', ['cosine', 'l2_norm', 'dot_product'])
-# def test_find_flat_schema(similarity):
-#     class FlatSchema(BaseDoc):
-#         tens_one: NdArray = Field(dims=10, similarity=similarity)
-#         tens_two: NdArray = Field(dims=50, similarity=similarity)
-
-#     store = ElasticDocIndex[FlatSchema]()
-
-#     index_docs = []
-#     for _ in range(10):
-#         vec_one = np.random.rand(10)
-#         vec_two = np.random.rand(50)
-#         if similarity == 'dot_product':
-#             vec_one = vec_one / np.linalg.norm(vec_one)
-#             vec_two = vec_two / np.linalg.norm(vec_two)
-#         index_docs.append(FlatDoc(tens_one=vec_one, tens_two=vec_two))
-
-#     store.index(index_docs)
-
-#     query = index_docs[-1]
-
-#     # find on tens_one
-#     docs, scores = store.find(query, search_field='tens_one', limit=5)
-#     assert len(docs) == 5
-#     assert len(scores) == 5
-#     assert docs[0].id == index_docs[-1].id
-#     assert np.allclose(docs[0].tens_one, index_docs[-1].tens_one)
-#     assert np.allclose(docs[0].tens_two, index_docs[-1].tens_two)
-
-#     # find on tens_two
-#     docs, scores = store.find(query, search_field='tens_two', limit=5)
-#     assert len(docs) == 5
-#     assert len(scores) == 5
-#     assert docs[0].id == index_docs[-1].id
-#     assert np.allclose(docs[0].tens_one, index_docs[-1].tens_one)
-#     assert np.allclose(docs[0].tens_two, index_docs[-1].tens_two)
-
-
-# @pytest.mark.parametrize('similarity', ['cosine', 'l2_norm', 'dot_product'])
-# def test_find_nested_schema(similarity):
-#     class SimpleDoc(BaseDoc):
-#         tens: NdArray[10] = Field(similarity=similarity)
-
-#     class NestedDoc(BaseDoc):
-#         d: SimpleDoc
-#         tens: NdArray[10] = Field(similarity=similarity)
-
-#     class DeepNestedDoc(BaseDoc):
-#         d: NestedDoc
-#         tens: NdArray = Field(similarity=similarity, dims=10)
-
-#     store = ElasticDocIndex[DeepNestedDoc]()
-
-#     index_docs = []
-#     for _ in range(10):
-#         vec_simple = np.random.rand(10)
-#         vec_nested = np.random.rand(10)
-#         vec_deep = np.random.rand(10)
-#         if similarity == 'dot_product':
-#             vec_simple = vec_simple / np.linalg.norm(vec_simple)
-#             vec_nested = vec_nested / np.linalg.norm(vec_nested)
-#             vec_deep = vec_deep / np.linalg.norm(vec_deep)
-#         index_docs.append(
-#             DeepNestedDoc(
-#                 d=NestedDoc(d=SimpleDoc(tens=vec_simple), tens=vec_nested),
-#                 tens=vec_deep,
-#             )
-#         )
-
-#     store.index(index_docs)
-
-#     query = index_docs[-1]
-
-#     # find on root level
-#     docs, scores = store.find(query, search_field='tens', limit=5)
-#     assert len(docs) == 5
-#     assert len(scores) == 5
-#     assert docs[0].id == index_docs[-1].id
-#     assert np.allclose(docs[0].tens, index_docs[-1].tens)
-
-#     # find on first nesting level
-#     docs, scores = store.find(query, search_field='d__tens', limit=5)
-#     assert len(docs) == 5
-#     assert len(scores) == 5
-#     assert docs[0].id == index_docs[-1].id
-#     assert np.allclose(docs[0].d.tens, index_docs[-1].d.tens)
-
-#     # find on second nesting level
-#     docs, scores = store.find(query, search_field='d__d__tens', limit=5)
-#     assert len(docs) == 5
-#     assert len(scores) == 5
-#     assert docs[0].id == index_docs[-1].id
-#     assert np.allclose(docs[0].d.d.tens, index_docs[-1].d.d.tens)
-
-
-# def test_find_batched():
-#     store = ElasticDocIndex[SimpleDoc]()
-
-#     index_docs = [SimpleDoc(tens=np.random.rand(10)) for _ in range(10)]
-#     store.index(index_docs)
-
-#     queries = index_docs[-2:]
-#     docs_batched, scores_batched = store.find_batched(
-#         queries, search_field='tens', limit=5
-#     )
-
-#     for docs, scores, query in zip(docs_batched, scores_batched, queries):
-#         assert len(docs) == 5
-#         assert len(scores) == 5
-#         assert docs[0].id == query.id
-#         assert np.allclose(docs[0].tens, query.tens)
-
-
-# def test_filter():
-#     class MyDoc(BaseDoc):
-#         A: bool
-#         B: int
-#         C: float
-
-#     store = ElasticDocIndex[MyDoc]()
-
-#     index_docs = [MyDoc(id=f'{i}', A=(i % 2 == 0), B=i, C=i + 0.5) for i in range(10)]
-#     store.index(index_docs)
-
-#     filter_query = {'term': {'A': True}}
-#     docs = store.filter(filter_query)
-#     assert len(docs) > 0
-#     for doc in docs:
-#         assert doc.A
-
-#     filter_query = {
-#         "bool": {
-#             "filter": [
-#                 {"terms": {"B": [3, 4, 7, 8]}},
-#                 {"range": {"C": {"gte": 3, "lte": 5}}},
-#             ]
-#         }
-#     }
-#     docs = store.filter(filter_query)
-#     assert [doc.id for doc in docs] == ['3', '4']
-
-
-# def test_text_search():
-#     class MyDoc(BaseDoc):
-#         text: str
-
-#     store = ElasticDocIndex[MyDoc]()
-#     index_docs = [
-#         MyDoc(text='hello world'),
-#         MyDoc(text='never gonna give you up'),
-#         MyDoc(text='we are the world'),
-#     ]
-#     store.index(index_docs)
-
-#     query = 'world'
-#     docs, scores = store.text_search(query, search_field='text')
-
-#     assert len(docs) == 2
-#     assert len(scores) == 2
-#     assert docs[0].text.index(query) >= 0
-#     assert docs[1].text.index(query) >= 0
-
-#     queries = ['world', 'never']
-#     docs, scores = store.text_search_batched(queries, search_field='text')
-#     for query, da, score in zip(queries, docs, scores):
-#         assert len(da) > 0
-#         assert len(score) > 0
-#         for doc in da:
-#             assert doc.text.index(query) >= 0
-
-
-# def test_query_builder():
-#     class MyDoc(BaseDoc):
-#         tens: NdArray[10] = Field(similarity='l2_norm')
-#         num: int
-#         text: str
-
-#     store = ElasticDocIndex[MyDoc]()
-#     index_docs = [
-#         MyDoc(id=f'{i}', tens=np.ones(10) * i, num=int(i / 2), text=f'text {int(i/2)}')
-#         for i in range(10)
-#     ]
-#     store.index(index_docs)
-
-#     # build_query
-#     q = store.build_query()
-#     assert isinstance(q, store.QueryBuilder)
-
-#     # filter
-#     q = store.build_query().filter({'term': {'num': 0}}).build()
-#     docs, _ = store.execute_query(q)
-#     assert [doc['id'] for doc in docs] == ['0', '1']
-
-#     # find
-#     q = store.build_query().find(index_docs[-1], search_field='tens', limit=3).build()
-#     docs, _ = store.execute_query(q)
-#     assert [doc['id'] for doc in docs] == ['9', '8', '7']
-
-#     # text_search
-#     q = store.build_query().text_search('0', search_field='text').build()
-#     docs, _ = store.execute_query(q)
-#     assert [doc['id'] for doc in docs] == ['0', '1']
-
-#     # combination
-#     q = (
-#         store.build_query()
-#         .filter({'range': {'num': {'lte': 3}}})
-#         .find(index_docs[-1], search_field='tens')
-#         .text_search('0', search_field='text')
-#         .build()
-#     )
-#     docs, _ = store.execute_query(q)
-#     assert [doc['id'] for doc in docs] == ['1', '0']
-
-#     # direct
-#     query = {
-#         'knn': {
-#             'field': 'tens',
-#             'query_vector': [9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0],
-#             'k': 10,
-#             'num_candidates': 10000,
-#             'filter': {
-#                 'bool': {
-#                     'filter': [
-#                         {'range': {'num': {'gte': 2}}},
-#                         {'range': {'num': {'lte': 3}}},
-#                     ]
-#                 }
-#             },
-#         },
-#     }
-
-#     docs, _ = store.execute_query(query)
-#     assert [doc['id'] for doc in docs] == ['7', '6', '5', '4']
diff --git a/tests/index/elastic/v8/test_index_get_del.py b/tests/index/elastic/v8/test_index_get_del.py
deleted file mode 100644
index b3e8b27c162..00000000000
--- a/tests/index/elastic/v8/test_index_get_del.py
+++ /dev/null
@@ -1,232 +0,0 @@
-# import numpy as np
-# import pytest
-
-# from docarray import DocArray
-# from docarray.index import ElasticDocIndex
-# from tests.index.elastic.fixture import start_storage_v8  # noqa: F401
-# from tests.index.elastic.fixture import DeepNestedDoc, FlatDoc, NestedDoc, SimpleDoc
-
-
-# @pytest.fixture
-# def ten_simple_docs():
-#     return [SimpleDoc(tens=np.random.randn(10)) for _ in range(10)]
-
-
-# @pytest.fixture
-# def ten_flat_docs():
-#     return [
-#         FlatDoc(tens_one=np.random.randn(10), tens_two=np.random.randn(50))
-#         for _ in range(10)
-#     ]
-
-
-# @pytest.fixture
-# def ten_nested_docs():
-#     return [NestedDoc(d=SimpleDoc(tens=np.random.randn(10))) for _ in range(10)]
-
-
-# @pytest.fixture
-# def ten_deep_nested_docs():
-#     return [
-#         DeepNestedDoc(d=NestedDoc(d=SimpleDoc(tens=np.random.randn(10))))
-#         for _ in range(10)
-#     ]
-
-
-# @pytest.mark.parametrize('use_docarray', [True, False])
-# def test_index_simple_schema(ten_simple_docs, use_docarray):
-#     store = ElasticDocIndex[SimpleDoc]()
-#     if use_docarray:
-#         ten_simple_docs = DocArray[SimpleDoc](ten_simple_docs)
-
-#     store.index(ten_simple_docs)
-#     assert store.num_docs() == 10
-
-
-# @pytest.mark.parametrize('use_docarray', [True, False])
-# def test_index_flat_schema(ten_flat_docs, use_docarray):
-#     store = ElasticDocIndex[FlatDoc]()
-#     if use_docarray:
-#         ten_flat_docs = DocArray[FlatDoc](ten_flat_docs)
-
-#     store.index(ten_flat_docs)
-#     assert store.num_docs() == 10
-
-
-# @pytest.mark.parametrize('use_docarray', [True, False])
-# def test_index_nested_schema(ten_nested_docs, use_docarray):
-#     store = ElasticDocIndex[NestedDoc]()
-#     if use_docarray:
-#         ten_nested_docs = DocArray[NestedDoc](ten_nested_docs)
-
-#     store.index(ten_nested_docs)
-#     assert store.num_docs() == 10
-
-
-# @pytest.mark.parametrize('use_docarray', [True, False])
-# def test_index_deep_nested_schema(ten_deep_nested_docs, use_docarray):
-#     store = ElasticDocIndex[DeepNestedDoc]()
-#     if use_docarray:
-#         ten_deep_nested_docs = DocArray[DeepNestedDoc](ten_deep_nested_docs)
-
-#     store.index(ten_deep_nested_docs)
-#     assert store.num_docs() == 10
-
-
-# def test_get_single(ten_simple_docs, ten_flat_docs, ten_nested_docs):
-#     # simple
-#     store = ElasticDocIndex[SimpleDoc]()
-#     store.index(ten_simple_docs)
-
-#     assert store.num_docs() == 10
-#     for d in ten_simple_docs:
-#         id_ = d.id
-#         assert store[id_].id == id_
-#         assert np.all(store[id_].tens == d.tens)
-
-#     # flat
-#     store = ElasticDocIndex[FlatDoc]()
-#     store.index(ten_flat_docs)
-
-#     assert store.num_docs() == 10
-#     for d in ten_flat_docs:
-#         id_ = d.id
-#         assert store[id_].id == id_
-#         assert np.all(store[id_].tens_one == d.tens_one)
-#         assert np.all(store[id_].tens_two == d.tens_two)
-
-#     # nested
-#     store = ElasticDocIndex[NestedDoc]()
-#     store.index(ten_nested_docs)
-
-#     assert store.num_docs() == 10
-#     for d in ten_nested_docs:
-#         id_ = d.id
-#         assert store[id_].id == id_
-#         assert store[id_].d.id == d.d.id
-#         assert np.all(store[id_].d.tens == d.d.tens)
-
-
-# def test_get_multiple(ten_simple_docs, ten_flat_docs, ten_nested_docs):
-#     docs_to_get_idx = [0, 2, 4, 6, 8]
-
-#     # simple
-#     store = ElasticDocIndex[SimpleDoc]()
-#     store.index(ten_simple_docs)
-
-#     assert store.num_docs() == 10
-#     docs_to_get = [ten_simple_docs[i] for i in docs_to_get_idx]
-#     ids_to_get = [d.id for d in docs_to_get]
-#     retrieved_docs = store[ids_to_get]
-#     for id_, d_in, d_out in zip(ids_to_get, docs_to_get, retrieved_docs):
-#         assert d_out.id == id_
-#         assert np.all(d_out.tens == d_in.tens)
-
-#     # flat
-#     store = ElasticDocIndex[FlatDoc]()
-#     store.index(ten_flat_docs)
-
-#     assert store.num_docs() == 10
-#     docs_to_get = [ten_flat_docs[i] for i in docs_to_get_idx]
-#     ids_to_get = [d.id for d in docs_to_get]
-#     retrieved_docs = store[ids_to_get]
-#     for id_, d_in, d_out in zip(ids_to_get, docs_to_get, retrieved_docs):
-#         assert d_out.id == id_
-#         assert np.all(d_out.tens_one == d_in.tens_one)
-#         assert np.all(d_out.tens_two == d_in.tens_two)
-
-#     # nested
-#     store = ElasticDocIndex[NestedDoc]()
-#     store.index(ten_nested_docs)
-
-#     assert store.num_docs() == 10
-#     docs_to_get = [ten_nested_docs[i] for i in docs_to_get_idx]
-#     ids_to_get = [d.id for d in docs_to_get]
-#     retrieved_docs = store[ids_to_get]
-#     for id_, d_in, d_out in zip(ids_to_get, docs_to_get, retrieved_docs):
-#         assert d_out.id == id_
-#         assert d_out.d.id == d_in.d.id
-#         assert np.all(d_out.d.tens == d_in.d.tens)
-
-
-# def test_get_key_error(ten_simple_docs):
-#     store = ElasticDocIndex[SimpleDoc]()
-#     store.index(ten_simple_docs)
-
-#     with pytest.raises(KeyError):
-#         store['not_a_real_id']
-
-
-# def test_del_single(ten_simple_docs):
-#     store = ElasticDocIndex[SimpleDoc]()
-#     store.index(ten_simple_docs)
-#     # delete once
-#     assert store.num_docs() == 10
-#     del store[ten_simple_docs[0].id]
-#     assert store.num_docs() == 9
-#     for i, d in enumerate(ten_simple_docs):
-#         id_ = d.id
-#         if i == 0:  # deleted
-#             with pytest.raises(KeyError):
-#                 store[id_]
-#         else:
-#             assert store[id_].id == id_
-#             assert np.all(store[id_].tens == d.tens)
-#     # delete again
-#     del store[ten_simple_docs[3].id]
-#     assert store.num_docs() == 8
-#     for i, d in enumerate(ten_simple_docs):
-#         id_ = d.id
-#         if i in (0, 3):  # deleted
-#             with pytest.raises(KeyError):
-#                 store[id_]
-#         else:
-#             assert store[id_].id == id_
-#             assert np.all(store[id_].tens == d.tens)
-
-
-# def test_del_multiple(ten_simple_docs):
-#     docs_to_del_idx = [0, 2, 4, 6, 8]
-
-#     store = ElasticDocIndex[SimpleDoc]()
-#     store.index(ten_simple_docs)
-
-#     assert store.num_docs() == 10
-#     docs_to_del = [ten_simple_docs[i] for i in docs_to_del_idx]
-#     ids_to_del = [d.id for d in docs_to_del]
-#     del store[ids_to_del]
-#     for i, doc in enumerate(ten_simple_docs):
-#         if i in docs_to_del_idx:
-#             with pytest.raises(KeyError):
-#                 store[doc.id]
-#         else:
-#             assert store[doc.id].id == doc.id
-#             assert np.all(store[doc.id].tens == doc.tens)
-
-
-# def test_del_key_error(ten_simple_docs):
-#     store = ElasticDocIndex[SimpleDoc]()
-#     store.index(ten_simple_docs)
-
-#     with pytest.warns(UserWarning):
-#         del store['not_a_real_id']
-
-
-# def test_num_docs(ten_simple_docs):
-#     store = ElasticDocIndex[SimpleDoc]()
-#     store.index(ten_simple_docs)
-
-#     assert store.num_docs() == 10
-
-#     del store[ten_simple_docs[0].id]
-#     assert store.num_docs() == 9
-
-#     del store[ten_simple_docs[3].id, ten_simple_docs[5].id]
-#     assert store.num_docs() == 7
-
-#     more_docs = [SimpleDoc(tens=np.random.rand(10)) for _ in range(5)]
-#     store.index(more_docs)
-#     assert store.num_docs() == 12
-
-#     del store[more_docs[2].id, ten_simple_docs[7].id]
-#     assert store.num_docs() == 10
diff --git a/tests/integrations/doc_index/elastic/fixture.py b/tests/integrations/doc_index/elastic/fixture.py
index 1caa31da2a6..4f047b63587 100644
--- a/tests/integrations/doc_index/elastic/fixture.py
+++ b/tests/integrations/doc_index/elastic/fixture.py
@@ -1,6 +1,7 @@
 import os
 import time
 
+import numpy as np
 import pytest
 from pydantic import Field
 
@@ -9,24 +10,6 @@
 
 pytestmark = [pytest.mark.slow, pytest.mark.doc_index]
 
-
-class SimpleDoc(BaseDoc):
-    tens: NdArray[10] = Field(dims=1000)
-
-
-class FlatDoc(BaseDoc):
-    tens_one: NdArray = Field(dims=10)
-    tens_two: NdArray = Field(dims=50)
-
-
-class NestedDoc(BaseDoc):
-    d: SimpleDoc
-
-
-class DeepNestedDoc(BaseDoc):
-    d: NestedDoc
-
-
 cur_dir = os.path.dirname(os.path.abspath(__file__))
 compose_yml_v7 = os.path.abspath(os.path.join(cur_dir, 'v7/docker-compose.yml'))
 compose_yml_v8 = os.path.abspath(os.path.join(cur_dir, 'v8/docker-compose.yml'))
@@ -56,3 +39,46 @@ def _wait_for_es():
     es = Elasticsearch(hosts='http://localhost:9200/')
     while not es.ping():
         time.sleep(0.5)
+
+
+class SimpleDoc(BaseDoc):
+    tens: NdArray[10] = Field(dims=1000)
+
+
+class FlatDoc(BaseDoc):
+    tens_one: NdArray = Field(dims=10)
+    tens_two: NdArray = Field(dims=50)
+
+
+class NestedDoc(BaseDoc):
+    d: SimpleDoc
+
+
+class DeepNestedDoc(BaseDoc):
+    d: NestedDoc
+
+
+@pytest.fixture(scope='function')
+def ten_simple_docs():
+    return [SimpleDoc(tens=np.random.randn(10)) for _ in range(10)]
+
+
+@pytest.fixture(scope='function')
+def ten_flat_docs():
+    return [
+        FlatDoc(tens_one=np.random.randn(10), tens_two=np.random.randn(50))
+        for _ in range(10)
+    ]
+
+
+@pytest.fixture(scope='function')
+def ten_nested_docs():
+    return [NestedDoc(d=SimpleDoc(tens=np.random.randn(10))) for _ in range(10)]
+
+
+@pytest.fixture(scope='function')
+def ten_deep_nested_docs():
+    return [
+        DeepNestedDoc(d=NestedDoc(d=SimpleDoc(tens=np.random.randn(10))))
+        for _ in range(10)
+    ]
diff --git a/tests/integrations/doc_index/elastic/v7/test_index_get_del.py b/tests/integrations/doc_index/elastic/v7/test_index_get_del.py
index d5394a7925b..5c0655b8538 100644
--- a/tests/integrations/doc_index/elastic/v7/test_index_get_del.py
+++ b/tests/integrations/doc_index/elastic/v7/test_index_get_del.py
@@ -7,45 +7,23 @@
 from docarray.documents import ImageDoc, TextDoc
 from docarray.index import ElasticV7DocIndex
 from docarray.typing import NdArray
-from tests.integrations.doc_index.elastic.fixture import start_storage_v7  # noqa: F401
-from tests.integrations.doc_index.elastic.fixture import (
+from tests.integrations.doc_index.elastic.fixture import (  # noqa: F401
     DeepNestedDoc,
     FlatDoc,
     NestedDoc,
     SimpleDoc,
+    start_storage_v7,
+    ten_deep_nested_docs,
+    ten_flat_docs,
+    ten_nested_docs,
+    ten_simple_docs,
 )
 
 pytestmark = [pytest.mark.slow, pytest.mark.index]
 
 
-@pytest.fixture
-def ten_simple_docs():
-    return [SimpleDoc(tens=np.random.randn(10)) for _ in range(10)]
-
-
-@pytest.fixture
-def ten_flat_docs():
-    return [
-        FlatDoc(tens_one=np.random.randn(10), tens_two=np.random.randn(50))
-        for _ in range(10)
-    ]
-
-
-@pytest.fixture
-def ten_nested_docs():
-    return [NestedDoc(d=SimpleDoc(tens=np.random.randn(10))) for _ in range(10)]
-
-
-@pytest.fixture
-def ten_deep_nested_docs():
-    return [
-        DeepNestedDoc(d=NestedDoc(d=SimpleDoc(tens=np.random.randn(10))))
-        for _ in range(10)
-    ]
-
-
 @pytest.mark.parametrize('use_docarray', [True, False])
-def test_index_simple_schema(ten_simple_docs, use_docarray):
+def test_index_simple_schema(ten_simple_docs, use_docarray):  # noqa: F811
     store = ElasticV7DocIndex[SimpleDoc]()
     if use_docarray:
         ten_simple_docs = DocArray[SimpleDoc](ten_simple_docs)
@@ -55,7 +33,7 @@ def test_index_simple_schema(ten_simple_docs, use_docarray):
 
 
 @pytest.mark.parametrize('use_docarray', [True, False])
-def test_index_flat_schema(ten_flat_docs, use_docarray):
+def test_index_flat_schema(ten_flat_docs, use_docarray):  # noqa: F811
     store = ElasticV7DocIndex[FlatDoc]()
     if use_docarray:
         ten_flat_docs = DocArray[FlatDoc](ten_flat_docs)
@@ -65,7 +43,7 @@ def test_index_flat_schema(ten_flat_docs, use_docarray):
 
 
 @pytest.mark.parametrize('use_docarray', [True, False])
-def test_index_nested_schema(ten_nested_docs, use_docarray):
+def test_index_nested_schema(ten_nested_docs, use_docarray):  # noqa: F811
     store = ElasticV7DocIndex[NestedDoc]()
     if use_docarray:
         ten_nested_docs = DocArray[NestedDoc](ten_nested_docs)
@@ -75,7 +53,7 @@ def test_index_nested_schema(ten_nested_docs, use_docarray):
 
 
 @pytest.mark.parametrize('use_docarray', [True, False])
-def test_index_deep_nested_schema(ten_deep_nested_docs, use_docarray):
+def test_index_deep_nested_schema(ten_deep_nested_docs, use_docarray):  # noqa: F811
     store = ElasticV7DocIndex[DeepNestedDoc]()
     if use_docarray:
         ten_deep_nested_docs = DocArray[DeepNestedDoc](ten_deep_nested_docs)
@@ -84,7 +62,7 @@ def test_index_deep_nested_schema(ten_deep_nested_docs, use_docarray):
     assert store.num_docs() == 10
 
 
-def test_get_single(ten_simple_docs, ten_flat_docs, ten_nested_docs):
+def test_get_single(ten_simple_docs, ten_flat_docs, ten_nested_docs):  # noqa: F811
     # simple
     store = ElasticV7DocIndex[SimpleDoc]()
     store.index(ten_simple_docs)
@@ -118,7 +96,7 @@ def test_get_single(ten_simple_docs, ten_flat_docs, ten_nested_docs):
         assert np.all(store[id_].d.tens == d.d.tens)
 
 
-def test_get_multiple(ten_simple_docs, ten_flat_docs, ten_nested_docs):
+def test_get_multiple(ten_simple_docs, ten_flat_docs, ten_nested_docs):  # noqa: F811
     docs_to_get_idx = [0, 2, 4, 6, 8]
 
     # simple
@@ -160,7 +138,7 @@ def test_get_multiple(ten_simple_docs, ten_flat_docs, ten_nested_docs):
         assert np.all(d_out.d.tens == d_in.d.tens)
 
 
-def test_get_key_error(ten_simple_docs):
+def test_get_key_error(ten_simple_docs):  # noqa: F811
     store = ElasticV7DocIndex[SimpleDoc]()
     store.index(ten_simple_docs)
 
@@ -168,7 +146,7 @@ def test_get_key_error(ten_simple_docs):
         store['not_a_real_id']
 
 
-def test_persisting(ten_simple_docs):
+def test_persisting(ten_simple_docs):  # noqa: F811
     store = ElasticV7DocIndex[SimpleDoc](index_name='test_persisting')
     store.index(ten_simple_docs)
 
@@ -176,7 +154,7 @@ def test_persisting(ten_simple_docs):
     assert store2.num_docs() == 10
 
 
-def test_del_single(ten_simple_docs):
+def test_del_single(ten_simple_docs):  # noqa: F811
     store = ElasticV7DocIndex[SimpleDoc]()
     store.index(ten_simple_docs)
     # delete once
@@ -204,7 +182,7 @@ def test_del_single(ten_simple_docs):
             assert np.all(store[id_].tens == d.tens)
 
 
-def test_del_multiple(ten_simple_docs):
+def test_del_multiple(ten_simple_docs):  # noqa: F811
     docs_to_del_idx = [0, 2, 4, 6, 8]
 
     store = ElasticV7DocIndex[SimpleDoc]()
@@ -223,7 +201,7 @@ def test_del_multiple(ten_simple_docs):
             assert np.all(store[doc.id].tens == doc.tens)
 
 
-def test_del_key_error(ten_simple_docs):
+def test_del_key_error(ten_simple_docs):  # noqa: F811
     store = ElasticV7DocIndex[SimpleDoc]()
     store.index(ten_simple_docs)
 
@@ -231,7 +209,7 @@ def test_del_key_error(ten_simple_docs):
         del store['not_a_real_id']
 
 
-def test_num_docs(ten_simple_docs):
+def test_num_docs(ten_simple_docs):  # noqa: F811
     store = ElasticV7DocIndex[SimpleDoc]()
     store.index(ten_simple_docs)
 
diff --git a/tests/index/elastic/v8/docker-compose.yml b/tests/integrations/doc_index/elastic/v8/docker-compose.yml
similarity index 100%
rename from tests/index/elastic/v8/docker-compose.yml
rename to tests/integrations/doc_index/elastic/v8/docker-compose.yml
diff --git a/tests/integrations/doc_index/elastic/v8/test_column_config.py b/tests/integrations/doc_index/elastic/v8/test_column_config.py
new file mode 100644
index 00000000000..6e1ad6cf88b
--- /dev/null
+++ b/tests/integrations/doc_index/elastic/v8/test_column_config.py
@@ -0,0 +1,131 @@
+import pytest
+from pydantic import Field
+
+from docarray import BaseDoc
+from docarray.index import ElasticDocIndex
+from tests.integrations.doc_index.elastic.fixture import start_storage_v8  # noqa: F401
+
+pytestmark = [pytest.mark.slow, pytest.mark.index, pytest.mark.elasticv8]
+
+
+def test_column_config():
+    class MyDoc(BaseDoc):
+        text: str
+        color: str = Field(col_type='keyword')
+
+    store = ElasticDocIndex[MyDoc]()
+    index_docs = [
+        MyDoc(id='0', text='hello world', color='red'),
+        MyDoc(id='1', text='never gonna give you up', color='blue'),
+        MyDoc(id='2', text='we are the world', color='green'),
+    ]
+    store.index(index_docs)
+
+    query = 'world'
+    docs, _ = store.text_search(query, search_field='text')
+    assert [doc.id for doc in docs] == ['0', '2']
+
+    filter_query = {'terms': {'color': ['red', 'blue']}}
+    docs = store.filter(filter_query)
+    assert [doc.id for doc in docs] == ['0', '1']
+
+
+def test_field_object():
+    class MyDoc(BaseDoc):
+        manager: dict = Field(
+            properties={
+                'age': {'type': 'integer'},
+                'name': {
+                    'properties': {
+                        'first': {'type': 'keyword'},
+                        'last': {'type': 'keyword'},
+                    }
+                },
+            }
+        )
+
+    store = ElasticDocIndex[MyDoc]()
+    doc = [
+        MyDoc(manager={'age': 25, 'name': {'first': 'Rachel', 'last': 'Green'}}),
+        MyDoc(manager={'age': 30, 'name': {'first': 'Monica', 'last': 'Geller'}}),
+        MyDoc(manager={'age': 35, 'name': {'first': 'Phoebe', 'last': 'Buffay'}}),
+    ]
+    store.index(doc)
+    id_ = doc[0].id
+    assert store[id_].id == id_
+    assert store[id_].manager == doc[0].manager
+
+    filter_query = {'range': {'manager.age': {'gte': 30}}}
+    docs = store.filter(filter_query)
+    assert [doc.id for doc in docs] == [doc[1].id, doc[2].id]
+
+
+def test_field_geo_point():
+    class MyDoc(BaseDoc):
+        location: dict = Field(col_type='geo_point')
+
+    store = ElasticDocIndex[MyDoc]()
+    doc = [
+        MyDoc(location={'lat': 40.12, 'lon': -72.34}),
+        MyDoc(location={'lat': 41.12, 'lon': -73.34}),
+        MyDoc(location={'lat': 42.12, 'lon': -74.34}),
+    ]
+    store.index(doc)
+
+    query = {
+        'query': {
+            'geo_bounding_box': {
+                'location': {
+                    'top_left': {'lat': 42, 'lon': -74},
+                    'bottom_right': {'lat': 40, 'lon': -72},
+                }
+            }
+        },
+    }
+
+    docs, _ = store.execute_query(query)
+    assert [doc['id'] for doc in docs] == [doc[0].id, doc[1].id]
+
+
+def test_field_range():
+    class MyDoc(BaseDoc):
+        expected_attendees: dict = Field(col_type='integer_range')
+        time_frame: dict = Field(col_type='date_range', format='yyyy-MM-dd')
+
+    store = ElasticDocIndex[MyDoc]()
+    doc = [
+        MyDoc(
+            expected_attendees={'gte': 10, 'lt': 20},
+            time_frame={'gte': '2023-01-01', 'lt': '2023-02-01'},
+        ),
+        MyDoc(
+            expected_attendees={'gte': 20, 'lt': 30},
+            time_frame={'gte': '2023-02-01', 'lt': '2023-03-01'},
+        ),
+        MyDoc(
+            expected_attendees={'gte': 30, 'lt': 40},
+            time_frame={'gte': '2023-03-01', 'lt': '2023-04-01'},
+        ),
+    ]
+    store.index(doc)
+
+    query = {
+        'query': {
+            'bool': {
+                'should': [
+                    {'term': {'expected_attendees': {'value': 15}}},
+                    {
+                        'range': {
+                            'time_frame': {
+                                'gte': '2023-02-05',
+                                'lt': '2023-02-10',
+                                'relation': 'contains',
+                            }
+                        }
+                    },
+                ]
+            }
+        },
+    }
+    docs, _ = store.execute_query(query)
+    assert [doc['id'] for doc in docs] == [doc[0].id, doc[1].id]
diff --git a/tests/integrations/doc_index/elastic/v8/test_find.py b/tests/integrations/doc_index/elastic/v8/test_find.py
new file mode 100644
index 00000000000..90292a772bd
--- /dev/null
+++ b/tests/integrations/doc_index/elastic/v8/test_find.py
@@ -0,0 +1,329 @@
+import numpy as np
+import pytest
+import torch
+from pydantic import Field
+
+from docarray import BaseDoc
+from docarray.index import ElasticDocIndex
+from docarray.typing import NdArray, TorchTensor
+from tests.integrations.doc_index.elastic.fixture import start_storage_v8  # noqa: F401
+from tests.integrations.doc_index.elastic.fixture import FlatDoc, SimpleDoc
+
+pytestmark = [pytest.mark.slow, pytest.mark.index, pytest.mark.elasticv8]
+
+
+@pytest.mark.parametrize('similarity', ['cosine', 'l2_norm', 'dot_product'])
+def test_find_simple_schema(similarity):
+    class SimpleSchema(BaseDoc):
+        tens: NdArray[10] = Field(similarity=similarity)
+
+    store = ElasticDocIndex[SimpleSchema]()
+
+    index_docs = []
+    for _ in range(10):
+        vec = np.random.rand(10)
+        if similarity == 'dot_product':
+            vec = vec / np.linalg.norm(vec)
+        index_docs.append(SimpleDoc(tens=vec))
+    store.index(index_docs)
+
+    query = index_docs[-1]
+    docs, scores = store.find(query, search_field='tens', limit=5)
+
+    assert len(docs) == 5
+    assert len(scores) == 5
+    assert docs[0].id == index_docs[-1].id
+    assert np.allclose(docs[0].tens, index_docs[-1].tens)
+
+
+@pytest.mark.parametrize('similarity', ['cosine', 'l2_norm', 'dot_product'])
+def test_find_flat_schema(similarity):
+    class FlatSchema(BaseDoc):
+        tens_one: NdArray = Field(dims=10, similarity=similarity)
+        tens_two: NdArray = Field(dims=50, similarity=similarity)
+
+    store = ElasticDocIndex[FlatSchema]()
+
+    index_docs = []
+    for _ in range(10):
+        vec_one = np.random.rand(10)
+        vec_two = np.random.rand(50)
+        if similarity == 'dot_product':
+            vec_one = vec_one / np.linalg.norm(vec_one)
+            vec_two = vec_two / np.linalg.norm(vec_two)
+        index_docs.append(FlatDoc(tens_one=vec_one, tens_two=vec_two))
+
+    store.index(index_docs)
+
+    query = index_docs[-1]
+
+    # find on tens_one
+    docs, scores = store.find(query, search_field='tens_one', limit=5)
+    assert len(docs) == 5
+    assert len(scores) == 5
+    assert docs[0].id == index_docs[-1].id
+    assert np.allclose(docs[0].tens_one, index_docs[-1].tens_one)
+    assert np.allclose(docs[0].tens_two, index_docs[-1].tens_two)
+
+    # find on tens_two
+    docs, scores = store.find(query, search_field='tens_two', limit=5)
+    assert len(docs) == 5
+    assert len(scores) == 5
+    assert docs[0].id == index_docs[-1].id
+    assert np.allclose(docs[0].tens_one, index_docs[-1].tens_one)
+    assert np.allclose(docs[0].tens_two, index_docs[-1].tens_two)
+
+
+@pytest.mark.parametrize('similarity', ['cosine', 'l2_norm', 'dot_product'])
+def test_find_nested_schema(similarity):
+    class SimpleDoc(BaseDoc):
+        tens: NdArray[10] = Field(similarity=similarity)
+
+    class NestedDoc(BaseDoc):
+        d: SimpleDoc
+        tens: NdArray[10] = Field(similarity=similarity)
+
+    class DeepNestedDoc(BaseDoc):
+        d: NestedDoc
+        tens: NdArray = Field(similarity=similarity, dims=10)
+
+    store = ElasticDocIndex[DeepNestedDoc]()
+
+    index_docs = []
+    for _ in range(10):
+        vec_simple = np.random.rand(10)
+        vec_nested = np.random.rand(10)
+        vec_deep = np.random.rand(10)
+        if similarity == 'dot_product':
+            vec_simple = vec_simple / np.linalg.norm(vec_simple)
+            vec_nested = vec_nested / np.linalg.norm(vec_nested)
+            vec_deep = vec_deep / np.linalg.norm(vec_deep)
+        index_docs.append(
+            DeepNestedDoc(
+                d=NestedDoc(d=SimpleDoc(tens=vec_simple), tens=vec_nested),
+                tens=vec_deep,
+            )
+        )
+
+    store.index(index_docs)
+
+    query = index_docs[-1]
+
+    # find on root level
+    docs, scores = store.find(query, search_field='tens', limit=5)
+    assert len(docs) == 5
+    assert len(scores) == 5
+    assert docs[0].id == index_docs[-1].id
+    assert np.allclose(docs[0].tens, index_docs[-1].tens)
+
+    # find on first nesting level
+    docs, scores = store.find(query, search_field='d__tens', limit=5)
+    assert len(docs) == 5
+    assert len(scores) == 5
+    assert docs[0].id == index_docs[-1].id
+    assert np.allclose(docs[0].d.tens, index_docs[-1].d.tens)
+
+    # find on second nesting level
+    docs, scores = store.find(query, search_field='d__d__tens', limit=5)
+    assert len(docs) == 5
+    assert len(scores) == 5
+    assert docs[0].id == index_docs[-1].id
+    assert np.allclose(docs[0].d.d.tens, index_docs[-1].d.d.tens)
+
+
+def test_find_torch():
+    class TorchDoc(BaseDoc):
+        tens: TorchTensor[10]
+
+    store = ElasticDocIndex[TorchDoc]()
+
+    # A dense_vector field stores dense vectors of float values.
+    index_docs = [
+        TorchDoc(tens=np.random.rand(10).astype(dtype=np.float32)) for _ in range(10)
+    ]
+    store.index(index_docs)
+
+    for doc in index_docs:
+        assert isinstance(doc.tens, TorchTensor)
+
+    query = index_docs[-1]
+    docs, scores = store.find(query, search_field='tens', limit=5)
+
+    assert len(docs) == 5
+    assert len(scores) == 5
+    for doc in docs:
+        assert isinstance(doc.tens, TorchTensor)
+
+    assert docs[0].id == index_docs[-1].id
+    assert torch.allclose(docs[0].tens, index_docs[-1].tens)
+
+
+def test_find_tensorflow():
+    from docarray.typing import TensorFlowTensor
+
+    class TfDoc(BaseDoc):
+        tens: TensorFlowTensor[10]
+
+    store = ElasticDocIndex[TfDoc]()
+
+    index_docs = [
+        TfDoc(tens=np.random.rand(10).astype(dtype=np.float32)) for _ in range(10)
+    ]
+    store.index(index_docs)
+
+    for doc in index_docs:
+        assert isinstance(doc.tens, TensorFlowTensor)
+
+    query = index_docs[-1]
+    docs, scores = store.find(query, search_field='tens', limit=5)
+
+    assert len(docs) == 5
+    assert len(scores) == 5
+    for doc in docs:
+        assert isinstance(doc.tens, TensorFlowTensor)
+
+    assert docs[0].id == index_docs[-1].id
+    assert np.allclose(
+        docs[0].tens.unwrap().numpy(), index_docs[-1].tens.unwrap().numpy()
+    )
+
+
+def test_find_batched():
+    store = ElasticDocIndex[SimpleDoc]()
+
+    index_docs = [SimpleDoc(tens=np.random.rand(10)) for _ in range(10)]
+    store.index(index_docs)
+
+    queries = index_docs[-2:]
+    docs_batched, scores_batched = store.find_batched(
+        queries, search_field='tens', limit=5
+    )
+
+    for docs, scores, query in zip(docs_batched, scores_batched, queries):
+        assert len(docs) == 5
+        assert len(scores) == 5
+        assert docs[0].id == query.id
+        assert np.allclose(docs[0].tens, query.tens)
+
+
+def test_filter():
+    class MyDoc(BaseDoc):
+        A: bool
+        B: int
+        C: float
+
+    store = ElasticDocIndex[MyDoc]()
+
+    index_docs = [MyDoc(id=f'{i}', A=(i % 2 == 0), B=i, C=i + 0.5) for i in range(10)]
+    store.index(index_docs)
+
+    filter_query = {'term': {'A': True}}
+    docs = store.filter(filter_query)
+    assert len(docs) > 0
+    for doc in docs:
+        assert doc.A
+
+    filter_query = {
+        "bool": {
+            "filter": [
+                {"terms": {"B": [3, 4, 7, 8]}},
+                {"range": {"C": {"gte": 3, "lte": 5}}},
+            ]
+        }
+    }
+    docs = store.filter(filter_query)
+    assert [doc.id for doc in docs] == ['3', '4']
+
+
+def test_text_search():
+    class MyDoc(BaseDoc):
+        text: str
+
+    store = ElasticDocIndex[MyDoc]()
+    index_docs = [
+        MyDoc(text='hello world'),
+        MyDoc(text='never gonna give you up'),
+        MyDoc(text='we are the world'),
+    ]
+    store.index(index_docs)
+
+    query = 'world'
+    docs, scores = store.text_search(query, search_field='text')
+
+    assert len(docs) == 2
+    assert len(scores) == 2
+    assert docs[0].text.index(query) >= 0
+    assert docs[1].text.index(query) >= 0
+
+    queries = ['world', 'never']
+    docs, scores = store.text_search_batched(queries, search_field='text')
+    for query, da, score in zip(queries, docs, scores):
+        assert len(da) > 0
+        assert len(score) > 0
+        for doc in da:
+            assert doc.text.index(query) >= 0
+
+
+def test_query_builder():
+    class MyDoc(BaseDoc):
+        tens: NdArray[10] = Field(similarity='l2_norm')
+        num: int
+        text: str
+
+    store = ElasticDocIndex[MyDoc]()
+    index_docs = [
+        MyDoc(id=f'{i}', tens=np.ones(10) * i, num=int(i / 2), text=f'text {int(i/2)}')
+        for i in range(10)
+    ]
+    store.index(index_docs)
+
+    # build_query
+    q = store.build_query()
+    assert isinstance(q, store.QueryBuilder)
+
+    # filter
+    q = store.build_query().filter({'term': {'num': 0}}).build()
+    docs, _ = store.execute_query(q)
+    assert [doc['id'] for doc in docs] == ['0', '1']
+
+    # find
+    q = store.build_query().find(index_docs[-1], search_field='tens', limit=3).build()
+    docs, _ = store.execute_query(q)
+    assert [doc['id'] for doc in docs] == ['9', '8', '7']
+
+    # text_search
+    q = store.build_query().text_search('0', search_field='text').build()
+    docs, _ = store.execute_query(q)
+    assert [doc['id'] for doc in docs] == ['0', '1']
+
+    # combination
+    q = (
+        store.build_query()
+        .filter({'range': {'num': {'lte': 3}}})
+        .find(index_docs[-1], search_field='tens')
+        .text_search('0', search_field='text')
+        .build()
+    )
+    docs, _ = store.execute_query(q)
+    assert [doc['id'] for doc in docs] == ['1', '0']
+
+    # direct
+    query = {
+        'knn': {
+            'field': 'tens',
+            'query_vector': [9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0],
+            'k': 10,
+            'num_candidates': 10000,
+            'filter': {
+                'bool': {
+                    'filter': [
+                        {'range': {'num': {'gte': 2}}},
+                        {'range': {'num': {'lte': 3}}},
+                    ]
+                }
+            },
+        },
+    }
+
+    docs, _ = store.execute_query(query)
+    assert [doc['id'] for doc in docs] == ['7', '6', '5', '4']
diff --git a/tests/integrations/doc_index/elastic/v8/test_index_get_del.py b/tests/integrations/doc_index/elastic/v8/test_index_get_del.py
new file mode 100644
index 00000000000..e51b683fa16
--- /dev/null
+++ b/tests/integrations/doc_index/elastic/v8/test_index_get_del.py
@@ -0,0 +1,270 @@
+from typing import Union
+
+import numpy as np
+import pytest
+
+from docarray import BaseDoc, DocArray
+from docarray.documents import ImageDoc, TextDoc
+from docarray.index import ElasticDocIndex
+from docarray.typing import NdArray
+from tests.integrations.doc_index.elastic.fixture import (  # noqa: F401
+    DeepNestedDoc,
+    FlatDoc,
+    NestedDoc,
+    SimpleDoc,
+    start_storage_v8,
+    ten_deep_nested_docs,
+    ten_flat_docs,
+    ten_nested_docs,
+    ten_simple_docs,
+)
+
+pytestmark = [pytest.mark.slow, pytest.mark.index, pytest.mark.elasticv8]
+
+
+@pytest.mark.parametrize('use_docarray', [True, False])
+def test_index_simple_schema(ten_simple_docs, use_docarray):  # noqa: F811
+    store = ElasticDocIndex[SimpleDoc]()
+    if use_docarray:
+        ten_simple_docs = DocArray[SimpleDoc](ten_simple_docs)
+
+    store.index(ten_simple_docs)
+    assert store.num_docs() == 10
+
+
+@pytest.mark.parametrize('use_docarray', [True, False])
+def test_index_flat_schema(ten_flat_docs, use_docarray):  # noqa: F811
+    store = ElasticDocIndex[FlatDoc]()
+    if use_docarray:
+        ten_flat_docs = DocArray[FlatDoc](ten_flat_docs)
+
+    store.index(ten_flat_docs)
+    assert store.num_docs() == 10
+
+
+@pytest.mark.parametrize('use_docarray', [True, False])
+def test_index_nested_schema(ten_nested_docs, use_docarray):  # noqa: F811
+    store = ElasticDocIndex[NestedDoc]()
+    if use_docarray:
+        ten_nested_docs = DocArray[NestedDoc](ten_nested_docs)
+
+    store.index(ten_nested_docs)
+    assert store.num_docs() == 10
+
+
+@pytest.mark.parametrize('use_docarray', [True, False])
+def test_index_deep_nested_schema(ten_deep_nested_docs, use_docarray):  # noqa: F811
+    store = ElasticDocIndex[DeepNestedDoc]()
+    if use_docarray:
+        ten_deep_nested_docs = DocArray[DeepNestedDoc](ten_deep_nested_docs)
+
+    store.index(ten_deep_nested_docs)
+    assert store.num_docs() == 10
+
+
+def test_get_single(ten_simple_docs, ten_flat_docs, ten_nested_docs):  # noqa: F811
+    # simple
+    store = ElasticDocIndex[SimpleDoc]()
+    store.index(ten_simple_docs)
+
+    assert store.num_docs() == 10
+    for d in ten_simple_docs:
+        id_ = d.id
+        assert store[id_].id == id_
+        assert np.all(store[id_].tens == d.tens)
+
+    # flat
+    store = ElasticDocIndex[FlatDoc]()
+    store.index(ten_flat_docs)
+
+    assert store.num_docs() == 10
+    for d in ten_flat_docs:
+        id_ = d.id
+        assert store[id_].id == id_
+        assert np.all(store[id_].tens_one == d.tens_one)
+        assert np.all(store[id_].tens_two == d.tens_two)
+
+    # nested
+    store = ElasticDocIndex[NestedDoc]()
+    store.index(ten_nested_docs)
+
+    assert store.num_docs() == 10
+    for d in ten_nested_docs:
+        id_ = d.id
+        assert store[id_].id == id_
+        assert store[id_].d.id == d.d.id
+        assert np.all(store[id_].d.tens == d.d.tens)
+
+
+def test_get_multiple(ten_simple_docs, ten_flat_docs, ten_nested_docs):  # noqa: F811
+    docs_to_get_idx = [0, 2, 4, 6, 8]
+
+    # simple
+    store = ElasticDocIndex[SimpleDoc]()
+    store.index(ten_simple_docs)
+
+    assert store.num_docs() == 10
+    docs_to_get = [ten_simple_docs[i] for i in docs_to_get_idx]
+    ids_to_get = [d.id for d in docs_to_get]
+    retrieved_docs = store[ids_to_get]
+    for id_, d_in, d_out in zip(ids_to_get, docs_to_get, retrieved_docs):
+        assert d_out.id == id_
+        assert np.all(d_out.tens == d_in.tens)
+
+    # flat
+    store = ElasticDocIndex[FlatDoc]()
+    store.index(ten_flat_docs)
+
+    assert store.num_docs() == 10
+    docs_to_get = [ten_flat_docs[i] for i in docs_to_get_idx]
+    ids_to_get = [d.id for d in docs_to_get]
+    retrieved_docs = store[ids_to_get]
+    for id_, d_in, d_out in zip(ids_to_get, docs_to_get, retrieved_docs):
+        assert d_out.id == id_
+        assert np.all(d_out.tens_one == d_in.tens_one)
+        assert np.all(d_out.tens_two == d_in.tens_two)
+
+    # nested
+    store = ElasticDocIndex[NestedDoc]()
+    store.index(ten_nested_docs)
+
+    assert store.num_docs() == 10
+    docs_to_get = [ten_nested_docs[i] for i in docs_to_get_idx]
+    ids_to_get = [d.id for d in docs_to_get]
+    retrieved_docs = store[ids_to_get]
+    for id_, d_in, d_out in zip(ids_to_get, docs_to_get, retrieved_docs):
+        assert d_out.id == id_
+        assert d_out.d.id == d_in.d.id
+        assert np.all(d_out.d.tens == d_in.d.tens)
+
+
+def test_get_key_error(ten_simple_docs):  # noqa: F811
+    store = ElasticDocIndex[SimpleDoc]()
+    store.index(ten_simple_docs)
+
+    with pytest.raises(KeyError):
+        store['not_a_real_id']
+
+
+def test_persisting(ten_simple_docs):  # noqa: F811
+    store = ElasticDocIndex[SimpleDoc](index_name='test_persisting')
+    store.index(ten_simple_docs)
+
+    store2 = ElasticDocIndex[SimpleDoc](index_name='test_persisting')
+    assert store2.num_docs() == 10
+
+
+def test_del_single(ten_simple_docs):  # noqa: F811
+    store = ElasticDocIndex[SimpleDoc]()
+    store.index(ten_simple_docs)
+    # delete once
+    assert store.num_docs() == 10
+    del store[ten_simple_docs[0].id]
+    assert store.num_docs() == 9
+    for i, d in enumerate(ten_simple_docs):
+        id_ = d.id
+        if i == 0:  # deleted
+            with pytest.raises(KeyError):
+                store[id_]
+        else:
+            assert store[id_].id == id_
+            assert np.all(store[id_].tens == d.tens)
+    # delete again
+    del store[ten_simple_docs[3].id]
+    assert store.num_docs() == 8
+    for i, d in enumerate(ten_simple_docs):
+        id_ = d.id
+        if i in (0, 3):  # deleted
+            with pytest.raises(KeyError):
+                store[id_]
+        else:
+            assert store[id_].id == id_
+            assert np.all(store[id_].tens == d.tens)
+
+
+def test_del_multiple(ten_simple_docs):  # noqa: F811
+    docs_to_del_idx = [0, 2, 4, 6, 8]
+
+    store = ElasticDocIndex[SimpleDoc]()
+    store.index(ten_simple_docs)
+
+    assert store.num_docs() == 10
+    docs_to_del = [ten_simple_docs[i] for i in docs_to_del_idx]
+    ids_to_del = [d.id for d in docs_to_del]
+    del store[ids_to_del]
+    for i, doc in enumerate(ten_simple_docs):
+        if i in docs_to_del_idx:
+            with pytest.raises(KeyError):
+                store[doc.id]
+        else:
+            assert store[doc.id].id == doc.id
+            assert np.all(store[doc.id].tens == doc.tens)
+
+
+def test_del_key_error(ten_simple_docs):  # noqa: F811
+    store = ElasticDocIndex[SimpleDoc]()
+    store.index(ten_simple_docs)
+
+    with pytest.warns(UserWarning):
+        del store['not_a_real_id']
+
+
+def test_num_docs(ten_simple_docs):  # noqa: F811
+    store = ElasticDocIndex[SimpleDoc]()
+    store.index(ten_simple_docs)
+
+    assert store.num_docs() == 10
+
+    del store[ten_simple_docs[0].id]
+    assert store.num_docs() == 9
+
+    del store[ten_simple_docs[3].id, ten_simple_docs[5].id]
+    assert store.num_docs() == 7
+
+    more_docs = [SimpleDoc(tens=np.random.rand(10)) for _ in range(5)]
+    store.index(more_docs)
+    assert store.num_docs() == 12
+
+    del store[more_docs[2].id, ten_simple_docs[7].id]
+    assert store.num_docs() == 10
+
+
+def test_index_union_doc():  # noqa: F811
+    class MyDoc(BaseDoc):
+        tensor: Union[NdArray, str]
+
+    class MySchema(BaseDoc):
+        tensor: NdArray
+
+    store = ElasticDocIndex[MySchema]()
+    doc = [MyDoc(tensor=np.random.randn(128))]
+    store.index(doc)
+
+    id_ = doc[0].id
+    assert store[id_].id == id_
+    assert np.all(store[id_].tensor == doc[0].tensor)
+
+
+def test_index_multi_modal_doc():
+    class MyMultiModalDoc(BaseDoc):
+        image: ImageDoc
+        text: TextDoc
+
+    store = ElasticDocIndex[MyMultiModalDoc]()
+
+    doc = [
+        MyMultiModalDoc(
+            image=ImageDoc(embedding=np.random.randn(128)), text=TextDoc(text='hello')
+        )
+    ]
+    store.index(doc)
+
+    id_ = doc[0].id
+    assert store[id_].id == id_
+    assert np.all(store[id_].image.embedding == doc[0].image.embedding)
+    assert store[id_].text.text == doc[0].text.text
+
+
+def test_elasticv7_version_check():
+    with pytest.raises(ImportError):
+        from docarray.index import ElasticV7DocIndex  # noqa: F401

From 8b14182864bf1fa0b06d588ed0ad57fe8fb04f7e Mon Sep 17 00:00:00 2001
From: AnneY <evangeline-lun@foxmail.com>
Date: Fri, 31 Mar 2023 20:46:00 +0800
Subject: [PATCH 08/14] fix: elasticversion in init, ci and toml

Signed-off-by: AnneY <evangeline-lun@foxmail.com>
---
 .github/workflows/ci.yml   | 35 +++++++++++++++++++++++++++++++++--
 docarray/index/__init__.py |  8 ++++++++
 pyproject.toml             |  5 +++--
 3 files changed, 44 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 130e72de9dd..cab0d20a625 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -225,7 +225,38 @@ jobs:
       - name: Test
         id: test
         run: |
-          poetry run pytest -m 'index' tests
+          poetry run pytest -m 'index and not elasticv8' tests
+        timeout-minutes: 30
+
+
+  docarray-elastic-v8:
+    needs: [lint-ruff, check-black, import-test]
+    runs-on: ubuntu-latest
+    strategy:
+      fail-fast: false
+      matrix:
+        python-version: [3.7]
+    steps:
+      - uses: actions/checkout@v2.5.0
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v4
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Prepare environment
+        run: |
+          python -m pip install --upgrade pip
+          python -m pip install poetry
+          poetry install --all-extras
+          poetry run pip install protobuf==3.19.0
+          poetry run pip install tensorflow==2.11.0
+          poetry run pip install elasticsearch==8.6.2
+          sudo apt-get update
+          sudo apt-get install --no-install-recommends ffmpeg
+
+      - name: Test
+        id: test
+        run: |
+          poetry run pytest -m 'index and elasticv8' tests
         timeout-minutes: 30
 
   docarray-test-tensorflow:
@@ -284,7 +315,7 @@ jobs:
 
   # just for blocking the merge until all parallel core-test are successful
   success-all-test:
-    needs: [docarray-test, docarray-test-proto3, docarray-doc-index, docarray-test-tensorflow, docarray-test-benchmarks, import-test, check-black, check-mypy, lint-ruff]
+    needs: [docarray-test, docarray-test-proto3, docarray-doc-index, docarray-elastic-v8, docarray-test-tensorflow, docarray-test-benchmarks, import-test, check-black, check-mypy, lint-ruff]
     if: always()
     runs-on: ubuntu-latest
     steps:
diff --git a/docarray/index/__init__.py b/docarray/index/__init__.py
index df0d133d29c..5147cb8113c 100644
--- a/docarray/index/__init__.py
+++ b/docarray/index/__init__.py
@@ -24,7 +24,15 @@ def __getattr__(name: str):
         import docarray.index.backends.elastic as lib
     elif name == 'ElasticV7DocIndex':
         import_library('elasticsearch', raise_error=True)
+        from elasticsearch import __version__ as __es__version__
+
         import docarray.index.backends.elasticv7 as lib
+
+        if __es__version__[0] > 7:
+            raise ImportError(
+                'ElasticV7DocIndex requires the elasticsearch library to be version 7.10.1'
+            )
+
     else:
         raise ImportError(
             f'cannot import name \'{name}\' from \'{_get_path_from_docarray_root_level(__file__)}\''
diff --git a/pyproject.toml b/pyproject.toml
index 229151108d1..cc0ce99f8b9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -24,10 +24,10 @@ hnswlib = {version = ">=0.6.2", optional = true }
 lz4 = {version= ">=1.0.0", optional = true}
 pydub = {version = "^0.25.1", optional = true }
 pandas = {version = ">=1.1.0", optional = true }
-elasticsearch = {version = "7.10.1", optional = true }
+elasticsearch = {version = ">=7.10.1", optional = true }
 smart-open = {version = ">=6.3.0", extras = ["s3"], optional = true}
 jina-hubble-sdk = {version = ">=0.34.0", optional = true}
-elastic-transport = "^8.4.0"
+elastic-transport = {version ="^8.4.0", optional = true }
 
 [tool.poetry.extras]
 common = ["protobuf", "lz4"]
@@ -115,4 +115,5 @@ markers = [
     "tensorflow: marks test using tensorflow and proto 3",
     "index: marks test using a document index",
     "benchmark: marks slow benchmarking tests",
+    "elasticv8: marks test that run with ElasticSearch v8",
 ]

From b66d55011bb271558c1becbd998896be15fac00f Mon Sep 17 00:00:00 2001
From: AnneY <evangeline-lun@foxmail.com>
Date: Fri, 31 Mar 2023 20:54:43 +0800
Subject: [PATCH 09/14] fix: update poetry extras

Signed-off-by: AnneY <evangeline-lun@foxmail.com>
---
 poetry.lock    | 8 ++++----
 pyproject.toml | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index dbbe7aeae8f..f9b068861f2 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.4.1 and should not be changed by hand.
 
 [[package]]
 name = "aiohttp"
@@ -808,7 +808,7 @@ name = "elastic-transport"
 version = "8.4.0"
 description = "Transport classes and utilities shared among Python Elastic client libraries"
 category = "main"
-optional = false
+optional = true
 python-versions = ">=3.6"
 files = [
     {file = "elastic-transport-8.4.0.tar.gz", hash = "sha256:b9ad708ceb7fcdbc6b30a96f886609a109f042c0b9d9f2e44403b3133ba7ff10"},
@@ -4610,7 +4610,7 @@ testing = ["flake8 (<5)", "func-timeout", "jaraco.functools", "jaraco.itertools"
 audio = ["pydub"]
 aws = ["smart-open"]
 common = ["lz4", "protobuf"]
-elasticsearch = ["elasticsearch"]
+elasticsearch = ["elastic-transport", "elasticsearch"]
 full = ["av", "lz4", "pandas", "pillow", "protobuf", "pydub", "trimesh", "types-pillow"]
 hnswlib = ["hnswlib"]
 image = ["pillow", "types-pillow"]
@@ -4624,4 +4624,4 @@ web = ["fastapi"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.7,<4.0"
-content-hash = "49f70eda2036ec961a1ed06e9364c56710c91f152d030ddf566519b443b52f93"
+content-hash = "fe116769811f4f45c7b48f72ad5c9dc58e4a31586656f4c2318462cf42492049"
diff --git a/pyproject.toml b/pyproject.toml
index cc0ce99f8b9..22c8d9f53c7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -37,7 +37,7 @@ video = ["av"]
 audio = ["pydub"]
 mesh = ["trimesh"]
 hnswlib = ["hnswlib"]
-elasticsearch = ["elasticsearch"]
+elasticsearch = ["elasticsearch", "elastic-transport"]
 jac = ["jina-hubble-sdk"]
 aws = ["smart-open"]
 torch = ["torch"]

From 4aba76649c156a43cd21f30a8df5a5b98c19c3d0 Mon Sep 17 00:00:00 2001
From: AnneY <evangeline-lun@foxmail.com>
Date: Fri, 31 Mar 2023 21:50:30 +0800
Subject: [PATCH 10/14] fix: raise error when init ElasticV7DocIndex

Signed-off-by: AnneY <evangeline-lun@foxmail.com>
---
 docarray/index/__init__.py                               | 7 -------
 docarray/index/backends/elasticv7.py                     | 9 +++++++++
 .../doc_index/elastic/v8/test_index_get_del.py           | 4 +++-
 3 files changed, 12 insertions(+), 8 deletions(-)

diff --git a/docarray/index/__init__.py b/docarray/index/__init__.py
index 5147cb8113c..2c724030fe7 100644
--- a/docarray/index/__init__.py
+++ b/docarray/index/__init__.py
@@ -24,15 +24,8 @@ def __getattr__(name: str):
         import docarray.index.backends.elastic as lib
     elif name == 'ElasticV7DocIndex':
         import_library('elasticsearch', raise_error=True)
-        from elasticsearch import __version__ as __es__version__
-
         import docarray.index.backends.elasticv7 as lib
 
-        if __es__version__[0] > 7:
-            raise ImportError(
-                'ElasticV7DocIndex requires the elasticsearch library to be version 7.10.1'
-            )
-
     else:
         raise ImportError(
             f'cannot import name \'{name}\' from \'{_get_path_from_docarray_root_level(__file__)}\''
diff --git a/docarray/index/backends/elasticv7.py b/docarray/index/backends/elasticv7.py
index 5f80379f85e..0013a766df6 100644
--- a/docarray/index/backends/elasticv7.py
+++ b/docarray/index/backends/elasticv7.py
@@ -14,6 +14,15 @@
 
 
 class ElasticV7DocIndex(ElasticDocIndex):
+    def __init__(self, db_config=None, **kwargs):
+        from elasticsearch import __version__ as __es__version__
+
+        if __es__version__[0] > 7:
+            raise ImportError(
+                'ElasticV7DocIndex requires the elasticsearch library to be version 7.10.1'
+            )
+
+        super().__init__(db_config, **kwargs)
 
     ###############################################
     # Inner classes for query builder and configs #
diff --git a/tests/integrations/doc_index/elastic/v8/test_index_get_del.py b/tests/integrations/doc_index/elastic/v8/test_index_get_del.py
index e51b683fa16..0736ed8ce8d 100644
--- a/tests/integrations/doc_index/elastic/v8/test_index_get_del.py
+++ b/tests/integrations/doc_index/elastic/v8/test_index_get_del.py
@@ -267,4 +267,6 @@ class MyMultiModalDoc(BaseDoc):
 
 def test_elasticv7_version_check():
     with pytest.raises(ImportError):
-        from docarray.index import ElasticV7DocIndex  # noqa: F401
+        from docarray.index import ElasticV7DocIndex
+
+        _ = ElasticV7DocIndex[SimpleDoc]()

From 5f17684e51a370e4ae1f4b2f3fd871113d5e053d Mon Sep 17 00:00:00 2001
From: AnneY <evangeline-lun@foxmail.com>
Date: Tue, 11 Apr 2023 22:24:34 +0800
Subject: [PATCH 11/14] fix: minor fix

Signed-off-by: AnneY <evangeline-lun@foxmail.com>
---
 docarray/index/backends/elastic.py   | 26 +++++++++++++++-----------
 docarray/index/backends/elasticv7.py | 20 +++++++++++++++-----
 2 files changed, 30 insertions(+), 16 deletions(-)

diff --git a/docarray/index/backends/elastic.py b/docarray/index/backends/elastic.py
index c003e9d4095..1a345195cca 100644
--- a/docarray/index/backends/elastic.py
+++ b/docarray/index/backends/elastic.py
@@ -85,6 +85,7 @@ def __init__(self, db_config=None, **kwargs):
             '_source': {'enabled': 'true'},
             'properties': {},
         }
+        mappings.update(self._db_config.index_mappings)
 
         for col_name, col in self._column_infos.items():
             mappings['properties'][col_name] = self._create_index_mapping(col)
@@ -124,24 +125,23 @@ def find(
             query: Union[AnyTensor, BaseDoc],
             search_field: str = 'embedding',
             limit: int = 10,
+            num_candidates: Optional[int] = None,
         ):
             if isinstance(query, BaseDoc):
                 query_vec = BaseDocIndex._get_values_by_column([query], search_field)[0]
             else:
                 query_vec = query
             query_vec_np = BaseDocIndex._to_numpy(self._outer_instance, query_vec)
-            self._query['knn'] = ElasticDocIndex._form_search_body(
+            self._query['knn'] = self._outer_instance._form_search_body(
                 query_vec_np,
                 limit,
                 search_field,
-                self._outer_instance._runtime_config.default_column_config[
-                    'dense_vector'
-                ]['num_candidates'],
+                num_candidates,
             )['knn']
 
             return self
 
-        # filter accrpts Leaf/Compound query clauses
+        # filter accepts Leaf/Compound query clauses
         # https://www.elastic.co/guide/en/elasticsearch/reference/current/query-dsl.html
         def filter(self, query: Dict[str, Any], limit: int = 10):
             self._query['size'] = limit
@@ -156,8 +156,8 @@ def text_search(self, query: str, search_field: str = 'text', limit: int = 10):
             return self
 
         find_batched = _raise_not_composable('find_batched')
-        filter_batched = _raise_not_composable('find_batched')
-        text_search_batched = _raise_not_composable('text_search')
+        filter_batched = _raise_not_composable('filter_batched')
+        text_search_batched = _raise_not_composable('text_search_batched')
 
     def build_query(self, **kwargs) -> QueryBuilder:
         """
@@ -173,6 +173,7 @@ class DBConfig(BaseDocIndex.DBConfig):
         index_name: Optional[str] = None
         es_config: Dict[str, Any] = field(default_factory=dict)
         index_settings: Dict[str, Any] = field(default_factory=dict)
+        index_mappings: Dict[str, Any] = field(default_factory=dict)
 
     @dataclass
     class RuntimeConfig(BaseDocIndex.RuntimeConfig):
@@ -483,13 +484,17 @@ def _send_requests(
 
         return accumulated_info, warning_info
 
-    @staticmethod
     def _form_search_body(
+        self,
         query: np.ndarray,
         limit: int,
         search_field: str = '',
-        num_candidates: int = 10000,
+        num_candidates: Optional[int] = None,
     ) -> Dict[str, Any]:
+        if not num_candidates:
+            num_candidates = self._runtime_config.default_column_config['dense_vector'][
+                'num_candidates'
+            ]
         body = {
             'size': limit,
             'knn': {
@@ -501,9 +506,8 @@ def _form_search_body(
         }
         return body
 
-    @staticmethod
     def _form_text_search_body(
-        query: str, limit: int, search_field: str = ''
+        self, query: str, limit: int, search_field: str = ''
     ) -> Dict[str, Any]:
         body = {
             'size': limit,
diff --git a/docarray/index/backends/elasticv7.py b/docarray/index/backends/elasticv7.py
index 0013a766df6..e77aedfc2b4 100644
--- a/docarray/index/backends/elasticv7.py
+++ b/docarray/index/backends/elasticv7.py
@@ -1,5 +1,6 @@
+import warnings
 from dataclasses import dataclass
-from typing import Any, Dict, List, Sequence, TypeVar, Union
+from typing import Any, Dict, List, Optional, Sequence, TypeVar, Union
 
 import numpy as np
 
@@ -48,16 +49,26 @@ def find(
             query: Union[AnyTensor, BaseDoc],
             search_field: str = 'embedding',
             limit: int = 10,
+            num_candidates: Optional[int] = None,
         ):
+            if num_candidates:
+                warnings.warn('`num_candidates` is not supported in ElasticV7DocIndex')
+
             if isinstance(query, BaseDoc):
                 query_vec = BaseDocIndex._get_values_by_column([query], search_field)[0]
             else:
                 query_vec = query
             query_vec_np = BaseDocIndex._to_numpy(self._outer_instance, query_vec)
             self._query['size'] = limit
-            self._query['query']['script_score'] = ElasticV7DocIndex._form_search_body(
+            self._query['query'][
+                'script_score'
+            ] = self._outer_instance._form_search_body(
                 query_vec_np, limit, search_field
-            )['query']['script_score']
+            )[
+                'query'
+            ][
+                'script_score'
+            ]
 
             return self
 
@@ -102,8 +113,7 @@ def _create_index_mapping(self, col: '_ColumnInfo') -> Dict[str, Any]:
 
         return index
 
-    @staticmethod
-    def _form_search_body(query: np.ndarray, limit: int, search_field: str = '') -> Dict[str, Any]:  # type: ignore
+    def _form_search_body(self, query: np.ndarray, limit: int, search_field: str = '') -> Dict[str, Any]:  # type: ignore
         body = {
             'size': limit,
             'query': {

From ae6c15b041240f291ab712739550046d9b35c794 Mon Sep 17 00:00:00 2001
From: AnneY <evangeline-lun@foxmail.com>
Date: Wed, 12 Apr 2023 11:23:03 +0800
Subject: [PATCH 12/14] refactor: move index tests

Signed-off-by: AnneY <evangeline-lun@foxmail.com>
---
 tests/{integrations/doc_index => index/elastic}/__init__.py   | 0
 tests/{integrations/doc_index => index}/elastic/fixture.py    | 0
 .../doc_index/elastic => index/elastic/v7}/__init__.py        | 0
 .../doc_index => index}/elastic/v7/docker-compose.yml         | 0
 .../doc_index => index}/elastic/v7/test_column_config.py      | 2 +-
 .../{integrations/doc_index => index}/elastic/v7/test_find.py | 4 ++--
 .../doc_index => index}/elastic/v7/test_index_get_del.py      | 2 +-
 .../doc_index => index}/elastic/v8/docker-compose.yml         | 0
 .../doc_index => index}/elastic/v8/test_column_config.py      | 2 +-
 .../{integrations/doc_index => index}/elastic/v8/test_find.py | 4 ++--
 .../doc_index => index}/elastic/v8/test_index_get_del.py      | 2 +-
 .../doc_index/elastic/v7 => index/hnswlib}/__init__.py        | 0
 tests/{integrations/doc_index => index}/hnswlib/test_find.py  | 0
 .../doc_index => index}/hnswlib/test_index_get_del.py         | 0
 .../doc_index => index}/hnswlib/test_persist_data.py          | 0
 tests/integrations/doc_index/hnswlib/__init__.py              | 0
 16 files changed, 8 insertions(+), 8 deletions(-)
 rename tests/{integrations/doc_index => index/elastic}/__init__.py (100%)
 rename tests/{integrations/doc_index => index}/elastic/fixture.py (100%)
 rename tests/{integrations/doc_index/elastic => index/elastic/v7}/__init__.py (100%)
 rename tests/{integrations/doc_index => index}/elastic/v7/docker-compose.yml (100%)
 rename tests/{integrations/doc_index => index}/elastic/v7/test_column_config.py (97%)
 rename tests/{integrations/doc_index => index}/elastic/v7/test_find.py (98%)
 rename tests/{integrations/doc_index => index}/elastic/v7/test_index_get_del.py (99%)
 rename tests/{integrations/doc_index => index}/elastic/v8/docker-compose.yml (100%)
 rename tests/{integrations/doc_index => index}/elastic/v8/test_column_config.py (97%)
 rename tests/{integrations/doc_index => index}/elastic/v8/test_find.py (98%)
 rename tests/{integrations/doc_index => index}/elastic/v8/test_index_get_del.py (99%)
 rename tests/{integrations/doc_index/elastic/v7 => index/hnswlib}/__init__.py (100%)
 rename tests/{integrations/doc_index => index}/hnswlib/test_find.py (100%)
 rename tests/{integrations/doc_index => index}/hnswlib/test_index_get_del.py (100%)
 rename tests/{integrations/doc_index => index}/hnswlib/test_persist_data.py (100%)
 delete mode 100644 tests/integrations/doc_index/hnswlib/__init__.py

diff --git a/tests/integrations/doc_index/__init__.py b/tests/index/elastic/__init__.py
similarity index 100%
rename from tests/integrations/doc_index/__init__.py
rename to tests/index/elastic/__init__.py
diff --git a/tests/integrations/doc_index/elastic/fixture.py b/tests/index/elastic/fixture.py
similarity index 100%
rename from tests/integrations/doc_index/elastic/fixture.py
rename to tests/index/elastic/fixture.py
diff --git a/tests/integrations/doc_index/elastic/__init__.py b/tests/index/elastic/v7/__init__.py
similarity index 100%
rename from tests/integrations/doc_index/elastic/__init__.py
rename to tests/index/elastic/v7/__init__.py
diff --git a/tests/integrations/doc_index/elastic/v7/docker-compose.yml b/tests/index/elastic/v7/docker-compose.yml
similarity index 100%
rename from tests/integrations/doc_index/elastic/v7/docker-compose.yml
rename to tests/index/elastic/v7/docker-compose.yml
diff --git a/tests/integrations/doc_index/elastic/v7/test_column_config.py b/tests/index/elastic/v7/test_column_config.py
similarity index 97%
rename from tests/integrations/doc_index/elastic/v7/test_column_config.py
rename to tests/index/elastic/v7/test_column_config.py
index df927a2c2de..a0d4aa4dec9 100644
--- a/tests/integrations/doc_index/elastic/v7/test_column_config.py
+++ b/tests/index/elastic/v7/test_column_config.py
@@ -3,7 +3,7 @@
 
 from docarray import BaseDoc
 from docarray.index import ElasticV7DocIndex
-from tests.integrations.doc_index.elastic.fixture import start_storage_v7  # noqa: F401
+from tests.index.elastic.fixture import start_storage_v7  # noqa: F401
 
 pytestmark = [pytest.mark.slow, pytest.mark.index]
 
diff --git a/tests/integrations/doc_index/elastic/v7/test_find.py b/tests/index/elastic/v7/test_find.py
similarity index 98%
rename from tests/integrations/doc_index/elastic/v7/test_find.py
rename to tests/index/elastic/v7/test_find.py
index 1a0503711a7..6665c8b2b60 100644
--- a/tests/integrations/doc_index/elastic/v7/test_find.py
+++ b/tests/index/elastic/v7/test_find.py
@@ -6,8 +6,8 @@
 from docarray import BaseDoc
 from docarray.index import ElasticV7DocIndex
 from docarray.typing import NdArray, TorchTensor
-from tests.integrations.doc_index.elastic.fixture import start_storage_v7  # noqa: F401
-from tests.integrations.doc_index.elastic.fixture import FlatDoc, SimpleDoc
+from tests.index.elastic.fixture import start_storage_v7  # noqa: F401
+from tests.index.elastic.fixture import FlatDoc, SimpleDoc
 
 pytestmark = [pytest.mark.slow, pytest.mark.index]
 
diff --git a/tests/integrations/doc_index/elastic/v7/test_index_get_del.py b/tests/index/elastic/v7/test_index_get_del.py
similarity index 99%
rename from tests/integrations/doc_index/elastic/v7/test_index_get_del.py
rename to tests/index/elastic/v7/test_index_get_del.py
index 7b34a4a7e46..7124d5d61bd 100644
--- a/tests/integrations/doc_index/elastic/v7/test_index_get_del.py
+++ b/tests/index/elastic/v7/test_index_get_del.py
@@ -7,7 +7,7 @@
 from docarray.documents import ImageDoc, TextDoc
 from docarray.index import ElasticV7DocIndex
 from docarray.typing import NdArray
-from tests.integrations.doc_index.elastic.fixture import (  # noqa: F401
+from tests.index.elastic.fixture import (  # noqa: F401
     DeepNestedDoc,
     FlatDoc,
     NestedDoc,
diff --git a/tests/integrations/doc_index/elastic/v8/docker-compose.yml b/tests/index/elastic/v8/docker-compose.yml
similarity index 100%
rename from tests/integrations/doc_index/elastic/v8/docker-compose.yml
rename to tests/index/elastic/v8/docker-compose.yml
diff --git a/tests/integrations/doc_index/elastic/v8/test_column_config.py b/tests/index/elastic/v8/test_column_config.py
similarity index 97%
rename from tests/integrations/doc_index/elastic/v8/test_column_config.py
rename to tests/index/elastic/v8/test_column_config.py
index 6e1ad6cf88b..2b3bbcee0f8 100644
--- a/tests/integrations/doc_index/elastic/v8/test_column_config.py
+++ b/tests/index/elastic/v8/test_column_config.py
@@ -3,7 +3,7 @@
 
 from docarray import BaseDoc
 from docarray.index import ElasticDocIndex
-from tests.integrations.doc_index.elastic.fixture import start_storage_v8  # noqa: F401
+from tests.index.elastic.fixture import start_storage_v8  # noqa: F401
 
 pytestmark = [pytest.mark.slow, pytest.mark.index, pytest.mark.elasticv8]
 
diff --git a/tests/integrations/doc_index/elastic/v8/test_find.py b/tests/index/elastic/v8/test_find.py
similarity index 98%
rename from tests/integrations/doc_index/elastic/v8/test_find.py
rename to tests/index/elastic/v8/test_find.py
index 90292a772bd..dcc4097eb7d 100644
--- a/tests/integrations/doc_index/elastic/v8/test_find.py
+++ b/tests/index/elastic/v8/test_find.py
@@ -6,8 +6,8 @@
 from docarray import BaseDoc
 from docarray.index import ElasticDocIndex
 from docarray.typing import NdArray, TorchTensor
-from tests.integrations.doc_index.elastic.fixture import start_storage_v8  # noqa: F401
-from tests.integrations.doc_index.elastic.fixture import FlatDoc, SimpleDoc
+from tests.index.elastic.fixture import start_storage_v8  # noqa: F401
+from tests.index.elastic.fixture import FlatDoc, SimpleDoc
 
 pytestmark = [pytest.mark.slow, pytest.mark.index, pytest.mark.elasticv8]
 
diff --git a/tests/integrations/doc_index/elastic/v8/test_index_get_del.py b/tests/index/elastic/v8/test_index_get_del.py
similarity index 99%
rename from tests/integrations/doc_index/elastic/v8/test_index_get_del.py
rename to tests/index/elastic/v8/test_index_get_del.py
index c3b87ac705a..db2df925ebb 100644
--- a/tests/integrations/doc_index/elastic/v8/test_index_get_del.py
+++ b/tests/index/elastic/v8/test_index_get_del.py
@@ -7,7 +7,7 @@
 from docarray.documents import ImageDoc, TextDoc
 from docarray.index import ElasticDocIndex
 from docarray.typing import NdArray
-from tests.integrations.doc_index.elastic.fixture import (  # noqa: F401
+from tests.index.elastic.fixture import (  # noqa: F401
     DeepNestedDoc,
     FlatDoc,
     NestedDoc,
diff --git a/tests/integrations/doc_index/elastic/v7/__init__.py b/tests/index/hnswlib/__init__.py
similarity index 100%
rename from tests/integrations/doc_index/elastic/v7/__init__.py
rename to tests/index/hnswlib/__init__.py
diff --git a/tests/integrations/doc_index/hnswlib/test_find.py b/tests/index/hnswlib/test_find.py
similarity index 100%
rename from tests/integrations/doc_index/hnswlib/test_find.py
rename to tests/index/hnswlib/test_find.py
diff --git a/tests/integrations/doc_index/hnswlib/test_index_get_del.py b/tests/index/hnswlib/test_index_get_del.py
similarity index 100%
rename from tests/integrations/doc_index/hnswlib/test_index_get_del.py
rename to tests/index/hnswlib/test_index_get_del.py
diff --git a/tests/integrations/doc_index/hnswlib/test_persist_data.py b/tests/index/hnswlib/test_persist_data.py
similarity index 100%
rename from tests/integrations/doc_index/hnswlib/test_persist_data.py
rename to tests/index/hnswlib/test_persist_data.py
diff --git a/tests/integrations/doc_index/hnswlib/__init__.py b/tests/integrations/doc_index/hnswlib/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000

From 3dc76941ed0f905fad6d73d7326da74b2db1c46c Mon Sep 17 00:00:00 2001
From: AnneY <evangeline-lun@foxmail.com>
Date: Wed, 12 Apr 2023 16:24:58 +0800
Subject: [PATCH 13/14] refactor: code refactor

Signed-off-by: AnneY <evangeline-lun@foxmail.com>
---
 tests/index/elastic/v8/test_find.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/index/elastic/v8/test_find.py b/tests/index/elastic/v8/test_find.py
index dcc4097eb7d..5ee0956bb87 100644
--- a/tests/index/elastic/v8/test_find.py
+++ b/tests/index/elastic/v8/test_find.py
@@ -224,10 +224,10 @@ class MyDoc(BaseDoc):
         assert doc.A
 
     filter_query = {
-        "bool": {
-            "filter": [
-                {"terms": {"B": [3, 4, 7, 8]}},
-                {"range": {"C": {"gte": 3, "lte": 5}}},
+        'bool': {
+            'filter': [
+                {'terms': {'B': [3, 4, 7, 8]}},
+                {'range': {'C': {'gte': 3, 'lte': 5}}},
             ]
         }
     }

From 0cc9e64e6c17ceec1b0685ba3fc00d5f083a5ada Mon Sep 17 00:00:00 2001
From: AnneY <evangeline-lun@foxmail.com>
Date: Wed, 12 Apr 2023 20:25:15 +0800
Subject: [PATCH 14/14] feat: add ip_range

Signed-off-by: AnneY <evangeline-lun@foxmail.com>
---
 docarray/index/backends/elastic.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docarray/index/backends/elastic.py b/docarray/index/backends/elastic.py
index b2d13d164af..c2c1c6646a2 100644
--- a/docarray/index/backends/elastic.py
+++ b/docarray/index/backends/elastic.py
@@ -207,6 +207,7 @@ def __post_init__(self):
                 'long_range': {},
                 'double_range': {},
                 'date_range': {},
+                'ip_range': {},
                 'ip': {},
                 'version': {},
                 'histogram': {},