From 7edbb6b544ce1bf617ee952630657dfcef207775 Mon Sep 17 00:00:00 2001
From: nan-wang <nan.wang@jina.ai>
Date: Sun, 9 Apr 2023 11:46:59 +0200
Subject: [PATCH 01/20] docs: add storing with file

Signed-off-by: nan-wang <nan.wang@jina.ai>
Signed-off-by: anna-charlotte <charlotte.gerhaher@jina.ai>
---
 docs/user_guide/storing/store_file.md | 53 +++++++++++++++++++++++++++
 1 file changed, 53 insertions(+)
 create mode 100644 docs/user_guide/storing/store_file.md

diff --git a/docs/user_guide/storing/store_file.md b/docs/user_guide/storing/store_file.md
new file mode 100644
index 00000000000..03711342ce4
--- /dev/null
+++ b/docs/user_guide/storing/store_file.md
@@ -0,0 +1,53 @@
+# Store
+[DocList][docarray.array.doc_list.doc_list.DocList] can be persisted using `push()` and `pull()` functions. Under the hood, 
+[DocStore][docarray.store.abstract_doc_store.AbstractDocStore] is used to persist a `DocList`. You can store your `Doc` on-disk. Alternatively, you can upload to [AWS S3](https://aws.amazon.com/s3/) or [Jina AI Cloud](https://cloud.jina.ai/user/storage). 
+
+# Store on-disk
+When you want to use your `DocList` in another place, you can use the `push()` function to push the `DocList` to one place and later use the `pull()` function to pull its content back. 
+
+## Push & pull
+To use the store locally, you need to pass a local file path to the function starting with `file://`.
+
+```python
+from docarray import BaseDoc, DocList
+
+
+class SimpleDoc(BaseDoc):
+    text: str
+
+
+store_docs = [SimpleDoc(text=f'doc {i}') for i in range(8)]
+
+dl = DocList[SimpleDoc]()
+dl.extend([SimpleDoc(text=f'doc {i}') for i in range(8)])
+dl.push('file:///Users/docarray/tmp/simple_dl')
+
+dl_pull = DocList[SimpleDoc].pull('file:///Users/docarray/tmp/simple_dl')
+```
+
+Under `/Users/docarray/tmp/`, there is a file with the name of `simple_dl.docs` being created to store the `DocList`.
+```output
+tmp
+└── simple_dl.docs
+```
+
+## Push & Pull with streaming
+When you have a large amount of `Doc` to push and pull, you could use the streaming function. `push_stream()` and `pull_stream()` can help you to stream the `DocList` in order to save the memory usage. You set multiple `DocList` to pull from the same source as well.
+
+```python
+from docarray import BaseDoc, DocList
+
+
+class SimpleDoc(BaseDoc):
+    text: str
+
+
+store_docs = [SimpleDoc(text=f'doc {i}') for i in range(8)]
+
+DocList[SimpleDoc].push_stream(iter(store_docs), 'file:///Users/docarray/tmp/dl_stream')
+dl_pull_stream_1 = DocList[SimpleDoc].pull_stream('file:///Users/docarray/tmp/dl_stream')
+dl_pull_stream_2 = DocList[SimpleDoc].pull_stream('file:///Users/docarray/tmp/dl_stream')
+for d1, d2 in zip(dl_pull_stream_1, dl_pull_stream_2):
+    print(f'get {d1}, get {d2}')
+```
+

From 80c3b2944d5717fd6c200a1946729276732a370e Mon Sep 17 00:00:00 2001
From: nan-wang <nan.wang@jina.ai>
Date: Mon, 10 Apr 2023 11:10:35 +0200
Subject: [PATCH 02/20] docs: add docs for the S3 store

Signed-off-by: nan-wang <nan.wang@jina.ai>
Signed-off-by: anna-charlotte <charlotte.gerhaher@jina.ai>
---
 docs/user_guide/storing/store_file.md |  2 +-
 docs/user_guide/storing/store_s3.md   | 87 +++++++++++++++++++++++++++
 2 files changed, 88 insertions(+), 1 deletion(-)
 create mode 100644 docs/user_guide/storing/store_s3.md

diff --git a/docs/user_guide/storing/store_file.md b/docs/user_guide/storing/store_file.md
index 03711342ce4..b3337371558 100644
--- a/docs/user_guide/storing/store_file.md
+++ b/docs/user_guide/storing/store_file.md
@@ -1,6 +1,6 @@
 # Store
 [DocList][docarray.array.doc_list.doc_list.DocList] can be persisted using `push()` and `pull()` functions. Under the hood, 
-[DocStore][docarray.store.abstract_doc_store.AbstractDocStore] is used to persist a `DocList`. You can store your `Doc` on-disk. Alternatively, you can upload to [AWS S3](https://aws.amazon.com/s3/) or [Jina AI Cloud](https://cloud.jina.ai/user/storage). 
+[DocStore][docarray.store.abstract_doc_store.AbstractDocStore] is used to persist a `DocList`. You can store your `Doc` on-disk. Alternatively, you can upload to [AWS S3](https://aws.amazon.com/s3/), [minio](https://min.io) or [Jina AI Cloud](https://cloud.jina.ai/user/storage). 
 
 # Store on-disk
 When you want to use your `DocList` in another place, you can use the `push()` function to push the `DocList` to one place and later use the `pull()` function to pull its content back. 
diff --git a/docs/user_guide/storing/store_s3.md b/docs/user_guide/storing/store_s3.md
new file mode 100644
index 00000000000..ce849807720
--- /dev/null
+++ b/docs/user_guide/storing/store_s3.md
@@ -0,0 +1,87 @@
+# Store on S3
+When you want to use your `DocList` in another place, you can use the `push()` function to push the `DocList` to S3 and later use the `pull()` function to pull its content back. 
+
+!!! note
+    To store on S3, you need to install the extra dependency with the following line
+    ```bash
+    pip install "docarray[aws]"
+    ```
+
+## Push & pull
+To use the store `DocList` on S3, you need to pass an S3 path to the function starting with `s3://`.
+
+In the following demo, we use `MinIO` as a local S3 service. You could use the following docker-compose file to start the service in a Docker container.
+
+```yaml
+version: "3"
+services:
+  minio:
+    container_name: minio
+    image: "minio/minio:RELEASE.2023-03-13T19-46-17Z"
+    ports:
+      - "9005:9000"
+    command: server /data
+```
+Save the above file as `dock-compose.yml` and run the following line in the same folder as the file,
+```bash
+docker-compose up
+```
+
+```python
+from docarray import BaseDoc, DocList
+
+
+class SimpleDoc(BaseDoc):
+    text: str
+
+
+if __name__ == '__main__':
+    import boto3
+    from botocore.client import Config
+
+    BUCKET = 'tmp_bucket'
+    my_session = boto3.session.Session()
+    s3 = my_session.resource(
+        service_name='s3',
+        region_name="us-east-1",
+        use_ssl=False,
+        endpoint_url="http://localhost:9005",
+        aws_access_key_id="minioadmin",
+        aws_secret_access_key="minioadmin",
+        config=Config(signature_version="s3v4"),
+    )
+    # make a bucket
+    s3.create_bucket(Bucket=BUCKET)
+
+    store_docs = [SimpleDoc(text=f'doc {i}') for i in range(8)]
+    dl = DocList[SimpleDoc]()
+    dl.extend([SimpleDoc(text=f'doc {i}') for i in range(8)])
+
+    # .push() and .pull() use the default boto3 client
+    boto3.Session.client.__defaults__ = (
+        "us-east-1",
+        None,
+        False,
+        None,
+        "http://localhost:9005",
+        "minioadmin",
+        "minioadmin",
+        None,
+        Config(signature_version="s3v4"),
+    )
+    dl.push(f's3://{BUCKET}/simple_dl')
+    dl_pull = DocList[SimpleDoc].pull(f's3://{BUCKET}/simple_dl')
+
+    # delete the bucket
+    s3.Bucket(BUCKET).objects.all().delete()
+    s3.Bucket(BUCKET).delete()
+```
+
+Under the bucket `tmp_bucket`, there is a file with the name of `simple_dl.docs` being created to store the `DocList`.
+
+!!! note
+    When using `.push()` and `.pull()`, `DocList` calls the default boto3 client. Be sure your default session is correctly set up.
+
+
+## Push & Pull with streaming
+When you have a large amount of `Doc` to push and pull, you could use the streaming function. `push_stream()` and `pull_stream()` can help you to stream the `DocList` in order to save the memory usage. You set multiple `DocList` to pull from the same source as well. The usage is the same as using streaming with local files. Please refer to [Push & Pull with streaming with local files][TODO_add_internal_link]

From 0c8481058316bf8e26273e80b3abbe3bd1115016 Mon Sep 17 00:00:00 2001
From: nan-wang <nan.wang@jina.ai>
Date: Mon, 10 Apr 2023 11:35:05 +0200
Subject: [PATCH 03/20] docs: add docs for jac store

Signed-off-by: nan-wang <nan.wang@jina.ai>
Signed-off-by: anna-charlotte <charlotte.gerhaher@jina.ai>
---
 docs/user_guide/storing/store_file.md |  5 +--
 docs/user_guide/storing/store_jac.md  | 49 +++++++++++++++++++++++++++
 docs/user_guide/storing/store_s3.md   | 10 ++++++
 3 files changed, 60 insertions(+), 4 deletions(-)
 create mode 100644 docs/user_guide/storing/store_jac.md

diff --git a/docs/user_guide/storing/store_file.md b/docs/user_guide/storing/store_file.md
index b3337371558..b8c899aeaba 100644
--- a/docs/user_guide/storing/store_file.md
+++ b/docs/user_guide/storing/store_file.md
@@ -16,10 +16,7 @@ class SimpleDoc(BaseDoc):
     text: str
 
 
-store_docs = [SimpleDoc(text=f'doc {i}') for i in range(8)]
-
-dl = DocList[SimpleDoc]()
-dl.extend([SimpleDoc(text=f'doc {i}') for i in range(8)])
+dl = DocList[SimpleDoc]([SimpleDoc(text=f'doc {i}') for i in range(8)])
 dl.push('file:///Users/docarray/tmp/simple_dl')
 
 dl_pull = DocList[SimpleDoc].pull('file:///Users/docarray/tmp/simple_dl')
diff --git a/docs/user_guide/storing/store_jac.md b/docs/user_guide/storing/store_jac.md
new file mode 100644
index 00000000000..d44e8f22bdd
--- /dev/null
+++ b/docs/user_guide/storing/store_jac.md
@@ -0,0 +1,49 @@
+# Store on Jina AI Cloud
+When you want to use your `DocList` in another place, you can use the `push()` function to push the `DocList` to S3 and later use the `pull()` function to pull its content back. 
+
+!!! note
+    To store on Jina AI Cloud, you need to install the extra dependency with the following line
+    ```bash
+    pip install "docarray[jac]"
+    ```
+
+## Push & pull
+To use the store `DocList` on Jina AI Cloud, you need to pass a Jina AI Cloud path to the function starting with `jac://`.
+
+Before getting started, you need to have an account at [Jina AI Cloud](http://cloud.jina.ai/) and created a [Personal Access Token (PAT)](https://cloud.jina.ai/settings/tokens).
+
+```python
+from docarray import BaseDoc, DocList
+import os
+
+
+class SimpleDoc(BaseDoc):
+    text: str
+
+
+os.environ['JINA_AUTH_TOKEN'] = 'YOUR_PAT'
+DL_NAME = 'simple-dl'
+dl = DocList[SimpleDoc]([SimpleDoc(text=f'doc {i}') for i in range(8)])
+# push to Jina AI Cloud
+dl.push(f'jac://{DL_NAME}')
+# pull from Jina AI Cloud
+dl_pull = DocList[SimpleDoc].pull(f'jac://{DL_NAME}')
+```
+
+
+!!! note
+    When using `.push()` and `.pull()`, `DocList` calls the default boto3 client. Be sure your default session is correctly set up.
+
+
+## Push & Pull with streaming
+When you have a large amount of `Doc` to push and pull, you could use the streaming function. `push_stream()` and `pull_stream()` can help you to stream the `DocList` in order to save the memory usage. You set multiple `DocList` to pull from the same source as well. The usage is the same as using streaming with local files. Please refer to [Push & Pull with streaming with local files][TODO_add_internal_link]
+
+
+## Delete
+To delete the store, you need to use the static method `delete()` of `JACDocStore` class.
+
+```python
+from docarray.store import JACDocStore
+
+JACDocStore.delete(f'jac://{DL_NAME}')
+```
\ No newline at end of file
diff --git a/docs/user_guide/storing/store_s3.md b/docs/user_guide/storing/store_s3.md
index ce849807720..68e21444eaa 100644
--- a/docs/user_guide/storing/store_s3.md
+++ b/docs/user_guide/storing/store_s3.md
@@ -85,3 +85,13 @@ Under the bucket `tmp_bucket`, there is a file with the name of `simple_dl.docs`
 
 ## Push & Pull with streaming
 When you have a large amount of `Doc` to push and pull, you could use the streaming function. `push_stream()` and `pull_stream()` can help you to stream the `DocList` in order to save the memory usage. You set multiple `DocList` to pull from the same source as well. The usage is the same as using streaming with local files. Please refer to [Push & Pull with streaming with local files][TODO_add_internal_link]
+
+
+## Delete
+To delete the store, you need to use the static method `delete()` of `JACDocStore` class.
+
+```python
+from docarray.store import S3DocStore
+
+success = S3DocStore.delete(f's3://{BUCKET}/simple_dl')
+```

From ed35d20a43a01f1f1dc63e6e9bfa1838f9f0ff06 Mon Sep 17 00:00:00 2001
From: anna-charlotte <charlotte.gerhaher@jina.ai>
Date: Thu, 13 Apr 2023 12:46:06 +0200
Subject: [PATCH 04/20] docs: store section in user guide

Signed-off-by: anna-charlotte <charlotte.gerhaher@jina.ai>
---
 docs/user_guide/storing/first_step.md | 11 +++++-
 docs/user_guide/storing/store_file.md | 54 +++++++++++++++++++++------
 docs/user_guide/storing/store_jac.md  | 22 ++++++++---
 docs/user_guide/storing/store_s3.md   | 19 ++++++----
 mkdocs.yml                            |  6 ++-
 5 files changed, 85 insertions(+), 27 deletions(-)

diff --git a/docs/user_guide/storing/first_step.md b/docs/user_guide/storing/first_step.md
index 5be8b39165b..f58c4ba4e36 100644
--- a/docs/user_guide/storing/first_step.md
+++ b/docs/user_guide/storing/first_step.md
@@ -1 +1,10 @@
-# Storing
+# Intro
+
+In the previous sections we saw how to use [`BaseDoc`][docarray.base_doc.doc.BaseDoc], [`DocList`][docarray.array.doc_list.doc_list.DocList] and [`DocVec`][docarray.array.doc_vec.doc_vec.DocVec] to represent multi-modal data and send it over the wire.
+In this section we will see how to store and persist this data.
+
+This section is divided into three parts:
+
+- [Store](store_file.md) of [`BaseDoc`][docarray.base_doc.doc.BaseDoc], [`DocList`][docarray.array.doc_list.doc_list.DocList] and [`DocVec`][docarray.array.doc_vec.doc_vec.DocVec] on-disk
+- [Store on Jina AI Cloud](store_jac.md)
+- [Store on S3](store_s3.md)
\ No newline at end of file
diff --git a/docs/user_guide/storing/store_file.md b/docs/user_guide/storing/store_file.md
index b8c899aeaba..973e6999775 100644
--- a/docs/user_guide/storing/store_file.md
+++ b/docs/user_guide/storing/store_file.md
@@ -1,12 +1,18 @@
 # Store
-[DocList][docarray.array.doc_list.doc_list.DocList] can be persisted using `push()` and `pull()` functions. Under the hood, 
-[DocStore][docarray.store.abstract_doc_store.AbstractDocStore] is used to persist a `DocList`. You can store your `Doc` on-disk. Alternatively, you can upload to [AWS S3](https://aws.amazon.com/s3/), [minio](https://min.io) or [Jina AI Cloud](https://cloud.jina.ai/user/storage). 
-
-# Store on-disk
-When you want to use your `DocList` in another place, you can use the `push()` function to push the `DocList` to one place and later use the `pull()` function to pull its content back. 
+[DocList][docarray.array.doc_list.doc_list.DocList] can be persisted using the
+[`.push()`][docarray.array.doc_list.pushpull.PushPullMixin.push] and 
+[`.pull()`][docarray.array.doc_list.pushpull.PushPullMixin.pull] methods. 
+Under the hood, [DocStore][docarray.store.abstract_doc_store.AbstractDocStore] is used to persist a `DocList`. 
+You can store your `Doc` on-disk. Alternatively, you can upload to [AWS S3](https://aws.amazon.com/s3/), 
+[minio](https://min.io) or [Jina AI Cloud](https://cloud.jina.ai/user/storage). 
+
+## Store on-disk
+When you want to use your [DocList][docarray.array.doc_list.doc_list.DocList] in another place, you can use the 
+[`.push()`][docarray.array.doc_list.pushpull.PushPullMixin.push] function to push the [DocList][docarray.array.doc_list.doc_list.DocList] 
+to one place and later use the [`.pull()`][docarray.array.doc_list.pushpull.PushPullMixin.pull] function to pull its content back. 
 
 ## Push & pull
-To use the store locally, you need to pass a local file path to the function starting with `file://`.
+To use the store locally, you need to pass a local file path to the function starting with `'file://'`.
 
 ```python
 from docarray import BaseDoc, DocList
@@ -23,13 +29,16 @@ dl_pull = DocList[SimpleDoc].pull('file:///Users/docarray/tmp/simple_dl')
 ```
 
 Under `/Users/docarray/tmp/`, there is a file with the name of `simple_dl.docs` being created to store the `DocList`.
-```output
+``` { .output .no-copy }
 tmp
 └── simple_dl.docs
 ```
 
-## Push & Pull with streaming
-When you have a large amount of `Doc` to push and pull, you could use the streaming function. `push_stream()` and `pull_stream()` can help you to stream the `DocList` in order to save the memory usage. You set multiple `DocList` to pull from the same source as well.
+## Push & pull with streaming
+When you have a large amount of documents to push and pull, you could use the streaming function. 
+[`.push_stream()`][docarray.array.doc_list.pushpull.PushPullMixin.push_stream] and 
+[`.pull_stream()`][docarray.array.doc_list.pushpull.PushPullMixin.pull_stream] can help you to stream the `DocList` in 
+order to save the memory usage. You set multiple `DocList` to pull from the same source as well.
 
 ```python
 from docarray import BaseDoc, DocList
@@ -41,10 +50,31 @@ class SimpleDoc(BaseDoc):
 
 store_docs = [SimpleDoc(text=f'doc {i}') for i in range(8)]
 
-DocList[SimpleDoc].push_stream(iter(store_docs), 'file:///Users/docarray/tmp/dl_stream')
-dl_pull_stream_1 = DocList[SimpleDoc].pull_stream('file:///Users/docarray/tmp/dl_stream')
-dl_pull_stream_2 = DocList[SimpleDoc].pull_stream('file:///Users/docarray/tmp/dl_stream')
+DocList[SimpleDoc].push_stream(
+    iter(store_docs),
+    'file:///Users/docarray/tmp/dl_stream',
+)
+dl_pull_stream_1 = DocList[SimpleDoc].pull_stream(
+    'file:///Users/docarray/tmp/dl_stream'
+)
+dl_pull_stream_2 = DocList[SimpleDoc].pull_stream(
+    'file:///Users/docarray/tmp/dl_stream'
+)
+
 for d1, d2 in zip(dl_pull_stream_1, dl_pull_stream_2):
     print(f'get {d1}, get {d2}')
 ```
 
+<details>
+    <summary>Output</summary>
+    ```text
+    get SimpleDoc(id='5a4b92af27aadbb852d636892506998b', text='doc 0'), get SimpleDoc(id='5a4b92af27aadbb852d636892506998b', text='doc 0')
+    get SimpleDoc(id='705e4f6acbab0a6ff10d11a07c03b24c', text='doc 1'), get SimpleDoc(id='705e4f6acbab0a6ff10d11a07c03b24c', text='doc 1')
+    get SimpleDoc(id='4fb5c01bd5f935bbe91cf73e271ad590', text='doc 2'), get SimpleDoc(id='4fb5c01bd5f935bbe91cf73e271ad590', text='doc 2')
+    get SimpleDoc(id='381498cef78f1d4f1d80415d67918940', text='doc 3'), get SimpleDoc(id='381498cef78f1d4f1d80415d67918940', text='doc 3')
+    get SimpleDoc(id='d968bc6fa235b1cfc69eded92926157e', text='doc 4'), get SimpleDoc(id='d968bc6fa235b1cfc69eded92926157e', text='doc 4')
+    get SimpleDoc(id='30bf347427a4bd50ce8ada1841320fe3', text='doc 5'), get SimpleDoc(id='30bf347427a4bd50ce8ada1841320fe3', text='doc 5')
+    get SimpleDoc(id='1389877ac97b3e6d0e8eb17568934708', text='doc 6'), get SimpleDoc(id='1389877ac97b3e6d0e8eb17568934708', text='doc 6')
+    get SimpleDoc(id='264b0eff2cd138d296f15c685e15bf23', text='doc 7'), get SimpleDoc(id='264b0eff2cd138d296f15c685e15bf23', text='doc 7')
+    ```
+</details>
\ No newline at end of file
diff --git a/docs/user_guide/storing/store_jac.md b/docs/user_guide/storing/store_jac.md
index d44e8f22bdd..8e2b47c9959 100644
--- a/docs/user_guide/storing/store_jac.md
+++ b/docs/user_guide/storing/store_jac.md
@@ -1,14 +1,16 @@
 # Store on Jina AI Cloud
-When you want to use your `DocList` in another place, you can use the `push()` function to push the `DocList` to S3 and later use the `pull()` function to pull its content back. 
+When you want to use your [`DocList`][docarray.DocList] in another place, you can use the 
+[`.push()`][docarray.array.doc_list.pushpull.PushPullMixin.push] method to push the `DocList` to S3 and later use the 
+[`.pull()`][docarray.array.doc_list.pushpull.PushPullMixin.pull] function to pull its content back. 
 
 !!! note
     To store on Jina AI Cloud, you need to install the extra dependency with the following line
-    ```bash
+    ```cmd
     pip install "docarray[jac]"
     ```
 
 ## Push & pull
-To use the store `DocList` on Jina AI Cloud, you need to pass a Jina AI Cloud path to the function starting with `jac://`.
+To use the store [`DocList`][docarray.DocList] on Jina AI Cloud, you need to pass a Jina AI Cloud path to the function starting with `'jac://'`.
 
 Before getting started, you need to have an account at [Jina AI Cloud](http://cloud.jina.ai/) and created a [Personal Access Token (PAT)](https://cloud.jina.ai/settings/tokens).
 
@@ -24,8 +26,10 @@ class SimpleDoc(BaseDoc):
 os.environ['JINA_AUTH_TOKEN'] = 'YOUR_PAT'
 DL_NAME = 'simple-dl'
 dl = DocList[SimpleDoc]([SimpleDoc(text=f'doc {i}') for i in range(8)])
+
 # push to Jina AI Cloud
 dl.push(f'jac://{DL_NAME}')
+
 # pull from Jina AI Cloud
 dl_pull = DocList[SimpleDoc].pull(f'jac://{DL_NAME}')
 ```
@@ -35,12 +39,18 @@ dl_pull = DocList[SimpleDoc].pull(f'jac://{DL_NAME}')
     When using `.push()` and `.pull()`, `DocList` calls the default boto3 client. Be sure your default session is correctly set up.
 
 
-## Push & Pull with streaming
-When you have a large amount of `Doc` to push and pull, you could use the streaming function. `push_stream()` and `pull_stream()` can help you to stream the `DocList` in order to save the memory usage. You set multiple `DocList` to pull from the same source as well. The usage is the same as using streaming with local files. Please refer to [Push & Pull with streaming with local files][TODO_add_internal_link]
+## Push & pull with streaming
+When you have a large amount of documents to push and pull, you could use the streaming function. 
+[`.push_stream()`][docarray.array.doc_list.pushpull.PushPullMixin.push_stream] and 
+[`.pull_stream()`][docarray.array.doc_list.pushpull.PushPullMixin.pull_stream] can help you to stream the 
+[`DocList`][docarray.DocList] in order to save the memory usage. 
+You set multiple `DocList` to pull from the same source as well. 
+The usage is the same as using streaming with local files. 
+Please refer to [Push & Pull with streaming with local files](store_file.md#push-pull-with-streaming).
 
 
 ## Delete
-To delete the store, you need to use the static method `delete()` of `JACDocStore` class.
+To delete the store, you need to use the static method [`.delete()`][docarray.store.jac.JACDocStore.delete] of [`JACDocStore`][docarray.store.jac.JACDocStore] class.
 
 ```python
 from docarray.store import JACDocStore
diff --git a/docs/user_guide/storing/store_s3.md b/docs/user_guide/storing/store_s3.md
index 68e21444eaa..9e63eb81e0a 100644
--- a/docs/user_guide/storing/store_s3.md
+++ b/docs/user_guide/storing/store_s3.md
@@ -1,14 +1,16 @@
 # Store on S3
-When you want to use your `DocList` in another place, you can use the `push()` function to push the `DocList` to S3 and later use the `pull()` function to pull its content back. 
+When you want to use your [`DocList`][docarray.DocList] in another place, you can use the 
+[`.push`][docarray.array.doc_list.pushpull.PushPullMixin.push] method to push the `DocList` to S3 and later use the
+[`.pull`][docarray.array.doc_list.pushpull.PushPullMixin.pull] function to pull its content back. 
 
 !!! note
     To store on S3, you need to install the extra dependency with the following line
-    ```bash
+    ```cmd
     pip install "docarray[aws]"
     ```
 
 ## Push & pull
-To use the store `DocList` on S3, you need to pass an S3 path to the function starting with `s3://`.
+To use the store [`DocList`][docarray.DocList] on S3, you need to pass an S3 path to the function starting with `'s3://'`.
 
 In the following demo, we use `MinIO` as a local S3 service. You could use the following docker-compose file to start the service in a Docker container.
 
@@ -22,7 +24,7 @@ services:
       - "9005:9000"
     command: server /data
 ```
-Save the above file as `dock-compose.yml` and run the following line in the same folder as the file,
+Save the above file as `docker-compose.yml` and run the following line in the same folder as the file,
 ```bash
 docker-compose up
 ```
@@ -83,12 +85,15 @@ Under the bucket `tmp_bucket`, there is a file with the name of `simple_dl.docs`
     When using `.push()` and `.pull()`, `DocList` calls the default boto3 client. Be sure your default session is correctly set up.
 
 
-## Push & Pull with streaming
-When you have a large amount of `Doc` to push and pull, you could use the streaming function. `push_stream()` and `pull_stream()` can help you to stream the `DocList` in order to save the memory usage. You set multiple `DocList` to pull from the same source as well. The usage is the same as using streaming with local files. Please refer to [Push & Pull with streaming with local files][TODO_add_internal_link]
+## Push & pull with streaming
+When you have a large amount of documents to push and pull, you could use the streaming function. 
+[`.push_stream()`][docarray.array.doc_list.pushpull.PushPullMixin.push_stream] and 
+[`.pull_stream()`][docarray.array.doc_list.pushpull.PushPullMixin.pull_stream] can help you to stream the 
+[`DocList`][docarray.DocList] in order to save the memory usage. You set multiple [`DocList`][docarray.DocList] to pull from the same source as well. The usage is the same as using streaming with local files. Please refer to [Push & Pull with streaming with local files](store_file.md#push-pull-with-streaming).
 
 
 ## Delete
-To delete the store, you need to use the static method `delete()` of `JACDocStore` class.
+To delete the store, you need to use the static method [`.delete()`][docarray.store.s3.S3DocStore.delete] of [`S3DocStore`][docarray.store.s3.S3DocStore] class.
 
 ```python
 from docarray.store import S3DocStore
diff --git a/mkdocs.yml b/mkdocs.yml
index 605b986393e..c3ace80d956 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -82,7 +82,11 @@ nav:
       - user_guide/representing/first_step.md
       - user_guide/representing/array.md
     - user_guide/sending/first_step.md
-    - user_guide/storing/first_step.md
+    - Storing:
+      - user_guide/storing/first_step.md
+      - user_guide/storing/store_file.md
+      - user_guide/storing/store_jac.md
+      - user_guide/storing/store_s3.md
 
   - How-to:
     - how_to/add_doc_index.md

From 3d59e7aa9cfda4acca4268d43a69d0a355aaaf8c Mon Sep 17 00:00:00 2001
From: anna-charlotte <charlotte.gerhaher@jina.ai>
Date: Thu, 13 Apr 2023 12:47:30 +0200
Subject: [PATCH 05/20] docs: add doc stores to api reference section

Signed-off-by: anna-charlotte <charlotte.gerhaher@jina.ai>
---
 docs/api_references/array/da.md                 | 2 +-
 docs/api_references/doc_store/doc_store.md      | 3 +++
 docs/api_references/doc_store/file_doc_store.md | 3 +++
 docs/api_references/doc_store/jac_doc_store.md  | 3 +++
 docs/api_references/doc_store/s3_doc_store.md   | 3 +++
 5 files changed, 13 insertions(+), 1 deletion(-)
 create mode 100644 docs/api_references/doc_store/doc_store.md
 create mode 100644 docs/api_references/doc_store/file_doc_store.md
 create mode 100644 docs/api_references/doc_store/jac_doc_store.md
 create mode 100644 docs/api_references/doc_store/s3_doc_store.md

diff --git a/docs/api_references/array/da.md b/docs/api_references/array/da.md
index eedcec827cd..28e1aa94efa 100644
--- a/docs/api_references/array/da.md
+++ b/docs/api_references/array/da.md
@@ -1,4 +1,4 @@
 # DocList
 
 ::: docarray.array.doc_list.doc_list.DocList
-::: docarray.array.doc_list.io.IOMixinArray
+::: docarray.array.doc_list.pushpull.PushPullMixin
\ No newline at end of file
diff --git a/docs/api_references/doc_store/doc_store.md b/docs/api_references/doc_store/doc_store.md
new file mode 100644
index 00000000000..eb6e65b9f4a
--- /dev/null
+++ b/docs/api_references/doc_store/doc_store.md
@@ -0,0 +1,3 @@
+# AbstractDocStore
+
+::: docarray.store.abstract_doc_store.AbstractDocStore
diff --git a/docs/api_references/doc_store/file_doc_store.md b/docs/api_references/doc_store/file_doc_store.md
new file mode 100644
index 00000000000..b81dc3ee298
--- /dev/null
+++ b/docs/api_references/doc_store/file_doc_store.md
@@ -0,0 +1,3 @@
+# FileDocStore
+
+::: docarray.store.file.FileDocStore
diff --git a/docs/api_references/doc_store/jac_doc_store.md b/docs/api_references/doc_store/jac_doc_store.md
new file mode 100644
index 00000000000..1d4c0a28303
--- /dev/null
+++ b/docs/api_references/doc_store/jac_doc_store.md
@@ -0,0 +1,3 @@
+# JACDocStore
+
+::: docarray.store.jac.JACDocStore
diff --git a/docs/api_references/doc_store/s3_doc_store.md b/docs/api_references/doc_store/s3_doc_store.md
new file mode 100644
index 00000000000..6856c42f2ff
--- /dev/null
+++ b/docs/api_references/doc_store/s3_doc_store.md
@@ -0,0 +1,3 @@
+# S3DocStore
+
+::: docarray.store.s3.S3DocStore

From 52dcab2972b5c370d39f44ad7dcfa1acfc3947a1 Mon Sep 17 00:00:00 2001
From: anna-charlotte <charlotte.gerhaher@jina.ai>
Date: Thu, 13 Apr 2023 12:47:59 +0200
Subject: [PATCH 06/20] docs: fix docstrings

Signed-off-by: anna-charlotte <charlotte.gerhaher@jina.ai>
---
 docarray/array/any_array.py         | 14 +++++++-------
 docarray/array/doc_list/pushpull.py | 26 ++++++++++++-------------
 docarray/store/file.py              | 27 +++++++++++++++-----------
 docarray/store/jac.py               | 30 +++++++++++++++--------------
 docarray/store/s3.py                | 18 ++++++++---------
 5 files changed, 61 insertions(+), 54 deletions(-)

diff --git a/docarray/array/any_array.py b/docarray/array/any_array.py
index da718519682..31d1dedb067 100644
--- a/docarray/array/any_array.py
+++ b/docarray/array/any_array.py
@@ -121,7 +121,7 @@ def _set_data_column(
         field: str,
         values: Union[List, T, 'AbstractTensor'],
     ):
-        """Set all Documents in this [`DocList`][docarray.typing.DocList] using the passed values
+        """Set all Documents in this [`DocList`][docarray.DocList] using the passed values
 
         :param field: name of the fields to extract
         :values: the values to set at the DocList level
@@ -140,7 +140,7 @@ def to_protobuf(self) -> 'DocListProto':
         ...
 
     def _to_node_protobuf(self) -> 'NodeProto':
-        """Convert a [`DocList`][docarray.typing.DocList] into a NodeProto protobuf message.
+        """Convert a [`DocList`][docarray.DocList] into a NodeProto protobuf message.
          This function should be called when a DocList
         is nested into another Document that need to be converted into a protobuf
 
@@ -157,7 +157,7 @@ def traverse_flat(
     ) -> Union[List[Any], 'AbstractTensor']:
         """
         Return a List of the accessed objects when applying the `access_path`. If this
-        results in a nested list or list of [`DocList`s][docarray.typing.DocList], the list will be flattened
+        results in a nested list or list of [`DocList`s][docarray.DocList], the list will be flattened
         on the first level. The access path is a string that consists of attribute
         names, concatenated and `"__"`-separated. It describes the path from the first
         level to an arbitrary one, e.g. `'content__image__url'`.
@@ -210,7 +210,7 @@ class Book(BaseDoc):
         chapters = docs.traverse_flat(access_path='chapters')  # list of 30 strings
         ```
 
-        If your [`DocList`][docarray.typing.DocList] is in doc_vec mode, and you want to access a field of
+        If your [`DocList`][docarray.DocList] is in doc_vec mode, and you want to access a field of
         type [`AnyTensor`][docarray.typing.AnyTensor], the doc_vec tensor will be returned instead of a list:
 
         ```python
@@ -263,7 +263,7 @@ def _flatten_one_level(sequence: List[Any]) -> List[Any]:
 
     def summary(self):
         """
-        Print a summary of this [`DocList`][docarray.typing.DocList] object and a summary of the schema of its
+        Print a summary of this [`DocList`][docarray.DocList] object and a summary of the schema of its
         Document type.
         """
         DocArraySummary(self).summary()
@@ -275,13 +275,13 @@ def _batch(
         show_progress: bool = False,
     ) -> Generator[T, None, None]:
         """
-        Creates a `Generator` that yields [`DocList`][docarray.typing.DocList] of size `batch_size`.
+        Creates a `Generator` that yields [`DocList`][docarray.DocList] of size `batch_size`.
         Note, that the last batch might be smaller than `batch_size`.
 
         :param batch_size: Size of each generated batch.
         :param shuffle: If set, shuffle the Documents before dividing into minibatches.
         :param show_progress: if set, show a progress bar when batching documents.
-        :yield: a Generator of [`DocList`][docarray.typing.DocList], each in the length of `batch_size`
+        :yield: a Generator of [`DocList`][docarray.DocList], each in the length of `batch_size`
         """
         from rich.progress import track
 
diff --git a/docarray/array/doc_list/pushpull.py b/docarray/array/doc_list/pushpull.py
index baa9c0439da..0d0f9384758 100644
--- a/docarray/array/doc_list/pushpull.py
+++ b/docarray/array/doc_list/pushpull.py
@@ -86,10 +86,10 @@ def push(
         show_progress: bool = False,
         branding: Optional[Dict] = None,
     ) -> Dict:
-        """Push this DocList object to the specified url.
+        """Push this `DocList` object to the specified url.
 
-        :param url: url specifying the protocol and save name of the DocList. Should be of the form ``protocol://namespace/name``. e.g. ``s3://bucket/path/to/namespace/name``, ``file:///path/to/folder/name``
-        :param public:  Only used by ``jac`` protocol. If true, anyone can pull a DocList if they know its name.
+        :param url: url specifying the protocol and save name of the `DocList`. Should be of the form ``protocol://namespace/name``. e.g. ``s3://bucket/path/to/namespace/name``, ``file:///path/to/folder/name``
+        :param public:  Only used by ``jac`` protocol. If true, anyone can pull a `DocList` if they know its name.
             Setting this to false will restrict access to only the creator.
         :param show_progress: If true, a progress bar will be displayed.
         :param branding: Only used by ``jac`` protocol. A dictionary of branding information to be sent to Jina AI Cloud. {"icon": "emoji", "background": "#fff"}
@@ -112,8 +112,8 @@ def push_stream(
         """Push a stream of documents to the specified url.
 
         :param docs: a stream of documents
-        :param url: url specifying the protocol and save name of the DocList. Should be of the form ``protocol://namespace/name``. e.g. ``s3://bucket/path/to/namespace/name``, ``file:///path/to/folder/name``
-        :param public:  Only used by ``jac`` protocol. If true, anyone can pull a DocList if they know its name.
+        :param url: url specifying the protocol and save name of the `DocList`. Should be of the form ``protocol://namespace/name``. e.g. ``s3://bucket/path/to/namespace/name``, ``file:///path/to/folder/name``
+        :param public:  Only used by ``jac`` protocol. If true, anyone can pull a `DocList` if they know its name.
         :param show_progress: If true, a progress bar will be displayed.
         :param branding: Only used by ``jac`` protocol. A dictionary of branding information to be sent to Jina AI Cloud. {"icon": "emoji", "background": "#fff"}
         """
@@ -130,19 +130,19 @@ def pull(
         show_progress: bool = False,
         local_cache: bool = True,
     ) -> 'DocList':
-        """Pull a :class:`DocList` from the specified url.
+        """Pull a `DocList` from the specified url.
 
-        :param url: url specifying the protocol and save name of the DocList. Should be of the form ``protocol://namespace/name``. e.g. ``s3://bucket/path/to/namespace/name``, ``file:///path/to/folder/name``
+        :param url: url specifying the protocol and save name of the `DocList`. Should be of the form ``protocol://namespace/name``. e.g. ``s3://bucket/path/to/namespace/name``, ``file:///path/to/folder/name``
         :param show_progress: if true, display a progress bar.
-        :param local_cache: store the downloaded DocList to local folder
-        :return: a :class:`DocList` object
+        :param local_cache: store the downloaded `DocList` to local folder
+        :return: a `DocList` object
         """
         from docarray.base_doc import AnyDoc
 
         if cls.doc_type == AnyDoc:
             raise TypeError(
                 'There is no document schema defined. '
-                'Please specify the DocList\'s Document type using `DocList[MyDoc]`.'
+                'Please specify the `DocList`\'s Document type using `DocList[MyDoc]`.'
             )
 
         logging.info(f'Pulling {url}')
@@ -160,9 +160,9 @@ def pull_stream(
     ) -> Iterator['BaseDoc']:
         """Pull a stream of Documents from the specified url.
 
-        :param url: url specifying the protocol and save name of the DocList. Should be of the form ``protocol://namespace/name``. e.g. ``s3://bucket/path/to/namespace/name``, ``file:///path/to/folder/name``
+        :param url: url specifying the protocol and save name of the `DocList`. Should be of the form ``protocol://namespace/name``. e.g. ``s3://bucket/path/to/namespace/name``, ``file:///path/to/folder/name``
         :param show_progress: if true, display a progress bar.
-        :param local_cache: store the downloaded DocList to local folder
+        :param local_cache: store the downloaded `DocList` to local folder
         :return: Iterator of Documents
         """
         from docarray.base_doc import AnyDoc
@@ -170,7 +170,7 @@ def pull_stream(
         if cls.doc_type == AnyDoc:
             raise TypeError(
                 'There is no document schema defined. '
-                'Please specify the DocList\'s Document type using `DocList[MyDoc]`.'
+                'Please specify the `DocList`\'s Document type using `DocList[MyDoc]`.'
             )
 
         logging.info(f'Pulling Document stream from {url}')
diff --git a/docarray/store/file.py b/docarray/store/file.py
index b649864478a..6c46c3ab615 100644
--- a/docarray/store/file.py
+++ b/docarray/store/file.py
@@ -16,11 +16,15 @@
 
 
 class FileDocStore(AbstractDocStore):
+    """Class to push and pull [`DocList`][docarray.DocList] on-disk."""
+
     @staticmethod
     def _abs_filepath(name: str) -> Path:
         """Resolve a name to an absolute path.
-        If it is not a path, the cache directoty is prepended.
-        If it is a path, it is resolved to an absolute path.
+
+        :param name: If it is not a path, the cache directory is prepended.
+            If it is a path, it is resolved to an absolute path.
+        :return: Path
         """
         if not (name.startswith('/') or name.startswith('~') or name.startswith('.')):
             name = str(_get_cache_path() / name)
@@ -32,11 +36,11 @@ def _abs_filepath(name: str) -> Path:
     def list(
         cls: Type[SelfFileDocStore], namespace: str, show_table: bool
     ) -> List[str]:
-        """List all DocArrays in a directory.
+        """List all [`DocList`s][docarray.DocList] in a directory.
 
         :param namespace: The directory to list.
         :param show_table: If True, print a table of the files in the directory.
-        :return: A list of the names of the DocArrays in the directory.
+        :return: A list of the names of the `DocLists` in the directory.
         """
         namespace_dir = cls._abs_filepath(namespace)
         if not namespace_dir.exists():
@@ -51,7 +55,7 @@ def list(
             from rich.table import Table
 
             table = Table(
-                title=f'You have {len(da_files)} DocArrays in file://{namespace_dir}',
+                title=f'You have {len(da_files)} DocLists in file://{namespace_dir}',
                 box=box.SIMPLE,
                 highlight=True,
             )
@@ -74,9 +78,9 @@ def list(
     def delete(
         cls: Type[SelfFileDocStore], name: str, missing_ok: bool = False
     ) -> bool:
-        """Delete a DocList from the local filesystem.
+        """Delete a [`DocList`][docarray.DocList] from the local filesystem.
 
-        :param name: The name of the DocList to delete.
+        :param name: The name of the `DocList` to delete.
         :param missing_ok: If True, do not raise an exception if the file does not exist. Defaults to False.
         :return: True if the file was deleted, False if it did not exist.
         """
@@ -98,8 +102,9 @@ def push(
         show_progress: bool,
         branding: Optional[Dict],
     ) -> Dict:
-        """Push this DocList object to the specified file path.
+        """Push this [`DocList`][docarray.DocList] object to the specified file path.
 
+        :param docs: The `DocList` to push.
         :param name: The file path to push to.
         :param public: Not used by the ``file`` protocol.
         :param show_progress: If true, a progress bar will be displayed.
@@ -150,12 +155,12 @@ def pull(
         show_progress: bool,
         local_cache: bool,
     ) -> 'DocList':
-        """Pull a :class:`DocList` from the specified url.
+        """Pull a [`DocList`][docarray.DocList] from the specified url.
 
         :param name: The file path to pull from.
         :param show_progress: if true, display a progress bar.
-        :param local_cache: store the downloaded DocList to local folder
-        :return: a :class:`DocList` object
+        :param local_cache: store the downloaded `DocList` to local folder
+        :return: a `DocList` object
         """
 
         return docs_cls(
diff --git a/docarray/store/jac.py b/docarray/store/jac.py
index 7838e3c26c8..6dafb49839a 100644
--- a/docarray/store/jac.py
+++ b/docarray/store/jac.py
@@ -82,7 +82,7 @@ def _get_raw_summary(self: 'DocList') -> List[Dict[str, Any]]:
 
 
 class JACDocStore(AbstractDocStore):
-    """Class to push and pull DocList to and from Jina AI Cloud."""
+    """Class to push and pull [`DocList`][docarray.DocList] to and from Jina AI Cloud."""
 
     @staticmethod
     @hubble.login_required
@@ -135,7 +135,7 @@ def list(namespace: str = '', show_table: bool = False) -> List[str]:
     @hubble.login_required
     def delete(name: str, missing_ok: bool = True) -> bool:
         """
-        Delete a DocList from the cloud.
+        Delete a [`DocList`][docarray.DocList] from the cloud.
         :param name: the name of the DocList to delete.
         :param missing_ok: if true, do not raise an error if the DocList does not exist.
         :return: True if the DocList was deleted, False if it did not exist.
@@ -158,17 +158,18 @@ def push(
         show_progress: bool = False,
         branding: Optional[Dict] = None,
     ) -> Dict:
-        """Push this DocList object to Jina AI Cloud
+        """Push this [`DocList`][docarray.DocList] object to Jina AI Cloud
 
-        .. note::
+        !!! note
             - Push with the same ``name`` will override the existing content.
             - Kinda like a public clipboard where everyone can override anyone's content.
               So to make your content survive longer, you may want to use longer & more complicated name.
             - The lifetime of the content is not promised atm, could be a day, could be a week. Do not use it for
               persistence. Only use this full temporary transmission/storage/clipboard.
 
-        :param name: A name that can later be used to retrieve this :class:`DocList`.
-        :param public: By default, anyone can pull a DocList if they know its name.
+        :param docs: The `DocList` to push.
+        :param name: A name that can later be used to retrieve this `DocList`.
+        :param public: By default, anyone can pull a `DocList` if they know its name.
             Setting this to false will restrict access to only the creator.
         :param show_progress: If true, a progress bar will be displayed.
         :param branding: A dictionary of branding information to be sent to Jina Cloud. e.g. {"icon": "emoji", "background": "#fff"}
@@ -245,15 +246,16 @@ def push_stream(
     ) -> Dict:
         """Push a stream of documents to Jina AI Cloud
 
-        .. note::
+        !!! note
             - Push with the same ``name`` will override the existing content.
             - Kinda like a public clipboard where everyone can override anyone's content.
               So to make your content survive longer, you may want to use longer & more complicated name.
             - The lifetime of the content is not promised atm, could be a day, could be a week. Do not use it for
               persistence. Only use this full temporary transmission/storage/clipboard.
 
-        :param name: A name that can later be used to retrieve this :class:`DocList`.
-        :param public: By default, anyone can pull a DocList if they know its name.
+        :param docs: a stream of documents
+        :param name: A name that can later be used to retrieve this `DocList`.
+        :param public: By default, anyone can pull a `DocList` if they know its name.
             Setting this to false will restrict access to only the creator.
         :param show_progress: If true, a progress bar will be displayed.
         :param branding: A dictionary of branding information to be sent to Jina Cloud. e.g. {"icon": "emoji", "background": "#fff"}
@@ -278,12 +280,12 @@ def pull(
         show_progress: bool = False,
         local_cache: bool = True,
     ) -> 'DocList':
-        """Pull a :class:`DocList` from Jina AI Cloud to local.
+        """Pull a [`DocList`][docarray.DocList] from Jina AI Cloud to local.
 
-        :param name: the upload name set during :meth:`.push`
+        :param name: the upload name set during `.push`
         :param show_progress: if true, display a progress bar.
         :param local_cache: store the downloaded DocList to local folder
-        :return: a :class:`DocList` object
+        :return: a [`DocList`][docarray.DocList] object
         """
         from docarray import DocList
 
@@ -299,9 +301,9 @@ def pull_stream(
         show_progress: bool = False,
         local_cache: bool = False,
     ) -> Iterator['BaseDoc']:
-        """Pull a :class:`DocList` from Jina AI Cloud to local.
+        """Pull a [`DocList`][docarray.DocList] from Jina AI Cloud to local.
 
-        :param name: the upload name set during :meth:`.push`
+        :param name: the upload name set during `.push`
         :param show_progress: if true, display a progress bar.
         :param local_cache: store the downloaded DocList to local folder
         :return: An iterator of Documents
diff --git a/docarray/store/s3.py b/docarray/store/s3.py
index 936a261396f..2ebb864fc8d 100644
--- a/docarray/store/s3.py
+++ b/docarray/store/s3.py
@@ -48,15 +48,15 @@ def close(self):
 
 
 class S3DocStore(AbstractDocStore):
-    """Class to push and pull DocList to and from S3."""
+    """Class to push and pull [`DocList`][docarray.DocList] to and from S3."""
 
     @staticmethod
     def list(namespace: str, show_table: bool = False) -> List[str]:
-        """List all DocArrays in the specified bucket and namespace.
+        """List all [`DocList`s][docarray.DocList] in the specified bucket and namespace.
 
         :param namespace: The bucket and namespace to list. e.g. my_bucket/my_namespace
         :param show_table: If true, a rich table will be printed to the console.
-        :return: A list of DocList names.
+        :return: A list of `DocList` names.
         """
         bucket, namespace = namespace.split('/', 1)
         s3 = boto3.resource('s3')
@@ -74,7 +74,7 @@ def list(namespace: str, show_table: bool = False) -> List[str]:
             from rich.table import Table
 
             table = Table(
-                title=f'You have {len(da_files)} DocArrays in bucket s3://{bucket} under the namespace "{namespace}"',
+                title=f'You have {len(da_files)} DocLists in bucket s3://{bucket} under the namespace "{namespace}"',
                 box=box.SIMPLE,
                 highlight=True,
             )
@@ -94,7 +94,7 @@ def list(namespace: str, show_table: bool = False) -> List[str]:
 
     @staticmethod
     def delete(name: str, missing_ok: bool = True) -> bool:
-        """Delete the DocList object at the specified bucket and key.
+        """Delete the [`DocList`][docarray.DocList] object at the specified bucket and key.
 
         :param name: The bucket and key to delete. e.g. my_bucket/my_key
         :param missing_ok: If true, no error will be raised if the object does not exist.
@@ -125,9 +125,9 @@ def push(
         show_progress: bool = False,
         branding: Optional[Dict] = None,
     ) -> Dict:
-        """Push this DocList object to the specified bucket and key.
+        """Push this [`DocList`][docarray.DocList] object to the specified bucket and key.
 
-        :param docs: The DocList to push.
+        :param docs: The `DocList` to push.
         :param name: The bucket and key to push to. e.g. my_bucket/my_key
         :param public: Not used by the ``s3`` protocol.
         :param show_progress: If true, a progress bar will be displayed.
@@ -182,12 +182,12 @@ def pull(
         show_progress: bool = False,
         local_cache: bool = False,
     ) -> 'DocList':
-        """Pull a :class:`DocList` from the specified bucket and key.
+        """Pull a [`DocList`][docarray.DocList] from the specified bucket and key.
 
         :param name: The bucket and key to pull from. e.g. my_bucket/my_key
         :param show_progress: if true, display a progress bar.
         :param local_cache: store the downloaded DocList to local cache
-        :return: a :class:`DocList` object
+        :return: a `DocList` object
         """
         docs = docs_cls(  # type: ignore
             cls.pull_stream(

From d592eb7bca18816837794ab4ba582a1808df4ffe Mon Sep 17 00:00:00 2001
From: anna-charlotte <charlotte.gerhaher@jina.ai>
Date: Thu, 13 Apr 2023 12:54:36 +0200
Subject: [PATCH 07/20] fix: clean up

Signed-off-by: anna-charlotte <charlotte.gerhaher@jina.ai>
---
 docs/user_guide/storing/first_step.md |  9 ++++++++-
 docs/user_guide/storing/store_file.md | 11 ++---------
 docs/user_guide/storing/store_s3.md   | 16 ++++++++--------
 3 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/docs/user_guide/storing/first_step.md b/docs/user_guide/storing/first_step.md
index f58c4ba4e36..d821f5872fb 100644
--- a/docs/user_guide/storing/first_step.md
+++ b/docs/user_guide/storing/first_step.md
@@ -3,8 +3,15 @@
 In the previous sections we saw how to use [`BaseDoc`][docarray.base_doc.doc.BaseDoc], [`DocList`][docarray.array.doc_list.doc_list.DocList] and [`DocVec`][docarray.array.doc_vec.doc_vec.DocVec] to represent multi-modal data and send it over the wire.
 In this section we will see how to store and persist this data.
 
+[DocList][docarray.array.doc_list.doc_list.DocList] can be persisted using the
+[`.push()`][docarray.array.doc_list.pushpull.PushPullMixin.push] and 
+[`.pull()`][docarray.array.doc_list.pushpull.PushPullMixin.pull] methods. 
+Under the hood, [DocStore][docarray.store.abstract_doc_store.AbstractDocStore] is used to persist a `DocList`. 
+You can store your documents on-disk. Alternatively, you can upload them to [AWS S3](https://aws.amazon.com/s3/), 
+[minio](https://min.io) or [Jina AI Cloud](https://cloud.jina.ai/user/storage). 
+
 This section is divided into three parts:
 
 - [Store](store_file.md) of [`BaseDoc`][docarray.base_doc.doc.BaseDoc], [`DocList`][docarray.array.doc_list.doc_list.DocList] and [`DocVec`][docarray.array.doc_vec.doc_vec.DocVec] on-disk
 - [Store on Jina AI Cloud](store_jac.md)
-- [Store on S3](store_s3.md)
\ No newline at end of file
+- [Store on S3](store_s3.md)
diff --git a/docs/user_guide/storing/store_file.md b/docs/user_guide/storing/store_file.md
index 973e6999775..8e76fe6d676 100644
--- a/docs/user_guide/storing/store_file.md
+++ b/docs/user_guide/storing/store_file.md
@@ -1,12 +1,5 @@
-# Store
-[DocList][docarray.array.doc_list.doc_list.DocList] can be persisted using the
-[`.push()`][docarray.array.doc_list.pushpull.PushPullMixin.push] and 
-[`.pull()`][docarray.array.doc_list.pushpull.PushPullMixin.pull] methods. 
-Under the hood, [DocStore][docarray.store.abstract_doc_store.AbstractDocStore] is used to persist a `DocList`. 
-You can store your `Doc` on-disk. Alternatively, you can upload to [AWS S3](https://aws.amazon.com/s3/), 
-[minio](https://min.io) or [Jina AI Cloud](https://cloud.jina.ai/user/storage). 
-
-## Store on-disk
+# Store on-disk
+
 When you want to use your [DocList][docarray.array.doc_list.doc_list.DocList] in another place, you can use the 
 [`.push()`][docarray.array.doc_list.pushpull.PushPullMixin.push] function to push the [DocList][docarray.array.doc_list.doc_list.DocList] 
 to one place and later use the [`.pull()`][docarray.array.doc_list.pushpull.PushPullMixin.pull] function to pull its content back. 
diff --git a/docs/user_guide/storing/store_s3.md b/docs/user_guide/storing/store_s3.md
index 9e63eb81e0a..fe712857349 100644
--- a/docs/user_guide/storing/store_s3.md
+++ b/docs/user_guide/storing/store_s3.md
@@ -24,8 +24,8 @@ services:
       - "9005:9000"
     command: server /data
 ```
-Save the above file as `docker-compose.yml` and run the following line in the same folder as the file,
-```bash
+Save the above file as `docker-compose.yml` and run the following line in the same folder as the file.
+```cmd
 docker-compose up
 ```
 
@@ -56,8 +56,8 @@ if __name__ == '__main__':
     s3.create_bucket(Bucket=BUCKET)
 
     store_docs = [SimpleDoc(text=f'doc {i}') for i in range(8)]
-    dl = DocList[SimpleDoc]()
-    dl.extend([SimpleDoc(text=f'doc {i}') for i in range(8)])
+    docs = DocList[SimpleDoc]()
+    docs.extend([SimpleDoc(text=f'doc {i}') for i in range(8)])
 
     # .push() and .pull() use the default boto3 client
     boto3.Session.client.__defaults__ = (
@@ -71,15 +71,15 @@ if __name__ == '__main__':
         None,
         Config(signature_version="s3v4"),
     )
-    dl.push(f's3://{BUCKET}/simple_dl')
-    dl_pull = DocList[SimpleDoc].pull(f's3://{BUCKET}/simple_dl')
+    docs.push(f's3://{BUCKET}/simple_docs')
+    docs_pull = DocList[SimpleDoc].pull(f's3://{BUCKET}/simple_docs')
 
     # delete the bucket
     s3.Bucket(BUCKET).objects.all().delete()
     s3.Bucket(BUCKET).delete()
 ```
 
-Under the bucket `tmp_bucket`, there is a file with the name of `simple_dl.docs` being created to store the `DocList`.
+Under the bucket `tmp_bucket`, there is a file with the name of `simple_docs.docs` being created to store the `DocList`.
 
 !!! note
     When using `.push()` and `.pull()`, `DocList` calls the default boto3 client. Be sure your default session is correctly set up.
@@ -98,5 +98,5 @@ To delete the store, you need to use the static method [`.delete()`][docarray.st
 ```python
 from docarray.store import S3DocStore
 
-success = S3DocStore.delete(f's3://{BUCKET}/simple_dl')
+success = S3DocStore.delete(f's3://{BUCKET}/simple_docs')
 ```

From af80be36f804af6b7233c33052dc6ea362fc2771 Mon Sep 17 00:00:00 2001
From: anna-charlotte <charlotte.gerhaher@jina.ai>
Date: Thu, 13 Apr 2023 13:56:54 +0200
Subject: [PATCH 08/20] fix: path in file doc store

Signed-off-by: anna-charlotte <charlotte.gerhaher@jina.ai>
---
 docs/user_guide/storing/store_file.md | 21 +++++++--------------
 1 file changed, 7 insertions(+), 14 deletions(-)

diff --git a/docs/user_guide/storing/store_file.md b/docs/user_guide/storing/store_file.md
index 8e76fe6d676..8602eb71adb 100644
--- a/docs/user_guide/storing/store_file.md
+++ b/docs/user_guide/storing/store_file.md
@@ -16,16 +16,13 @@ class SimpleDoc(BaseDoc):
 
 
 dl = DocList[SimpleDoc]([SimpleDoc(text=f'doc {i}') for i in range(8)])
-dl.push('file:///Users/docarray/tmp/simple_dl')
+dl.push('file://simple_dl')
 
-dl_pull = DocList[SimpleDoc].pull('file:///Users/docarray/tmp/simple_dl')
+dl_pull = DocList[SimpleDoc].pull('file://simple_dl')
 ```
 
-Under `/Users/docarray/tmp/`, there is a file with the name of `simple_dl.docs` being created to store the `DocList`.
-``` { .output .no-copy }
-tmp
-└── simple_dl.docs
-```
+A file with the name of `simple_dl.docs` being created to store the `DocList`.
+
 
 ## Push & pull with streaming
 When you have a large amount of documents to push and pull, you could use the streaming function. 
@@ -45,14 +42,10 @@ store_docs = [SimpleDoc(text=f'doc {i}') for i in range(8)]
 
 DocList[SimpleDoc].push_stream(
     iter(store_docs),
-    'file:///Users/docarray/tmp/dl_stream',
-)
-dl_pull_stream_1 = DocList[SimpleDoc].pull_stream(
-    'file:///Users/docarray/tmp/dl_stream'
-)
-dl_pull_stream_2 = DocList[SimpleDoc].pull_stream(
-    'file:///Users/docarray/tmp/dl_stream'
+    'file://dl_stream',
 )
+dl_pull_stream_1 = DocList[SimpleDoc].pull_stream('file://dl_stream')
+dl_pull_stream_2 = DocList[SimpleDoc].pull_stream('file://dl_stream')
 
 for d1, d2 in zip(dl_pull_stream_1, dl_pull_stream_2):
     print(f'get {d1}, get {d2}')

From 47debe43a31252729d2951cd7e9e2a6e9080fcb8 Mon Sep 17 00:00:00 2001
From: Alex Cureton-Griffiths <alexcg1@users.noreply.github.com>
Date: Thu, 13 Apr 2023 13:33:49 +0200
Subject: [PATCH 09/20] docs(menu): consistency, wording fixes (#1363)

* docs(menu): consistency, wording fixes

Signed-off-by: Alex C-G <alexcg@outlook.com>

* docs(intro): remove redundancy in title

Signed-off-by: Alex C-G <alexcg@outlook.com>

---------

Signed-off-by: Alex C-G <alexcg@outlook.com>
Signed-off-by: anna-charlotte <charlotte.gerhaher@jina.ai>
---
 docs/how_to/audio2text.md                              | 8 ++++----
 docs/how_to/multimodal_training_and_serving.md         | 2 +-
 docs/how_to/optimize_performance_with_id_generation.md | 2 +-
 docs/user_guide/intro.md                               | 2 +-
 docs/user_guide/sending/first_step.md                  | 2 +-
 mkdocs.yml                                             | 4 ++--
 6 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/docs/how_to/audio2text.md b/docs/how_to/audio2text.md
index fcec869ce0f..d2f2507e08f 100644
--- a/docs/how_to/audio2text.md
+++ b/docs/how_to/audio2text.md
@@ -1,10 +1,10 @@
-# Creating an Audio to Text App with Jina and DocArray V2
+# Create an audio to text app with Jina and DocArray V2
 
-This is how you can build an Audio to Text app using Jina, Docarray and Whisper
+This is how you can build an Audio to Text app using Jina, DocArray and Whisper.
 
 We will use: 
 
-* DocarrayV2: Helps us to load and preprocess multimodal data such as image, text and audio in our case
+* DocArray V2: Helps us to load and preprocess multimodal data such as image, text and audio in our case
 * Jina: Helps us serve the model quickly and create a client
 
 First let's install requirements
@@ -76,4 +76,4 @@ with Deployment(
     print(docs[0].text)
 ```
 
-And we get the transcribed result!
\ No newline at end of file
+And we get the transcribed result!
diff --git a/docs/how_to/multimodal_training_and_serving.md b/docs/how_to/multimodal_training_and_serving.md
index 9c30cbeffba..604545c7cd2 100644
--- a/docs/how_to/multimodal_training_and_serving.md
+++ b/docs/how_to/multimodal_training_and_serving.md
@@ -12,7 +12,7 @@ jupyter:
     name: python3
 ---
 
-# Multi-Modal Deep learning with DocList
+# Multimodal deep learning with DocList
 
 DocList is a library for representing, sending, and storing multi-modal data that can be used for a variety of different
 use cases.
diff --git a/docs/how_to/optimize_performance_with_id_generation.md b/docs/how_to/optimize_performance_with_id_generation.md
index db46020faa2..5d0df78e776 100644
--- a/docs/how_to/optimize_performance_with_id_generation.md
+++ b/docs/how_to/optimize_performance_with_id_generation.md
@@ -1,4 +1,4 @@
-# How to optimize performance
+# Optimize performance
 
 ### `BaseDoc`'s id
 
diff --git a/docs/user_guide/intro.md b/docs/user_guide/intro.md
index 5c9fbb14d1f..94bb730fdb0 100644
--- a/docs/user_guide/intro.md
+++ b/docs/user_guide/intro.md
@@ -1,4 +1,4 @@
-# User Guide - Introduction
+# Introduction
 
 This user guide shows you how to use `DocArray` with most of its features.
 
diff --git a/docs/user_guide/sending/first_step.md b/docs/user_guide/sending/first_step.md
index a18433535b9..1079b9dd75b 100644
--- a/docs/user_guide/sending/first_step.md
+++ b/docs/user_guide/sending/first_step.md
@@ -1 +1 @@
-# Sending
+# Sending data
diff --git a/mkdocs.yml b/mkdocs.yml
index c3ace80d956..bd1548a0a22 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -76,9 +76,9 @@ plugins:
 
 nav:
   - Home: README.md
-  - Tutorial - User Guide:
+  - Tutorial/User Guide:
     - user_guide/intro.md
-    - Representing:
+    - Representing data:
       - user_guide/representing/first_step.md
       - user_guide/representing/array.md
     - user_guide/sending/first_step.md

From 65c5e88dfc55e534352056d9e8307880e1a6c146 Mon Sep 17 00:00:00 2001
From: Anne Yang <evangeline-lun@foxmail.com>
Date: Thu, 13 Apr 2023 19:38:01 +0800
Subject: [PATCH 10/20] fix: default dims=-1 for elastic index (#1368)

Signed-off-by: AnneY <evangeline-lun@foxmail.com>
Signed-off-by: anna-charlotte <charlotte.gerhaher@jina.ai>
---
 docarray/index/backends/elastic.py           | 11 ++++++++++-
 tests/index/elastic/fixture.py               |  5 +++++
 tests/index/elastic/v7/test_index_get_del.py |  7 ++++++-
 tests/index/elastic/v8/test_index_get_del.py |  9 +++++++--
 4 files changed, 28 insertions(+), 4 deletions(-)

diff --git a/docarray/index/backends/elastic.py b/docarray/index/backends/elastic.py
index c2c1c6646a2..52f60e1d098 100644
--- a/docarray/index/backends/elastic.py
+++ b/docarray/index/backends/elastic.py
@@ -88,8 +88,17 @@ def __init__(self, db_config=None, **kwargs):
         mappings.update(self._db_config.index_mappings)
 
         for col_name, col in self._column_infos.items():
+            if col.db_type == 'dense_vector' and (
+                not col.n_dim and col.config['dims'] < 0
+            ):
+                self._logger.info(
+                    f'Not indexing column {col_name}, the dimensionality is not specified'
+                )
+                continue
+
             mappings['properties'][col_name] = self._create_index_mapping(col)
 
+        # print(mappings['properties'])
         if self._client.indices.exists(index=self._index_name):
             self._client_put_mapping(mappings)
         else:
@@ -231,8 +240,8 @@ def __post_init__(self):
 
         def dense_vector_config(self):
             config = {
+                'dims': -1,
                 'index': True,
-                'dims': 128,
                 'similarity': 'cosine',  # 'l2_norm', 'dot_product', 'cosine'
                 'm': 16,
                 'ef_construction': 100,
diff --git a/tests/index/elastic/fixture.py b/tests/index/elastic/fixture.py
index 315078d6269..812f0f09d51 100644
--- a/tests/index/elastic/fixture.py
+++ b/tests/index/elastic/fixture.py
@@ -6,6 +6,7 @@
 from pydantic import Field
 
 from docarray import BaseDoc
+from docarray.documents import ImageDoc
 from docarray.typing import NdArray
 
 pytestmark = [pytest.mark.slow, pytest.mark.index]
@@ -58,6 +59,10 @@ class DeepNestedDoc(BaseDoc):
     d: NestedDoc
 
 
+class MyImageDoc(ImageDoc):
+    embedding: NdArray = Field(dims=128)
+
+
 @pytest.fixture(scope='function')
 def ten_simple_docs():
     return [SimpleDoc(tens=np.random.randn(10)) for _ in range(10)]
diff --git a/tests/index/elastic/v7/test_index_get_del.py b/tests/index/elastic/v7/test_index_get_del.py
index 7124d5d61bd..d5ead493c03 100644
--- a/tests/index/elastic/v7/test_index_get_del.py
+++ b/tests/index/elastic/v7/test_index_get_del.py
@@ -10,6 +10,7 @@
 from tests.index.elastic.fixture import (  # noqa: F401
     DeepNestedDoc,
     FlatDoc,
+    MyImageDoc,
     NestedDoc,
     SimpleDoc,
     start_storage_v7,
@@ -247,7 +248,7 @@ class MySchema(BaseDoc):
 
 def test_index_multi_modal_doc():
     class MyMultiModalDoc(BaseDoc):
-        image: ImageDoc
+        image: MyImageDoc
         text: TextDoc
 
     store = ElasticV7DocIndex[MyMultiModalDoc]()
@@ -263,3 +264,7 @@ class MyMultiModalDoc(BaseDoc):
     assert store[id_].id == id_
     assert np.all(store[id_].image.embedding == doc[0].image.embedding)
     assert store[id_].text.text == doc[0].text.text
+
+    query = doc[0]
+    docs, _ = store.find(query, limit=10, search_field='image__embedding')
+    assert len(docs) > 0
diff --git a/tests/index/elastic/v8/test_index_get_del.py b/tests/index/elastic/v8/test_index_get_del.py
index db2df925ebb..03560caae7d 100644
--- a/tests/index/elastic/v8/test_index_get_del.py
+++ b/tests/index/elastic/v8/test_index_get_del.py
@@ -10,6 +10,7 @@
 from tests.index.elastic.fixture import (  # noqa: F401
     DeepNestedDoc,
     FlatDoc,
+    MyImageDoc,
     NestedDoc,
     SimpleDoc,
     start_storage_v8,
@@ -234,7 +235,7 @@ class MyDoc(BaseDoc):
         tensor: Union[NdArray, str]
 
     class MySchema(BaseDoc):
-        tensor: NdArray
+        tensor: NdArray[128]
 
     store = ElasticDocIndex[MySchema]()
     doc = [MyDoc(tensor=np.random.randn(128))]
@@ -247,7 +248,7 @@ class MySchema(BaseDoc):
 
 def test_index_multi_modal_doc():
     class MyMultiModalDoc(BaseDoc):
-        image: ImageDoc
+        image: MyImageDoc
         text: TextDoc
 
     store = ElasticDocIndex[MyMultiModalDoc]()
@@ -264,6 +265,10 @@ class MyMultiModalDoc(BaseDoc):
     assert np.all(store[id_].image.embedding == doc[0].image.embedding)
     assert store[id_].text.text == doc[0].text.text
 
+    query = doc[0]
+    docs, _ = store.find(query, limit=10, search_field='image__embedding')
+    assert len(docs) > 0
+
 
 def test_elasticv7_version_check():
     with pytest.raises(ImportError):

From 95c8c7029f0c39be0c7f82d8623213dddc764d80 Mon Sep 17 00:00:00 2001
From: anna-charlotte <charlotte.gerhaher@jina.ai>
Date: Thu, 13 Apr 2023 14:27:45 +0200
Subject: [PATCH 11/20] fix: s3 bucket var

Signed-off-by: anna-charlotte <charlotte.gerhaher@jina.ai>
---
 docs/user_guide/storing/store_s3.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/docs/user_guide/storing/store_s3.md b/docs/user_guide/storing/store_s3.md
index fe712857349..bd896b107e6 100644
--- a/docs/user_guide/storing/store_s3.md
+++ b/docs/user_guide/storing/store_s3.md
@@ -98,5 +98,6 @@ To delete the store, you need to use the static method [`.delete()`][docarray.st
 ```python
 from docarray.store import S3DocStore
 
+BUCKET = 'tmp_bucket'
 success = S3DocStore.delete(f's3://{BUCKET}/simple_docs')
 ```

From 96aa50f5bb0b0d07efd1116882b1d2c0c226329e Mon Sep 17 00:00:00 2001
From: anna-charlotte <charlotte.gerhaher@jina.ai>
Date: Thu, 13 Apr 2023 14:58:19 +0200
Subject: [PATCH 12/20] fix: typo and s3 delete code snippet

Signed-off-by: anna-charlotte <charlotte.gerhaher@jina.ai>
---
 docs/user_guide/storing/store_jac.md |  2 +-
 docs/user_guide/storing/store_s3.md  | 55 ++++++++++++++++++++++++----
 2 files changed, 48 insertions(+), 9 deletions(-)

diff --git a/docs/user_guide/storing/store_jac.md b/docs/user_guide/storing/store_jac.md
index 8e2b47c9959..2975df7311f 100644
--- a/docs/user_guide/storing/store_jac.md
+++ b/docs/user_guide/storing/store_jac.md
@@ -1,6 +1,6 @@
 # Store on Jina AI Cloud
 When you want to use your [`DocList`][docarray.DocList] in another place, you can use the 
-[`.push()`][docarray.array.doc_list.pushpull.PushPullMixin.push] method to push the `DocList` to S3 and later use the 
+[`.push()`][docarray.array.doc_list.pushpull.PushPullMixin.push] method to push the `DocList` to Jina AI Cloud and later use the 
 [`.pull()`][docarray.array.doc_list.pushpull.PushPullMixin.pull] function to pull its content back. 
 
 !!! note
diff --git a/docs/user_guide/storing/store_s3.md b/docs/user_guide/storing/store_s3.md
index bd896b107e6..c4e0878133b 100644
--- a/docs/user_guide/storing/store_s3.md
+++ b/docs/user_guide/storing/store_s3.md
@@ -73,10 +73,6 @@ if __name__ == '__main__':
     )
     docs.push(f's3://{BUCKET}/simple_docs')
     docs_pull = DocList[SimpleDoc].pull(f's3://{BUCKET}/simple_docs')
-
-    # delete the bucket
-    s3.Bucket(BUCKET).objects.all().delete()
-    s3.Bucket(BUCKET).delete()
 ```
 
 Under the bucket `tmp_bucket`, there is a file with the name of `simple_docs.docs` being created to store the `DocList`.
@@ -95,9 +91,52 @@ When you have a large amount of documents to push and pull, you could use the st
 ## Delete
 To delete the store, you need to use the static method [`.delete()`][docarray.store.s3.S3DocStore.delete] of [`S3DocStore`][docarray.store.s3.S3DocStore] class.
 
-```python
-from docarray.store import S3DocStore
+```python hl_lines="44-47"
+from docarray import BaseDoc, DocList
+
+
+class SimpleDoc(BaseDoc):
+    text: str
+
+
+if __name__ == '__main__':
+    import boto3
+    from botocore.client import Config
+
+    BUCKET = 'tmp_bucket'
+    my_session = boto3.session.Session()
+    s3 = my_session.resource(
+        service_name='s3',
+        region_name="us-east-1",
+        use_ssl=False,
+        endpoint_url="http://localhost:9005",
+        aws_access_key_id="minioadmin",
+        aws_secret_access_key="minioadmin",
+        config=Config(signature_version="s3v4"),
+    )
+    # make a bucket
+    s3.create_bucket(Bucket=BUCKET)
+
+    store_docs = [SimpleDoc(text=f'doc {i}') for i in range(8)]
+    docs = DocList[SimpleDoc]()
+    docs.extend([SimpleDoc(text=f'doc {i}') for i in range(8)])
+
+    # .push() and .pull() use the default boto3 client
+    boto3.Session.client.__defaults__ = (
+        "us-east-1",
+        None,
+        False,
+        None,
+        "http://localhost:9005",
+        "minioadmin",
+        "minioadmin",
+        None,
+        Config(signature_version="s3v4"),
+    )
+    docs.push(f's3://{BUCKET}/simple_docs')
+
+    # delete bucket
+    from docarray.store import S3DocStore
 
-BUCKET = 'tmp_bucket'
-success = S3DocStore.delete(f's3://{BUCKET}/simple_docs')
+    success = S3DocStore.delete('{BUCKET}/simple_docs')
 ```

From 77c7ea23eeafea217e6f1282d2e46296e5c7d234 Mon Sep 17 00:00:00 2001
From: anna-charlotte <charlotte.gerhaher@jina.ai>
Date: Thu, 13 Apr 2023 15:35:35 +0200
Subject: [PATCH 13/20] docs: exclude jacdocstore docs from test for now

Signed-off-by: anna-charlotte <charlotte.gerhaher@jina.ai>
---
 tests/documentation/test_docs.py | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/tests/documentation/test_docs.py b/tests/documentation/test_docs.py
index 085022b5a00..ce8c19b115e 100644
--- a/tests/documentation/test_docs.py
+++ b/tests/documentation/test_docs.py
@@ -43,15 +43,20 @@ def check_md_file(fpath, memory=False, lang="python", keyword_ignore=[]):
         check_raw_file_full(text, lang=lang, keyword_ignore=keyword_ignore)
 
 
-@pytest.mark.parametrize(
-    'fpath',
-    [
-        *list(pathlib.Path('docs/user_guide').glob('**/*.md')),
-        *list(pathlib.Path('docs/data_types').glob('**/*.md')),
-    ],
-    ids=str,
-)
+paths = [
+    *list(pathlib.Path('docs/user_guide').glob('**/*.md')),
+    *list(pathlib.Path('docs/data_types').glob('**/*.md')),
+]
+exclude = [pathlib.Path('docs/user_guide/storing/store_jac.md')]
+
+for path in exclude:
+    if path in paths:
+        paths.remove(path)
+
+
+@pytest.mark.parametrize('fpath', paths, ids=str)
 def test_files_good(fpath):
+    print(f"fpath = {fpath}")
     check_md_file(fpath=fpath, memory=True)
 
 

From 8b118d3b723ab855e5df1535fcbc7b62431140c2 Mon Sep 17 00:00:00 2001
From: anna-charlotte <charlotte.gerhaher@jina.ai>
Date: Thu, 13 Apr 2023 15:39:57 +0200
Subject: [PATCH 14/20] Revert "docs: exclude jacdocstore docs from test for
 now"

This reverts commit a52fed5430369e0306930ac27139cc64eb6456e9.

Signed-off-by: anna-charlotte <charlotte.gerhaher@jina.ai>
---
 tests/documentation/test_docs.py | 21 ++++++++-------------
 1 file changed, 8 insertions(+), 13 deletions(-)

diff --git a/tests/documentation/test_docs.py b/tests/documentation/test_docs.py
index ce8c19b115e..085022b5a00 100644
--- a/tests/documentation/test_docs.py
+++ b/tests/documentation/test_docs.py
@@ -43,20 +43,15 @@ def check_md_file(fpath, memory=False, lang="python", keyword_ignore=[]):
         check_raw_file_full(text, lang=lang, keyword_ignore=keyword_ignore)
 
 
-paths = [
-    *list(pathlib.Path('docs/user_guide').glob('**/*.md')),
-    *list(pathlib.Path('docs/data_types').glob('**/*.md')),
-]
-exclude = [pathlib.Path('docs/user_guide/storing/store_jac.md')]
-
-for path in exclude:
-    if path in paths:
-        paths.remove(path)
-
-
-@pytest.mark.parametrize('fpath', paths, ids=str)
+@pytest.mark.parametrize(
+    'fpath',
+    [
+        *list(pathlib.Path('docs/user_guide').glob('**/*.md')),
+        *list(pathlib.Path('docs/data_types').glob('**/*.md')),
+    ],
+    ids=str,
+)
 def test_files_good(fpath):
-    print(f"fpath = {fpath}")
     check_md_file(fpath=fpath, memory=True)
 
 

From ac7a567533ef0054f2634f1efdc8dd59f130ddf6 Mon Sep 17 00:00:00 2001
From: anna-charlotte <charlotte.gerhaher@jina.ai>
Date: Thu, 13 Apr 2023 15:47:58 +0200
Subject: [PATCH 15/20] docs: exclude jac tests

Signed-off-by: anna-charlotte <charlotte.gerhaher@jina.ai>
---
 tests/documentation/test_docs.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tests/documentation/test_docs.py b/tests/documentation/test_docs.py
index 085022b5a00..1d8fe1679b3 100644
--- a/tests/documentation/test_docs.py
+++ b/tests/documentation/test_docs.py
@@ -52,7 +52,10 @@ def check_md_file(fpath, memory=False, lang="python", keyword_ignore=[]):
     ids=str,
 )
 def test_files_good(fpath):
-    check_md_file(fpath=fpath, memory=True)
+    keyword_ignore = []
+    if 'store_jac.md' in str(fpath):
+        keyword_ignore = ['jac']
+    check_md_file(fpath=fpath, memory=True, keyword_ignore=keyword_ignore)
 
 
 def test_readme():

From f15955def47efabb8cd959f3c4f44ca0535bc165 Mon Sep 17 00:00:00 2001
From: Shukri <hsm207@users.noreply.github.com>
Date: Thu, 13 Apr 2023 16:35:15 +0200
Subject: [PATCH 16/20] feat: weaviate document index V2! (#1367)

Signed-off-by: anna-charlotte <charlotte.gerhaher@jina.ai>
---
 .pre-commit-config.yaml                       |   2 +-
 docarray/index/abstract.py                    |   8 +-
 docarray/index/backends/weaviate.py           | 833 ++++++++++++++++++
 poetry.lock                                   | 145 ++-
 pyproject.toml                                |   2 +
 .../doc_index/weaviate/docker-compose.yml     |  27 +
 .../doc_index/weaviate/fixture_weaviate.py    |  41 +
 .../weaviate/test_column_config_weaviate.py   |  33 +
 .../doc_index/weaviate/test_find_weaviate.py  |  66 ++
 .../weaviate/test_index_get_del_weaviate.py   | 452 ++++++++++
 10 files changed, 1589 insertions(+), 20 deletions(-)
 create mode 100644 docarray/index/backends/weaviate.py
 create mode 100644 tests/integrations/doc_index/weaviate/docker-compose.yml
 create mode 100644 tests/integrations/doc_index/weaviate/fixture_weaviate.py
 create mode 100644 tests/integrations/doc_index/weaviate/test_column_config_weaviate.py
 create mode 100644 tests/integrations/doc_index/weaviate/test_find_weaviate.py
 create mode 100644 tests/integrations/doc_index/weaviate/test_index_get_del_weaviate.py

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index bccbe2f206d..9df8e8a06d2 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -32,4 +32,4 @@ repos:
       args:
         - -S
       additional_dependencies:
-        - black==22.3.0
\ No newline at end of file
+        - black==22.3.0
diff --git a/docarray/index/abstract.py b/docarray/index/abstract.py
index 4feb576ed76..3c423137259 100644
--- a/docarray/index/abstract.py
+++ b/docarray/index/abstract.py
@@ -49,12 +49,12 @@
 
 class FindResultBatched(NamedTuple):
     documents: List[DocList]
-    scores: np.ndarray
+    scores: List[np.ndarray]
 
 
 class _FindResultBatched(NamedTuple):
     documents: Union[List[DocList], List[List[Dict[str, Any]]]]
-    scores: np.ndarray
+    scores: List[np.ndarray]
 
 
 def _raise_not_composable(name):
@@ -571,7 +571,9 @@ def text_search_batched(
 
         if len(da_list) > 0 and isinstance(da_list[0], List):
             docs = [self._dict_list_to_docarray(docs) for docs in da_list]
-        return FindResultBatched(documents=docs, scores=scores)
+            return FindResultBatched(documents=docs, scores=scores)
+
+        return FindResultBatched(documents=da_list, scores=scores)
 
     ##########################################################
     # Helper methods                                         #
diff --git a/docarray/index/backends/weaviate.py b/docarray/index/backends/weaviate.py
new file mode 100644
index 00000000000..c54d3e76f47
--- /dev/null
+++ b/docarray/index/backends/weaviate.py
@@ -0,0 +1,833 @@
+import base64
+import copy
+import logging
+import os
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import (
+    Any,
+    Dict,
+    Generator,
+    Generic,
+    List,
+    Optional,
+    Sequence,
+    Tuple,
+    Type,
+    TypeVar,
+    Union,
+    cast,
+)
+
+import numpy as np
+import weaviate
+from pydantic import parse_obj_as
+from typing_extensions import Literal
+
+import docarray
+from docarray import BaseDoc, DocList
+from docarray.index.abstract import BaseDocIndex, FindResultBatched, _FindResultBatched
+from docarray.typing import AnyTensor
+from docarray.typing.tensor.abstract_tensor import AbstractTensor
+from docarray.typing.tensor.ndarray import NdArray
+from docarray.utils.find import FindResult, _FindResult
+
+TSchema = TypeVar('TSchema', bound=BaseDoc)
+T = TypeVar('T', bound='WeaviateDocumentIndex')
+
+
+DEFAULT_BATCH_CONFIG = {
+    "batch_size": 20,
+    "dynamic": False,
+    "timeout_retries": 3,
+    "num_workers": 1,
+}
+
+DEFAULT_BINARY_PATH = str(Path.home() / ".cache/weaviate-embedded/")
+DEFAULT_PERSISTENCE_DATA_PATH = str(Path.home() / ".local/share/weaviate")
+
+
+@dataclass
+class EmbeddedOptions:
+    persistence_data_path: str = os.environ.get(
+        "XDG_DATA_HOME", DEFAULT_PERSISTENCE_DATA_PATH
+    )
+    binary_path: str = os.environ.get("XDG_CACHE_HOME", DEFAULT_BINARY_PATH)
+    version: str = "latest"
+    port: int = 6666
+    hostname: str = "127.0.0.1"
+    additional_env_vars: Optional[Dict[str, str]] = None
+
+
+# TODO: add more types and figure out how to handle text vs string type
+# see https://weaviate.io/developers/weaviate/configuration/datatypes
+WEAVIATE_PY_VEC_TYPES = [list, np.ndarray, AbstractTensor]
+WEAVIATE_PY_TYPES = [bool, int, float, str, docarray.typing.ID]
+
+# "id" and "_id" are reserved names in weaviate so we need to use a different
+# name for the id column in a BaseDocument
+DOCUMENTID = "docarrayid"
+
+
+class WeaviateDocumentIndex(BaseDocIndex, Generic[TSchema]):
+    def __init__(self, db_config=None, **kwargs) -> None:
+        self.embedding_column: Optional[str] = None
+        self.properties: Optional[List[str]] = None
+        # keep track of the column name that contains the bytes
+        # type because we will store them as a base64 encoded string
+        # in weaviate
+        self.bytes_columns: List[str] = []
+        # keep track of the array columns that are not embeddings because we will
+        # convert them to python lists before uploading to weaviate
+        self.nonembedding_array_columns: List[str] = []
+        super().__init__(db_config=db_config, **kwargs)
+        self._db_config: WeaviateDocumentIndex.DBConfig = cast(
+            WeaviateDocumentIndex.DBConfig, self._db_config
+        )
+        self._runtime_config: WeaviateDocumentIndex.RuntimeConfig = cast(
+            WeaviateDocumentIndex.RuntimeConfig, self._runtime_config
+        )
+
+        if self._db_config.embedded_options:
+            self._client = weaviate.Client(
+                embedded_options=self._db_config.embedded_options
+            )
+        else:
+            self._client = weaviate.Client(
+                self._db_config.host, auth_client_secret=self._build_auth_credentials()
+            )
+
+        self._configure_client()
+        self._validate_columns()
+        self._set_embedding_column()
+        self._set_properties()
+        self._create_schema()
+
+    def _set_properties(self) -> None:
+        field_overwrites = {"id": DOCUMENTID}
+
+        self.properties = [
+            field_overwrites.get(k, k)
+            for k, v in self._column_infos.items()
+            if v.config.get('is_embedding', False) is False
+        ]
+
+    def _validate_columns(self) -> None:
+        # must have at most one column with property is_embedding=True
+        # and that column must be of type WEAVIATE_PY_VEC_TYPES
+        # TODO: update when https://github.com/weaviate/weaviate/issues/2424
+        # is implemented and discuss best interface to signal which column(s)
+        # should be used for embeddings
+        num_embedding_columns = 0
+
+        for column_name, column_info in self._column_infos.items():
+            if column_info.config.get('is_embedding', False):
+                num_embedding_columns += 1
+                # if db_type is not 'number[]', then that means the type of the column in
+                # the given schema is not one of WEAVIATE_PY_VEC_TYPES
+                # note: the mapping between a column's type in the schema to a weaviate type
+                # is handled by the python_type_to_db_type method
+                if column_info.db_type != 'number[]':
+                    raise ValueError(
+                        f'Column {column_name} is marked as embedding but is not of type {WEAVIATE_PY_VEC_TYPES}'
+                    )
+
+        if num_embedding_columns > 1:
+            raise ValueError(
+                f'Only one column can be marked as embedding but found {num_embedding_columns} columns marked as embedding'
+            )
+
+    def _set_embedding_column(self) -> None:
+        for column_name, column_info in self._column_infos.items():
+            if column_info.config.get('is_embedding', False):
+                self.embedding_column = column_name
+                break
+
+    def _configure_client(self) -> None:
+        self._client.batch.configure(**self._runtime_config.batch_config)
+
+    def _build_auth_credentials(self):
+        dbconfig = self._db_config
+
+        if dbconfig.auth_api_key:
+            return weaviate.auth.AuthApiKey(api_key=dbconfig.auth_api_key)
+        elif dbconfig.username and dbconfig.password:
+            return weaviate.auth.AuthClientPassword(
+                dbconfig.username, dbconfig.password, dbconfig.scopes
+            )
+        else:
+            return None
+
+    def configure(self, runtime_config=None, **kwargs) -> None:
+        super().configure(runtime_config, **kwargs)
+        self._configure_client()
+
+    def _create_schema(self) -> None:
+        schema: Dict[str, Any] = {}
+
+        properties = []
+        column_infos = self._column_infos
+
+        for column_name, column_info in column_infos.items():
+            # in weaviate, we do not create a property for the doc's embeddings
+            if column_name == self.embedding_column:
+                continue
+            if column_info.db_type == 'blob':
+                self.bytes_columns.append(column_name)
+            if column_info.db_type == 'number[]':
+                self.nonembedding_array_columns.append(column_name)
+            prop = {
+                "name": column_name
+                if column_name != 'id'
+                else DOCUMENTID,  # in weaviate, id and _id is a reserved keyword
+                "dataType": [column_info.db_type],
+            }
+            properties.append(prop)
+
+        # TODO: What is the best way to specify other config that is part of schema?
+        # e.g. invertedIndexConfig, shardingConfig, moduleConfig, vectorIndexConfig
+        #       and configure replication
+        # we will update base on user feedback
+        schema["properties"] = properties
+        schema["class"] = self._db_config.index_name
+
+        # TODO: Use exists() instead of contains() when available
+        #       see https://github.com/weaviate/weaviate-python-client/issues/232
+        if self._client.schema.contains(schema):
+            logging.warning(
+                f"Found index {self._db_config.index_name} with schema {schema}. Will reuse existing schema."
+            )
+        else:
+            self._client.schema.create_class(schema)
+
+    @dataclass
+    class DBConfig(BaseDocIndex.DBConfig):
+        host: str = 'http://localhost:8080'
+        index_name: str = 'Document'
+        username: Optional[str] = None
+        password: Optional[str] = None
+        scopes: List[str] = field(default_factory=lambda: ["offline_access"])
+        auth_api_key: Optional[str] = None
+        embedded_options: Optional[EmbeddedOptions] = None
+
+    @dataclass
+    class RuntimeConfig(BaseDocIndex.RuntimeConfig):
+        default_column_config: Dict[Any, Dict[str, Any]] = field(
+            default_factory=lambda: {
+                np.ndarray: {},
+                docarray.typing.ID: {},
+                'string': {},
+                'text': {},
+                'int': {},
+                'number': {},
+                'boolean': {},
+                'number[]': {},
+                'blob': {},
+            }
+        )
+
+        batch_config: Dict[str, Any] = field(
+            default_factory=lambda: DEFAULT_BATCH_CONFIG
+        )
+
+    def _del_items(self, doc_ids: Sequence[str]):
+        has_matches = True
+
+        operands = [
+            {"path": [DOCUMENTID], "operator": "Equal", "valueString": doc_id}
+            for doc_id in doc_ids
+        ]
+        where_filter = {
+            "operator": "Or",
+            "operands": operands,
+        }
+
+        # do a loop because there is a limit to how many objects can be deleted at
+        # in a single query
+        # see: https://weaviate.io/developers/weaviate/api/rest/batch#maximum-number-of-deletes-per-query
+        while has_matches:
+            results = self._client.batch.delete_objects(
+                class_name=self._db_config.index_name,
+                where=where_filter,
+            )
+
+            has_matches = results["results"]["matches"]
+
+    def _filter(self, filter_query: Any, limit: int) -> Union[DocList, List[Dict]]:
+        self._overwrite_id(filter_query)
+
+        results = (
+            self._client.query.get(self._db_config.index_name, self.properties)
+            .with_additional("vector")
+            .with_where(filter_query)
+            .with_limit(limit)
+            .do()
+        )
+
+        docs = results["data"]["Get"][self._db_config.index_name]
+
+        return [self._parse_weaviate_result(doc) for doc in docs]
+
+    def _filter_batched(
+        self, filter_queries: Any, limit: int
+    ) -> Union[List[DocList], List[List[Dict]]]:
+        for filter_query in filter_queries:
+            self._overwrite_id(filter_query)
+
+        qs = [
+            self._client.query.get(self._db_config.index_name, self.properties)
+            .with_additional("vector")
+            .with_where(filter_query)
+            .with_limit(limit)
+            .with_alias(f'query_{i}')
+            for i, filter_query in enumerate(filter_queries)
+        ]
+
+        batched_results = self._client.query.multi_get(qs).do()
+
+        return [
+            [self._parse_weaviate_result(doc) for doc in batched_result]
+            for batched_result in batched_results["data"]["Get"].values()
+        ]
+
+    def find(
+        self,
+        query: Union[AnyTensor, BaseDoc],
+        search_field: str = '',
+        limit: int = 10,
+        **kwargs,
+    ):
+        self._logger.debug('Executing `find`')
+        if search_field != '':
+            raise ValueError(
+                'Argument search_field is not supported for WeaviateDocumentIndex.\nSet search_field to an empty string to proceed.'
+            )
+        embedding_field = self._get_embedding_field()
+        if isinstance(query, BaseDoc):
+            query_vec = self._get_values_by_column([query], embedding_field)[0]
+        else:
+            query_vec = query
+        query_vec_np = self._to_numpy(query_vec)
+        docs, scores = self._find(
+            query_vec_np, search_field=search_field, limit=limit, **kwargs
+        )
+
+        if isinstance(docs, List):
+            docs = self._dict_list_to_docarray(docs)
+
+        return FindResult(documents=docs, scores=scores)
+
+    def _overwrite_id(self, where_filter):
+        """
+        Overwrite the id field in the where filter to DOCUMENTID
+        if the "id" field is present in the path
+        """
+        for key, value in where_filter.items():
+            if key == "path" and value == ["id"]:
+                where_filter[key] = [DOCUMENTID]
+            elif isinstance(value, dict):
+                self._overwrite_id(value)
+            elif isinstance(value, list):
+                for item in value:
+                    if isinstance(item, dict):
+                        self._overwrite_id(item)
+
+    def _find(
+        self,
+        query: np.ndarray,
+        limit: int,
+        search_field: str = '',
+        score_name: Literal["certainty", "distance"] = "certainty",
+        score_threshold: Optional[float] = None,
+    ) -> _FindResult:
+        index_name = self._db_config.index_name
+        if search_field:
+            logging.warning(
+                'Argument search_field is not supported for WeaviateDocumentIndex. Ignoring.'
+            )
+        near_vector: Dict[str, Any] = {
+            "vector": query,
+        }
+        if score_threshold:
+            near_vector[score_name] = score_threshold
+
+        results = (
+            self._client.query.get(index_name, self.properties)
+            .with_near_vector(
+                near_vector,
+            )
+            .with_limit(limit)
+            .with_additional([score_name, "vector"])
+            .do()
+        )
+
+        docs, scores = self._format_response(
+            results["data"]["Get"][index_name], score_name
+        )
+        return _FindResult(docs, parse_obj_as(NdArray, scores))
+
+    def _format_response(
+        self, results, score_name
+    ) -> Tuple[List[Dict[Any, Any]], List[Any]]:
+        """
+        Format the response from Weaviate into a Tuple of DocList and scores
+        """
+
+        documents = []
+        scores = []
+
+        for result in results:
+            score = result["_additional"][score_name]
+            scores.append(score)
+
+            document = self._parse_weaviate_result(result)
+            documents.append(document)
+
+        return documents, scores
+
+    def find_batched(
+        self,
+        queries: Union[AnyTensor, DocList],
+        search_field: str = '',
+        limit: int = 10,
+        **kwargs,
+    ) -> FindResultBatched:
+        self._logger.debug('Executing `find_batched`')
+        if search_field != '':
+            raise ValueError(
+                'Argument search_field is not supported for WeaviateDocumentIndex.\nSet search_field to an empty string to proceed.'
+            )
+        embedding_field = self._get_embedding_field()
+
+        if isinstance(queries, Sequence):
+            query_vec_list = self._get_values_by_column(queries, embedding_field)
+            query_vec_np = np.stack(
+                tuple(self._to_numpy(query_vec) for query_vec in query_vec_list)
+            )
+        else:
+            query_vec_np = self._to_numpy(queries)
+
+        da_list, scores = self._find_batched(
+            query_vec_np, search_field=search_field, limit=limit, **kwargs
+        )
+
+        if len(da_list) > 0 and isinstance(da_list[0], List):
+            da_list = [self._dict_list_to_docarray(docs) for docs in da_list]
+
+        return FindResultBatched(documents=da_list, scores=scores)  # type: ignore
+
+    def _find_batched(
+        self,
+        queries: np.ndarray,
+        limit: int,
+        search_field: str = '',
+        score_name: Literal["certainty", "distance"] = "certainty",
+        score_threshold: Optional[float] = None,
+    ) -> _FindResultBatched:
+        qs = []
+        for i, query in enumerate(queries):
+            near_vector: Dict[str, Any] = {"vector": query}
+
+            if score_threshold:
+                near_vector[score_name] = score_threshold
+
+            q = (
+                self._client.query.get(self._db_config.index_name, self.properties)
+                .with_near_vector(near_vector)
+                .with_limit(limit)
+                .with_additional([score_name, "vector"])
+                .with_alias(f'query_{i}')
+            )
+
+            qs.append(q)
+
+        results = self._client.query.multi_get(qs).do()
+
+        docs_and_scores = [
+            self._format_response(result, score_name)
+            for result in results["data"]["Get"].values()
+        ]
+
+        docs, scores = zip(*docs_and_scores)
+        return _FindResultBatched(list(docs), list(scores))
+
+    def _get_items(self, doc_ids: Sequence[str]) -> List[Dict]:
+        # TODO: warn when doc_ids > QUERY_MAXIMUM_RESULTS after
+        #       https://github.com/weaviate/weaviate/issues/2792
+        #       is implemented
+        operands = [
+            {"path": [DOCUMENTID], "operator": "Equal", "valueString": doc_id}
+            for doc_id in doc_ids
+        ]
+        where_filter = {
+            "operator": "Or",
+            "operands": operands,
+        }
+
+        results = (
+            self._client.query.get(self._db_config.index_name, self.properties)
+            .with_where(where_filter)
+            .with_additional("vector")
+            .do()
+        )
+
+        docs = [
+            self._parse_weaviate_result(doc)
+            for doc in results["data"]["Get"][self._db_config.index_name]
+        ]
+
+        return docs
+
+    def _rewrite_documentid(self, document: Dict):
+        doc = document.copy()
+
+        # rewrite the id to DOCUMENTID
+        document_id = doc.pop('id')
+        doc[DOCUMENTID] = document_id
+
+        return doc
+
+    def _parse_weaviate_result(self, result: Dict) -> Dict:
+        """
+        Parse the result from weaviate to a format that is compatible with the schema
+        that was used to initialize weaviate with.
+        """
+
+        result = result.copy()
+
+        # rewrite the DOCUMENTID to id
+        if DOCUMENTID in result:
+            result['id'] = result.pop(DOCUMENTID)
+
+        # take the vector from the _additional field
+        if '_additional' in result and self.embedding_column:
+            additional_fields = result.pop('_additional')
+            if 'vector' in additional_fields:
+                result[self.embedding_column] = additional_fields['vector']
+
+        # convert any base64 encoded bytes column to bytes
+        self._decode_base64_properties_to_bytes(result)
+
+        return result
+
+    def _index(self, column_to_data: Dict[str, Generator[Any, None, None]]):
+        docs = self._transpose_col_value_dict(column_to_data)
+        index_name = self._db_config.index_name
+
+        with self._client.batch as batch:
+            for doc in docs:
+                parsed_doc = self._rewrite_documentid(doc)
+                self._encode_bytes_columns_to_base64(parsed_doc)
+                self._convert_nonembedding_array_to_list(parsed_doc)
+                vector = (
+                    parsed_doc.pop(self.embedding_column)
+                    if self.embedding_column
+                    else None
+                )
+
+                batch.add_data_object(
+                    uuid=weaviate.util.generate_uuid5(parsed_doc, index_name),
+                    data_object=parsed_doc,
+                    class_name=index_name,
+                    vector=vector,
+                )
+
+    def _text_search(
+        self, query: str, limit: int, search_field: str = ''
+    ) -> _FindResult:
+        index_name = self._db_config.index_name
+        bm25 = {"query": query, "properties": [search_field]}
+
+        results = (
+            self._client.query.get(index_name, self.properties)
+            .with_bm25(bm25)
+            .with_limit(limit)
+            .with_additional(["score", "vector"])
+            .do()
+        )
+
+        docs, scores = self._format_response(
+            results["data"]["Get"][index_name], "score"
+        )
+
+        return _FindResult(documents=docs, scores=parse_obj_as(NdArray, scores))
+
+    def _text_search_batched(
+        self, queries: Sequence[str], limit: int, search_field: str = ''
+    ) -> _FindResultBatched:
+        qs = []
+        for i, query in enumerate(queries):
+            bm25 = {"query": query, "properties": [search_field]}
+
+            q = (
+                self._client.query.get(self._db_config.index_name, self.properties)
+                .with_bm25(bm25)
+                .with_limit(limit)
+                .with_additional(["score", "vector"])
+                .with_alias(f'query_{i}')
+            )
+
+            qs.append(q)
+
+        results = self._client.query.multi_get(qs).do()
+
+        docs_and_scores = [
+            self._format_response(result, "score")
+            for result in results["data"]["Get"].values()
+        ]
+
+        docs, scores = zip(*docs_and_scores)
+        return _FindResultBatched(list(docs), list(scores))
+
+    def execute_query(self, query: Any, *args, **kwargs) -> Any:
+        da_class = DocList.__class_getitem__(cast(Type[BaseDoc], self._schema))
+
+        if isinstance(query, self.QueryBuilder):
+            batched_results = self._client.query.multi_get(query._queries).do()
+            batched_docs = batched_results["data"]["Get"].values()
+
+            def f(doc):
+                # TODO: use
+                # return self._schema(**self._parse_weaviate_result(doc))
+                # when https://github.com/weaviate/weaviate/issues/2858
+                # is fixed
+                return self._schema.from_view(self._parse_weaviate_result(doc))  # type: ignore
+
+            results = [
+                da_class([f(doc) for doc in batched_doc])
+                for batched_doc in batched_docs
+            ]
+            return results if len(results) > 1 else results[0]
+
+        # TODO: validate graphql query string before sending it to weaviate
+        if isinstance(query, str):
+            return self._client.query.raw(query)
+
+    def num_docs(self) -> int:
+        index_name = self._db_config.index_name
+        result = self._client.query.aggregate(index_name).with_meta_count().do()
+        # TODO: decorator to check for errors
+        total_docs = result["data"]["Aggregate"][index_name][0]["meta"]["count"]
+
+        return total_docs
+
+    def python_type_to_db_type(self, python_type: Type) -> Any:
+        """Map python type to database type."""
+        for allowed_type in WEAVIATE_PY_VEC_TYPES:
+            if issubclass(python_type, allowed_type):
+                return 'number[]'
+
+        py_weaviate_type_map = {
+            docarray.typing.ID: 'string',
+            str: 'text',
+            int: 'int',
+            float: 'number',
+            bool: 'boolean',
+            np.ndarray: 'number[]',
+            bytes: 'blob',
+        }
+
+        for py_type, weaviate_type in py_weaviate_type_map.items():
+            if issubclass(python_type, py_type):
+                return weaviate_type
+
+        raise ValueError(f'Unsupported column type for {type(self)}: {python_type}')
+
+    def build_query(self) -> BaseDocIndex.QueryBuilder:
+        return self.QueryBuilder(self)
+
+    def _get_embedding_field(self):
+        for colname, colinfo in self._column_infos.items():
+            # no need to check for missing is_embedding attribute because this check
+            # is done when the index is created
+            if colinfo.config.get('is_embedding', None):
+                return colname
+
+        # just to pass mypy
+        return ""
+
+    def _encode_bytes_columns_to_base64(self, doc):
+        for column in self.bytes_columns:
+            if doc[column] is not None:
+                doc[column] = base64.b64encode(doc[column]).decode("utf-8")
+
+    def _decode_base64_properties_to_bytes(self, doc):
+        for column in self.bytes_columns:
+            if doc[column] is not None:
+                doc[column] = base64.b64decode(doc[column])
+
+    def _convert_nonembedding_array_to_list(self, doc):
+        for column in self.nonembedding_array_columns:
+            if doc[column] is not None:
+                doc[column] = doc[column].tolist()
+
+    class QueryBuilder(BaseDocIndex.QueryBuilder):
+        def __init__(self, document_index):
+            self._queries = [
+                document_index._client.query.get(
+                    document_index._db_config.index_name, document_index.properties
+                )
+            ]
+
+        def build(self) -> Any:
+            num_queries = len(self._queries)
+
+            for i in range(num_queries):
+                q = self._queries[i]
+                if self._is_hybrid_query(q):
+                    self._make_proper_hybrid_query(q)
+                q.with_additional(["vector"]).with_alias(f'query_{i}')
+
+            return self
+
+        def _is_hybrid_query(self, query: weaviate.gql.get.GetBuilder) -> bool:
+            """
+            Checks if a query has been composed with both a with_bm25 and a with_near_vector verb
+            """
+            if not query._near_ask:
+                return False
+            else:
+                return query._bm25 and query._near_ask._content.get("vector", None)
+
+        def _make_proper_hybrid_query(
+            self, query: weaviate.gql.get.GetBuilder
+        ) -> weaviate.gql.get.GetBuilder:
+            """
+            Modifies a query to be a proper hybrid query.
+
+            In weaviate, a query with with_bm25 and with_near_vector verb is not a hybrid query.
+            We need to use the with_hybrid verb to make it a hybrid query.
+            """
+
+            text_query = query._bm25.query
+            vector_query = query._near_ask._content["vector"]
+            hybrid_query = weaviate.gql.get.Hybrid(
+                query=text_query, vector=vector_query, alpha=0.5
+            )
+
+            query._bm25 = None
+            query._near_ask = None
+            query._hybrid = hybrid_query
+
+        def _overwrite_id(self, where_filter):
+            """
+            Overwrite the id field in the where filter to DOCUMENTID
+            if the "id" field is present in the path
+            """
+            for key, value in where_filter.items():
+                if key == "path" and value == ["id"]:
+                    where_filter[key] = [DOCUMENTID]
+                elif isinstance(value, dict):
+                    self._overwrite_id(value)
+                elif isinstance(value, list):
+                    for item in value:
+                        if isinstance(item, dict):
+                            self._overwrite_id(item)
+
+        def find(
+            self,
+            query,
+            score_name: Literal["certainty", "distance"] = "certainty",
+            score_threshold: Optional[float] = None,
+        ) -> Any:
+            near_vector = {
+                "vector": query,
+            }
+            if score_threshold:
+                near_vector[score_name] = score_threshold
+
+            self._queries[0] = self._queries[0].with_near_vector(near_vector)
+            return self
+
+        def find_batched(
+            self,
+            queries,
+            score_name: Literal["certainty", "distance"] = "certainty",
+            score_threshold: Optional[float] = None,
+        ) -> Any:
+            adj_queries, adj_clauses = self._resize_queries_and_clauses(
+                self._queries, queries
+            )
+            new_queries = []
+
+            for query, clause in zip(adj_queries, adj_clauses):
+                near_vector = {
+                    "vector": clause,
+                }
+                if score_threshold:
+                    near_vector[score_name] = score_threshold
+
+                new_queries.append(query.with_near_vector(near_vector))
+
+            self._queries = new_queries
+
+            return self
+
+        def filter(self, where_filter) -> Any:
+            where_filter = where_filter.copy()
+            self._overwrite_id(where_filter)
+            self._queries[0] = self._queries[0].with_where(where_filter)
+            return self
+
+        def filter_batched(self, filters) -> Any:
+            adj_queries, adj_clauses = self._resize_queries_and_clauses(
+                self._queries, filters
+            )
+            new_queries = []
+
+            for query, clause in zip(adj_queries, adj_clauses):
+                clause = clause.copy()
+                self._overwrite_id(clause)
+                new_queries.append(query.with_where(clause))
+
+            self._queries = new_queries
+
+            return self
+
+        def text_search(self, query, search_field) -> Any:
+            bm25 = {"query": query, "properties": [search_field]}
+            self._queries[0] = self._queries[0].with_bm25(**bm25)
+            return self
+
+        def text_search_batched(self, queries, search_field) -> Any:
+            adj_queries, adj_clauses = self._resize_queries_and_clauses(
+                self._queries, queries
+            )
+            new_queries = []
+
+            for query, clause in zip(adj_queries, adj_clauses):
+                bm25 = {"query": clause, "properties": [search_field]}
+                new_queries.append(query.with_bm25(**bm25))
+
+            self._queries = new_queries
+
+            return self
+
+        def limit(self, limit: int) -> Any:
+            self._queries = [query.with_limit(limit) for query in self._queries]
+            return self
+
+        def _resize_queries_and_clauses(self, queries, clauses):
+            """
+            Adjust the length and content of queries and clauses so that we can compose
+            them element-wise
+            """
+            num_clauses = len(clauses)
+            num_queries = len(queries)
+
+            # if there's only one clause, then we assume that it should be applied
+            # to every query
+            if num_clauses == 1:
+                return queries, clauses * num_queries
+            # if there's only one query, then we can lengthen it to match the number
+            # of clauses
+            elif num_queries == 1:
+                return [copy.deepcopy(queries[0]) for _ in range(num_clauses)], clauses
+            # if the number of queries and clauses is the same, then we can just
+            # return them as-is
+            elif num_clauses == num_queries:
+                return queries, clauses
+            else:
+                raise ValueError(
+                    f"Can't compose {num_clauses} clauses with {num_queries} queries"
+                )
diff --git a/poetry.lock b/poetry.lock
index cd46e05c897..398a9ec992d 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.4.2 and should not be changed by hand.
 
 [[package]]
 name = "aiohttp"
@@ -264,6 +264,21 @@ docs = ["furo", "sphinx", "sphinx-notfound-page", "zope.interface"]
 tests = ["cloudpickle", "coverage[toml] (>=5.0.2)", "hypothesis", "mypy (>=0.900,!=0.940)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "zope.interface"]
 tests-no-zope = ["cloudpickle", "coverage[toml] (>=5.0.2)", "hypothesis", "mypy (>=0.900,!=0.940)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins"]
 
+[[package]]
+name = "authlib"
+version = "1.2.0"
+description = "The ultimate Python library in building OAuth and OpenID Connect servers and clients."
+category = "main"
+optional = false
+python-versions = "*"
+files = [
+    {file = "Authlib-1.2.0-py2.py3-none-any.whl", hash = "sha256:4ddf4fd6cfa75c9a460b361d4bd9dac71ffda0be879dbe4292a02e92349ad55a"},
+    {file = "Authlib-1.2.0.tar.gz", hash = "sha256:4fa3e80883a5915ef9f5bc28630564bc4ed5b5af39812a3ff130ec76bd631e9d"},
+]
+
+[package.dependencies]
+cryptography = ">=3.2"
+
 [[package]]
 name = "av"
 version = "10.0.0"
@@ -525,7 +540,7 @@ files = [
 name = "cffi"
 version = "1.15.1"
 description = "Foreign Function Interface for Python calling C code."
-category = "dev"
+category = "main"
 optional = false
 python-versions = "*"
 files = [
@@ -698,6 +713,48 @@ files = [
 [package.extras]
 test = ["flake8 (==3.7.8)", "hypothesis (==3.55.3)"]
 
+[[package]]
+name = "cryptography"
+version = "40.0.1"
+description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers."
+category = "main"
+optional = false
+python-versions = ">=3.6"
+files = [
+    {file = "cryptography-40.0.1-cp36-abi3-macosx_10_12_universal2.whl", hash = "sha256:918cb89086c7d98b1b86b9fdb70c712e5a9325ba6f7d7cfb509e784e0cfc6917"},
+    {file = "cryptography-40.0.1-cp36-abi3-macosx_10_12_x86_64.whl", hash = "sha256:9618a87212cb5200500e304e43691111570e1f10ec3f35569fdfcd17e28fd797"},
+    {file = "cryptography-40.0.1-cp36-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3a4805a4ca729d65570a1b7cac84eac1e431085d40387b7d3bbaa47e39890b88"},
+    {file = "cryptography-40.0.1-cp36-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:63dac2d25c47f12a7b8aa60e528bfb3c51c5a6c5a9f7c86987909c6c79765554"},
+    {file = "cryptography-40.0.1-cp36-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:0a4e3406cfed6b1f6d6e87ed243363652b2586b2d917b0609ca4f97072994405"},
+    {file = "cryptography-40.0.1-cp36-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:1e0af458515d5e4028aad75f3bb3fe7a31e46ad920648cd59b64d3da842e4356"},
+    {file = "cryptography-40.0.1-cp36-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:d8aa3609d337ad85e4eb9bb0f8bcf6e4409bfb86e706efa9a027912169e89122"},
+    {file = "cryptography-40.0.1-cp36-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:cf91e428c51ef692b82ce786583e214f58392399cf65c341bc7301d096fa3ba2"},
+    {file = "cryptography-40.0.1-cp36-abi3-win32.whl", hash = "sha256:650883cc064297ef3676b1db1b7b1df6081794c4ada96fa457253c4cc40f97db"},
+    {file = "cryptography-40.0.1-cp36-abi3-win_amd64.whl", hash = "sha256:a805a7bce4a77d51696410005b3e85ae2839bad9aa38894afc0aa99d8e0c3160"},
+    {file = "cryptography-40.0.1-pp38-pypy38_pp73-macosx_10_12_x86_64.whl", hash = "sha256:cd033d74067d8928ef00a6b1327c8ea0452523967ca4463666eeba65ca350d4c"},
+    {file = "cryptography-40.0.1-pp38-pypy38_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:d36bbeb99704aabefdca5aee4eba04455d7a27ceabd16f3b3ba9bdcc31da86c4"},
+    {file = "cryptography-40.0.1-pp38-pypy38_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:32057d3d0ab7d4453778367ca43e99ddb711770477c4f072a51b3ca69602780a"},
+    {file = "cryptography-40.0.1-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:f5d7b79fa56bc29580faafc2ff736ce05ba31feaa9d4735048b0de7d9ceb2b94"},
+    {file = "cryptography-40.0.1-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:7c872413353c70e0263a9368c4993710070e70ab3e5318d85510cc91cce77e7c"},
+    {file = "cryptography-40.0.1-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:28d63d75bf7ae4045b10de5413fb1d6338616e79015999ad9cf6fc538f772d41"},
+    {file = "cryptography-40.0.1-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:6f2bbd72f717ce33100e6467572abaedc61f1acb87b8d546001328d7f466b778"},
+    {file = "cryptography-40.0.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:cc3a621076d824d75ab1e1e530e66e7e8564e357dd723f2533225d40fe35c60c"},
+    {file = "cryptography-40.0.1.tar.gz", hash = "sha256:2803f2f8b1e95f614419926c7e6f55d828afc614ca5ed61543877ae668cc3472"},
+]
+
+[package.dependencies]
+cffi = ">=1.12"
+
+[package.extras]
+docs = ["sphinx (>=5.3.0)", "sphinx-rtd-theme (>=1.1.1)"]
+docstest = ["pyenchant (>=1.6.11)", "sphinxcontrib-spelling (>=4.0.1)", "twine (>=1.12.0)"]
+pep8test = ["black", "check-manifest", "mypy", "ruff"]
+sdist = ["setuptools-rust (>=0.11.4)"]
+ssh = ["bcrypt (>=3.1.5)"]
+test = ["iso8601", "pretend", "pytest (>=6.2.0)", "pytest-benchmark", "pytest-cov", "pytest-shard (>=0.1.2)", "pytest-subtests", "pytest-xdist"]
+test-randomorder = ["pytest-randomly"]
+tox = ["tox"]
+
 [[package]]
 name = "debugpy"
 version = "1.6.3"
@@ -730,7 +787,7 @@ files = [
 name = "decorator"
 version = "5.1.1"
 description = "Decorators for Humans"
-category = "dev"
+category = "main"
 optional = false
 python-versions = ">=3.5"
 files = [
@@ -3004,7 +3061,7 @@ validation = ["lxml"]
 name = "pycparser"
 version = "2.21"
 description = "C parser in Python"
-category = "dev"
+category = "main"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
 files = [
@@ -3541,25 +3598,25 @@ files = [
 
 [[package]]
 name = "requests"
-version = "2.27.1"
+version = "2.28.2"
 description = "Python HTTP for Humans."
 category = "main"
 optional = false
-python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*, !=3.5.*"
+python-versions = ">=3.7, <4"
 files = [
-    {file = "requests-2.27.1-py2.py3-none-any.whl", hash = "sha256:f22fa1e554c9ddfd16e6e41ac79759e17be9e492b3587efa038054674760e72d"},
-    {file = "requests-2.27.1.tar.gz", hash = "sha256:68d7c56fd5a8999887728ef304a6d12edc7be74f1cfa47714fc8b414525c9a61"},
+    {file = "requests-2.28.2-py3-none-any.whl", hash = "sha256:64299f4909223da747622c030b781c0d7811e359c37124b4bd368fb8c6518baa"},
+    {file = "requests-2.28.2.tar.gz", hash = "sha256:98b1b2782e3c6c4904938b84c0eb932721069dfdb9134313beff7c83c2df24bf"},
 ]
 
 [package.dependencies]
 certifi = ">=2017.4.17"
-charset-normalizer = {version = ">=2.0.0,<2.1.0", markers = "python_version >= \"3\""}
-idna = {version = ">=2.5,<4", markers = "python_version >= \"3\""}
+charset-normalizer = ">=2,<4"
+idna = ">=2.5,<4"
 urllib3 = ">=1.21.1,<1.27"
 
 [package.extras]
-socks = ["PySocks (>=1.5.6,!=1.5.7)", "win-inet-pton"]
-use-chardet-on-py3 = ["chardet (>=3.0.2,<5)"]
+socks = ["PySocks (>=1.5.6,!=1.5.7)"]
+use-chardet-on-py3 = ["chardet (>=3.0.2,<6)"]
 
 [[package]]
 name = "rfc3986"
@@ -4073,6 +4130,27 @@ files = [
     {file = "tornado-6.2.tar.gz", hash = "sha256:9b630419bde84ec666bfd7ea0a4cb2a8a651c2d5cccdbdd1972a0c859dfc3c13"},
 ]
 
+[[package]]
+name = "tqdm"
+version = "4.65.0"
+description = "Fast, Extensible Progress Meter"
+category = "main"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "tqdm-4.65.0-py3-none-any.whl", hash = "sha256:c4f53a17fe37e132815abceec022631be8ffe1b9381c2e6e30aa70edc99e9671"},
+    {file = "tqdm-4.65.0.tar.gz", hash = "sha256:1871fb68a86b8fb3b59ca4cdd3dcccbc7e6d613eeed31f4c332531977b89beb5"},
+]
+
+[package.dependencies]
+colorama = {version = "*", markers = "platform_system == \"Windows\""}
+
+[package.extras]
+dev = ["py-make (>=0.1.0)", "twine", "wheel"]
+notebook = ["ipywidgets (>=6)"]
+slack = ["slack-sdk"]
+telegram = ["requests"]
+
 [[package]]
 name = "traitlets"
 version = "5.5.0"
@@ -4275,6 +4353,23 @@ typing-extensions = {version = "*", markers = "python_version < \"3.8\""}
 [package.extras]
 standard = ["colorama (>=0.4)", "httptools (>=0.5.0)", "python-dotenv (>=0.13)", "pyyaml (>=5.1)", "uvloop (>=0.14.0,!=0.15.0,!=0.15.1)", "watchfiles (>=0.13)", "websockets (>=10.0)"]
 
+[[package]]
+name = "validators"
+version = "0.20.0"
+description = "Python Data Validation for Humans™."
+category = "main"
+optional = false
+python-versions = ">=3.4"
+files = [
+    {file = "validators-0.20.0.tar.gz", hash = "sha256:24148ce4e64100a2d5e267233e23e7afeb55316b47d30faae7eb6e7292bc226a"},
+]
+
+[package.dependencies]
+decorator = ">=3.4.0"
+
+[package.extras]
+test = ["flake8 (>=2.4.0)", "isort (>=4.2.2)", "pytest (>=2.2.3)"]
+
 [[package]]
 name = "virtualenv"
 version = "20.16.7"
@@ -4365,6 +4460,24 @@ files = [
     {file = "wcwidth-0.2.5.tar.gz", hash = "sha256:c4d647b99872929fdb7bdcaa4fbe7f01413ed3d98077df798530e5b04f116c83"},
 ]
 
+[[package]]
+name = "weaviate-client"
+version = "3.15.5"
+description = "A python native weaviate client"
+category = "main"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "weaviate-client-3.15.5.tar.gz", hash = "sha256:6da7e5d08dc9bb8b7879661d1a457c50af7d73e621a5305efe131160e83da69e"},
+    {file = "weaviate_client-3.15.5-py3-none-any.whl", hash = "sha256:24d0be614e5494534e758cc67a45e7e15f3929a89bf512afd642de53d08723c7"},
+]
+
+[package.dependencies]
+authlib = ">=1.1.0"
+requests = ">=2.28.0,<2.29.0"
+tqdm = ">=4.59.0,<5.0.0"
+validators = ">=0.18.2,<=0.21.0"
+
 [[package]]
 name = "webencodings"
 version = "0.5.1"
@@ -4625,14 +4738,14 @@ testing = ["flake8 (<5)", "func-timeout", "jaraco.functools", "jaraco.itertools"
 [extras]
 audio = ["pydub"]
 aws = ["smart-open"]
-elasticsearch = ["elasticsearch", "elastic-transport"]
-full = ["protobuf", "lz4", "pandas", "pillow", "types-pillow", "av", "pydub", "trimesh"]
+elasticsearch = ["elastic-transport", "elasticsearch"]
+full = ["av", "lz4", "pandas", "pillow", "protobuf", "pydub", "trimesh", "types-pillow"]
 hnswlib = ["hnswlib"]
 image = ["pillow", "types-pillow"]
 jac = ["jina-hubble-sdk"]
 mesh = ["trimesh"]
 pandas = ["pandas"]
-proto = ["protobuf", "lz4"]
+proto = ["lz4", "protobuf"]
 torch = ["torch"]
 video = ["av"]
 web = ["fastapi"]
@@ -4640,4 +4753,4 @@ web = ["fastapi"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.7,<4.0"
-content-hash = "a5bae8ca8239347d066e7566dfea56f08d42950f7037e50870cee226809f4b01"
+content-hash = "5a07acb92ae45bc42e49e68af897444874d6facd4ed81af4bd9e8d37d7737037"
diff --git a/pyproject.toml b/pyproject.toml
index ecc72c74719..2b5bc301296 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -24,6 +24,7 @@ hnswlib = {version = ">=0.6.2", optional = true }
 lz4 = {version= ">=1.0.0", optional = true}
 pydub = {version = "^0.25.1", optional = true }
 pandas = {version = ">=1.1.0", optional = true }
+weaviate-client = {version = ">=3.15", extras = ["weaviate"]}
 elasticsearch = {version = ">=7.10.1", optional = true }
 smart-open = {version = ">=6.3.0", extras = ["s3"], optional = true}
 jina-hubble-sdk = {version = ">=0.34.0", optional = true}
@@ -92,6 +93,7 @@ module = [
     "trimesh",
     "pandas",
     "av",
+    "weaviate"
 ]
 ignore_missing_imports = true
 
diff --git a/tests/integrations/doc_index/weaviate/docker-compose.yml b/tests/integrations/doc_index/weaviate/docker-compose.yml
new file mode 100644
index 00000000000..5cca1e722eb
--- /dev/null
+++ b/tests/integrations/doc_index/weaviate/docker-compose.yml
@@ -0,0 +1,27 @@
+version: '3.8'
+
+services:
+  
+ weaviate:
+  command:
+    - --host
+    - 0.0.0.0
+    - --port
+    - '8080'
+    - --scheme
+    - http
+  image: semitechnologies/weaviate:1.18.3
+  ports:
+    - "8080:8080"
+  restart: on-failure:0
+  environment:
+    QUERY_DEFAULTS_LIMIT: 25
+    AUTHENTICATION_ANONYMOUS_ACCESS_ENABLED: 'true'
+    PERSISTENCE_DATA_PATH: '/var/lib/weaviate'
+    DEFAULT_VECTORIZER_MODULE: 'none'
+    ENABLE_MODULES: ''
+    CLUSTER_HOSTNAME: 'node1'
+    LOG_LEVEL: debug # verbose
+    LOG_FORMAT: text
+    # LOG_LEVEL: trace # very verbose
+    GODEBUG: gctrace=1 # make go garbage collector verbose
\ No newline at end of file
diff --git a/tests/integrations/doc_index/weaviate/fixture_weaviate.py b/tests/integrations/doc_index/weaviate/fixture_weaviate.py
new file mode 100644
index 00000000000..786a92b2a00
--- /dev/null
+++ b/tests/integrations/doc_index/weaviate/fixture_weaviate.py
@@ -0,0 +1,41 @@
+import os
+import time
+
+import pytest
+import requests
+import weaviate
+
+HOST = "http://localhost:8080"
+
+
+cur_dir = os.path.dirname(os.path.abspath(__file__))
+weaviate_yml = os.path.abspath(os.path.join(cur_dir, 'docker-compose.yml'))
+
+
+@pytest.fixture(scope='session', autouse=True)
+def start_storage():
+    os.system(f"docker-compose -f {weaviate_yml} up -d --remove-orphans")
+    _wait_for_weaviate()
+
+    yield
+    os.system(f"docker-compose -f {weaviate_yml} down --remove-orphans")
+
+
+def _wait_for_weaviate():
+    while True:
+        try:
+            response = requests.get(f"{HOST}/v1/.well-known/ready")
+            if response.status_code == 200:
+                return
+            else:
+                time.sleep(0.5)
+        except requests.exceptions.ConnectionError:
+            time.sleep(1)
+
+
+@pytest.fixture
+def weaviate_client(start_storage):
+    client = weaviate.Client(HOST)
+    client.schema.delete_all()
+    yield client
+    client.schema.delete_all()
diff --git a/tests/integrations/doc_index/weaviate/test_column_config_weaviate.py b/tests/integrations/doc_index/weaviate/test_column_config_weaviate.py
new file mode 100644
index 00000000000..a3050e9b9ba
--- /dev/null
+++ b/tests/integrations/doc_index/weaviate/test_column_config_weaviate.py
@@ -0,0 +1,33 @@
+# TODO: enable ruff qa on this file when we figure out why it thinks weaviate_client is
+#       redefined at each test that fixture
+# ruff: noqa
+from pydantic import Field
+
+from docarray import BaseDoc
+from docarray.index.backends.weaviate import WeaviateDocumentIndex
+from tests.integrations.doc_index.weaviate.fixture_weaviate import (  # noqa: F401
+    start_storage,
+    weaviate_client,
+)
+
+
+def test_column_config(weaviate_client):
+    def get_text_field_data_type(store, index_name):
+        props = store._client.schema.get(index_name)["properties"]
+        text_field = [p for p in props if p["name"] == "text"][0]
+
+        return text_field["dataType"][0]
+
+    class TextDoc(BaseDoc):
+        text: str = Field()
+
+    class StringDoc(BaseDoc):
+        text: str = Field(col_type="string")
+
+    dbconfig = WeaviateDocumentIndex.DBConfig(index_name="TextDoc")
+    store = WeaviateDocumentIndex[TextDoc](db_config=dbconfig)
+    assert get_text_field_data_type(store, "TextDoc") == "text"
+
+    dbconfig = WeaviateDocumentIndex.DBConfig(index_name="StringDoc")
+    store = WeaviateDocumentIndex[StringDoc](db_config=dbconfig)
+    assert get_text_field_data_type(store, "StringDoc") == "string"
diff --git a/tests/integrations/doc_index/weaviate/test_find_weaviate.py b/tests/integrations/doc_index/weaviate/test_find_weaviate.py
new file mode 100644
index 00000000000..c54d167c634
--- /dev/null
+++ b/tests/integrations/doc_index/weaviate/test_find_weaviate.py
@@ -0,0 +1,66 @@
+# TODO: enable ruff qa on this file when we figure out why it thinks weaviate_client is
+#       redefined at each test that fixture
+# ruff: noqa
+import numpy as np
+import pytest
+import torch
+from pydantic import Field
+
+from docarray import BaseDoc
+from docarray.index.backends.weaviate import WeaviateDocumentIndex
+from docarray.typing import TorchTensor
+from tests.integrations.doc_index.weaviate.fixture_weaviate import (  # noqa: F401
+    start_storage,
+    weaviate_client,
+)
+
+
+def test_find_torch(weaviate_client):
+    class TorchDoc(BaseDoc):
+        tens: TorchTensor[10] = Field(dims=10, is_embedding=True)
+
+    store = WeaviateDocumentIndex[TorchDoc]()
+
+    index_docs = [
+        TorchDoc(tens=np.random.rand(10).astype(dtype=np.float32)) for _ in range(10)
+    ]
+    store.index(index_docs)
+
+    query = index_docs[-1]
+    docs, scores = store.find(query, limit=5)
+
+    assert len(docs) == 5
+    assert len(scores) == 5
+    for doc in docs:
+        assert isinstance(doc.tens, TorchTensor)
+
+    assert docs[0].id == index_docs[-1].id
+    assert torch.allclose(docs[0].tens, index_docs[-1].tens)
+
+
+@pytest.mark.tensorflow
+def test_find_tensorflow():
+    from docarray.typing import TensorFlowTensor
+
+    class TfDoc(BaseDoc):
+        tens: TensorFlowTensor[10] = Field(dims=10, is_embedding=True)
+
+    store = WeaviateDocumentIndex[TfDoc]()
+
+    index_docs = [
+        TfDoc(tens=np.random.rand(10).astype(dtype=np.float32)) for _ in range(10)
+    ]
+    store.index(index_docs)
+
+    query = index_docs[-1]
+    docs, scores = store.find(query, limit=5)
+
+    assert len(docs) == 5
+    assert len(scores) == 5
+    for doc in docs:
+        assert isinstance(doc.tens, TensorFlowTensor)
+
+    assert docs[0].id == index_docs[-1].id
+    assert np.allclose(
+        docs[0].tens.unwrap().numpy(), index_docs[-1].tens.unwrap().numpy()
+    )
diff --git a/tests/integrations/doc_index/weaviate/test_index_get_del_weaviate.py b/tests/integrations/doc_index/weaviate/test_index_get_del_weaviate.py
new file mode 100644
index 00000000000..e9c218d45a4
--- /dev/null
+++ b/tests/integrations/doc_index/weaviate/test_index_get_del_weaviate.py
@@ -0,0 +1,452 @@
+# TODO: enable ruff qa on this file when we figure out why it thinks weaviate_client is
+#       redefined at each test that fixture
+# ruff: noqa
+import logging
+
+import numpy as np
+import pytest
+from pydantic import Field
+
+from docarray import BaseDoc
+from docarray.documents import ImageDoc, TextDoc
+from docarray.index.backends.weaviate import (
+    DOCUMENTID,
+    EmbeddedOptions,
+    WeaviateDocumentIndex,
+)
+from docarray.typing import NdArray
+from tests.integrations.doc_index.weaviate.fixture_weaviate import (  # noqa: F401
+    HOST,
+    start_storage,
+    weaviate_client,
+)
+
+
+class SimpleDoc(BaseDoc):
+    tens: NdArray[10] = Field(dim=1000, is_embedding=True)
+
+
+class Document(BaseDoc):
+    embedding: NdArray[2] = Field(dim=2, is_embedding=True)
+    text: str = Field()
+
+
+class NestedDocument(BaseDoc):
+    text: str = Field()
+    child: Document
+
+
+@pytest.fixture
+def ten_simple_docs():
+    return [SimpleDoc(tens=np.random.randn(10)) for _ in range(10)]
+
+
+@pytest.fixture
+def documents():
+    texts = ["lorem ipsum", "dolor sit amet", "consectetur adipiscing elit"]
+    embeddings = [[10, 10], [10.5, 10.5], [-100, -100]]
+
+    # create the docs by enumerating from 1 and use that as the id
+    docs = [
+        Document(id=str(i), embedding=embedding, text=text)
+        for i, (embedding, text) in enumerate(zip(embeddings, texts))
+    ]
+
+    yield docs
+
+
+@pytest.fixture
+def test_store(weaviate_client, documents):
+    store = WeaviateDocumentIndex[Document]()
+    store.index(documents)
+    yield store
+
+
+def test_index_simple_schema(weaviate_client, ten_simple_docs):
+    store = WeaviateDocumentIndex[SimpleDoc]()
+    store.index(ten_simple_docs)
+    assert store.num_docs() == 10
+
+    for doc in ten_simple_docs:
+        doc_id = doc.id
+        doc_embedding = doc.tens
+
+        result = (
+            weaviate_client.query.get("Document", DOCUMENTID)
+            .with_additional("vector")
+            .with_where(
+                {"path": [DOCUMENTID], "operator": "Equal", "valueString": doc_id}
+            )
+            .do()
+        )
+
+        result = result["data"]["Get"]["Document"][0]
+        assert result[DOCUMENTID] == doc_id
+        assert np.allclose(result["_additional"]["vector"], doc_embedding)
+
+
+def test_validate_columns(weaviate_client):
+    dbconfig = WeaviateDocumentIndex.DBConfig(host=HOST)
+
+    class InvalidDoc1(BaseDoc):
+        tens: NdArray[10] = Field(dim=1000, is_embedding=True)
+        tens2: NdArray[10] = Field(dim=1000, is_embedding=True)
+
+    class InvalidDoc2(BaseDoc):
+        tens: int = Field(dim=1000, is_embedding=True)
+
+    with pytest.raises(ValueError, match=r"Only one column can be marked as embedding"):
+        WeaviateDocumentIndex[InvalidDoc1](db_config=dbconfig)
+
+    with pytest.raises(ValueError, match=r"marked as embedding but is not of type"):
+        WeaviateDocumentIndex[InvalidDoc2](db_config=dbconfig)
+
+
+def test_find(weaviate_client, caplog):
+    class Document(BaseDoc):
+        embedding: NdArray[2] = Field(dim=2, is_embedding=True)
+
+    vectors = [[10, 10], [10.5, 10.5], [-100, -100]]
+    docs = [Document(embedding=vector) for vector in vectors]
+
+    store = WeaviateDocumentIndex[Document]()
+    store.index(docs)
+
+    query = [10.1, 10.1]
+
+    results = store.find(
+        query, search_field='', limit=3, score_name="distance", score_threshold=1e-2
+    )
+    assert len(results) == 2
+
+    results = store.find(query, search_field='', limit=3, score_threshold=0.99)
+    assert len(results) == 2
+
+    with pytest.raises(
+        ValueError,
+        match=r"Argument search_field is not supported for WeaviateDocumentIndex",
+    ):
+        store.find(query, search_field="foo", limit=10)
+
+
+def test_find_batched(weaviate_client, caplog):
+    class Document(BaseDoc):
+        embedding: NdArray[2] = Field(dim=2, is_embedding=True)
+
+    vectors = [[10, 10], [10.5, 10.5], [-100, -100]]
+    docs = [Document(embedding=vector) for vector in vectors]
+
+    store = WeaviateDocumentIndex[Document]()
+    store.index(docs)
+
+    queries = np.array([[10.1, 10.1], [-100, -100]])
+
+    results = store.find_batched(
+        queries, search_field='', limit=3, score_name="distance", score_threshold=1e-2
+    )
+    assert len(results) == 2
+    assert len(results.documents[0]) == 2
+    assert len(results.documents[1]) == 1
+
+    results = store.find_batched(
+        queries, search_field='', limit=3, score_name="certainty"
+    )
+    assert len(results) == 2
+    assert len(results.documents[0]) == 3
+    assert len(results.documents[1]) == 3
+
+    with pytest.raises(
+        ValueError,
+        match=r"Argument search_field is not supported for WeaviateDocumentIndex",
+    ):
+        store.find_batched(queries, search_field="foo", limit=10)
+
+
+@pytest.mark.parametrize(
+    "filter_query, expected_num_docs",
+    [
+        ({"path": ["text"], "operator": "Equal", "valueText": "lorem ipsum"}, 1),
+        ({"path": ["text"], "operator": "Equal", "valueText": "foo"}, 0),
+        ({"path": ["id"], "operator": "Equal", "valueString": "1"}, 1),
+    ],
+)
+def test_filter(test_store, filter_query, expected_num_docs):
+    docs = test_store.filter(filter_query, limit=3)
+    actual_num_docs = len(docs)
+
+    assert actual_num_docs == expected_num_docs
+
+
+@pytest.mark.parametrize(
+    "filter_queries, expected_num_docs",
+    [
+        (
+            [
+                {"path": ["text"], "operator": "Equal", "valueText": "lorem ipsum"},
+                {"path": ["text"], "operator": "Equal", "valueText": "foo"},
+            ],
+            [1, 0],
+        ),
+        (
+            [
+                {"path": ["id"], "operator": "Equal", "valueString": "1"},
+                {"path": ["id"], "operator": "Equal", "valueString": "2"},
+            ],
+            [1, 0],
+        ),
+    ],
+)
+def test_filter_batched(test_store, filter_queries, expected_num_docs):
+    filter_queries = [
+        {"path": ["text"], "operator": "Equal", "valueText": "lorem ipsum"},
+        {"path": ["text"], "operator": "Equal", "valueText": "foo"},
+    ]
+
+    results = test_store.filter_batched(filter_queries, limit=3)
+    actual_num_docs = [len(docs) for docs in results]
+    assert actual_num_docs == expected_num_docs
+
+
+def test_text_search(test_store):
+    results = test_store.text_search(query="lorem", search_field="text", limit=3)
+    assert len(results.documents) == 1
+
+
+def test_text_search_batched(test_store):
+    text_queries = ["lorem", "foo"]
+
+    results = test_store.text_search_batched(
+        queries=text_queries, search_field="text", limit=3
+    )
+    assert len(results.documents[0]) == 1
+    assert len(results.documents[1]) == 0
+
+
+def test_del_items(test_store):
+    del test_store[["1", "2"]]
+    assert test_store.num_docs() == 1
+
+
+def test_get_items(test_store):
+    docs = test_store[["1", "2"]]
+    assert len(docs) == 2
+    assert set(doc.id for doc in docs) == {'1', '2'}
+
+
+def test_index_nested_documents(weaviate_client):
+    store = WeaviateDocumentIndex[NestedDocument]()
+    document = NestedDocument(
+        text="lorem ipsum", child=Document(embedding=[10, 10], text="dolor sit amet")
+    )
+    store.index([document])
+    assert store.num_docs() == 1
+
+
+@pytest.mark.parametrize(
+    "search_field, query, expected_num_docs",
+    [
+        ("text", "lorem", 1),
+        ("child__text", "dolor", 1),
+        ("text", "foo", 0),
+        ("child__text", "bar", 0),
+    ],
+)
+def test_text_search_nested_documents(
+    weaviate_client, search_field, query, expected_num_docs
+):
+    store = WeaviateDocumentIndex[NestedDocument]()
+    document = NestedDocument(
+        text="lorem ipsum", child=Document(embedding=[10, 10], text="dolor sit amet")
+    )
+    store.index([document])
+
+    results = store.text_search(query=query, search_field=search_field, limit=3)
+
+    assert len(results.documents) == expected_num_docs
+
+
+def test_reuse_existing_schema(weaviate_client, caplog):
+    WeaviateDocumentIndex[SimpleDoc]()
+
+    with caplog.at_level(logging.DEBUG):
+        WeaviateDocumentIndex[SimpleDoc]()
+        assert "Will reuse existing schema" in caplog.text
+
+
+def test_query_builder(test_store):
+    query_embedding = [10.25, 10.25]
+    query_text = "ipsum"
+    where_filter = {"path": ["id"], "operator": "Equal", "valueString": "1"}
+    q = (
+        test_store.build_query()
+        .find(query=query_embedding)
+        .filter(where_filter)
+        .build()
+    )
+
+    docs = test_store.execute_query(q)
+    assert len(docs) == 1
+
+    q = (
+        test_store.build_query()
+        .text_search(query=query_text, search_field="text")
+        .build()
+    )
+
+    docs = test_store.execute_query(q)
+    assert len(docs) == 1
+
+
+def test_batched_query_builder(test_store):
+    query_embeddings = [[10.25, 10.25], [-100, -100]]
+    query_texts = ["ipsum", "foo"]
+    where_filters = [{"path": ["id"], "operator": "Equal", "valueString": "1"}]
+
+    q = (
+        test_store.build_query()
+        .find_batched(
+            queries=query_embeddings, score_name="certainty", score_threshold=0.99
+        )
+        .filter_batched(filters=where_filters)
+        .build()
+    )
+
+    docs = test_store.execute_query(q)
+    assert len(docs[0]) == 1
+    assert len(docs[1]) == 0
+
+    q = (
+        test_store.build_query()
+        .text_search_batched(queries=query_texts, search_field="text")
+        .build()
+    )
+
+    docs = test_store.execute_query(q)
+    assert len(docs[0]) == 1
+    assert len(docs[1]) == 0
+
+
+def test_raw_graphql(test_store):
+    graphql_query = """
+    {
+     Aggregate {
+      Document {
+       meta {
+        count
+       }
+      }
+     }
+    }
+    """
+
+    results = test_store.execute_query(graphql_query)
+    num_docs = results["data"]["Aggregate"]["Document"][0]["meta"]["count"]
+
+    assert num_docs == 3
+
+
+def test_hybrid_query(test_store):
+    query_embedding = [10.25, 10.25]
+    query_text = "ipsum"
+    where_filter = {"path": ["id"], "operator": "Equal", "valueString": "1"}
+
+    q = (
+        test_store.build_query()
+        .find(query=query_embedding)
+        .text_search(query=query_text, search_field="text")
+        .filter(where_filter)
+        .build()
+    )
+
+    docs = test_store.execute_query(q)
+    assert len(docs) == 1
+
+
+def test_hybrid_query_batched(test_store):
+    query_embeddings = [[10.25, 10.25], [-100, -100]]
+    query_texts = ["dolor", "elit"]
+
+    q = (
+        test_store.build_query()
+        .find_batched(
+            queries=query_embeddings, score_name="certainty", score_threshold=0.99
+        )
+        .text_search_batched(queries=query_texts, search_field="text")
+        .build()
+    )
+
+    docs = test_store.execute_query(q)
+    assert docs[0][0].id == '1'
+    assert docs[1][0].id == '2'
+
+
+def test_index_multi_modal_doc():
+    class MyMultiModalDoc(BaseDoc):
+        image: ImageDoc
+        text: TextDoc
+
+    store = WeaviateDocumentIndex[MyMultiModalDoc]()
+
+    doc = [
+        MyMultiModalDoc(
+            image=ImageDoc(embedding=np.random.randn(128)), text=TextDoc(text='hello')
+        )
+    ]
+    store.index(doc)
+
+    id_ = doc[0].id
+    assert store[id_].id == id_
+    assert np.all(store[id_].image.embedding == doc[0].image.embedding)
+    assert store[id_].text.text == doc[0].text.text
+
+
+def test_index_document_with_bytes(weaviate_client):
+    doc = ImageDoc(id="1", url="www.foo.com", bytes_=b"foo")
+
+    store = WeaviateDocumentIndex[ImageDoc]()
+    store.index([doc])
+
+    results = store.filter(
+        filter_query={"path": ["id"], "operator": "Equal", "valueString": "1"}
+    )
+
+    assert doc == results[0]
+
+
+def test_index_document_with_no_embeddings(weaviate_client):
+    # define a document that does not have any field where is_embedding=True
+    class Document(BaseDoc):
+        not_embedding: NdArray[2] = Field(dim=2)
+        text: str
+
+    doc = Document(not_embedding=[2, 5], text="dolor sit amet", id="1")
+
+    store = WeaviateDocumentIndex[Document]()
+
+    store.index([doc])
+
+    results = store.filter(
+        filter_query={"path": ["id"], "operator": "Equal", "valueString": "1"}
+    )
+
+    assert doc == results[0]
+
+
+def test_limit_query_builder(test_store):
+    query_vector = [10.25, 10.25]
+    q = test_store.build_query().find(query=query_vector).limit(2)
+
+    docs = test_store.execute_query(q)
+    assert len(docs) == 2
+
+
+@pytest.mark.linux
+def test_embedded_weaviate():
+    class Document(BaseDoc):
+        text: str
+
+    embedded_options = EmbeddedOptions()
+    db_config = WeaviateDocumentIndex.DBConfig(embedded_options=embedded_options)
+    store = WeaviateDocumentIndex[Document](db_config=db_config)
+
+    assert store._client._connection.embedded_db

From a077cc69ab806a091ddd345c116ddbd9fda9521c Mon Sep 17 00:00:00 2001
From: Nan Wang <nan.wang@jina.ai>
Date: Thu, 13 Apr 2023 16:59:34 +0200
Subject: [PATCH 17/20] docs: add sending section (#1350)

* docs: add serialization for json

Signed-off-by: nan-wang <nan.wang@jina.ai>

* docs: add serialization for binary and protobuf

Signed-off-by: nan-wang <nan.wang@jina.ai>

* docs: add serialization for base64 and bytes

Signed-off-by: nan-wang <nan.wang@jina.ai>

* docs: add serialization for csv

Signed-off-by: nan-wang <nan.wang@jina.ai>

* docs: add serialization for dataframe

Signed-off-by: nan-wang <nan.wang@jina.ai>

* fix: add doctring to documentaion basedoc

Signed-off-by: samsja <sami.jaghouar@hotmail.fr>

* fix: fix mypy

Signed-off-by: samsja <sami.jaghouar@hotmail.fr>

* fix: add docstring doc list

Signed-off-by: samsja <sami.jaghouar@hotmail.fr>

* fix: dic doc array docstring

Signed-off-by: samsja <sami.jaghouar@hotmail.fr>

* fix: fix page for doc list serilizaiton

Signed-off-by: samsja <sami.jaghouar@hotmail.fr>

* fix: fix docstring

Signed-off-by: samsja <sami.jaghouar@hotmail.fr>

* feat: add docvec

Signed-off-by: samsja <sami.jaghouar@hotmail.fr>

* docs: add send doc section

Signed-off-by: samsja <sami.jaghouar@hotmail.fr>

* docs: fix docstring

Signed-off-by: samsja <sami.jaghouar@hotmail.fr>

* refactor: better tree structure for sending

Signed-off-by: samsja <sami.jaghouar@hotmail.fr>

* fix: fix tests

Signed-off-by: samsja <sami.jaghouar@hotmail.fr>

* fix: fix python code snippet ods

Signed-off-by: samsja <sami.jaghouar@hotmail.fr>

* fix: fix remove breakpoint

Signed-off-by: samsja <sami.jaghouar@hotmail.fr>

* feat: add intro

Signed-off-by: samsja <sami.jaghouar@hotmail.fr>

* feat: add ref

Signed-off-by: samsja <sami.jaghouar@hotmail.fr>

* feat: move fastapi part

Signed-off-by: samsja <sami.jaghouar@hotmail.fr>

* fix: fix fastAPI

Signed-off-by: samsja <sami.jaghouar@hotmail.fr>

* fix: remove uselss mixin

Signed-off-by: samsja <sami.jaghouar@hotmail.fr>

* faet: add jina section

Signed-off-by: samsja <sami.jaghouar@hotmail.fr>

* fix: compress -> compression

Signed-off-by: samsja <sami.jaghouar@hotmail.fr>

* feat: apply suggestion

Co-authored-by: Alex Cureton-Griffiths <alexcg1@users.noreply.github.com>
Co-authored-by: Charlotte Gerhaher <charlotte.gerhaher@jina.ai>
Signed-off-by: samsja <55492238+samsja@users.noreply.github.com>

* fix: apply alex suggestion

Signed-off-by: samsja <sami.jaghouar@hotmail.fr>

* wip

Signed-off-by: samsja <sami.jaghouar@hotmail.fr>

* fix: fix all docstring

Signed-off-by: samsja <sami.jaghouar@hotmail.fr>

* fix: fix update docstring

Signed-off-by: samsja <sami.jaghouar@hotmail.fr>

* fix: fix ruff

Signed-off-by: samsja <sami.jaghouar@hotmail.fr>

* fix: fix smth

Signed-off-by: samsja <sami.jaghouar@hotmail.fr>

* feat: apply charllote suggestion

Co-authored-by: Charlotte Gerhaher <charlotte.gerhaher@jina.ai>
Signed-off-by: samsja <55492238+samsja@users.noreply.github.com>

---------

Signed-off-by: nan-wang <nan.wang@jina.ai>
Signed-off-by: samsja <sami.jaghouar@hotmail.fr>
Signed-off-by: samsja <55492238+samsja@users.noreply.github.com>
Co-authored-by: samsja <sami.jaghouar@hotmail.fr>
Co-authored-by: samsja <55492238+samsja@users.noreply.github.com>
Co-authored-by: Alex Cureton-Griffiths <alexcg1@users.noreply.github.com>
Co-authored-by: Charlotte Gerhaher <charlotte.gerhaher@jina.ai>
---
 docarray/array/any_array.py                   |  21 +--
 docarray/array/doc_list/doc_list.py           |   1 +
 docarray/array/doc_list/io.py                 |  29 +--
 docarray/base_doc/doc.py                      |  81 ++++++++-
 docarray/base_doc/mixins/io.py                |   2 +-
 docarray/base_doc/mixins/update.py            |  38 ++--
 docs/api_references/array/da.md               |   3 +-
 docs/api_references/base_doc/base_doc.md      |   3 +
 .../sending/api/fastAPI.md}                   |  12 +-
 .../sending/api/jina.md}                      |   2 +
 docs/user_guide/sending/first_step.md         |  13 +-
 docs/user_guide/sending/ser/send_doc.md       |  55 ++++++
 docs/user_guide/sending/ser/send_doclist.md   | 165 ++++++++++++++++++
 docs/user_guide/sending/ser/send_docvec.md    |  30 ++++
 mkdocs.yml                                    |  13 +-
 tests/documentation/test_docs.py              |  32 ++--
 16 files changed, 437 insertions(+), 63 deletions(-)
 rename docs/{integrations/fastapi.md => user_guide/sending/api/fastAPI.md} (90%)
 rename docs/{how_to/audio2text.md => user_guide/sending/api/jina.md} (99%)
 create mode 100644 docs/user_guide/sending/ser/send_doc.md
 create mode 100644 docs/user_guide/sending/ser/send_doclist.md
 create mode 100644 docs/user_guide/sending/ser/send_docvec.md

diff --git a/docarray/array/any_array.py b/docarray/array/any_array.py
index 31d1dedb067..e3b46132ee6 100644
--- a/docarray/array/any_array.py
+++ b/docarray/array/any_array.py
@@ -121,7 +121,7 @@ def _set_data_column(
         field: str,
         values: Union[List, T, 'AbstractTensor'],
     ):
-        """Set all Documents in this [`DocList`][docarray.DocList] using the passed values
+        """Set all Documents in this [`DocList`][docarray.array.doc_list.doc_list.DocList] using the passed values
 
         :param field: name of the fields to extract
         :values: the values to set at the DocList level
@@ -140,8 +140,8 @@ def to_protobuf(self) -> 'DocListProto':
         ...
 
     def _to_node_protobuf(self) -> 'NodeProto':
-        """Convert a [`DocList`][docarray.DocList] into a NodeProto protobuf message.
-         This function should be called when a DocList
+        """Convert a [`DocList`][docarray.array.doc_list.doc_list.DocList] into a NodeProto protobuf message.
+        This function should be called when a DocList
         is nested into another Document that need to be converted into a protobuf
 
         :return: the nested item protobuf message
@@ -157,13 +157,11 @@ def traverse_flat(
     ) -> Union[List[Any], 'AbstractTensor']:
         """
         Return a List of the accessed objects when applying the `access_path`. If this
-        results in a nested list or list of [`DocList`s][docarray.DocList], the list will be flattened
+        results in a nested list or list of [`DocList`s][docarray.array.doc_list.doc_list.DocList], the list will be flattened
         on the first level. The access path is a string that consists of attribute
         names, concatenated and `"__"`-separated. It describes the path from the first
         level to an arbitrary one, e.g. `'content__image__url'`.
 
-        :param access_path: a string that represents the access path (`"__"`-separated).
-        :return: list of the accessed objects, flattened if nested.
 
         ```python
         from docarray import BaseDoc, DocList, Text
@@ -210,7 +208,7 @@ class Book(BaseDoc):
         chapters = docs.traverse_flat(access_path='chapters')  # list of 30 strings
         ```
 
-        If your [`DocList`][docarray.DocList] is in doc_vec mode, and you want to access a field of
+        If your [`DocList`][docarray.array.doc_list.doc_list.DocList] is in doc_vec mode, and you want to access a field of
         type [`AnyTensor`][docarray.typing.AnyTensor], the doc_vec tensor will be returned instead of a list:
 
         ```python
@@ -232,6 +230,9 @@ class Image(BaseDoc):
             access_path='tensor'
         )  # tensor of shape (2, 3, 224, 224)
         ```
+
+        :param access_path: a string that represents the access path ("__"-separated).
+        :return: list of the accessed objects, flattened if nested.
         """
         ...
 
@@ -263,7 +264,7 @@ def _flatten_one_level(sequence: List[Any]) -> List[Any]:
 
     def summary(self):
         """
-        Print a summary of this [`DocList`][docarray.DocList] object and a summary of the schema of its
+        Print a summary of this [`DocList`][docarray.array.doc_list.doc_list.DocList] object and a summary of the schema of its
         Document type.
         """
         DocArraySummary(self).summary()
@@ -275,13 +276,13 @@ def _batch(
         show_progress: bool = False,
     ) -> Generator[T, None, None]:
         """
-        Creates a `Generator` that yields [`DocList`][docarray.DocList] of size `batch_size`.
+        Creates a `Generator` that yields [`DocList`][docarray.array.doc_list.doc_list.DocList] of size `batch_size`.
         Note, that the last batch might be smaller than `batch_size`.
 
         :param batch_size: Size of each generated batch.
         :param shuffle: If set, shuffle the Documents before dividing into minibatches.
         :param show_progress: if set, show a progress bar when batching documents.
-        :yield: a Generator of [`DocList`][docarray.DocList], each in the length of `batch_size`
+        :yield: a Generator of [`DocList`][docarray.array.doc_list.doc_list.DocList], each in the length of `batch_size`
         """
         from rich.progress import track
 
diff --git a/docarray/array/doc_list/doc_list.py b/docarray/array/doc_list/doc_list.py
index d01d7a31e0d..8eb1a822d59 100644
--- a/docarray/array/doc_list/doc_list.py
+++ b/docarray/array/doc_list/doc_list.py
@@ -96,6 +96,7 @@ class Image(BaseDoc):
 
     # You can also set fields, with `docs.tensor = np.random.random([10, 100])`:
 
+
     import numpy as np
 
     docs.tensor = np.random.random([10, 100])
diff --git a/docarray/array/doc_list/io.py b/docarray/array/doc_list/io.py
index e0814e89fa8..9f153e2f1bd 100644
--- a/docarray/array/doc_list/io.py
+++ b/docarray/array/doc_list/io.py
@@ -141,7 +141,7 @@ def from_bytes(
 
         :param data: Bytes from which to deserialize
         :param protocol: protocol that was used to serialize
-        :param compress: compress algorithm that was used to serialize
+        :param compress: compression algorithm that was used to serialize between `lz4`, `bz2`, `lzma`, `zlib`, `gzip`
         :param show_progress: show progress bar, only works when protocol is `pickle` or `protobuf`
         :return: the deserialized `DocList`
         """
@@ -247,7 +247,7 @@ def to_bytes(
         For more Pythonic code, please use ``bytes(...)``.
 
         :param protocol: protocol to use. It can be 'pickle-array', 'protobuf-array', 'pickle' or 'protobuf'
-        :param compress: compress algorithm to use
+        :param compress: compress algorithm to use between : `lz4`, `bz2`, `lzma`, `zlib`, `gzip`
         :param file_ctx: File or filename or serialized bytes where the data is stored.
         :param show_progress: show progress bar, only works when protocol is `pickle` or `protobuf`
         :return: the binary serialization in bytes or None if file_ctx is passed where to store
@@ -277,7 +277,7 @@ def from_base64(
 
         :param data: Base64 string to deserialize
         :param protocol: protocol that was used to serialize
-        :param compress: compress algorithm that was used to serialize
+        :param compress: compress algorithm that was used to serialize between `lz4`, `bz2`, `lzma`, `zlib`, `gzip`
         :param show_progress: show progress bar, only works when protocol is `pickle` or `protobuf`
         :return: the deserialized `DocList`
         """
@@ -297,7 +297,7 @@ def to_base64(
         """Serialize itself into base64 encoded string.
 
         :param protocol: protocol to use. It can be 'pickle-array', 'protobuf-array', 'pickle' or 'protobuf'
-        :param compress: compress algorithm to use
+        :param compress: compress algorithm to use between `lz4`, `bz2`, `lzma`, `zlib`, `gzip`
         :param show_progress: show progress bar, only works when protocol is `pickle` or `protobuf`
         :return: the binary serialization in bytes or None if file_ctx is passed where to store
         """
@@ -566,7 +566,7 @@ def _load_binary_all(
     ):
         """Read a `DocList` object from a binary file
         :param protocol: protocol to use. It can be 'pickle-array', 'protobuf-array', 'pickle' or 'protobuf'
-        :param compress: compress algorithm to use
+        :param compress: compress algorithm to use between `lz4`, `bz2`, `lzma`, `zlib`, `gzip`
         :param show_progress: show progress bar, only works when protocol is `pickle` or `protobuf`
         :return: a `DocList`
         """
@@ -646,7 +646,7 @@ def _load_binary_stream(
         """Yield `Document` objects from a binary file
 
         :param protocol: protocol to use. It can be 'pickle' or 'protobuf'
-        :param compress: compress algorithm to use
+        :param compress: compress algorithm to use between `lz4`, `bz2`, `lzma`, `zlib`, `gzip`
         :param show_progress: show progress bar, only works when protocol is `pickle` or `protobuf`
         :return: a generator of `Document` objects
         """
@@ -702,13 +702,7 @@ def load_binary(
     ) -> Union[T, Generator['T_doc', None, None]]:
         """Load doc_list elements from a compressed binary file.
 
-        :param file: File or filename or serialized bytes where the data is stored.
-        :param protocol: protocol to use. It can be 'pickle-array', 'protobuf-array', 'pickle' or 'protobuf'
-        :param compress: compress algorithm to use
-        :param show_progress: show progress bar, only works when protocol is `pickle` or `protobuf`
-        :param streaming: if `True` returns a generator over `Document` objects.
         In case protocol is pickle the `Documents` are streamed from disk to save memory usage
-        :return: a `DocList` object
 
         !!! note
             If `file` is `str` it can specify `protocol` and `compress` as file extensions.
@@ -716,6 +710,15 @@ def load_binary(
             string interpolation of the respective `protocol` and `compress` methods.
             For example if `file=my_docarray.protobuf.lz4` then the binary data will be loaded assuming `protocol=protobuf`
             and `compress=lz4`.
+
+        :param file: File or filename or serialized bytes where the data is stored.
+        :param protocol: protocol to use. It can be 'pickle-array', 'protobuf-array', 'pickle' or 'protobuf'
+        :param compress: compress algorithm to use between `lz4`, `bz2`, `lzma`, `zlib`, `gzip`
+        :param show_progress: show progress bar, only works when protocol is `pickle` or `protobuf`
+        :param streaming: if `True` returns a generator over `Document` objects.
+
+        :return: a `DocList` object
+
         """
         load_protocol: Optional[str] = protocol
         load_compress: Optional[str] = compress
@@ -765,7 +768,7 @@ def save_binary(
 
         :param file: File or filename to which the data is saved.
         :param protocol: protocol to use. It can be 'pickle-array', 'protobuf-array', 'pickle' or 'protobuf'
-        :param compress: compress algorithm to use
+        :param compress: compress algorithm to use between `lz4`, `bz2`, `lzma`, `zlib`, `gzip`
         :param show_progress: show progress bar, only works when protocol is `pickle` or `protobuf`
 
          !!! note
diff --git a/docarray/base_doc/doc.py b/docarray/base_doc/doc.py
index a5c42a82ee4..0ed39bd0d49 100644
--- a/docarray/base_doc/doc.py
+++ b/docarray/base_doc/doc.py
@@ -1,5 +1,15 @@
 import os
-from typing import TYPE_CHECKING, Any, Dict, Optional, Type, TypeVar
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Callable,
+    Dict,
+    Optional,
+    Type,
+    TypeVar,
+    Union,
+    no_type_check,
+)
 
 import orjson
 from pydantic import BaseModel, Field
@@ -12,11 +22,16 @@
 from docarray.typing.tensor.abstract_tensor import AbstractTensor
 
 if TYPE_CHECKING:
+    from pydantic import Protocol
+    from pydantic.types import StrBytes
+    from pydantic.typing import AbstractSetIntStr, MappingIntStrAny
+
     from docarray.array.doc_vec.column_storage import ColumnStorageView
 
 _console: Console = Console()
 
 T = TypeVar('T', bound='BaseDoc')
+T_update = TypeVar('T_update', bound='UpdateMixin')
 
 
 class BaseDoc(BaseModel, IOMixin, UpdateMixin, BaseNode):
@@ -141,3 +156,67 @@ def _docarray_to_json_compatible(self) -> Dict:
         :return: A dictionary of the BaseDoc object
         """
         return self.dict()
+
+    ########################################################################################################################################################
+    ### this section is just for documentation purposes will be removed later once https://github.com/mkdocstrings/griffe/issues/138 is fixed ##############
+    ########################################################################################################################################################
+
+    def json(
+        self,
+        *,
+        include: Optional[Union['AbstractSetIntStr', 'MappingIntStrAny']] = None,
+        exclude: Optional[Union['AbstractSetIntStr', 'MappingIntStrAny']] = None,
+        by_alias: bool = False,
+        skip_defaults: Optional[bool] = None,
+        exclude_unset: bool = False,
+        exclude_defaults: bool = False,
+        exclude_none: bool = False,
+        encoder: Optional[Callable[[Any], Any]] = None,
+        models_as_dict: bool = True,
+        **dumps_kwargs: Any,
+    ) -> str:
+        """
+        Generate a JSON representation of the model, `include` and `exclude` arguments as per `dict()`.
+
+        `encoder` is an optional function to supply as `default` to json.dumps(), other arguments as per `json.dumps()`.
+        """
+        return super().json(
+            include=include,
+            exclude=exclude,
+            by_alias=by_alias,
+            skip_defaults=skip_defaults,
+            exclude_unset=exclude_unset,
+            exclude_defaults=exclude_defaults,
+            exclude_none=exclude_none,
+            encoder=encoder,
+            models_as_dict=models_as_dict,
+            **dumps_kwargs,
+        )
+
+    @no_type_check
+    @classmethod
+    def parse_raw(
+        cls: Type[T],
+        b: 'StrBytes',
+        *,
+        content_type: str = None,
+        encoding: str = 'utf8',
+        proto: 'Protocol' = None,
+        allow_pickle: bool = False,
+    ) -> T:
+        """
+        Parse a raw string or bytes into a base doc
+        :param b:
+        :param content_type:
+        :param encoding: the encoding to use when parsing a string, defaults to 'utf8'
+        :param proto: protocol to use.
+        :param allow_pickle: allow pickle protocol
+        :return: a document
+        """
+        return super(BaseDoc, cls).parse_raw(
+            b,
+            content_type=content_type,
+            encoding=encoding,
+            proto=proto,
+            allow_pickle=allow_pickle,
+        )
diff --git a/docarray/base_doc/mixins/io.py b/docarray/base_doc/mixins/io.py
index b2a64e8082b..e50d9ac791d 100644
--- a/docarray/base_doc/mixins/io.py
+++ b/docarray/base_doc/mixins/io.py
@@ -138,7 +138,7 @@ def to_bytes(
         For more Pythonic code, please use ``bytes(...)``.
 
         :param protocol: protocol to use. It can be 'pickle' or 'protobuf'
-        :param compress: compress algorithm to use
+        :param compress: compression algorithm to use
         :return: the binary serialization in bytes
         """
         import pickle
diff --git a/docarray/base_doc/mixins/update.py b/docarray/base_doc/mixins/update.py
index 99fdbc2bf8e..471e97483ba 100644
--- a/docarray/base_doc/mixins/update.py
+++ b/docarray/base_doc/mixins/update.py
@@ -25,7 +25,8 @@ def update(self, other: T):
         Updates self with the content of other. Changes are applied to self.
         Updating one Document with another consists in the following:
          - setting data properties of the second Document to the first Document
-         if they are not None
+         if they are not None:
+
          - Concatenating lists and updating sets
          - Updating recursively Documents and DocArrays
          - Updating Dictionaries of the left with the right
@@ -38,30 +39,33 @@ def update(self, other: T):
         so they behave as regular types and the value of `self` is updated
         with the value of `other`
 
-            EXAMPLE USAGE
 
-            .. code-block:: python
+        ---
+
+        ```python
+        from typing import List, Optional
 
-                from docarray import BaseDoc
-                from docarray.documents import Text
+        from docarray import BaseDoc
 
 
-                class MyDocument(BaseDoc):
-                    content: str
-                    title: Optional[str] = None
-                    tags_: List
+        class MyDocument(BaseDoc):
+            content: str
+            title: Optional[str] = None
+            tags_: List
 
 
-                doc1 = MyDocument(
-                    content='Core content of the document', title='Title', tags_=['python', 'AI']
-                )
-                doc2 = MyDocument(content='Core content updated', tags_=['docarray'])
+        doc1 = MyDocument(
+            content='Core content of the document', title='Title', tags_=['python', 'AI']
+        )
+        doc2 = MyDocument(content='Core content updated', tags_=['docarray'])
 
-                doc1.update(doc2)
-                assert doc1.content == 'Core content updated'
-                assert doc1.title == 'Title'
-                assert doc1.tags_ == ['python', 'AI', 'docarray']
+        doc1.update(doc2)
+        assert doc1.content == 'Core content updated'
+        assert doc1.title == 'Title'
+        assert doc1.tags_ == ['python', 'AI', 'docarray']
+        ```
 
+        ---
         :param other: The Document with which to update the contents of this
         """
         if type(self) != type(other):
diff --git a/docs/api_references/array/da.md b/docs/api_references/array/da.md
index 28e1aa94efa..e1f5b33f008 100644
--- a/docs/api_references/array/da.md
+++ b/docs/api_references/array/da.md
@@ -1,4 +1,5 @@
 # DocList
 
 ::: docarray.array.doc_list.doc_list.DocList
-::: docarray.array.doc_list.pushpull.PushPullMixin
\ No newline at end of file
+::: docarray.array.doc_list.io.IOMixinArray
+::: docarray.array.doc_list.pushpull.PushPullMixin
diff --git a/docs/api_references/base_doc/base_doc.md b/docs/api_references/base_doc/base_doc.md
index 0fe2dc80891..abce654ee96 100644
--- a/docs/api_references/base_doc/base_doc.md
+++ b/docs/api_references/base_doc/base_doc.md
@@ -1,3 +1,6 @@
 # BaseDoc
 
 ::: docarray.base_doc.doc.BaseDoc
+::: docarray.base_doc.mixins.io.IOMixin
+::: docarray.base_doc.mixins.update.UpdateMixin
+
diff --git a/docs/integrations/fastapi.md b/docs/user_guide/sending/api/fastAPI.md
similarity index 90%
rename from docs/integrations/fastapi.md
rename to docs/user_guide/sending/api/fastAPI.md
index e55b09fba9e..d35308fefce 100644
--- a/docs/integrations/fastapi.md
+++ b/docs/user_guide/sending/api/fastAPI.md
@@ -1,9 +1,15 @@
-# Use DocArray with FastAPI
+# FastAPI
 
-FastAPI is a high-performance web framework for building APIs with Python. It's designed to be easy to use and supports asynchronous programming. 
-Since [`DocArray` documents are Pydantic Models (with a twist)](../user_guide/representing/first_step.md) they can be easily integrated with FastAPI, 
+[FastAPI](https://fastapi.tiangolo.com/) is a high-performance web framework for building APIs with Python based on Python type hints. It's designed to be easy to use and supports asynchronous programming. 
+Since [`DocArray` documents are Pydantic Models (with a twist)](../../representing/first_step.md) they can be easily integrated with FastAPI, 
 and provide a seamless and efficient way to work with multimodal data in FastAPI-powered APIs.
 
+!!! note
+    you need to install FastAPI to follow this section
+    ``` 
+    pip install fastapi
+    ```
+
 
 First, you should define schemas for your input and/or output Documents:
 ```python
diff --git a/docs/how_to/audio2text.md b/docs/user_guide/sending/api/jina.md
similarity index 99%
rename from docs/how_to/audio2text.md
rename to docs/user_guide/sending/api/jina.md
index d2f2507e08f..cbdf50acd2a 100644
--- a/docs/how_to/audio2text.md
+++ b/docs/user_guide/sending/api/jina.md
@@ -1,3 +1,5 @@
+# Jina
+
 # Create an audio to text app with Jina and DocArray V2
 
 This is how you can build an Audio to Text app using Jina, DocArray and Whisper.
diff --git a/docs/user_guide/sending/first_step.md b/docs/user_guide/sending/first_step.md
index 1079b9dd75b..6e2d2608943 100644
--- a/docs/user_guide/sending/first_step.md
+++ b/docs/user_guide/sending/first_step.md
@@ -1 +1,12 @@
-# Sending data
+# Intro
+
+In the representation section we saw how to use [`BaseDoc`][docarray.base_doc.doc.BaseDoc], [`DocList`][docarray.array.doc_list.doc_list.DocList] and [`DocVec`][docarray.array.doc_vec.doc_vec.DocVec]
+to represent multi-modal data. In this section we will see **how to send these data over the wire**.
+
+
+This section is divided into two:
+
+- [Serialization](./ser/send_doc.md) of [`BaseDoc`][docarray.base_doc.doc.BaseDoc], [`DocList`][docarray.array.doc_list.doc_list.DocList] and [`DocVec`][docarray.array.doc_vec.doc_vec.DocVec]
+- [Using DocArray with a web framework to build a multimodal API](./api/jina.md)
+
+
diff --git a/docs/user_guide/sending/ser/send_doc.md b/docs/user_guide/sending/ser/send_doc.md
new file mode 100644
index 00000000000..dd77557dbba
--- /dev/null
+++ b/docs/user_guide/sending/ser/send_doc.md
@@ -0,0 +1,55 @@
+# BaseDoc
+
+You need to serialize a [BaseDoc][docarray.base_doc.doc.BaseDoc] before you can store or send it.
+
+!!! note
+    [BaseDoc][docarray.base_doc.doc.BaseDoc] supports serialization to `protobuf` and `json` formats.
+
+## Serialization to protobuf
+
+You can use [`to_protobuf`][docarray.base_doc.mixins.io.IOMixin.to_protobuf] to serialize a [BaseDoc][docarray.base_doc.doc.BaseDoc] to a protobuf message object
+and use [`from_protobuf`][docarray.base_doc.mixins.io.IOMixin.from_protobuf] to deserialize it.
+
+```python
+from typing import List
+from docarray import BaseDoc
+
+
+class MyDoc(BaseDoc):
+    text: str
+    tags: List[str]
+
+
+doc = MyDoc(text='hello world', tags=['hello', 'world'])
+proto_message = doc.to_protobuf()
+new_doc = MyDoc.from_protobuf(proto_message)
+assert doc == new_doc  # True
+```
+
+## Serialization to JSON
+
+You can use [`json`][docarray.base_doc.doc.BaseDoc.json] to serialize a [BaseDoc][docarray.base_doc.doc.BaseDoc] to a json string
+and use [`parse_raw`][docarray.base_doc.doc.BaseDoc.parse_raw] to deserialize it.
+
+```python
+from typing import List
+from docarray import BaseDoc
+
+
+class MyDoc(BaseDoc):
+    text: str
+    tags: List[str]
+
+
+doc = MyDoc(text='hello world', tags=['hello', 'world'])
+json_str = doc.json()
+new_doc = MyDoc.parse_raw(json_str)
+assert doc == new_doc  # True
+```
+
+See also:
+
+* The serializing [DocList](./send_doclist.md) section
+* The serializing [DocVec](./send_docvec.md) section
+
+
diff --git a/docs/user_guide/sending/ser/send_doclist.md b/docs/user_guide/sending/ser/send_doclist.md
new file mode 100644
index 00000000000..70b1789ca5f
--- /dev/null
+++ b/docs/user_guide/sending/ser/send_doclist.md
@@ -0,0 +1,165 @@
+# DocList
+When sending or storing [`DocList`][docarray.array.doc_list.doc_list.DocList], you need to use serialization. [DocList][docarray.array.doc_list.doc_list.DocList] supports multiple ways to serialize the data.
+
+## JSON
+You can use [`to_json()`][docarray.array.doc_list.io.IOMixinArray.to_json] and [`from_json()`][docarray.array.doc_list.io.IOMixinArray.from_json] to serialize and deserialize a [DocList][docarray.array.doc_list.doc_list.DocList]:
+
+```python
+from docarray import BaseDoc, DocList
+
+
+class SimpleDoc(BaseDoc):
+    text: str
+
+
+dl = DocList[SimpleDoc]([SimpleDoc(text=f'doc {i}') for i in range(2)])
+
+with open('simple-dl.json', 'wb') as f:
+    json_dl = dl.to_json()
+    print(json_dl)
+    f.write(json_dl)
+
+with open('simple-dl.json', 'r') as f:
+    dl_load_from_json = DocList[SimpleDoc].from_json(f.read())
+    print(dl_load_from_json)
+```
+
+[to_json()][docarray.array.doc_list.io.IOMixinArray.to_json] returns the binary representation of the json object. [from_json()][docarray.array.doc_list.io.IOMixinArray.from_json] can load from either `str` or `binary` representation of the json object.
+
+```output
+b'[{"id":"5540e72d407ae81abb2390e9249ed066","text":"doc 0"},{"id":"fbe9f80d2fa03571e899a2887af1ac1b","text":"doc 1"}]'
+```
+
+## Protobuf
+To serialize a DocList with `protobuf`, you can use [`to_protobuf()`][docarray.array.doc_list.io.IOMixinArray.to_protobuf]  and [`from_protobuf()`][docarray.array.doc_list.io.IOMixinArray.from_protobuf] to serialize and deserialize a [DocList][docarray.array.doc_list.doc_list.DocList]:
+
+```python
+from docarray import BaseDoc, DocList
+
+
+class SimpleDoc(BaseDoc):
+    text: str
+
+
+dl = DocList[SimpleDoc]([SimpleDoc(text=f'doc {i}') for i in range(2)])
+
+proto_message_dl = dl.to_protobuf()
+dl_from_proto = DocList[SimpleDoc].from_protobuf(proto_message_dl)
+print(type(proto_message_dl))
+print(dl_from_proto)
+```
+
+[to_protobuf()][docarray.array.doc_list.io.IOMixinArray.to_protobuf]  returns a protobuf object of `docarray_pb2.DocListProto` class. [from_protobuf()][docarray.array.doc_list.io.IOMixinArray.from_protobuf]  accepts a protobuf message object to construct a [DocList][docarray.array.doc_list.doc_list.DocList].
+
+## Base64
+When transferring over the network, you can choose `Base64` format to serialize the [`DocList`][docarray.array.doc_list.doc_list.DocList].
+Serializing a [DocList][docarray.array.doc_list.doc_list.DocList] in Base64 supports both `pickle` and `protobuf` protocols. Besides, you can choose different compression methods.
+
+To serialize a [DocList][docarray.array.doc_list.doc_list.DocList] in Base64, you can use [`to_base64()`][docarray.array.doc_list.io.IOMixinArray.to_base64]  and [`from_base64()`][docarray.array.doc_list.io.IOMixinArray.from_protobuf] to serialize and deserialize a [DocList][docarray.array.doc_list.doc_list.DocList]:
+
+We support multiple compression methods. (namely : `lz4`, `bz2`, `lzma`, `zlib`, `gzip`)
+
+
+```python
+from docarray import BaseDoc, DocList
+
+
+class SimpleDoc(BaseDoc):
+    text: str
+
+
+dl = DocList[SimpleDoc]([SimpleDoc(text=f'doc {i}') for i in range(2)])
+
+base64_repr_dl = dl.to_base64(compress=None, protocol='pickle')
+
+dl_from_base64 = DocList[SimpleDoc].from_base64(
+    base64_repr_dl, compress=None, protocol='pickle'
+)
+```
+
+## Binary
+Similar to `Base64` serialization, `Binary` serialization also supports different protocols and compression methods.
+
+To save a [DocList][docarray.array.doc_list.doc_list.DocList] into a binary file, you can use [`save_binary()`][docarray.array.doc_list.io.IOMixinArray.to_base64]  and [`load_binary()`][docarray.array.doc_list.io.IOMixinArray.from_protobuf] to serialize and deserialize a [DocList][docarray.array.doc_list.doc_list.DocList]:
+
+```python
+from docarray import BaseDoc, DocList
+
+
+class SimpleDoc(BaseDoc):
+    text: str
+
+
+dl = DocList[SimpleDoc]([SimpleDoc(text=f'doc {i}') for i in range(2)])
+
+dl.save_binary('simple-dl.pickle', compress=None, protocol='pickle')
+
+dl_from_binary = DocList[SimpleDoc].load_binary(
+    'simple-dl.pickle', compress=None, protocol='pickle'
+)
+```
+
+The [DocList][docarray.array.doc_list.doc_list.DocList] is stored at `simple-dl.pickle` file.
+
+### Bytes
+Under the hood,  [save_binary()][docarray.array.doc_list.io.IOMixinArray.to_base64] prepares the file object and calls [to_bytes()][docarray.array.doc_list.io.IOMixinArray.to_bytes] function to convert the [DocList][docarray.array.doc_list.doc_list.DocList] into a byte object. You can use [to_bytes()][docarray.array.doc_list.io.IOMixinArray.to_bytes] function directly and use [from_bytes()][docarray.array.doc_list.io.IOMixinArray.from_bytes] to load the [DocList][docarray.array.doc_list.doc_list.DocList] from a byte object. You can use `protocol` to choose between `pickle` and `protobuf`. Besides, [to_bytes()][docarray.array.doc_list.io.IOMixinArray.to_bytes]  and [save_binary()][docarray.array.doc_list.io.IOMixinArray.save_binary] support multiple options for `compress` as well. 
+
+```python
+from docarray import BaseDoc, DocList
+
+
+class SimpleDoc(BaseDoc):
+    text: str
+
+
+dl = DocList[SimpleDoc]([SimpleDoc(text=f'doc {i}') for i in range(2)])
+
+bytes_dl = dl.to_bytes(protocol='pickle', compress=None)
+
+dl_from_bytes = DocList[SimpleDoc].from_bytes(
+    bytes_dl, compress=None, protocol='pickle'
+)
+```
+
+
+## CSV
+You can use [`from_csv()`][docarray.array.doc_list.io.IOMixinArray.from_csv] and [`to_csv()`][docarray.array.doc_list.io.IOMixinArray.to_csv] to de-/serializae and deserialize the [DocList][docarray.array.doc_list.doc_list.DocList] from/to a CSV file. Use the `dialect` parameter to choose the dialect of the CSV format:
+
+```python
+from docarray import BaseDoc, DocList
+
+
+class SimpleDoc(BaseDoc):
+    text: str
+
+
+dl = DocList[SimpleDoc]([SimpleDoc(text=f'doc {i}') for i in range(2)])
+
+dl.to_csv('simple-dl.csv')
+dl_from_csv = DocList[SimpleDoc].from_csv('simple-dl.csv')
+print(dl_from_csv)
+```
+
+
+## Pandas.Dataframe
+You can use [`from_dataframe()`][docarray.array.doc_list.io.IOMixinArray.from_dataframe] and [`to_dataframe()`][docarray.array.doc_list.io.IOMixinArray.to_dataframe] to load/save the [DocList][docarray.array.doc_list.doc_list.DocList] from/to a pandas DataFrame:
+
+```python
+from docarray import BaseDoc, DocList
+
+
+class SimpleDoc(BaseDoc):
+    text: str
+
+
+dl = DocList[SimpleDoc]([SimpleDoc(text=f'doc {i}') for i in range(2)])
+
+df = dl.to_dataframe()
+dl_from_dataframe = DocList[SimpleDoc].from_dataframe(df)
+print(dl_from_dataframe)
+```
+
+See also:
+
+* The serializing [BaseDoc](./send_doc.md) section
+* The serializing [DocVec](./send_docvec.md) section
diff --git a/docs/user_guide/sending/ser/send_docvec.md b/docs/user_guide/sending/ser/send_docvec.md
new file mode 100644
index 00000000000..3fbaf759075
--- /dev/null
+++ b/docs/user_guide/sending/ser/send_docvec.md
@@ -0,0 +1,30 @@
+# DocVec
+
+When sending or storing [`DocVec`][docarray.array.doc_list.doc_list.DocVec], you need to use serialization. [DocVec][docarray.array.doc_list.doc_list.DocVec] only supports protobuf to serialize the data.
+You can use [`to_protobuf`][docarray.array.doc_list.doc_list.DocVec.to_protobuf] and [`from_protobuf`][docarray.array.doc_list.doc_list.DocVec.from_protobuf] to serialize and deserialize a [DocVec][docarray.array.doc_list.doc_list.DocVec]
+
+```python
+import numpy as np
+
+from docarray import BaseDoc, DocVec
+from docarray.typing import AnyTensor
+
+
+class SimpleVecDoc(BaseDoc):
+    tensor: AnyTensor
+
+
+dv = DocVec[SimpleVecDoc]([SimpleVecDoc(tensor=np.ones(16)) for _ in range(8)])
+
+proto_message_dv = dv.to_protobuf()
+
+dv_from_proto = DocVec[SimpleVecDoc].from_protobuf(proto_message_dv)
+```
+
+!!! note
+    We are planning to add more serialization formats in the future, notably JSON.
+
+[`to_protobuf`][docarray.array.doc_list.doc_list.DocVec.to_protobuf] returns a protobuf object of `docarray_pb2.DocVecProto` class. [`from_protobuf`][docarray.array.doc_list.doc_list.DocVec.from_protobuf] accepts a protobuf message object to construct a [DocVec][docarray.array.doc_list.doc_list.DocVec].
+
+* The serializing [BaseDoc](./send_doc.md) section
+* The serializing [DocList](./send_doclist.md) section
diff --git a/mkdocs.yml b/mkdocs.yml
index bd1548a0a22..f8a967fcec5 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -81,7 +81,15 @@ nav:
     - Representing data:
       - user_guide/representing/first_step.md
       - user_guide/representing/array.md
-    - user_guide/sending/first_step.md
+    - Sending:
+      - user_guide/sending/first_step.md
+      - Serialization:
+        - user_guide/sending/ser/send_doc.md
+        - user_guide/sending/ser/send_doclist.md
+        - user_guide/sending/ser/send_docvec.md
+      - Building API:
+          - user_guide/sending/api/jina.md
+          - user_guide/sending/api/fastAPI.md
     - Storing:
       - user_guide/storing/first_step.md
       - user_guide/storing/store_file.md
@@ -92,9 +100,6 @@ nav:
     - how_to/add_doc_index.md
     - how_to/multimodal_training_and_serving.md
     - how_to/optimize_performance_with_id_generation.md
-    - how_to/audio2text.md
-  - Integrations:
-    - integrations/fastapi.md
   - Data Types:
     - data_types/text/text.md
     - data_types/image/image.md
diff --git a/tests/documentation/test_docs.py b/tests/documentation/test_docs.py
index 1d8fe1679b3..b071839c88c 100644
--- a/tests/documentation/test_docs.py
+++ b/tests/documentation/test_docs.py
@@ -4,6 +4,8 @@
 from mktestdocs import grab_code_blocks
 from mktestdocs.__main__ import _executors, check_raw_string
 
+file_to_skip = ['fastAPI', 'jina']
+
 
 def check_raw_file_full(raw, lang="python", keyword_ignore=[]):
     if lang not in _executors:
@@ -43,19 +45,25 @@ def check_md_file(fpath, memory=False, lang="python", keyword_ignore=[]):
         check_raw_file_full(text, lang=lang, keyword_ignore=keyword_ignore)
 
 
-@pytest.mark.parametrize(
-    'fpath',
-    [
-        *list(pathlib.Path('docs/user_guide').glob('**/*.md')),
-        *list(pathlib.Path('docs/data_types').glob('**/*.md')),
-    ],
-    ids=str,
-)
+files_to_check = [
+    *list(pathlib.Path('docs/user_guide').glob('**/*.md')),
+    *list(pathlib.Path('docs/data_types').glob('**/*.md')),
+]
+
+file_to_remove = []
+
+for file in files_to_check:
+    for fn in file_to_skip:
+        if fn in str(file):
+            file_to_remove.append(file)
+
+for file in file_to_remove:
+    files_to_check.remove(file)
+
+
+@pytest.mark.parametrize('fpath', files_to_check, ids=str)
 def test_files_good(fpath):
-    keyword_ignore = []
-    if 'store_jac.md' in str(fpath):
-        keyword_ignore = ['jac']
-    check_md_file(fpath=fpath, memory=True, keyword_ignore=keyword_ignore)
+    check_md_file(fpath=fpath, memory=True, keyword_ignore=['pickle', 'jac'])
 
 
 def test_readme():

From b6c3b66d1afd1bb0e782dc16fb35e0d13e1779a3 Mon Sep 17 00:00:00 2001
From: Charlotte Gerhaher <charlotte.gerhaher@jina.ai>
Date: Fri, 14 Apr 2023 09:02:38 +0200
Subject: [PATCH 18/20] fix: apply sami suggestion

Co-authored-by: samsja <55492238+samsja@users.noreply.github.com>
Signed-off-by: Charlotte Gerhaher <charlotte.gerhaher@jina.ai>
Signed-off-by: anna-charlotte <charlotte.gerhaher@jina.ai>
---
 docs/api_references/doc_store/doc_store.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/api_references/doc_store/doc_store.md b/docs/api_references/doc_store/doc_store.md
index eb6e65b9f4a..275f3e8e3b0 100644
--- a/docs/api_references/doc_store/doc_store.md
+++ b/docs/api_references/doc_store/doc_store.md
@@ -1,3 +1,3 @@
-# AbstractDocStore
+# DocStore
 
 ::: docarray.store.abstract_doc_store.AbstractDocStore

From d897d06f1481cdd48969c984c98f5b14de6ca644 Mon Sep 17 00:00:00 2001
From: anna-charlotte <charlotte.gerhaher@jina.ai>
Date: Fri, 14 Apr 2023 09:37:55 +0200
Subject: [PATCH 19/20] fix: apply suggestions from samis code review

Signed-off-by: anna-charlotte <charlotte.gerhaher@jina.ai>
---
 .../storing/{ => doc_store}/store_file.md       |  0
 .../storing/{ => doc_store}/store_jac.md        |  0
 .../storing/{ => doc_store}/store_s3.md         |  0
 docs/user_guide/storing/first_step.md           | 17 +++++++++++++----
 mkdocs.yml                                      |  8 +++++---
 5 files changed, 18 insertions(+), 7 deletions(-)
 rename docs/user_guide/storing/{ => doc_store}/store_file.md (100%)
 rename docs/user_guide/storing/{ => doc_store}/store_jac.md (100%)
 rename docs/user_guide/storing/{ => doc_store}/store_s3.md (100%)

diff --git a/docs/user_guide/storing/store_file.md b/docs/user_guide/storing/doc_store/store_file.md
similarity index 100%
rename from docs/user_guide/storing/store_file.md
rename to docs/user_guide/storing/doc_store/store_file.md
diff --git a/docs/user_guide/storing/store_jac.md b/docs/user_guide/storing/doc_store/store_jac.md
similarity index 100%
rename from docs/user_guide/storing/store_jac.md
rename to docs/user_guide/storing/doc_store/store_jac.md
diff --git a/docs/user_guide/storing/store_s3.md b/docs/user_guide/storing/doc_store/store_s3.md
similarity index 100%
rename from docs/user_guide/storing/store_s3.md
rename to docs/user_guide/storing/doc_store/store_s3.md
diff --git a/docs/user_guide/storing/first_step.md b/docs/user_guide/storing/first_step.md
index d821f5872fb..13ecfe138c0 100644
--- a/docs/user_guide/storing/first_step.md
+++ b/docs/user_guide/storing/first_step.md
@@ -3,6 +3,13 @@
 In the previous sections we saw how to use [`BaseDoc`][docarray.base_doc.doc.BaseDoc], [`DocList`][docarray.array.doc_list.doc_list.DocList] and [`DocVec`][docarray.array.doc_vec.doc_vec.DocVec] to represent multi-modal data and send it over the wire.
 In this section we will see how to store and persist this data.
 
+DocArray offers to ways of storing your data:
+
+1. In a **[Document Store](#document-store)** for simple long-term storage
+2. In a **[Document Index](#document-index)** for fast retrieval using vector similarity
+
+## Document Store
+    
 [DocList][docarray.array.doc_list.doc_list.DocList] can be persisted using the
 [`.push()`][docarray.array.doc_list.pushpull.PushPullMixin.push] and 
 [`.pull()`][docarray.array.doc_list.pushpull.PushPullMixin.pull] methods. 
@@ -10,8 +17,10 @@ Under the hood, [DocStore][docarray.store.abstract_doc_store.AbstractDocStore] i
 You can store your documents on-disk. Alternatively, you can upload them to [AWS S3](https://aws.amazon.com/s3/), 
 [minio](https://min.io) or [Jina AI Cloud](https://cloud.jina.ai/user/storage). 
 
-This section is divided into three parts:
+This section covers the following three topics:
 
-- [Store](store_file.md) of [`BaseDoc`][docarray.base_doc.doc.BaseDoc], [`DocList`][docarray.array.doc_list.doc_list.DocList] and [`DocVec`][docarray.array.doc_vec.doc_vec.DocVec] on-disk
-- [Store on Jina AI Cloud](store_jac.md)
-- [Store on S3](store_s3.md)
+  - [Store](doc_store/store_file.md) of [`BaseDoc`][docarray.base_doc.doc.BaseDoc], [`DocList`][docarray.array.doc_list.doc_list.DocList] and [`DocVec`][docarray.array.doc_vec.doc_vec.DocVec] on-disk
+  - [Store on Jina AI Cloud](doc_store/store_jac.md) 
+  - [Store on S3](doc_store/store_s3.md)
+   
+## Document Index
diff --git a/mkdocs.yml b/mkdocs.yml
index f8a967fcec5..255eeff4818 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -92,9 +92,11 @@ nav:
           - user_guide/sending/api/fastAPI.md
     - Storing:
       - user_guide/storing/first_step.md
-      - user_guide/storing/store_file.md
-      - user_guide/storing/store_jac.md
-      - user_guide/storing/store_s3.md
+      - DocStore:
+        - user_guide/storing/doc_store/store_file.md
+        - user_guide/storing/doc_store/store_jac.md
+        - user_guide/storing/doc_store/store_s3.md
+
 
   - How-to:
     - how_to/add_doc_index.md

From 597cd3a4b079706a3a168b0c0cc302355ce98a99 Mon Sep 17 00:00:00 2001
From: Charlotte Gerhaher <charlotte.gerhaher@jina.ai>
Date: Fri, 14 Apr 2023 09:09:35 +0200
Subject: [PATCH 20/20] docs: add missing links and clean up (#1370)

* docs: add links and clean up

Signed-off-by: anna-charlotte <charlotte.gerhaher@jina.ai>

* fix: Text to TextDoc

Signed-off-by: anna-charlotte <charlotte.gerhaher@jina.ai>

* fix: 3d urls

Signed-off-by: anna-charlotte <charlotte.gerhaher@jina.ai>

* fix: pc url

Signed-off-by: anna-charlotte <charlotte.gerhaher@jina.ai>

* fix: comment out display pc

Signed-off-by: anna-charlotte <charlotte.gerhaher@jina.ai>

---------

Signed-off-by: anna-charlotte <charlotte.gerhaher@jina.ai>
---
 docarray/array/any_array.py                   |  2 +-
 docarray/array/doc_list/io.py                 | 13 +++---
 docarray/array/doc_vec/doc_vec.py             | 46 ++++++++++---------
 docarray/base_doc/mixins/update.py            |  8 ++--
 docarray/data/torch_dataset.py                | 36 +++++++++------
 .../tensor/audio/abstract_audio_tensor.py     |  2 +-
 .../tensor/image/abstract_image_tensor.py     |  2 +-
 .../tensor/image/image_tensorflow_tensor.py   |  3 +-
 .../typing/tensor/image/image_torch_tensor.py |  3 +-
 .../typing/tensor/video/video_tensor_mixin.py |  2 +-
 docarray/typing/url/url_3d/mesh_url.py        | 34 +++++++-------
 docarray/typing/url/url_3d/point_cloud_url.py | 26 ++++++-----
 docarray/typing/url/video_url.py              |  3 +-
 13 files changed, 97 insertions(+), 83 deletions(-)

diff --git a/docarray/array/any_array.py b/docarray/array/any_array.py
index e3b46132ee6..6457072cf88 100644
--- a/docarray/array/any_array.py
+++ b/docarray/array/any_array.py
@@ -209,7 +209,7 @@ class Book(BaseDoc):
         ```
 
         If your [`DocList`][docarray.array.doc_list.doc_list.DocList] is in doc_vec mode, and you want to access a field of
-        type [`AnyTensor`][docarray.typing.AnyTensor], the doc_vec tensor will be returned instead of a list:
+        type `AnyTensor`, the doc_vec tensor will be returned instead of a list:
 
         ```python
         class Image(BaseDoc):
diff --git a/docarray/array/doc_list/io.py b/docarray/array/doc_list/io.py
index 9f153e2f1bd..16dca6a5bb0 100644
--- a/docarray/array/doc_list/io.py
+++ b/docarray/array/doc_list/io.py
@@ -358,10 +358,9 @@ def from_csv(
         :param dialect: defines separator and how to handle whitespaces etc.
             Can be a [`csv.Dialect`](https://docs.python.org/3/library/csv.html#csv.Dialect)
             instance or one string of:
-
-                - 'excel' (for comma separated values),
-                - 'excel-tab' (for tab separated values),
-                - 'unix' (for csv file generated on UNIX systems).
+            `'excel'` (for comma separated values),
+            `'excel-tab'` (for tab separated values),
+            `'unix'` (for csv file generated on UNIX systems).
 
         :return: `DocList` object
         """
@@ -428,10 +427,10 @@ def to_csv(
         :param dialect: defines separator and how to handle whitespaces etc.
             Can be a [`csv.Dialect`](https://docs.python.org/3/library/csv.html#csv.Dialect)
             instance or one string of:
+            `'excel'` (for comma separated values),
+            `'excel-tab'` (for tab separated values),
+            `'unix'` (for csv file generated on UNIX systems).
 
-                - 'excel' (for comma seperated values),
-                - 'excel-tab' (for tab separated values),
-                - 'unix' (for csv file generated on UNIX systems).
         """
         fields = self.doc_type._get_access_paths()
 
diff --git a/docarray/array/doc_vec/doc_vec.py b/docarray/array/doc_vec/doc_vec.py
index 7d692b31084..adb701d2a11 100644
--- a/docarray/array/doc_vec/doc_vec.py
+++ b/docarray/array/doc_vec/doc_vec.py
@@ -59,32 +59,34 @@ class DocVec(AnyDocArray[T_doc]):
     computation that require batches of data (ex: matrix multiplication, distance
     calculation, deep learning forward pass)
 
-    A DocVec has a similar interface as
-    {class}`~docarray.array.DocList` but with an underlying implementation that is
-    column based instead of row based. Each field
-    of the schema of the DocVec
-    (the :attr:`~docarray.array.doc_vec.DocVec.doc_type` which is a
-    `BaseDoc`) will be stored in a column. If the field is a tensor, the data from all Documents will be stored as a single, doc_vec (torch/np/tf) tensor.
-    If the tensor field
-    is `AnyTensor` or a Union of tensor types, the
-    :attr:`~docarray.array.doc_vec.DocVec.tensor_type` will be used to determine
-    the type of the doc_vec column.
-
-    If the field is another `BasedDoc` the column will be another DocVec that follows the
-    schema of the nested Document.
-    If the field is a `DocList` or
-    `DocVec` then the column will be a list of `DocVec`.
+    A DocVec has a similar interface as [`DocList`][docarray.array.DocList]
+    but with an underlying implementation that is column based instead of row based.
+    Each field of the schema of the `DocVec` (the `.doc_type` which is a
+    [`BaseDoc`][docarray.BaseDoc]) will be stored in a column.
+
+    If the field is a tensor, the data from all Documents will be stored as a single
+    doc_vec (torch/np/tf) tensor.
+
+    If the tensor field is `AnyTensor` or a Union of tensor types, the
+    `.tensor_type` will be used to determine the type of the doc_vec column.
+
+    If the field is another [`BaseDoc`][docarray.BaseDoc] the column will be another
+    `DocVec` that follows the schema of the nested Document.
+
+    If the field is a [`DocList`][docarray.DocList] or `DocVec` then the column will
+    be a list of `DocVec`.
+
     For any other type the column is a Python list.
 
-    Every `Document` inside a `DocVec` is a view into the data columns stored at the `DocVec` level. The `BaseDoc`  does
-     not hold any data itself. The behavior of
-     this Document "view" is similar to the behavior of `view = tensor[i]` in
-     numpy/PyTorch.
+    Every `Document` inside a `DocVec` is a view into the data columns stored at the
+    `DocVec` level. The `BaseDoc` does not hold any data itself. The behavior of
+    this Document "view" is similar to the behavior of `view = tensor[i]` in
+    numpy/PyTorch.
 
-    :param docs: a homogeneous sequence of BaseDoc
+    :param docs: a homogeneous sequence of `BaseDoc`
     :param tensor_type: Tensor Class used to wrap the doc_vec tensors. This is useful
-    if the BaseDoc of this DocVec has some undefined tensor type like
-    AnyTensor or Union of NdArray and TorchTensor
+        if the BaseDoc of this DocVec has some undefined tensor type like
+        AnyTensor or Union of NdArray and TorchTensor
     """
 
     doc_type: Type[T_doc]
diff --git a/docarray/base_doc/mixins/update.py b/docarray/base_doc/mixins/update.py
index 471e97483ba..754e6c9b789 100644
--- a/docarray/base_doc/mixins/update.py
+++ b/docarray/base_doc/mixins/update.py
@@ -24,9 +24,9 @@ def update(self, other: T):
         """
         Updates self with the content of other. Changes are applied to self.
         Updating one Document with another consists in the following:
-         - setting data properties of the second Document to the first Document
-         if they are not None:
 
+         - Setting data properties of the second Document to the first Document
+         if they are not None
          - Concatenating lists and updating sets
          - Updating recursively Documents and DocArrays
          - Updating Dictionaries of the left with the right
@@ -35,9 +35,9 @@ def update(self, other: T):
         it is applied to a static schema type, the presence of the field is
         given by the field not having a None value and that DocArrays,
         lists and sets are concatenated. It is worth mentioning that Tuples
-        are not merged together since they are meant to be inmutable,
+        are not merged together since they are meant to be immutable,
         so they behave as regular types and the value of `self` is updated
-        with the value of `other`
+        with the value of `other`.
 
 
         ---
diff --git a/docarray/data/torch_dataset.py b/docarray/data/torch_dataset.py
index 25fbb9a9a6a..f174326c2a1 100644
--- a/docarray/data/torch_dataset.py
+++ b/docarray/data/torch_dataset.py
@@ -14,30 +14,31 @@ class MultiModalDataset(Dataset, Generic[T_doc]):
     A dataset that can be used inside a PyTorch DataLoader.
     In other words, it implements the PyTorch Dataset interface.
 
-    :param docs: the DocList to be used as the dataset
-    :param preprocessing: a dictionary of field names and preprocessing functions
-
     The preprocessing dictionary passed to the constructor consists of keys that are
     field names and values that are functions that take a single argument and return
     a single argument.
 
-    EXAMPLE USAGE
-    .. code-block:: python
+    ---
+
+    ```python
     from torch.utils.data import DataLoader
     from docarray import DocList
     from docarray.data import MultiModalDataset
-    from docarray.documents import Text
+    from docarray.documents import TextDoc
 
 
     def prepend_number(text: str):
         return f"Number {text}"
 
 
-    docs = DocList[Text](Text(text=str(i)) for i in range(16))
-    ds = MultiModalDataset[Text](docs, preprocessing={'text': prepend_number})
-    loader = DataLoader(ds, batch_size=4, collate_fn=MultiModalDataset[Text].collate_fn)
+    docs = DocList[TextDoc](TextDoc(text=str(i)) for i in range(16))
+    ds = MultiModalDataset[TextDoc](docs, preprocessing={'text': prepend_number})
+    loader = DataLoader(ds, batch_size=4, collate_fn=MultiModalDataset[TextDoc].collate_fn)
     for batch in loader:
         print(batch.text)
+    ```
+
+    ---
 
     Nested fields can be accessed by using dot notation.
     The document itself can be accessed using the empty string as the key.
@@ -47,24 +48,25 @@ def prepend_number(text: str):
 
     The transformations will be applied according to their order in the dictionary.
 
-    EXAMPLE USAGE
-    .. code-block:: python
+    ---
+
+    ```python
     import torch
     from torch.utils.data import DataLoader
     from docarray import DocList, BaseDoc
     from docarray.data import MultiModalDataset
-    from docarray.documents import Text
+    from docarray.documents import TextDoc
 
 
     class Thesis(BaseDoc):
-        title: Text
+        title: TextDoc
 
 
     class Student(BaseDoc):
         thesis: Thesis
 
 
-    def embed_title(title: Text):
+    def embed_title(title: TextDoc):
         title.embedding = torch.ones(4)
 
 
@@ -90,6 +92,12 @@ def add_nonsense(student: Student):
     loader = DataLoader(ds, batch_size=4, collate_fn=ds.collate_fn)
     for batch in loader:
         print(batch.thesis.title.embedding)
+    ```
+
+    ---
+
+    :param docs: the `DocList` to be used as the dataset
+    :param preprocessing: a dictionary of field names and preprocessing functions
     """
 
     doc_type: Optional[Type[BaseDoc]] = None
diff --git a/docarray/typing/tensor/audio/abstract_audio_tensor.py b/docarray/typing/tensor/audio/abstract_audio_tensor.py
index 56fdae6c05e..b987b2addfd 100644
--- a/docarray/typing/tensor/audio/abstract_audio_tensor.py
+++ b/docarray/typing/tensor/audio/abstract_audio_tensor.py
@@ -16,7 +16,7 @@
 class AbstractAudioTensor(AbstractTensor, ABC):
     def to_bytes(self) -> 'AudioBytes':
         """
-        Convert audio tensor to AudioBytes.
+        Convert audio tensor to [`AudioBytes`][docarray.typrin.AudioBytes].
         """
         from docarray.typing.bytes.audio_bytes import AudioBytes
 
diff --git a/docarray/typing/tensor/image/abstract_image_tensor.py b/docarray/typing/tensor/image/abstract_image_tensor.py
index 0a880be9865..9566910781d 100644
--- a/docarray/typing/tensor/image/abstract_image_tensor.py
+++ b/docarray/typing/tensor/image/abstract_image_tensor.py
@@ -15,7 +15,7 @@
 class AbstractImageTensor(AbstractTensor, ABC):
     def to_bytes(self, format: str = 'PNG') -> 'ImageBytes':
         """
-        Convert image tensor to ImageBytes.
+        Convert image tensor to [`ImageBytes`][docarray.typing.ImageBytes].
 
         :param format: the image format use to store the image, can be 'PNG' , 'JPG' ...
         :return: an ImageBytes object
diff --git a/docarray/typing/tensor/image/image_tensorflow_tensor.py b/docarray/typing/tensor/image/image_tensorflow_tensor.py
index c95b001e704..f373f45b30e 100644
--- a/docarray/typing/tensor/image/image_tensorflow_tensor.py
+++ b/docarray/typing/tensor/image/image_tensorflow_tensor.py
@@ -14,7 +14,8 @@ class ImageTensorFlowTensor(
     """
     Subclass of [`TensorFlowTensor`][docarray.typing.TensorFlowTensor],
     to represent an image tensor. Adds image-specific features to the tensor.
-    For instance the ability convert the tensor back to image bytes which are
+    For instance the ability convert the tensor back to
+    [`ImageBytes`][docarray.typing.ImageBytes] which are
     optimized to send over the wire.
 
 
diff --git a/docarray/typing/tensor/image/image_torch_tensor.py b/docarray/typing/tensor/image/image_torch_tensor.py
index 249030c00f6..103a936d705 100644
--- a/docarray/typing/tensor/image/image_torch_tensor.py
+++ b/docarray/typing/tensor/image/image_torch_tensor.py
@@ -12,7 +12,8 @@ class ImageTorchTensor(AbstractImageTensor, TorchTensor, metaclass=metaTorchAndN
     """
     Subclass of [`TorchTensor`][docarray.typing.TorchTensor], to represent an image tensor.
     Adds image-specific features to the tensor.
-    For instance the ability convert the tensor back to image bytes which are
+    For instance the ability convert the tensor back to
+    [`ImageBytes`][docarray.typing.ImageBytes] which are
     optimized to send over the wire.
 
 
diff --git a/docarray/typing/tensor/video/video_tensor_mixin.py b/docarray/typing/tensor/video/video_tensor_mixin.py
index d2ed61eacee..173daaacce8 100644
--- a/docarray/typing/tensor/video/video_tensor_mixin.py
+++ b/docarray/typing/tensor/video/video_tensor_mixin.py
@@ -135,7 +135,7 @@ def to_bytes(
         audio_format: str = 'fltp',
     ) -> 'VideoBytes':
         """
-        Convert video tensor to VideoBytes.
+        Convert video tensor to [`VideoBytes`][docarray.typing.VideoBytes].
 
         :param audio_tensor: AudioTensor containing the video's soundtrack.
         :param video_frame_rate: video frames per second.
diff --git a/docarray/typing/url/url_3d/mesh_url.py b/docarray/typing/url/url_3d/mesh_url.py
index 9ba5e330e6e..70f32eb5581 100644
--- a/docarray/typing/url/url_3d/mesh_url.py
+++ b/docarray/typing/url/url_3d/mesh_url.py
@@ -26,33 +26,33 @@ def load(
         trimesh_args: Optional[Dict[str, Any]] = None,
     ) -> 'VerticesAndFaces':
         """
-         Load the data from the url into a VerticesAndFaces object containing
-         vertices and faces information.
+        Load the data from the url into a [`VerticesAndFaces`][docarray.documents.VerticesAndFaces]
+        object containing vertices and faces information.
 
         ---
 
-         ```python
-         from docarray import BaseDoc
+        ```python
+        from docarray import BaseDoc
 
-         from docarray.typing import Mesh3DUrl, NdArray
+        from docarray.typing import Mesh3DUrl, NdArray
 
 
-         class MyDoc(BaseDoc):
-             mesh_url: Mesh3DUrl
+        class MyDoc(BaseDoc):
+            mesh_url: Mesh3DUrl
 
 
-         doc = MyDoc(mesh_url="toydata/tetrahedron.obj")
+        doc = MyDoc(mesh_url="https://people.sc.fsu.edu/~jburkardt/data/obj/al.obj")
 
-         tensors = doc.mesh_url.load()
-         assert isinstance(tensors.vertices, NdArray)
-         assert isinstance(tensors.faces, NdArray)
-         ```
+        tensors = doc.mesh_url.load()
+        assert isinstance(tensors.vertices, NdArray)
+        assert isinstance(tensors.faces, NdArray)
+        ```
 
-         ---
-         :param skip_materials: Skip materials if True, else skip.
-         :param trimesh_args: dictionary of additional arguments for `trimesh.load()`
-             or `trimesh.load_remote()`.
-         :return: VerticesAndFaces object containing vertices and faces information.
+
+        :param skip_materials: Skip materials if True, else skip.
+        :param trimesh_args: dictionary of additional arguments for `trimesh.load()`
+            or `trimesh.load_remote()`.
+        :return: VerticesAndFaces object containing vertices and faces information.
         """
         from docarray.documents.mesh.vertices_and_faces import VerticesAndFaces
 
diff --git a/docarray/typing/url/url_3d/point_cloud_url.py b/docarray/typing/url/url_3d/point_cloud_url.py
index dd3f17be0df..efe6ce6ae0e 100644
--- a/docarray/typing/url/url_3d/point_cloud_url.py
+++ b/docarray/typing/url/url_3d/point_cloud_url.py
@@ -29,7 +29,7 @@ def load(
         trimesh_args: Optional[Dict[str, Any]] = None,
     ) -> 'PointsAndColors':
         """
-        Load the data from the url into an NdArray containing point cloud information.
+        Load the data from the url into an `NdArray` containing point cloud information.
 
 
         ---
@@ -45,7 +45,7 @@ class MyDoc(BaseDoc):
             point_cloud_url: PointCloud3DUrl
 
 
-        doc = MyDoc(point_cloud_url="toydata/tetrahedron.obj")
+        doc = MyDoc(point_cloud_url="thttps://people.sc.fsu.edu/~jburkardt/data/obj/al.obj")
 
         # point_cloud = doc.point_cloud_url.load(samples=100)
 
@@ -96,20 +96,24 @@ def display(
         First, it loads the point cloud into a `PointsAndColors` object, and then
         calls display on it. The following is therefore equivalent:
 
-        .. code-block:: python
+        ---
 
-            import numpy as np
-            from docarray import BaseDoc
+        ```python
+        import numpy as np
+        from docarray import BaseDoc
 
-            from docarray.documents import PointCloud3D
+        from docarray.documents import PointCloud3D
 
-            pc = PointCloud3D("toydata/tetrahedron.obj")
+        pc = PointCloud3D(url="https://people.sc.fsu.edu/~jburkardt/data/obj/al.obj")
 
-            # option 1
-            pc.url.display()
+        # option 1
+        # pc.url.display()
 
-            # option 2 (equivalent)
-            pc.url.load(samples=10000).display()
+        # option 2 (equivalent)
+        # pc.url.load(samples=10000).display()
+        ```
+
+        ---
 
         :param samples: number of points to sample from the mesh.
         """
diff --git a/docarray/typing/url/video_url.py b/docarray/typing/url/video_url.py
index db9dd4b5080..8c5f0e6d995 100644
--- a/docarray/typing/url/video_url.py
+++ b/docarray/typing/url/video_url.py
@@ -73,8 +73,7 @@ class MyDoc(BaseDoc):
         ---
 
         :param kwargs: supports all keyword arguments that are being supported by
-            av.open() as described in:
-            https://pyav.org/docs/stable/api/_globals.html?highlight=open#av.open
+            av.open() as described [here](https://pyav.org/docs/stable/api/_globals.html?highlight=open#av.open)
 
         :return: [`AudioNdArray`][docarray.typing.AudioNdArray] representing the audio content,
             [`VideoNdArray`][docarray.typing.VideoNdArray] representing the images of the video,