From a34d9cc0596261d766fcf83266c6320a889c5fc6 Mon Sep 17 00:00:00 2001
From: nan-wang <nan.wang@jina.ai>
Date: Mon, 10 Apr 2023 12:08:11 +0200
Subject: [PATCH 01/33] docs: add serialization for json

Signed-off-by: nan-wang <nan.wang@jina.ai>
---
 docs/user_guide/sending/send_doclist.md | 31 +++++++++++++++++++++++++
 1 file changed, 31 insertions(+)
 create mode 100644 docs/user_guide/sending/send_doclist.md

diff --git a/docs/user_guide/sending/send_doclist.md b/docs/user_guide/sending/send_doclist.md
new file mode 100644
index 00000000000..c4b340649eb
--- /dev/null
+++ b/docs/user_guide/sending/send_doclist.md
@@ -0,0 +1,31 @@
+# Serialization for `DocList`
+When sending or storing `DocList`, you need to use serialization. `DocList` supports multiple ways to serialize the data.
+
+## json
+You can use `to_json()` and `from_json()` to serialize and deserialize a `DocList`.
+
+```python
+from docarray import BaseDoc, DocList
+
+
+class SimpleDoc(BaseDoc):
+    text: str
+
+
+dl = DocList[SimpleDoc]([SimpleDoc(text=f'doc {i}') for i in range(2)])
+
+with open('simple-dl.json', 'wb') as f:
+    json_dl = dl.to_json()
+    print(json_dl)
+    f.write(json_dl)
+
+with open('simple-dl.json', 'r') as f:
+    dl_load_from_json = DocList[SimpleDoc].from_json(f.read())
+    print(dl_load_from_json)
+```
+
+`to_json()` return the binary representation of the json object. `from_json()` can load from either `str` or `binary` representation of the json object.
+
+```output
+b'[{"id":"5540e72d407ae81abb2390e9249ed066","text":"doc 0"},{"id":"fbe9f80d2fa03571e899a2887af1ac1b","text":"doc 1"}]'
+```
\ No newline at end of file

From 2a9de6d2d8016b3ea4b140a005effebbba538ec9 Mon Sep 17 00:00:00 2001
From: nan-wang <nan.wang@jina.ai>
Date: Mon, 10 Apr 2023 12:40:25 +0200
Subject: [PATCH 02/33] docs: add serialization for binary and protobuf

Signed-off-by: nan-wang <nan.wang@jina.ai>
---
 docs/user_guide/sending/send_doclist.md | 69 +++++++++++++++++++++++--
 1 file changed, 66 insertions(+), 3 deletions(-)

diff --git a/docs/user_guide/sending/send_doclist.md b/docs/user_guide/sending/send_doclist.md
index c4b340649eb..374acf0781a 100644
--- a/docs/user_guide/sending/send_doclist.md
+++ b/docs/user_guide/sending/send_doclist.md
@@ -1,7 +1,7 @@
 # Serialization for `DocList`
 When sending or storing `DocList`, you need to use serialization. `DocList` supports multiple ways to serialize the data.
 
-## json
+## JSON
 You can use `to_json()` and `from_json()` to serialize and deserialize a `DocList`.
 
 ```python
@@ -24,8 +24,71 @@ with open('simple-dl.json', 'r') as f:
     print(dl_load_from_json)
 ```
 
-`to_json()` return the binary representation of the json object. `from_json()` can load from either `str` or `binary` representation of the json object.
+`to_json()` returns the binary representation of the json object. `from_json()` can load from either `str` or `binary` representation of the json object.
 
 ```output
 b'[{"id":"5540e72d407ae81abb2390e9249ed066","text":"doc 0"},{"id":"fbe9f80d2fa03571e899a2887af1ac1b","text":"doc 1"}]'
-```
\ No newline at end of file
+```
+
+## Protobuf
+When using protobuf, you can use `to_protobuf()` and `from_protobuf()` to serialize and deserialize a `DocList`
+
+```python
+from docarray import BaseDoc, DocList
+
+
+class SimpleDoc(BaseDoc):
+    text: str
+
+
+dl = DocList[SimpleDoc]([SimpleDoc(text=f'doc {i}') for i in range(2)])
+
+proto_message_dl = dl.to_protobuf()
+dl_from_proto = DocList[SimpleDoc].from_protobuf(proto_message_dl)
+print(type(proto_message_dl))
+print(dl_from_proto)
+```
+
+`to_protobuf()` returns a protobuf object of `docarray_pb2.DocListProto` class. `from_protobuf()` accepts a protobuf message object to construct a `DocList`.
+
+
+## Bytes
+
+
+## Binary
+Storing a `DocList` supports both `pickle` and `protobuf` protocols. Besides, you can choose different compression methods.
+
+
+| Compression Methods | Notes |
+| --- |-------|
+| `lz4` |       |
+| `bz2` |       |
+| `lzma` |       |
+| `zlib` |       |
+| `gzip` |       |
+
+```python
+from docarray import BaseDoc, DocList
+
+
+class SimpleDoc(BaseDoc):
+    text: str
+
+
+dl = DocList[SimpleDoc]([SimpleDoc(text=f'doc {i}') for i in range(2)])
+
+dl.save_binary('simple-dl.pickle', compress=None, protocol='pickle')
+
+dl_from_binary = DocList[SimpleDoc].load_binary('simple-dl.pickle', compress=None, protocol='pickle')
+```
+
+The `DocList` is stored at `simple-dl.pickle` file.
+
+
+## Base64
+
+
+## CSV
+
+
+## Pandas.Dataframe
\ No newline at end of file

From 189787a5f5d847dba274edef72617b01af319123 Mon Sep 17 00:00:00 2001
From: nan-wang <nan.wang@jina.ai>
Date: Mon, 10 Apr 2023 12:56:28 +0200
Subject: [PATCH 03/33] docs: add serialization for base64 and bytes

Signed-off-by: nan-wang <nan.wang@jina.ai>
---
 docs/user_guide/sending/send_doclist.md | 44 +++++++++++++++++++++----
 1 file changed, 37 insertions(+), 7 deletions(-)

diff --git a/docs/user_guide/sending/send_doclist.md b/docs/user_guide/sending/send_doclist.md
index 374acf0781a..dfd43b10905 100644
--- a/docs/user_guide/sending/send_doclist.md
+++ b/docs/user_guide/sending/send_doclist.md
@@ -51,12 +51,9 @@ print(dl_from_proto)
 
 `to_protobuf()` returns a protobuf object of `docarray_pb2.DocListProto` class. `from_protobuf()` accepts a protobuf message object to construct a `DocList`.
 
-
-## Bytes
-
-
-## Binary
-Storing a `DocList` supports both `pickle` and `protobuf` protocols. Besides, you can choose different compression methods.
+## Base64
+When transferring over the network, you can choose `Base64` format to serialize the `DocList`.
+Storing a `DocList` in Base64 supports both `pickle` and `protobuf` protocols. Besides, you can choose different compression methods.
 
 
 | Compression Methods | Notes |
@@ -71,6 +68,24 @@ Storing a `DocList` supports both `pickle` and `protobuf` protocols. Besides, yo
 from docarray import BaseDoc, DocList
 
 
+class SimpleDoc(BaseDoc):
+    text: str
+
+
+dl = DocList[SimpleDoc]([SimpleDoc(text=f'doc {i}') for i in range(2)])
+
+base64_repr_dl = dl.to_base64(compress=None, protocol='pickle')
+
+dl_from_base64 = DocList[SimpleDoc].to_base64(base64_repr_dl , compress=None, protocol='pickle')
+```
+
+## Binary
+Similar as in `Base64` serialization, `Binary` serialization also supports different protocols and compression methods.
+
+```python
+from docarray import BaseDoc, DocList
+
+
 class SimpleDoc(BaseDoc):
     text: str
 
@@ -84,8 +99,23 @@ dl_from_binary = DocList[SimpleDoc].load_binary('simple-dl.pickle', compress=Non
 
 The `DocList` is stored at `simple-dl.pickle` file.
 
+### Bytes
+Under the hood, `save_binary()` prepares the file object and calls `to_bytes()` function to convert the `DocList` into a byte object. You can use `to_bytes()` function directly and use `from_bytes()` to load the `DocList` from a byte object. You can use `protocol` to choose between `pickle` and `protobuf`. Besides, `to_bytes()`  and `save_bytes()` support multiple options for `compress` as well. 
 
-## Base64
+```python
+from docarray import BaseDoc, DocList
+
+
+class SimpleDoc(BaseDoc):
+    text: str
+
+
+dl = DocList[SimpleDoc]([SimpleDoc(text=f'doc {i}') for i in range(2)])
+
+bytes_dl = dl.to_bytes(protocol='pickle', compress=None)
+
+dl_from_bytes = DocList[SimpleDoc].from_bytes(bytes_dl, compress=None, protocol='pickle')
+```
 
 
 ## CSV

From f363a1b2b4b011d82ab870bc73a4669ffb56de93 Mon Sep 17 00:00:00 2001
From: nan-wang <nan.wang@jina.ai>
Date: Mon, 10 Apr 2023 13:01:32 +0200
Subject: [PATCH 04/33] docs: add serialization for csv

Signed-off-by: nan-wang <nan.wang@jina.ai>
---
 docs/user_guide/sending/send_doclist.md | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/docs/user_guide/sending/send_doclist.md b/docs/user_guide/sending/send_doclist.md
index dfd43b10905..936795bf86e 100644
--- a/docs/user_guide/sending/send_doclist.md
+++ b/docs/user_guide/sending/send_doclist.md
@@ -119,6 +119,22 @@ dl_from_bytes = DocList[SimpleDoc].from_bytes(bytes_dl, compress=None, protocol=
 
 
 ## CSV
+You can use `from_csv()` and `to_csv()` to de-/serializae and deserialize the `DocList` from/to a CSV file. Use the `dialect` parameter to choose the dialect of the CSV format. Check more details in the API doc. TODO: Add api doc here.
+
+```python
+from docarray import BaseDoc, DocList
+
+
+class SimpleDoc(BaseDoc):
+    text: str
+
+
+dl = DocList[SimpleDoc]([SimpleDoc(text=f'doc {i}') for i in range(2)])
+
+dl.to_csv('simple-dl.csv')
+dl_from_csv = DocList[SimpleDoc].from_csv('simple-dl.csv')
+print(dl_from_csv)
+```
 
 
 ## Pandas.Dataframe
\ No newline at end of file

From aedb7d8a54871b49d2e374970190a0a00462ab26 Mon Sep 17 00:00:00 2001
From: nan-wang <nan.wang@jina.ai>
Date: Mon, 10 Apr 2023 13:04:28 +0200
Subject: [PATCH 05/33] docs: add serialization for dataframe

Signed-off-by: nan-wang <nan.wang@jina.ai>
---
 docs/user_guide/sending/send_doclist.md | 18 +++++++++++++++++-
 1 file changed, 17 insertions(+), 1 deletion(-)

diff --git a/docs/user_guide/sending/send_doclist.md b/docs/user_guide/sending/send_doclist.md
index 936795bf86e..92a48f1c803 100644
--- a/docs/user_guide/sending/send_doclist.md
+++ b/docs/user_guide/sending/send_doclist.md
@@ -137,4 +137,20 @@ print(dl_from_csv)
 ```
 
 
-## Pandas.Dataframe
\ No newline at end of file
+## Pandas.Dataframe
+You can use `from_pandas()` and `to_pandas()` to load/save the `DocList` from/to a pandas DataFrame.
+
+```python
+from docarray import BaseDoc, DocList
+
+
+class SimpleDoc(BaseDoc):
+    text: str
+
+
+dl = DocList[SimpleDoc]([SimpleDoc(text=f'doc {i}') for i in range(2)])
+
+df = dl.to_pandas()
+dl_from_dataframe = DocList[SimpleDoc].from_pandas(df)
+print(dl_from_dataframe)
+```
\ No newline at end of file

From e223aba1ec3553ba775f67c8cea6c6c51fa4c057 Mon Sep 17 00:00:00 2001
From: samsja <sami.jaghouar@hotmail.fr>
Date: Wed, 12 Apr 2023 11:20:08 +0200
Subject: [PATCH 06/33] fix: add doctring to documentaion basedoc

Signed-off-by: samsja <sami.jaghouar@hotmail.fr>
---
 docarray/base_doc/doc.py           | 118 +++++++++++++++++++++++++++++
 docarray/base_doc/mixins/update.py |  34 +++++----
 2 files changed, 136 insertions(+), 16 deletions(-)

diff --git a/docarray/base_doc/doc.py b/docarray/base_doc/doc.py
index a5c42a82ee4..7e100bceecb 100644
--- a/docarray/base_doc/doc.py
+++ b/docarray/base_doc/doc.py
@@ -13,6 +13,7 @@
 
 if TYPE_CHECKING:
     from docarray.array.doc_vec.column_storage import ColumnStorageView
+    from docarray.proto import DocProto
 
 _console: Console = Console()
 
@@ -141,3 +142,120 @@ def _docarray_to_json_compatible(self) -> Dict:
         :return: A dictionary of the BaseDoc object
         """
         return self.dict()
+
+    ########################################################################################################################################################
+    ### this section is just for documentation purposes will be removed later once https://github.com/mkdocstrings/griffe/issues/138 is fixed ##############
+    ########################################################################################################################################################
+
+    def to_bytes(
+        self, protocol: str = 'protobuf', compress: Optional[str] = None
+    ) -> bytes:
+        """Serialize itself into bytes.
+
+        For more Pythonic code, please use ``bytes(...)``.
+
+        :param protocol: protocol to use. It can be 'pickle' or 'protobuf'
+        :param compress: compress algorithm to use
+        :return: the binary serialization in bytes
+        """
+        return super().to_bytes(protocol, compress)
+
+    @classmethod
+    def from_bytes(
+        cls: Type[T],
+        data: bytes,
+        protocol: str = 'protobuf',
+        compress: Optional[str] = None,
+    ) -> T:
+        """Build Document object from binary bytes
+
+        :param data: binary bytes
+        :param protocol: protocol to use. It can be 'pickle' or 'protobuf'
+        :param compress: compress method to use
+        :return: a Document object
+        """
+        return super(BaseDoc, cls).from_bytes(data, protocol, compress)
+
+    def to_base64(
+        self, protocol: str = 'protobuf', compress: Optional[str] = None
+    ) -> str:
+        """Serialize a Document object into as base64 string
+
+        :param protocol: protocol to use. It can be 'pickle' or 'protobuf'
+        :param compress: compress method to use
+        :return: a base64 encoded string
+        """
+        return super().to_base64(protocol, compress)
+
+    @classmethod
+    def from_base64(
+        cls: Type[T],
+        data: str,
+        protocol: str = 'pickle',
+        compress: Optional[str] = None,
+    ) -> T:
+        """Build Document object from binary bytes
+
+        :param data: a base64 encoded string
+        :param protocol: protocol to use. It can be 'pickle' or 'protobuf'
+        :param compress: compress method to use
+        :return: a Document object
+        """
+        return super(BaseDoc, cls).from_base64(data, protocol, compress)
+
+    @classmethod
+    def from_protobuf(cls: Type[T], pb_msg: 'DocProto') -> T:
+        """create a Document from a protobuf message
+
+        :param pb_msg: the proto message of the Document
+        :return: a Document initialize with the proto data
+        """
+        return super(BaseDoc, cls).from_protobuf(pb_msg)
+
+    def update(self, other: T):
+        """
+        Updates self with the content of other. Changes are applied to self.
+        Updating one Document with another consists in the following:
+         - setting data properties of the second Document to the first Document
+         if they are not None
+         - Concatenating lists and updating sets
+         - Updating recursively Documents and DocArrays
+         - Updating Dictionaries of the left with the right
+
+        It behaves as an update operation for Dictionaries, except that since
+        it is applied to a static schema type, the presence of the field is
+        given by the field not having a None value and that DocArrays,
+        lists and sets are concatenated. It is worth mentioning that Tuples
+        are not merged together since they are meant to be inmutable,
+        so they behave as regular types and the value of `self` is updated
+        with the value of `other`
+
+
+        ---
+
+        ```python
+        from docarray import BaseDoc
+        from docarray.documents import Text
+
+
+        class MyDocument(BaseDoc):
+            content: str
+            title: Optional[str] = None
+            tags_: List
+
+
+        doc1 = MyDocument(
+            content='Core content of the document', title='Title', tags_=['python', 'AI']
+        )
+        doc2 = MyDocument(content='Core content updated', tags_=['docarray'])
+
+        doc1.update(doc2)
+        assert doc1.content == 'Core content updated'
+        assert doc1.title == 'Title'
+        assert doc1.tags_ == ['python', 'AI', 'docarray']
+        ```
+
+        ---
+        :param other: The Document with which to update the contents of this
+        """
+        super().update(other)
diff --git a/docarray/base_doc/mixins/update.py b/docarray/base_doc/mixins/update.py
index 99fdbc2bf8e..5a21738a7d4 100644
--- a/docarray/base_doc/mixins/update.py
+++ b/docarray/base_doc/mixins/update.py
@@ -38,30 +38,32 @@ def update(self, other: T):
         so they behave as regular types and the value of `self` is updated
         with the value of `other`
 
-            EXAMPLE USAGE
 
-            .. code-block:: python
+        ---
 
-                from docarray import BaseDoc
-                from docarray.documents import Text
+        ```python
+        from docarray import BaseDoc
+        from docarray.documents import Text
 
 
-                class MyDocument(BaseDoc):
-                    content: str
-                    title: Optional[str] = None
-                    tags_: List
+        class MyDocument(BaseDoc):
+            content: str
+            title: Optional[str] = None
+            tags_: List
 
 
-                doc1 = MyDocument(
-                    content='Core content of the document', title='Title', tags_=['python', 'AI']
-                )
-                doc2 = MyDocument(content='Core content updated', tags_=['docarray'])
+        doc1 = MyDocument(
+            content='Core content of the document', title='Title', tags_=['python', 'AI']
+        )
+        doc2 = MyDocument(content='Core content updated', tags_=['docarray'])
 
-                doc1.update(doc2)
-                assert doc1.content == 'Core content updated'
-                assert doc1.title == 'Title'
-                assert doc1.tags_ == ['python', 'AI', 'docarray']
+        doc1.update(doc2)
+        assert doc1.content == 'Core content updated'
+        assert doc1.title == 'Title'
+        assert doc1.tags_ == ['python', 'AI', 'docarray']
+        ```
 
+        ---
         :param other: The Document with which to update the contents of this
         """
         if type(self) != type(other):

From 7491246becf3fa53b6de01a2a6e091a1c1804144 Mon Sep 17 00:00:00 2001
From: samsja <sami.jaghouar@hotmail.fr>
Date: Wed, 12 Apr 2023 11:21:43 +0200
Subject: [PATCH 07/33] fix: fix mypy

Signed-off-by: samsja <sami.jaghouar@hotmail.fr>
---
 docarray/base_doc/doc.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/docarray/base_doc/doc.py b/docarray/base_doc/doc.py
index 7e100bceecb..ccb5b65b99e 100644
--- a/docarray/base_doc/doc.py
+++ b/docarray/base_doc/doc.py
@@ -18,6 +18,7 @@
 _console: Console = Console()
 
 T = TypeVar('T', bound='BaseDoc')
+T_update = TypeVar('T_update', bound='UpdateMixin')
 
 
 class BaseDoc(BaseModel, IOMixin, UpdateMixin, BaseNode):
@@ -212,7 +213,7 @@ def from_protobuf(cls: Type[T], pb_msg: 'DocProto') -> T:
         """
         return super(BaseDoc, cls).from_protobuf(pb_msg)
 
-    def update(self, other: T):
+    def update(self, other: T_update):
         """
         Updates self with the content of other. Changes are applied to self.
         Updating one Document with another consists in the following:

From 5a40e79c70d72bb28a5eb55da58c47696e3d3c8b Mon Sep 17 00:00:00 2001
From: samsja <sami.jaghouar@hotmail.fr>
Date: Wed, 12 Apr 2023 11:39:04 +0200
Subject: [PATCH 08/33] fix: add docstring doc list

Signed-off-by: samsja <sami.jaghouar@hotmail.fr>
---
 docarray/array/doc_list/doc_list.py | 290 +++++++++++++++++++++++++++-
 1 file changed, 289 insertions(+), 1 deletion(-)

diff --git a/docarray/array/doc_list/doc_list.py b/docarray/array/doc_list/doc_list.py
index 89364ff4842..257f294a5e7 100644
--- a/docarray/array/doc_list/doc_list.py
+++ b/docarray/array/doc_list/doc_list.py
@@ -1,10 +1,15 @@
+import csv
 import io
+import pathlib
 from functools import wraps
 from typing import (
     TYPE_CHECKING,
     Any,
+    BinaryIO,
     Callable,
+    Generator,
     Iterable,
+    Iterator,
     List,
     MutableSequence,
     Optional,
@@ -18,7 +23,7 @@
 from typing_inspect import is_union_type
 
 from docarray.array.any_array import AnyDocArray
-from docarray.array.doc_list.io import IOMixinArray
+from docarray.array.doc_list.io import IOMixinArray, _LazyRequestReader
 from docarray.array.doc_list.pushpull import PushPullMixin
 from docarray.array.doc_list.sequence_indexing_mixin import (
     IndexingSequenceMixin,
@@ -28,6 +33,7 @@
 from docarray.typing import NdArray
 
 if TYPE_CHECKING:
+    import pandas as pd
     from pydantic import BaseConfig
     from pydantic.fields import ModelField
 
@@ -305,3 +311,285 @@ def __getitem__(self: T, item: IndexIterType) -> T:
 
     def __getitem__(self, item):
         return super().__getitem__(item)
+
+    ########################################################################################################################################################
+    ### this section is just for documentation purposes will be removed later once https://github.com/mkdocstrings/griffe/issues/138 is fixed ##############
+    ########################################################################################################################################################
+
+    def to_protobuf(self) -> 'DocListProto':
+        """Convert DocList into a Protobuf message"""
+        return super(DocList, self).to_protobuf()
+
+    @classmethod
+    def from_bytes(
+        cls: Type[T],
+        data: bytes,
+        protocol: str = 'protobuf-array',
+        compress: Optional[str] = None,
+        show_progress: bool = False,
+    ) -> T:
+        """Deserialize bytes into a DocList.
+
+        :param data: Bytes from which to deserialize
+        :param protocol: protocol that was used to serialize
+        :param compress: compress algorithm that was used to serialize
+        :param show_progress: show progress bar, only works when protocol is `pickle` or `protobuf`
+        :return: the deserialized DocList
+        """
+        return super(DocList, cls).from_bytes(
+            data, protocol=protocol, compress=compress, show_progress=show_progress
+        )
+
+    def to_binary_stream(
+        self,
+        protocol: str = 'protobuf',
+        compress: Optional[str] = None,
+        show_progress: bool = False,
+    ) -> Iterator[bytes]:
+        return super().to_binary_stream(
+            protocol=protocol, compress=compress, show_progress=show_progress
+        )
+
+    def to_bytes(
+        self,
+        protocol: str = 'protobuf-array',
+        compress: Optional[str] = None,
+        file_ctx: Optional[BinaryIO] = None,
+        show_progress: bool = False,
+    ) -> Optional[bytes]:
+        """Serialize itself into bytes.
+
+        For more Pythonic code, please use ``bytes(...)``.
+
+        :param protocol: protocol to use. It can be 'pickle-array', 'protobuf-array', 'pickle' or 'protobuf'
+        :param compress: compress algorithm to use
+        :param file_ctx: File or filename or serialized bytes where the data is stored.
+        :param show_progress: show progress bar, only works when protocol is `pickle` or `protobuf`
+        :return: the binary serialization in bytes or None if file_ctx is passed where to store
+        """
+        return super().to_bytes(
+            protocol=protocol,
+            compress=compress,
+            file_ctx=file_ctx,
+            show_progress=show_progress,
+        )
+
+    @classmethod
+    def from_base64(
+        cls: Type[T],
+        data: str,
+        protocol: str = 'protobuf-array',
+        compress: Optional[str] = None,
+        show_progress: bool = False,
+    ) -> T:
+        """Deserialize base64 strings into a DocList.
+
+        :param data: Base64 string to deserialize
+        :param protocol: protocol that was used to serialize
+        :param compress: compress algorithm that was used to serialize
+        :param show_progress: show progress bar, only works when protocol is `pickle` or `protobuf`
+        :return: the deserialized DocList
+        """
+        return super(DocList, cls).from_base64(
+            data, protocol=protocol, compress=compress, show_progress=show_progress
+        )
+
+    def to_base64(
+        self,
+        protocol: str = 'protobuf-array',
+        compress: Optional[str] = None,
+        show_progress: bool = False,
+    ) -> str:
+        """Serialize itself into base64 encoded string.
+
+        :param protocol: protocol to use. It can be 'pickle-array', 'protobuf-array', 'pickle' or 'protobuf'
+        :param compress: compress algorithm to use
+        :param show_progress: show progress bar, only works when protocol is `pickle` or `protobuf`
+        :return: the binary serialization in bytes or None if file_ctx is passed where to store
+        """
+        return super().to_base64(
+            protocol=protocol, compress=compress, show_progress=show_progress
+        )
+
+    @classmethod
+    def from_json(
+        cls: Type[T],
+        file: Union[str, bytes, bytearray],
+    ) -> T:
+        """Deserialize JSON strings or bytes into a DocList.
+
+        :param file: JSON object from where to deserialize a DocList
+        :return: the deserialized DocList
+        """
+        return super(DocList, cls).from_json(file)
+
+    def to_json(self) -> bytes:
+        """Convert the object into JSON bytes. Can be loaded via :meth:`.from_json`.
+        :return: JSON serialization of DocList
+        """
+        return super().to_json()
+
+    @classmethod
+    def from_csv(
+        cls,
+        file_path: str,
+        encoding: str = 'utf-8',
+        dialect: Union[str, csv.Dialect] = 'excel',
+    ) -> 'DocList':
+        """
+        Load a DocList from a csv file following the schema defined in the
+        :attr:`~docarray.DocList.doc_type` attribute.
+        Every row of the csv file will be mapped to one document in the doc_list.
+        The column names (defined in the first row) have to match the field names
+        of the Document type.
+        For nested fields use "__"-separated access paths, such as 'image__url'.
+
+        List-like fields (including field of type DocList) are not supported.
+
+        :param file_path: path to csv file to load DocList from.
+        :param encoding: encoding used to read the csv file. Defaults to 'utf-8'.
+        :param dialect: defines separator and how to handle whitespaces etc.
+            Can be a csv.Dialect instance or one string of:
+            'excel' (for comma seperated values),
+            'excel-tab' (for tab separated values),
+            'unix' (for csv file generated on UNIX systems).
+        :return: DocList
+        """
+        return super(DocList, cls).from_csv(
+            file_path, encoding=encoding, dialect=dialect
+        )
+
+    def to_csv(
+        self, file_path: str, dialect: Union[str, csv.Dialect] = 'excel'
+    ) -> None:
+        """
+        Save a DocList to a csv file.
+        The field names will be stored in the first row. Each row corresponds to the
+        information of one Document.
+        Columns for nested fields will be named after the "__"-seperated access paths,
+        such as `"image__url"` for `image.url`.
+
+        :param file_path: path to a csv file.
+        :param dialect: defines separator and how to handle whitespaces etc.
+            Can be a csv.Dialect instance or one string of:
+            'excel' (for comma seperated values),
+            'excel-tab' (for tab separated values),
+            'unix' (for csv file generated on UNIX systems).
+        """
+        return super().to_csv(file_path, dialect=dialect)
+
+    @classmethod
+    def from_dataframe(cls, df: 'pd.DataFrame') -> 'DocList':
+        """
+        Load a DocList from a `pandas.DataFrame` following the schema
+        defined in the :attr:`~docarray.DocList.doc_type` attribute.
+        Every row of the dataframe will be mapped to one Document in the doc_list.
+        The column names of the dataframe have to match the field names of the
+        Document type.
+        For nested fields use "__"-separated access paths as column names,
+        such as 'image__url'.
+
+        List-like fields (including field of type DocList) are not supported.
+
+        EXAMPLE USAGE:
+
+        .. code-block:: python
+
+            import pandas as pd
+
+            from docarray import BaseDoc, DocList
+
+
+            class Person(BaseDoc):
+                name: str
+                follower: int
+
+
+            df = pd.DataFrame(
+                data=[['Maria', 12345], ['Jake', 54321]], columns=['name', 'follower']
+            )
+
+            docs = DocList[Person].from_dataframe(df)
+
+            assert docs.name == ['Maria', 'Jake']
+            assert docs.follower == [12345, 54321]
+
+
+        :param df: pandas.DataFrame to extract Document's information from
+        :return: DocList where each Document contains the information of one
+            corresponding row of the `pandas.DataFrame`.
+        """
+        return super(DocList, cls).from_dataframe(df)
+
+    def to_dataframe(self) -> 'pd.DataFrame':
+        """
+        Save a DocList to a `pandas.DataFrame`.
+        The field names will be stored as column names. Each row of the dataframe corresponds
+        to the information of one Document.
+        Columns for nested fields will be named after the "__"-seperated access paths,
+        such as `"image__url"` for `image.url`.
+
+        :return: pandas.DataFrame
+        """
+        return super().to_dataframe()
+
+    @classmethod
+    def load_binary(
+        cls: Type[T],
+        file: Union[str, bytes, pathlib.Path, io.BufferedReader, _LazyRequestReader],
+        protocol: str = 'protobuf-array',
+        compress: Optional[str] = None,
+        show_progress: bool = False,
+        streaming: bool = False,
+    ) -> Union[T, Generator['T_doc', None, None]]:
+        """Load doc_list elements from a compressed binary file.
+
+        :param file: File or filename or serialized bytes where the data is stored.
+        :param protocol: protocol to use. It can be 'pickle-array', 'protobuf-array', 'pickle' or 'protobuf'
+        :param compress: compress algorithm to use
+        :param show_progress: show progress bar, only works when protocol is `pickle` or `protobuf`
+        :param streaming: if `True` returns a generator over `Document` objects.
+        In case protocol is pickle the `Documents` are streamed from disk to save memory usage
+        :return: a DocList object
+
+        .. note::
+            If `file` is `str` it can specify `protocol` and `compress` as file extensions.
+            This functionality assumes `file=file_name.$protocol.$compress` where `$protocol` and `$compress` refer to a
+            string interpolation of the respective `protocol` and `compress` methods.
+            For example if `file=my_docarray.protobuf.lz4` then the binary data will be loaded assuming `protocol=protobuf`
+            and `compress=lz4`.
+        """
+        return super().load_binary(
+            file, protocol=protocol, compress=compress, show_progress=show_progress
+        )
+
+    def save_binary(
+        self,
+        file: Union[str, pathlib.Path],
+        protocol: str = 'protobuf-array',
+        compress: Optional[str] = None,
+        show_progress: bool = False,
+    ) -> None:
+        """Save DocList into a binary file.
+
+        It will use the protocol to pick how to save the DocList.
+        If used 'picke-doc_list` and `protobuf-array` the DocList will be stored
+        and compressed at complete level using `pickle` or `protobuf`.
+        When using `protobuf` or `pickle` as protocol each Document in DocList
+        will be stored individually and this would make it available for streaming.
+
+        :param file: File or filename to which the data is saved.
+        :param protocol: protocol to use. It can be 'pickle-array', 'protobuf-array', 'pickle' or 'protobuf'
+        :param compress: compress algorithm to use
+        :param show_progress: show progress bar, only works when protocol is `pickle` or `protobuf`
+
+         .. note::
+            If `file` is `str` it can specify `protocol` and `compress` as file extensions.
+            This functionality assumes `file=file_name.$protocol.$compress` where `$protocol` and `$compress` refer to a
+            string interpolation of the respective `protocol` and `compress` methods.
+            For example if `file=my_docarray.protobuf.lz4` then the binary data will be created using `protocol=protobuf`
+            and `compress=lz4`.
+        """
+        return super().save_binary(
+            file, protocol=protocol, compress=compress, show_progress=show_progress
+        )

From 4e53699e6ebbfabd51fa797a1d6c6f0081b72cbe Mon Sep 17 00:00:00 2001
From: samsja <sami.jaghouar@hotmail.fr>
Date: Wed, 12 Apr 2023 11:52:51 +0200
Subject: [PATCH 09/33] fix: dic doc array docstring

Signed-off-by: samsja <sami.jaghouar@hotmail.fr>
---
 docarray/array/any_array.py         | 99 ++++++++++++++++-------------
 docarray/array/doc_list/doc_list.py | 28 ++++----
 docarray/array/doc_list/io.py       | 28 ++++----
 3 files changed, 85 insertions(+), 70 deletions(-)

diff --git a/docarray/array/any_array.py b/docarray/array/any_array.py
index d156da9ea8c..2e1e6db091c 100644
--- a/docarray/array/any_array.py
+++ b/docarray/array/any_array.py
@@ -162,77 +162,88 @@ def traverse_flat(
         names, concatenated and "__"-separated. It describes the path from the first
         level to an arbitrary one, e.g. 'content__image__url'.
 
-        :param access_path: a string that represents the access path ("__"-separated).
-        :return: list of the accessed objects, flattened if nested.
 
-        EXAMPLE USAGE
-        .. code-block:: python
-            from docarray import BaseDoc, DocList, Text
+        ---
 
+        ```python
+        from docarray import BaseDoc, DocList, Text
 
-            class Author(BaseDoc):
-                name: str
 
+        class Author(BaseDoc):
+            name: str
 
-            class Book(BaseDoc):
-                author: Author
-                content: Text
 
+        class Book(BaseDoc):
+            author: Author
+            content: Text
 
-            docs = DocList[Book](
-                Book(author=Author(name='Jenny'), content=Text(text=f'book_{i}'))
-                for i in range(10)  # noqa: E501
-            )
 
-            books = docs.traverse_flat(access_path='content')  # list of 10 Text objs
+        docs = DocList[Book](
+            Book(author=Author(name='Jenny'), content=Text(text=f'book_{i}'))
+            for i in range(10)  # noqa: E501
+        )
 
-            authors = docs.traverse_flat(access_path='author__name')  # list of 10 strings
+        books = docs.traverse_flat(access_path='content')  # list of 10 Text objs
+
+        authors = docs.traverse_flat(access_path='author__name')  # list of 10 strings
+        ```
+
+        ---
 
         If the resulting list is a nested list, it will be flattened:
 
-        EXAMPLE USAGE
-        .. code-block:: python
-            from docarray import BaseDoc, DocList
+        ---
 
+        ```python
+        from docarray import BaseDoc, DocList
 
-            class Chapter(BaseDoc):
-                content: str
 
+        class Chapter(BaseDoc):
+            content: str
 
-            class Book(BaseDoc):
-                chapters: DocList[Chapter]
 
+        class Book(BaseDoc):
+            chapters: DocList[Chapter]
 
-            docs = DocList[Book](
-                Book(chapters=DocList[Chapter]([Chapter(content='some_content') for _ in range(3)]))
-                for _ in range(10)
-            )
 
-            chapters = docs.traverse_flat(access_path='chapters')  # list of 30 strings
+        docs = DocList[Book](
+            Book(chapters=DocList[Chapter]([Chapter(content='some_content') for _ in range(3)]))
+            for _ in range(10)
+        )
 
+        chapters = docs.traverse_flat(access_path='chapters')  # list of 30 strings
+        ```
+
+        ---
         If your DocList is in doc_vec mode, and you want to access a field of
         type AnyTensor, the doc_vec tensor will be returned instead of a list:
 
-        EXAMPLE USAGE
-        .. code-block:: python
-            class Image(BaseDoc):
-                tensor: TorchTensor[3, 224, 224]
+        ---
 
+        ```python
+        class Image(BaseDoc):
+            tensor: TorchTensor[3, 224, 224]
 
-            batch = DocList[Image](
-                [
-                    Image(
-                        tensor=torch.zeros(3, 224, 224),
-                    )
-                    for _ in range(2)
-                ]
-            )
 
-            batch_stacked = batch.stack()
-            tensors = batch_stacked.traverse_flat(
-                access_path='tensor'
-            )  # tensor of shape (2, 3, 224, 224)
+        batch = DocList[Image](
+            [
+                Image(
+                    tensor=torch.zeros(3, 224, 224),
+                )
+                for _ in range(2)
+            ]
+        )
+
+        batch_stacked = batch.stack()
+        tensors = batch_stacked.traverse_flat(
+            access_path='tensor'
+        )  # tensor of shape (2, 3, 224, 224)
+        ```
 
+        ---
+
+        :param access_path: a string that represents the access path ("__"-separated).
+        :return: list of the accessed objects, flattened if nested.
         """
         ...
 
diff --git a/docarray/array/doc_list/doc_list.py b/docarray/array/doc_list/doc_list.py
index 257f294a5e7..91ee5443872 100644
--- a/docarray/array/doc_list/doc_list.py
+++ b/docarray/array/doc_list/doc_list.py
@@ -491,29 +491,31 @@ def from_dataframe(cls, df: 'pd.DataFrame') -> 'DocList':
 
         List-like fields (including field of type DocList) are not supported.
 
-        EXAMPLE USAGE:
 
-        .. code-block:: python
+        ---
 
-            import pandas as pd
+        ```python
+        import pandas as pd
 
-            from docarray import BaseDoc, DocList
+        from docarray import BaseDoc, DocList
 
 
-            class Person(BaseDoc):
-                name: str
-                follower: int
+        class Person(BaseDoc):
+            name: str
+            follower: int
 
 
-            df = pd.DataFrame(
-                data=[['Maria', 12345], ['Jake', 54321]], columns=['name', 'follower']
-            )
+        df = pd.DataFrame(
+            data=[['Maria', 12345], ['Jake', 54321]], columns=['name', 'follower']
+        )
 
-            docs = DocList[Person].from_dataframe(df)
+        docs = DocList[Person].from_dataframe(df)
 
-            assert docs.name == ['Maria', 'Jake']
-            assert docs.follower == [12345, 54321]
+        assert docs.name == ['Maria', 'Jake']
+        assert docs.follower == [12345, 54321]
+        ```
 
+        ---
 
         :param df: pandas.DataFrame to extract Document's information from
         :return: DocList where each Document contains the information of one
diff --git a/docarray/array/doc_list/io.py b/docarray/array/doc_list/io.py
index fed12363697..91623df4e5d 100644
--- a/docarray/array/doc_list/io.py
+++ b/docarray/array/doc_list/io.py
@@ -451,28 +451,30 @@ def from_dataframe(cls, df: 'pd.DataFrame') -> 'DocList':
 
         List-like fields (including field of type DocList) are not supported.
 
-        EXAMPLE USAGE:
+        ---
 
-        .. code-block:: python
+        ```python
+        import pandas as pd
 
-            import pandas as pd
+        from docarray import BaseDoc, DocList
 
-            from docarray import BaseDoc, DocList
 
+        class Person(BaseDoc):
+            name: str
+            follower: int
 
-            class Person(BaseDoc):
-                name: str
-                follower: int
 
+        df = pd.DataFrame(
+            data=[['Maria', 12345], ['Jake', 54321]], columns=['name', 'follower']
+        )
 
-            df = pd.DataFrame(
-                data=[['Maria', 12345], ['Jake', 54321]], columns=['name', 'follower']
-            )
+        docs = DocList[Person].from_dataframe(df)
 
-            docs = DocList[Person].from_dataframe(df)
+        assert docs.name == ['Maria', 'Jake']
+        assert docs.follower == [12345, 54321]
+        ```
 
-            assert docs.name == ['Maria', 'Jake']
-            assert docs.follower == [12345, 54321]
+        ---
 
 
         :param df: pandas.DataFrame to extract Document's information from

From 39c1df974e30f0a83889c5478b6e27f17952d96a Mon Sep 17 00:00:00 2001
From: samsja <sami.jaghouar@hotmail.fr>
Date: Wed, 12 Apr 2023 13:02:22 +0200
Subject: [PATCH 10/33] fix: fix page for doc list serilizaiton

Signed-off-by: samsja <sami.jaghouar@hotmail.fr>
---
 docarray/array/doc_list/doc_list.py     | 22 ++++++------
 docarray/array/doc_list/io.py           | 16 ++++-----
 docs/user_guide/sending/send_doclist.md | 48 +++++++++++++------------
 mkdocs.yml                              |  5 ++-
 4 files changed, 50 insertions(+), 41 deletions(-)

diff --git a/docarray/array/doc_list/doc_list.py b/docarray/array/doc_list/doc_list.py
index 91ee5443872..d7b16d3031c 100644
--- a/docarray/array/doc_list/doc_list.py
+++ b/docarray/array/doc_list/doc_list.py
@@ -332,7 +332,7 @@ def from_bytes(
 
         :param data: Bytes from which to deserialize
         :param protocol: protocol that was used to serialize
-        :param compress: compress algorithm that was used to serialize
+        :param compress: compress algorithm that was used to serialize between `lz4`, `bz2`, `lzma`, `zlib`, `gzip`
         :param show_progress: show progress bar, only works when protocol is `pickle` or `protobuf`
         :return: the deserialized DocList
         """
@@ -362,7 +362,7 @@ def to_bytes(
         For more Pythonic code, please use ``bytes(...)``.
 
         :param protocol: protocol to use. It can be 'pickle-array', 'protobuf-array', 'pickle' or 'protobuf'
-        :param compress: compress algorithm to use
+        :param compress: compress algorithm to use between `lz4`, `bz2`, `lzma`, `zlib`, `gzip`
         :param file_ctx: File or filename or serialized bytes where the data is stored.
         :param show_progress: show progress bar, only works when protocol is `pickle` or `protobuf`
         :return: the binary serialization in bytes or None if file_ctx is passed where to store
@@ -403,7 +403,7 @@ def to_base64(
         """Serialize itself into base64 encoded string.
 
         :param protocol: protocol to use. It can be 'pickle-array', 'protobuf-array', 'pickle' or 'protobuf'
-        :param compress: compress algorithm to use
+        :param compress: compress algorithm to use between `lz4`, `bz2`, `lzma`, `zlib`, `gzip`
         :param show_progress: show progress bar, only works when protocol is `pickle` or `protobuf`
         :return: the binary serialization in bytes or None if file_ctx is passed where to store
         """
@@ -548,7 +548,7 @@ def load_binary(
 
         :param file: File or filename or serialized bytes where the data is stored.
         :param protocol: protocol to use. It can be 'pickle-array', 'protobuf-array', 'pickle' or 'protobuf'
-        :param compress: compress algorithm to use
+        :param compress: compress algorithm to use between 'lz4', 'gzip', 'bz2', 'zstd', 'lzma'
         :param show_progress: show progress bar, only works when protocol is `pickle` or `protobuf`
         :param streaming: if `True` returns a generator over `Document` objects.
         In case protocol is pickle the `Documents` are streamed from disk to save memory usage
@@ -580,17 +580,19 @@ def save_binary(
         When using `protobuf` or `pickle` as protocol each Document in DocList
         will be stored individually and this would make it available for streaming.
 
-        :param file: File or filename to which the data is saved.
-        :param protocol: protocol to use. It can be 'pickle-array', 'protobuf-array', 'pickle' or 'protobuf'
-        :param compress: compress algorithm to use
-        :param show_progress: show progress bar, only works when protocol is `pickle` or `protobuf`
-
-         .. note::
+        !! note
             If `file` is `str` it can specify `protocol` and `compress` as file extensions.
             This functionality assumes `file=file_name.$protocol.$compress` where `$protocol` and `$compress` refer to a
             string interpolation of the respective `protocol` and `compress` methods.
             For example if `file=my_docarray.protobuf.lz4` then the binary data will be created using `protocol=protobuf`
             and `compress=lz4`.
+
+        :param file: File or filename to which the data is saved.
+        :param protocol: protocol to use. It can be 'pickle-array', 'protobuf-array', 'pickle' or 'protobuf'
+        :param compress: compress algorithm to use between : `lz4`, `bz2`, `lzma`, `zlib`, `gzip`
+        :param show_progress: show progress bar, only works when protocol is `pickle` or `protobuf`
+
+
         """
         return super().save_binary(
             file, protocol=protocol, compress=compress, show_progress=show_progress
diff --git a/docarray/array/doc_list/io.py b/docarray/array/doc_list/io.py
index 91623df4e5d..265e648449d 100644
--- a/docarray/array/doc_list/io.py
+++ b/docarray/array/doc_list/io.py
@@ -141,7 +141,7 @@ def from_bytes(
 
         :param data: Bytes from which to deserialize
         :param protocol: protocol that was used to serialize
-        :param compress: compress algorithm that was used to serialize
+        :param compress: compress algorithm that was used to serialize between `lz4`, `bz2`, `lzma`, `zlib`, `gzip`
         :param show_progress: show progress bar, only works when protocol is `pickle` or `protobuf`
         :return: the deserialized DocList
         """
@@ -247,7 +247,7 @@ def to_bytes(
         For more Pythonic code, please use ``bytes(...)``.
 
         :param protocol: protocol to use. It can be 'pickle-array', 'protobuf-array', 'pickle' or 'protobuf'
-        :param compress: compress algorithm to use
+        :param compress: compress algorithm to use between : `lz4`, `bz2`, `lzma`, `zlib`, `gzip`
         :param file_ctx: File or filename or serialized bytes where the data is stored.
         :param show_progress: show progress bar, only works when protocol is `pickle` or `protobuf`
         :return: the binary serialization in bytes or None if file_ctx is passed where to store
@@ -277,7 +277,7 @@ def from_base64(
 
         :param data: Base64 string to deserialize
         :param protocol: protocol that was used to serialize
-        :param compress: compress algorithm that was used to serialize
+        :param compress: compress algorithm that was used to serialize between `lz4`, `bz2`, `lzma`, `zlib`, `gzip`
         :param show_progress: show progress bar, only works when protocol is `pickle` or `protobuf`
         :return: the deserialized DocList
         """
@@ -297,7 +297,7 @@ def to_base64(
         """Serialize itself into base64 encoded string.
 
         :param protocol: protocol to use. It can be 'pickle-array', 'protobuf-array', 'pickle' or 'protobuf'
-        :param compress: compress algorithm to use
+        :param compress: compress algorithm to use between `lz4`, `bz2`, `lzma`, `zlib`, `gzip`
         :param show_progress: show progress bar, only works when protocol is `pickle` or `protobuf`
         :return: the binary serialization in bytes or None if file_ctx is passed where to store
         """
@@ -562,7 +562,7 @@ def _load_binary_all(
     ):
         """Read a `DocList` object from a binary file
         :param protocol: protocol to use. It can be 'pickle-array', 'protobuf-array', 'pickle' or 'protobuf'
-        :param compress: compress algorithm to use
+        :param compress: compress algorithm to use between `lz4`, `bz2`, `lzma`, `zlib`, `gzip`
         :param show_progress: show progress bar, only works when protocol is `pickle` or `protobuf`
         :return: a `DocList`
         """
@@ -642,7 +642,7 @@ def _load_binary_stream(
         """Yield `Document` objects from a binary file
 
         :param protocol: protocol to use. It can be 'pickle' or 'protobuf'
-        :param compress: compress algorithm to use
+        :param compress: compress algorithm to use between `lz4`, `bz2`, `lzma`, `zlib`, `gzip`
         :param show_progress: show progress bar, only works when protocol is `pickle` or `protobuf`
         :return: a generator of `Document` objects
         """
@@ -700,7 +700,7 @@ def load_binary(
 
         :param file: File or filename or serialized bytes where the data is stored.
         :param protocol: protocol to use. It can be 'pickle-array', 'protobuf-array', 'pickle' or 'protobuf'
-        :param compress: compress algorithm to use
+        :param compress: compress algorithm to use between `lz4`, `bz2`, `lzma`, `zlib`, `gzip`
         :param show_progress: show progress bar, only works when protocol is `pickle` or `protobuf`
         :param streaming: if `True` returns a generator over `Document` objects.
         In case protocol is pickle the `Documents` are streamed from disk to save memory usage
@@ -761,7 +761,7 @@ def save_binary(
 
         :param file: File or filename to which the data is saved.
         :param protocol: protocol to use. It can be 'pickle-array', 'protobuf-array', 'pickle' or 'protobuf'
-        :param compress: compress algorithm to use
+        :param compress: compress algorithm to use between `lz4`, `bz2`, `lzma`, `zlib`, `gzip`
         :param show_progress: show progress bar, only works when protocol is `pickle` or `protobuf`
 
          .. note::
diff --git a/docs/user_guide/sending/send_doclist.md b/docs/user_guide/sending/send_doclist.md
index 92a48f1c803..25ecce1cbff 100644
--- a/docs/user_guide/sending/send_doclist.md
+++ b/docs/user_guide/sending/send_doclist.md
@@ -1,8 +1,8 @@
-# Serialization for `DocList`
-When sending or storing `DocList`, you need to use serialization. `DocList` supports multiple ways to serialize the data.
+# Serialization for DocList
+When sending or storing [`DocList`][docarray.array.doc_list.doc_list.DocList], you need to use serialization. [DocList][docarray.array.doc_list.doc_list.DocList] supports multiple ways to serialize the data.
 
 ## JSON
-You can use `to_json()` and `from_json()` to serialize and deserialize a `DocList`.
+You can use [`to_json()`][docarray.array.doc_list.doc_list.DocList.to_json] and [`from_json()`][docarray.array.doc_list.doc_list.DocList.from_json] to serialize and deserialize a [DocList][docarray.array.doc_list.doc_list.DocList].
 
 ```python
 from docarray import BaseDoc, DocList
@@ -24,14 +24,14 @@ with open('simple-dl.json', 'r') as f:
     print(dl_load_from_json)
 ```
 
-`to_json()` returns the binary representation of the json object. `from_json()` can load from either `str` or `binary` representation of the json object.
+[to_json()][docarray.array.doc_list.doc_list.DocList.to_json] returns the binary representation of the json object. [from_json()][docarray.array.doc_list.doc_list.DocList.from_json] can load from either `str` or `binary` representation of the json object.
 
 ```output
 b'[{"id":"5540e72d407ae81abb2390e9249ed066","text":"doc 0"},{"id":"fbe9f80d2fa03571e899a2887af1ac1b","text":"doc 1"}]'
 ```
 
 ## Protobuf
-When using protobuf, you can use `to_protobuf()` and `from_protobuf()` to serialize and deserialize a `DocList`
+To serialize a DocList with `protobuf`, you can use [`to_protobuf()`][docarray.array.doc_list.doc_list.DocList.to_protobuf]  and [`from_protobuf()`][docarray.array.doc_list.doc_list.DocList.from_protobuf] to serialize and deserialize a [DocList][docarray.array.doc_list.doc_list.DocList].
 
 ```python
 from docarray import BaseDoc, DocList
@@ -49,20 +49,16 @@ print(type(proto_message_dl))
 print(dl_from_proto)
 ```
 
-`to_protobuf()` returns a protobuf object of `docarray_pb2.DocListProto` class. `from_protobuf()` accepts a protobuf message object to construct a `DocList`.
+[to_protobuf()][docarray.array.doc_list.doc_list.DocList.to_protobuf]  returns a protobuf object of `docarray_pb2.DocListProto` class. [from_protobuf()][docarray.array.doc_list.doc_list.DocList.from_protobuf]  accepts a protobuf message object to construct a [DocList][docarray.array.doc_list.doc_list.DocList].
 
 ## Base64
-When transferring over the network, you can choose `Base64` format to serialize the `DocList`.
-Storing a `DocList` in Base64 supports both `pickle` and `protobuf` protocols. Besides, you can choose different compression methods.
+When transferring over the network, you can choose `Base64` format to serialize the [`DocList`][docarray.array.doc_list.doc_list.DocList].
+Serializing a [DocList][docarray.array.doc_list.doc_list.DocList] in Base64 supports both `pickle` and `protobuf` protocols. Besides, you can choose different compression methods.
 
+To serialize a [DocList][docarray.array.doc_list.doc_list.DocList] in Base64, you can use [`to_base64()`][docarray.array.doc_list.doc_list.DocList.to_base64]  and [`from_base64()`][docarray.array.doc_list.doc_list.DocList.from_protobuf] to serialize and deserialize a [DocList][docarray.array.doc_list.doc_list.from_base64].
+
+We support multiple compression methods. (namely : `lz4`, `bz2`, `lzma`, `zlib`, `gzip`)
 
-| Compression Methods | Notes |
-| --- |-------|
-| `lz4` |       |
-| `bz2` |       |
-| `lzma` |       |
-| `zlib` |       |
-| `gzip` |       |
 
 ```python
 from docarray import BaseDoc, DocList
@@ -76,12 +72,16 @@ dl = DocList[SimpleDoc]([SimpleDoc(text=f'doc {i}') for i in range(2)])
 
 base64_repr_dl = dl.to_base64(compress=None, protocol='pickle')
 
-dl_from_base64 = DocList[SimpleDoc].to_base64(base64_repr_dl , compress=None, protocol='pickle')
+dl_from_base64 = DocList[SimpleDoc].from_base64(
+    base64_repr_dl, compress=None, protocol='pickle'
+)
 ```
 
 ## Binary
 Similar as in `Base64` serialization, `Binary` serialization also supports different protocols and compression methods.
 
+To save a [DocList][docarray.array.doc_list.doc_list.DocList] into a binary file, you can use [`save_binary()`][docarray.array.doc_list.doc_list.DocList.to_base64]  and [`load_binary()`][docarray.array.doc_list.doc_list.DocList.from_protobuf] to serialize and deserialize a [DocList][docarray.array.doc_list.doc_list.from_base64].
+
 ```python
 from docarray import BaseDoc, DocList
 
@@ -94,13 +94,15 @@ dl = DocList[SimpleDoc]([SimpleDoc(text=f'doc {i}') for i in range(2)])
 
 dl.save_binary('simple-dl.pickle', compress=None, protocol='pickle')
 
-dl_from_binary = DocList[SimpleDoc].load_binary('simple-dl.pickle', compress=None, protocol='pickle')
+dl_from_binary = DocList[SimpleDoc].load_binary(
+    'simple-dl.pickle', compress=None, protocol='pickle'
+)
 ```
 
-The `DocList` is stored at `simple-dl.pickle` file.
+The [DocList][docarray.array.doc_list.doc_list.DocList] is stored at `simple-dl.pickle` file.
 
 ### Bytes
-Under the hood, `save_binary()` prepares the file object and calls `to_bytes()` function to convert the `DocList` into a byte object. You can use `to_bytes()` function directly and use `from_bytes()` to load the `DocList` from a byte object. You can use `protocol` to choose between `pickle` and `protobuf`. Besides, `to_bytes()`  and `save_bytes()` support multiple options for `compress` as well. 
+Under the hood,  [save_binary()][docarray.array.doc_list.doc_list.DocList.to_base64] prepares the file object and calls [to_bytes()][docarray.array.doc_list.doc_list.DocList.to_bytes] function to convert the [DocList][docarray.array.doc_list.doc_list.DocList] into a byte object. You can use [to_bytes()][docarray.array.doc_list.doc_list.DocList.to_bytes] function directly and use [from_bytes()][docarray.array.doc_list.doc_list.DocList.from_bytes] to load the [DocList][docarray.array.doc_list.doc_list.DocList] from a byte object. You can use `protocol` to choose between `pickle` and `protobuf`. Besides, [to_bytes()][docarray.array.doc_list.doc_list.DocList.to_bytes]  and [save_bytes()][docarray.array.doc_list.doc_list.DocList.save_bytes] support multiple options for `compress` as well. 
 
 ```python
 from docarray import BaseDoc, DocList
@@ -114,12 +116,14 @@ dl = DocList[SimpleDoc]([SimpleDoc(text=f'doc {i}') for i in range(2)])
 
 bytes_dl = dl.to_bytes(protocol='pickle', compress=None)
 
-dl_from_bytes = DocList[SimpleDoc].from_bytes(bytes_dl, compress=None, protocol='pickle')
+dl_from_bytes = DocList[SimpleDoc].from_bytes(
+    bytes_dl, compress=None, protocol='pickle'
+)
 ```
 
 
 ## CSV
-You can use `from_csv()` and `to_csv()` to de-/serializae and deserialize the `DocList` from/to a CSV file. Use the `dialect` parameter to choose the dialect of the CSV format. Check more details in the API doc. TODO: Add api doc here.
+You can use [`from_csv()`][docarray.array.doc_list.doc_list.DocList.from_csv] and [`to_csv()`][docarray.array.doc_list.doc_list.DocList.to_csv] to de-/serializae and deserialize the [DocList][docarray.array.doc_list.doc_list.DocList] from/to a CSV file. Use the `dialect` parameter to choose the dialect of the CSV format.
 
 ```python
 from docarray import BaseDoc, DocList
@@ -138,7 +142,7 @@ print(dl_from_csv)
 
 
 ## Pandas.Dataframe
-You can use `from_pandas()` and `to_pandas()` to load/save the `DocList` from/to a pandas DataFrame.
+You can use [`from_dataframe()`][docarray.array.doc_list.doc_list.DocList.from_dataframe] and [`to_dataframe()`][docarray.array.doc_list.doc_list.DocList.to_dataframe] to load/save the [DocList][docarray.array.doc_list.doc_list.DocList] from/to a pandas DataFrame.
 
 ```python
 from docarray import BaseDoc, DocList
diff --git a/mkdocs.yml b/mkdocs.yml
index f4441995378..1fa2f413d4e 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -77,7 +77,10 @@ nav:
     - Representing:
       - user_guide/representing/first_step.md
       - user_guide/representing/array.md
-    - user_guide/sending/first_step.md
+    - Sending:
+      - user_guide/sending/first_step.md
+      - user_guide/sending/send_doclist.md
+
     - user_guide/storing/first_step.md
 
   - How-to:

From 5de2719e94f5208dcd8188459aa9b8dbd46e6958 Mon Sep 17 00:00:00 2001
From: samsja <sami.jaghouar@hotmail.fr>
Date: Wed, 12 Apr 2023 13:10:40 +0200
Subject: [PATCH 11/33] fix: fix docstring

Signed-off-by: samsja <sami.jaghouar@hotmail.fr>
---
 docarray/array/doc_list/doc_list.py | 40 +++++++++++++++++++----------
 1 file changed, 27 insertions(+), 13 deletions(-)

diff --git a/docarray/array/doc_list/doc_list.py b/docarray/array/doc_list/doc_list.py
index d7b16d3031c..b0a656a2195 100644
--- a/docarray/array/doc_list/doc_list.py
+++ b/docarray/array/doc_list/doc_list.py
@@ -101,27 +101,41 @@ class Image(BaseDoc):
     fields at the DocList level (for example `docs.tensor` or `docs.url`).
     You can also set fields, with `docs.tensor = np.random.random([10, 100])`:
 
-        print(docs.url)
-        # [ImageUrl('http://url.com/foo.png', host_type='domain'), ...]
-        import numpy as np
+    ---
 
-        docs.tensor = np.random.random([10, 100])
-        print(docs.tensor)
-        # [NdArray([0.11299577, 0.47206767, 0.481723  , 0.34754724, 0.15016037,
-        #          0.88861321, 0.88317666, 0.93845579, 0.60486676, ... ]), ...]
+    ```python
+    print(docs.url)
+    # [ImageUrl('http://url.com/foo.png', host_type='domain'), ...]
+    import numpy as np
+
+    docs.tensor = np.random.random([10, 100])
+    print(docs.tensor)
+    # [NdArray([0.11299577, 0.47206767, 0.481723  , 0.34754724, 0.15016037,
+    #          0.88861321, 0.88317666, 0.93845579, 0.60486676, ... ]), ...]
+    ```
 
+    ---
     You can index into a DocList like a numpy doc_list or torch tensor:
 
+    ---
 
-        docs[0]  # index by position
-        docs[0:5:2]  # index by slice
-        docs[[0, 2, 3]]  # index by list of indices
-        docs[True, False, True, True, ...]  # index by boolean mask
+    ```python
+    docs[0]  # index by position
+    docs[0:5:2]  # index by slice
+    docs[[0, 2, 3]]  # index by list of indices
+    docs[True, False, True, True, ...]  # index by boolean mask
+    ```
 
+    ---
     You can delete items from a DocList like a Python List
+    ---
+
+    ```python
+    del docs[0]  # remove first element from DocList
+    del docs[0:5]  # remove elements for 0 to 5 from DocList
+    ```
 
-        del docs[0]  # remove first element from DocList
-        del docs[0:5]  # remove elements for 0 to 5 from DocList
+    ---
 
     :param docs: iterable of Document
 

From e9df25b4ab2dbf49207b51712884d245eaccb8d2 Mon Sep 17 00:00:00 2001
From: samsja <sami.jaghouar@hotmail.fr>
Date: Wed, 12 Apr 2023 13:24:06 +0200
Subject: [PATCH 12/33] feat: add docvec

Signed-off-by: samsja <sami.jaghouar@hotmail.fr>
---
 docs/user_guide/sending/send_docvec.md | 24 ++++++++++++++++++++++++
 mkdocs.yml                             |  1 +
 2 files changed, 25 insertions(+)
 create mode 100644 docs/user_guide/sending/send_docvec.md

diff --git a/docs/user_guide/sending/send_docvec.md b/docs/user_guide/sending/send_docvec.md
new file mode 100644
index 00000000000..d400caa844c
--- /dev/null
+++ b/docs/user_guide/sending/send_docvec.md
@@ -0,0 +1,24 @@
+# Serialization of DocVec
+
+When sending or storing [`DocVec`][docarray.array.doc_list.doc_list.DocVec], you need to use serialization. [DocVec][docarray.array.doc_list.doc_list.DocVec] only supports protobuf to serialize the data.
+You can use [`to_protobuf`][docarray.array.doc_list.doc_list.DocVec.to_protobuf] and [`from_protobuf`][docarray.array.doc_list.doc_list.DocVec.from_protobuf] to serialize and deserialize a [DocVec][docarray.array.doc_list.doc_list.DocVec]
+
+```python
+import numpy as np
+
+from docarray import BaseDoc, DocVec
+from docarray.typing import AnyTensor
+
+
+class SimpleVecDoc(BaseDoc):
+    tensor: AnyTensor
+
+
+dv = DocVec[SimpleVecDoc]([SimpleVecDoc(tensor=np.ones(16)) for _ in range(8)])
+
+proto_message_dv = dv.to_protobuf()
+
+dv_from_proto = DocVec[SimpleVecDoc].from_protobuf(proto_message_dv)
+```
+
+[`to_protobuf`][docarray.array.doc_list.doc_list.DocVec.to_protobuf] returns a protobuf object of `docarray_pb2.DocVecProto` class. [`from_protobuf`][docarray.array.doc_list.doc_list.DocVec.from_protobuf] accepts a protobuf message object to construct a [DocVec][docarray.array.doc_list.doc_list.DocVec].
diff --git a/mkdocs.yml b/mkdocs.yml
index 1fa2f413d4e..e02e8f72056 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -80,6 +80,7 @@ nav:
     - Sending:
       - user_guide/sending/first_step.md
       - user_guide/sending/send_doclist.md
+      - user_guide/sending/send_docvec.md
 
     - user_guide/storing/first_step.md
 

From 28534744e5ff7258a722db484fadc1980ff490d3 Mon Sep 17 00:00:00 2001
From: samsja <sami.jaghouar@hotmail.fr>
Date: Wed, 12 Apr 2023 13:34:42 +0200
Subject: [PATCH 13/33] docs: add send doc section

Signed-off-by: samsja <sami.jaghouar@hotmail.fr>
---
 docs/user_guide/sending/send_doc.md | 48 +++++++++++++++++++++++++++++
 1 file changed, 48 insertions(+)
 create mode 100644 docs/user_guide/sending/send_doc.md

diff --git a/docs/user_guide/sending/send_doc.md b/docs/user_guide/sending/send_doc.md
new file mode 100644
index 00000000000..5b7e7fff91d
--- /dev/null
+++ b/docs/user_guide/sending/send_doc.md
@@ -0,0 +1,48 @@
+# Serialization of BaseDoc
+
+In order to send or store [BaseDoc][docarray.base_doc.doc.BaseDoc] you need to serialize them first. 
+
+!! note
+    [BaseDoc][docarray.base_doc.doc.BaseDoc] supports serialization to `protobuf` and `json` formats.
+
+## Serialization to protobuf
+
+You can use [`to_protobuf`][docarray.base_doc.doc.BaseDoc.to_protobuf] to serialize a [BaseDoc][docarray.base_doc.doc.BaseDoc] to a protobuf message object
+and use [`from_protobuf`][docarray.base_doc.doc.BaseDoc.from_protobuf] to deserialize it.
+
+```python
+from typing import List
+from docarray import BaseDoc
+
+
+class MyDoc(BaseDoc):
+    text: str
+    tags: List[str]
+
+
+doc = MyDoc(text='hello world', tags=['hello', 'world'])
+proto_message = doc.to_protobuf()
+new_doc = MyDoc.from_protobuf(proto_message)
+assert doc == new_doc  # True
+```
+
+## Serialization to json
+
+You can use [`json`][docarray.base_doc.doc.BaseDoc.json] to serialize a [BaseDoc][docarray.base_doc.doc.BaseDoc] to a json string
+and use [`parse_raw`][docarray.base_doc.doc.BaseDoc.parse_raw] to deserialize it.
+
+```python
+from typing import List
+from docarray import BaseDoc
+
+
+class MyDoc(BaseDoc):
+    text: str
+    tags: List[str]
+
+
+doc = MyDoc(text='hello world', tags=['hello', 'world'])
+json_str = doc.json()
+new_doc = MyDoc.parse_raw(json_str)
+assert doc == new_doc  # True
+```
\ No newline at end of file

From a6910f16c49cfe6d930c94a852b1e72645451f74 Mon Sep 17 00:00:00 2001
From: samsja <sami.jaghouar@hotmail.fr>
Date: Wed, 12 Apr 2023 13:53:28 +0200
Subject: [PATCH 14/33] docs: fix docstring

Signed-off-by: samsja <sami.jaghouar@hotmail.fr>
---
 docarray/base_doc/doc.py | 82 +++++++++++++++++++++++++++++++++++++++-
 mkdocs.yml               |  1 +
 2 files changed, 82 insertions(+), 1 deletion(-)

diff --git a/docarray/base_doc/doc.py b/docarray/base_doc/doc.py
index ccb5b65b99e..dbc1f155f58 100644
--- a/docarray/base_doc/doc.py
+++ b/docarray/base_doc/doc.py
@@ -1,5 +1,15 @@
 import os
-from typing import TYPE_CHECKING, Any, Dict, Optional, Type, TypeVar
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Callable,
+    Dict,
+    Optional,
+    Type,
+    TypeVar,
+    Union,
+    no_type_check,
+)
 
 import orjson
 from pydantic import BaseModel, Field
@@ -12,6 +22,10 @@
 from docarray.typing.tensor.abstract_tensor import AbstractTensor
 
 if TYPE_CHECKING:
+    from pydantic import Protocol
+    from pydantic.types import StrBytes
+    from pydantic.typing import AbstractSetIntStr, MappingIntStrAny
+
     from docarray.array.doc_vec.column_storage import ColumnStorageView
     from docarray.proto import DocProto
 
@@ -260,3 +274,69 @@ class MyDocument(BaseDoc):
         :param other: The Document with which to update the contents of this
         """
         super().update(other)
+
+    def to_protobuf(self) -> 'DocProto':
+        """Convert Document into a Protobuf message.
+
+        :return: the protobuf message
+        """
+        super().to_protobuf()
+
+    def json(
+        self,
+        *,
+        include: Optional[Union['AbstractSetIntStr', 'MappingIntStrAny']] = None,
+        exclude: Optional[Union['AbstractSetIntStr', 'MappingIntStrAny']] = None,
+        by_alias: bool = False,
+        skip_defaults: Optional[bool] = None,
+        exclude_unset: bool = False,
+        exclude_defaults: bool = False,
+        exclude_none: bool = False,
+        encoder: Optional[Callable[[Any], Any]] = None,
+        models_as_dict: bool = True,
+        **dumps_kwargs: Any,
+    ) -> str:
+        """
+        Generate a JSON representation of the model, `include` and `exclude` arguments as per `dict()`.
+
+        `encoder` is an optional function to supply as `default` to json.dumps(), other arguments as per `json.dumps()`.
+        """
+        return super().json(
+            include=include,
+            exclude=exclude,
+            by_alias=by_alias,
+            skip_defaults=skip_defaults,
+            exclude_unset=exclude_unset,
+            exclude_defaults=exclude_defaults,
+            exclude_none=exclude_none,
+            encoder=encoder,
+            models_as_dict=models_as_dict,
+            **dumps_kwargs,
+        )
+
+    @no_type_check
+    def parse_raw(
+        cls: Type[T],
+        b: StrBytes,
+        *,
+        content_type: str = None,
+        encoding: str = 'utf8',
+        proto: Protocol = None,
+        allow_pickle: bool = False,
+    ) -> T:
+        """
+        Parse a raw string or bytes into a base doc
+        :param b:
+        :param content_type:
+        :param encoding: the encoding to use when parsing a string, defaults to 'utf8'
+        :param proto: protocol to use.
+        :param allow_pickle: allow pickle protocol
+        :return: a document
+        """
+        return super(BaseDoc, cls).parse_raw(
+            b,
+            content_type=content_type,
+            encoding=encoding,
+            proto=proto,
+            allow_pickle=allow_pickle,
+        )
diff --git a/mkdocs.yml b/mkdocs.yml
index e02e8f72056..4817151a82b 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -79,6 +79,7 @@ nav:
       - user_guide/representing/array.md
     - Sending:
       - user_guide/sending/first_step.md
+      - user_guide/sending/send_doc.md
       - user_guide/sending/send_doclist.md
       - user_guide/sending/send_docvec.md
 

From e73a6c450364a3f782396066ab05b744123401d2 Mon Sep 17 00:00:00 2001
From: samsja <sami.jaghouar@hotmail.fr>
Date: Wed, 12 Apr 2023 13:59:03 +0200
Subject: [PATCH 15/33] refactor: better tree structure for sending

Signed-off-by: samsja <sami.jaghouar@hotmail.fr>
---
 docs/user_guide/sending/api/fastAPI.md            |  0
 docs/user_guide/sending/api/jina.md               |  1 +
 docs/user_guide/sending/first_step.md             |  2 +-
 docs/user_guide/sending/{ => ser}/send_doc.md     |  0
 docs/user_guide/sending/{ => ser}/send_doclist.md |  0
 docs/user_guide/sending/{ => ser}/send_docvec.md  |  0
 mkdocs.yml                                        | 10 +++++++---
 7 files changed, 9 insertions(+), 4 deletions(-)
 create mode 100644 docs/user_guide/sending/api/fastAPI.md
 create mode 100644 docs/user_guide/sending/api/jina.md
 rename docs/user_guide/sending/{ => ser}/send_doc.md (100%)
 rename docs/user_guide/sending/{ => ser}/send_doclist.md (100%)
 rename docs/user_guide/sending/{ => ser}/send_docvec.md (100%)

diff --git a/docs/user_guide/sending/api/fastAPI.md b/docs/user_guide/sending/api/fastAPI.md
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/docs/user_guide/sending/api/jina.md b/docs/user_guide/sending/api/jina.md
new file mode 100644
index 00000000000..4e51fd6ee93
--- /dev/null
+++ b/docs/user_guide/sending/api/jina.md
@@ -0,0 +1 @@
+# Jina
\ No newline at end of file
diff --git a/docs/user_guide/sending/first_step.md b/docs/user_guide/sending/first_step.md
index a18433535b9..f822386b8af 100644
--- a/docs/user_guide/sending/first_step.md
+++ b/docs/user_guide/sending/first_step.md
@@ -1 +1 @@
-# Sending
+# first step
diff --git a/docs/user_guide/sending/send_doc.md b/docs/user_guide/sending/ser/send_doc.md
similarity index 100%
rename from docs/user_guide/sending/send_doc.md
rename to docs/user_guide/sending/ser/send_doc.md
diff --git a/docs/user_guide/sending/send_doclist.md b/docs/user_guide/sending/ser/send_doclist.md
similarity index 100%
rename from docs/user_guide/sending/send_doclist.md
rename to docs/user_guide/sending/ser/send_doclist.md
diff --git a/docs/user_guide/sending/send_docvec.md b/docs/user_guide/sending/ser/send_docvec.md
similarity index 100%
rename from docs/user_guide/sending/send_docvec.md
rename to docs/user_guide/sending/ser/send_docvec.md
diff --git a/mkdocs.yml b/mkdocs.yml
index 4817151a82b..579074bbfc6 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -79,9 +79,13 @@ nav:
       - user_guide/representing/array.md
     - Sending:
       - user_guide/sending/first_step.md
-      - user_guide/sending/send_doc.md
-      - user_guide/sending/send_doclist.md
-      - user_guide/sending/send_docvec.md
+      - Serialization:
+        - user_guide/sending/ser/send_doc.md
+        - user_guide/sending/ser/send_doclist.md
+        - user_guide/sending/ser/send_docvec.md
+      - Building API:
+          - user_guide/sending/api/jina.md
+          - user_guide/sending/api/fastAPI.md
 
     - user_guide/storing/first_step.md
 

From 66fc6db5c104294d5869b2ccc17134f72874aa3c Mon Sep 17 00:00:00 2001
From: samsja <sami.jaghouar@hotmail.fr>
Date: Wed, 12 Apr 2023 14:04:17 +0200
Subject: [PATCH 16/33] fix: fix tests

Signed-off-by: samsja <sami.jaghouar@hotmail.fr>
---
 docarray/base_doc/doc.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/docarray/base_doc/doc.py b/docarray/base_doc/doc.py
index dbc1f155f58..bb319f3074d 100644
--- a/docarray/base_doc/doc.py
+++ b/docarray/base_doc/doc.py
@@ -280,7 +280,7 @@ def to_protobuf(self) -> 'DocProto':
 
         :return: the protobuf message
         """
-        super().to_protobuf()
+        return super().to_protobuf()
 
     def json(
         self,
@@ -315,13 +315,14 @@ def json(
         )
 
     @no_type_check
+    @classmethod
     def parse_raw(
         cls: Type[T],
-        b: StrBytes,
+        b: 'StrBytes',
         *,
         content_type: str = None,
         encoding: str = 'utf8',
-        proto: Protocol = None,
+        proto: 'Protocol' = None,
         allow_pickle: bool = False,
     ) -> T:
         """

From f32bcca05f2f83e8d68b176f8c7258bb80d529bb Mon Sep 17 00:00:00 2001
From: samsja <sami.jaghouar@hotmail.fr>
Date: Wed, 12 Apr 2023 14:40:57 +0200
Subject: [PATCH 17/33] fix: fix python code snippet ods

Signed-off-by: samsja <sami.jaghouar@hotmail.fr>
---
 docarray/base_doc/mixins/io.py              | 1 +
 docs/user_guide/sending/ser/send_doclist.md | 4 ++--
 simple-dl.csv                               | 3 +++
 simple-dl.json                              | 1 +
 tests/documentation/test_docs.py            | 2 +-
 5 files changed, 8 insertions(+), 3 deletions(-)
 create mode 100644 simple-dl.csv
 create mode 100644 simple-dl.json

diff --git a/docarray/base_doc/mixins/io.py b/docarray/base_doc/mixins/io.py
index b2a64e8082b..ad5913b6498 100644
--- a/docarray/base_doc/mixins/io.py
+++ b/docarray/base_doc/mixins/io.py
@@ -144,6 +144,7 @@ def to_bytes(
         import pickle
 
         if protocol == 'pickle':
+            breakpoint()
             bstr = pickle.dumps(self)
         elif protocol == 'protobuf':
             bstr = self.to_protobuf().SerializePartialToString()
diff --git a/docs/user_guide/sending/ser/send_doclist.md b/docs/user_guide/sending/ser/send_doclist.md
index 25ecce1cbff..532926cba28 100644
--- a/docs/user_guide/sending/ser/send_doclist.md
+++ b/docs/user_guide/sending/ser/send_doclist.md
@@ -154,7 +154,7 @@ class SimpleDoc(BaseDoc):
 
 dl = DocList[SimpleDoc]([SimpleDoc(text=f'doc {i}') for i in range(2)])
 
-df = dl.to_pandas()
-dl_from_dataframe = DocList[SimpleDoc].from_pandas(df)
+df = dl.to_dataframe()
+dl_from_dataframe = DocList[SimpleDoc].from_dataframe(df)
 print(dl_from_dataframe)
 ```
\ No newline at end of file
diff --git a/simple-dl.csv b/simple-dl.csv
new file mode 100644
index 00000000000..73fcef9088e
--- /dev/null
+++ b/simple-dl.csv
@@ -0,0 +1,3 @@
+id,text
+e5083675a1ff093b5db61485dea954e1,doc 0
+6cf91fb8ce69c2adcca4abeacab1bbb2,doc 1
diff --git a/simple-dl.json b/simple-dl.json
new file mode 100644
index 00000000000..e8402651a63
--- /dev/null
+++ b/simple-dl.json
@@ -0,0 +1 @@
+[{"id":"c972944303fc583b0a66057c323af21a","text":"doc 0"},{"id":"febc35bbd6563d24fa8a832447fba5bb","text":"doc 1"}]
\ No newline at end of file
diff --git a/tests/documentation/test_docs.py b/tests/documentation/test_docs.py
index 6ca32d7700f..646bb4c582f 100644
--- a/tests/documentation/test_docs.py
+++ b/tests/documentation/test_docs.py
@@ -47,7 +47,7 @@ def check_md_file(fpath, memory=False, lang="python", keyword_ignore=[]):
     'fpath', pathlib.Path('docs/user_guide').glob('**/*.md'), ids=str
 )
 def test_files_good(fpath):
-    check_md_file(fpath=fpath, memory=True)
+    check_md_file(fpath=fpath, memory=True, keyword_ignore=['pickle'])
 
 
 def test_readme():

From 4047c23c4a236c46a205c6bec9ebcb6809646a0d Mon Sep 17 00:00:00 2001
From: samsja <sami.jaghouar@hotmail.fr>
Date: Wed, 12 Apr 2023 14:50:08 +0200
Subject: [PATCH 18/33] fix: fix remove breakpoint

Signed-off-by: samsja <sami.jaghouar@hotmail.fr>
---
 docarray/base_doc/mixins/io.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/docarray/base_doc/mixins/io.py b/docarray/base_doc/mixins/io.py
index ad5913b6498..b2a64e8082b 100644
--- a/docarray/base_doc/mixins/io.py
+++ b/docarray/base_doc/mixins/io.py
@@ -144,7 +144,6 @@ def to_bytes(
         import pickle
 
         if protocol == 'pickle':
-            breakpoint()
             bstr = pickle.dumps(self)
         elif protocol == 'protobuf':
             bstr = self.to_protobuf().SerializePartialToString()

From 682130706415ee3637ad20e69642e9c6c7cdc14f Mon Sep 17 00:00:00 2001
From: samsja <sami.jaghouar@hotmail.fr>
Date: Wed, 12 Apr 2023 15:36:38 +0200
Subject: [PATCH 19/33] feat: add intro

Signed-off-by: samsja <sami.jaghouar@hotmail.fr>
---
 docs/user_guide/sending/first_step.md | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/docs/user_guide/sending/first_step.md b/docs/user_guide/sending/first_step.md
index f822386b8af..05441f8337d 100644
--- a/docs/user_guide/sending/first_step.md
+++ b/docs/user_guide/sending/first_step.md
@@ -1 +1,11 @@
-# first step
+# Intro
+
+In the representation section we saw how to use [`BaseDoc`][docarray.base_doc.doc.BaseDoc], [`DocList`][docarray.array.doc_list.doc_list.DocList] and [`DocVec`][docarray.array.doc_vec.doc_vec.DocVec]
+to represent multi-modal data. In this section we will see **how to send these data over the wire**.
+
+
+This section is dived in two:
+
+- [Serialization](./ser/send_doc.md) of [`BaseDoc`][docarray.base_doc.doc.BaseDoc], [`DocList`][docarray.array.doc_list.doc_list.DocList] and [`DocVec`][docarray.array.doc_vec.doc_vec.DocVec]
+- [Using DocArray with web framework to build multimodal API](./api/jina.md)
+

From ce60c65fc1d8839596409beb26d96424251fe282 Mon Sep 17 00:00:00 2001
From: samsja <sami.jaghouar@hotmail.fr>
Date: Wed, 12 Apr 2023 15:48:24 +0200
Subject: [PATCH 20/33] feat: add ref

Signed-off-by: samsja <sami.jaghouar@hotmail.fr>
---
 docs/user_guide/sending/ser/send_doc.md     | 11 +++++++++--
 docs/user_guide/sending/ser/send_doclist.md |  9 +++++++--
 docs/user_guide/sending/ser/send_docvec.md  |  8 +++++++-
 3 files changed, 23 insertions(+), 5 deletions(-)

diff --git a/docs/user_guide/sending/ser/send_doc.md b/docs/user_guide/sending/ser/send_doc.md
index 5b7e7fff91d..e65f5d7d950 100644
--- a/docs/user_guide/sending/ser/send_doc.md
+++ b/docs/user_guide/sending/ser/send_doc.md
@@ -1,4 +1,4 @@
-# Serialization of BaseDoc
+# BaseDoc
 
 In order to send or store [BaseDoc][docarray.base_doc.doc.BaseDoc] you need to serialize them first. 
 
@@ -45,4 +45,11 @@ doc = MyDoc(text='hello world', tags=['hello', 'world'])
 json_str = doc.json()
 new_doc = MyDoc.parse_raw(json_str)
 assert doc == new_doc  # True
-```
\ No newline at end of file
+```
+
+See also:
+
+* The serializing [DocList](./send_doclist.md) section
+* The serializing [DocVec](./send_docvec.md) section
+
+
diff --git a/docs/user_guide/sending/ser/send_doclist.md b/docs/user_guide/sending/ser/send_doclist.md
index 532926cba28..e3bb6583e6e 100644
--- a/docs/user_guide/sending/ser/send_doclist.md
+++ b/docs/user_guide/sending/ser/send_doclist.md
@@ -1,4 +1,4 @@
-# Serialization for DocList
+# DocList
 When sending or storing [`DocList`][docarray.array.doc_list.doc_list.DocList], you need to use serialization. [DocList][docarray.array.doc_list.doc_list.DocList] supports multiple ways to serialize the data.
 
 ## JSON
@@ -157,4 +157,9 @@ dl = DocList[SimpleDoc]([SimpleDoc(text=f'doc {i}') for i in range(2)])
 df = dl.to_dataframe()
 dl_from_dataframe = DocList[SimpleDoc].from_dataframe(df)
 print(dl_from_dataframe)
-```
\ No newline at end of file
+```
+
+See also:
+
+* The serializing [BaseDoc](./send_doc.md) section
+* The serializing [DocVec](./send_docvec.md) section
diff --git a/docs/user_guide/sending/ser/send_docvec.md b/docs/user_guide/sending/ser/send_docvec.md
index d400caa844c..3868ff7c60b 100644
--- a/docs/user_guide/sending/ser/send_docvec.md
+++ b/docs/user_guide/sending/ser/send_docvec.md
@@ -1,4 +1,4 @@
-# Serialization of DocVec
+# DocVec
 
 When sending or storing [`DocVec`][docarray.array.doc_list.doc_list.DocVec], you need to use serialization. [DocVec][docarray.array.doc_list.doc_list.DocVec] only supports protobuf to serialize the data.
 You can use [`to_protobuf`][docarray.array.doc_list.doc_list.DocVec.to_protobuf] and [`from_protobuf`][docarray.array.doc_list.doc_list.DocVec.from_protobuf] to serialize and deserialize a [DocVec][docarray.array.doc_list.doc_list.DocVec]
@@ -21,4 +21,10 @@ proto_message_dv = dv.to_protobuf()
 dv_from_proto = DocVec[SimpleVecDoc].from_protobuf(proto_message_dv)
 ```
 
+!!! note
+    We are planning to add more serilization format in the future, notably JSON.
+
 [`to_protobuf`][docarray.array.doc_list.doc_list.DocVec.to_protobuf] returns a protobuf object of `docarray_pb2.DocVecProto` class. [`from_protobuf`][docarray.array.doc_list.doc_list.DocVec.from_protobuf] accepts a protobuf message object to construct a [DocVec][docarray.array.doc_list.doc_list.DocVec].
+
+* The serializing [BaseDoc](./send_doc.md) section
+* The serializing [DocList](./send_doclist.md) section

From e66a90096a8a85d9879ee4b986246e6e827de38c Mon Sep 17 00:00:00 2001
From: samsja <sami.jaghouar@hotmail.fr>
Date: Wed, 12 Apr 2023 16:09:52 +0200
Subject: [PATCH 21/33] feat: move fastapi part

Signed-off-by: samsja <sami.jaghouar@hotmail.fr>
---
 docs/integrations/fastapi.md           | 134 -----------------------
 docs/user_guide/sending/api/fastAPI.md | 140 +++++++++++++++++++++++++
 mkdocs.yml                             |   2 -
 3 files changed, 140 insertions(+), 136 deletions(-)
 delete mode 100644 docs/integrations/fastapi.md

diff --git a/docs/integrations/fastapi.md b/docs/integrations/fastapi.md
deleted file mode 100644
index e55b09fba9e..00000000000
--- a/docs/integrations/fastapi.md
+++ /dev/null
@@ -1,134 +0,0 @@
-# Use DocArray with FastAPI
-
-FastAPI is a high-performance web framework for building APIs with Python. It's designed to be easy to use and supports asynchronous programming. 
-Since [`DocArray` documents are Pydantic Models (with a twist)](../user_guide/representing/first_step.md) they can be easily integrated with FastAPI, 
-and provide a seamless and efficient way to work with multimodal data in FastAPI-powered APIs.
-
-
-First, you should define schemas for your input and/or output Documents:
-```python
-from docarray import BaseDoc
-from docarray.documents import ImageDoc
-from docarray.typing import NdArray
-
-
-class InputDoc(BaseDoc):
-    img: ImageDoc
-
-
-class OutputDoc(BaseDoc):
-    embedding_clip: NdArray
-    embedding_bert: NdArray
-```
-
-Afterwards, you can use your Documents with FastAPI:
-```python
-import numpy as np
-from fastapi import FastAPI
-from httpx import AsyncClient
-
-from docarray.documents import ImageDoc
-from docarray.base_doc import DocumentResponse
-
-input_doc = InputDoc(img=ImageDoc(tensor=np.zeros((3, 224, 224))))
-
-app = FastAPI()
-
-
-@app.post("/doc/", response_model=OutputDoc, response_class=DocumentResponse)
-async def create_item(doc: InputDoc) -> OutputDoc:
-    ## call my fancy model to generate the embeddings
-    doc = OutputDoc(
-        embedding_clip=np.zeros((100, 1)), embedding_bert=np.zeros((100, 1))
-    )
-    return doc
-
-
-async with AsyncClient(app=app, base_url="http://test") as ac:
-    response = await ac.post("/doc/", data=input_doc.json())
-
-doc = OutputDoc.parse_raw(response.content.decode())
-```
-
-The big advantage here is **first-class support for ML centric data**, such as {Torch, TF, ...}Tensor, Embedding, etc.
-
-This includes handy features such as validating the shape of a tensor:
-
-```python
-from docarray import BaseDoc
-from docarray.typing import TorchTensor
-import torch
-
-
-class MyDoc(BaseDoc):
-    tensor: TorchTensor[3, 224, 224]
-
-
-doc = MyDoc(tensor=torch.zeros(3, 224, 224))  # works
-doc = MyDoc(tensor=torch.zeros(224, 224, 3))  # works by reshaping
-doc = MyDoc(tensor=torch.zeros(224))  # fails validation
-
-
-class Image(BaseDoc):
-    tensor: TorchTensor[3, 'x', 'x']
-
-
-Image(tensor=torch.zeros(3, 224, 224))  # works
-Image(
-    tensor=torch.zeros(3, 64, 128)
-)  # fails validation because second dimension does not match third
-Image(
-    tensor=torch.zeros(4, 224, 224)
-)  # fails validation because of the first dimension
-Image(
-    tensor=torch.zeros(3, 64)
-)  # fails validation because it does not have enough dimensions
-```
-
-
-Further, you can send and receive lists of Documents represented as a `DocArray` object:
-
-!!! note
-    Currently, `FastAPI` receives `DocArray` objects as lists, so you have to construct a DocArray inside the function.
-    Also, if you want to return a `DocArray` object, first you have to convert it to a list. 
-    (Shown in the example below)
-
-```python
-from typing import List
-
-import numpy as np
-from fastapi import FastAPI
-from httpx import AsyncClient
-
-from docarray import DocArray
-from docarray.base_doc import DocArrayResponse
-from docarray.documents import TextDoc
-
-# Create a docarray
-docs = DocArray[TextDoc]([TextDoc(text='first'), TextDoc(text='second')])
-
-app = FastAPI()
-
-
-# Always use our custom response class (needed to dump tensors)
-@app.post("/doc/", response_class=DocArrayResponse)
-async def create_embeddings(docs: List[TextDoc]) -> List[TextDoc]:
-    # The docs FastAPI will receive will be treated as List[TextDoc]
-    # so you need to cast it to DocArray
-    docs = DocArray[TextDoc].construct(docs)
-
-    # Embed docs
-    for doc in docs:
-        doc.embedding = np.zeros((3, 224, 224))
-
-    # Return your DocArray as a list
-    return list(docs)
-
-
-async with AsyncClient(app=app, base_url="http://test") as ac:
-    response = await ac.post("/doc/", data=docs.to_json())  # sending docs as json
-
-assert response.status_code == 200
-# You can read FastAPI's response in the following way
-docs = DocArray[TextDoc].from_json(response.content.decode())
-```
diff --git a/docs/user_guide/sending/api/fastAPI.md b/docs/user_guide/sending/api/fastAPI.md
index e69de29bb2d..5409b989787 100644
--- a/docs/user_guide/sending/api/fastAPI.md
+++ b/docs/user_guide/sending/api/fastAPI.md
@@ -0,0 +1,140 @@
+# FastAPI
+
+[FastAPI](https://fastapi.tiangolo.com/) is a high-performance web framework for building APIs with Python based on python type hint. It's designed to be easy to use and supports asynchronous programming. 
+Since [`DocArray` documents are Pydantic Models (with a twist)](../../representing/first_step.md) they can be easily integrated with FastAPI, 
+and provide a seamless and efficient way to work with multimodal data in FastAPI-powered APIs.
+
+!!! note
+    you need to install FastAPI to follow this section
+    ``` 
+    pip install fastapi
+    ```
+
+
+First, you should define schemas for your input and/or output Documents:
+```python
+from docarray import BaseDoc
+from docarray.documents import ImageDoc
+from docarray.typing import NdArray
+
+
+class InputDoc(BaseDoc):
+    img: ImageDoc
+
+
+class OutputDoc(BaseDoc):
+    embedding_clip: NdArray
+    embedding_bert: NdArray
+```
+
+Afterwards, you can use your Documents with FastAPI:
+```python
+import numpy as np
+from fastapi import FastAPI
+from httpx import AsyncClient
+
+from docarray.documents import ImageDoc
+from docarray.base_doc import DocumentResponse
+
+input_doc = InputDoc(img=ImageDoc(tensor=np.zeros((3, 224, 224))))
+
+app = FastAPI()
+
+
+@app.post("/doc/", response_model=OutputDoc, response_class=DocumentResponse)
+async def create_item(doc: InputDoc) -> OutputDoc:
+    ## call my fancy model to generate the embeddings
+    doc = OutputDoc(
+        embedding_clip=np.zeros((100, 1)), embedding_bert=np.zeros((100, 1))
+    )
+    return doc
+
+
+async with AsyncClient(app=app, base_url="http://test") as ac:
+    response = await ac.post("/doc/", data=input_doc.json())
+
+doc = OutputDoc.parse_raw(response.content.decode())
+```
+
+The big advantage here is **first-class support for ML centric data**, such as {Torch, TF, ...}Tensor, Embedding, etc.
+
+This includes handy features such as validating the shape of a tensor:
+
+```python
+from docarray import BaseDoc
+from docarray.typing import TorchTensor
+import torch
+
+
+class MyDoc(BaseDoc):
+    tensor: TorchTensor[3, 224, 224]
+
+
+doc = MyDoc(tensor=torch.zeros(3, 224, 224))  # works
+doc = MyDoc(tensor=torch.zeros(224, 224, 3))  # works by reshaping
+doc = MyDoc(tensor=torch.zeros(224))  # fails validation
+
+
+class Image(BaseDoc):
+    tensor: TorchTensor[3, 'x', 'x']
+
+
+Image(tensor=torch.zeros(3, 224, 224))  # works
+Image(
+    tensor=torch.zeros(3, 64, 128)
+)  # fails validation because second dimension does not match third
+Image(
+    tensor=torch.zeros(4, 224, 224)
+)  # fails validation because of the first dimension
+Image(
+    tensor=torch.zeros(3, 64)
+)  # fails validation because it does not have enough dimensions
+```
+
+
+Further, you can send and receive lists of Documents represented as a `DocArray` object:
+
+!!! note
+    Currently, `FastAPI` receives `DocArray` objects as lists, so you have to construct a DocArray inside the function.
+    Also, if you want to return a `DocArray` object, first you have to convert it to a list. 
+    (Shown in the example below)
+
+```python
+from typing import List
+
+import numpy as np
+from fastapi import FastAPI
+from httpx import AsyncClient
+
+from docarray import DocArray
+from docarray.base_doc import DocArrayResponse
+from docarray.documents import TextDoc
+
+# Create a docarray
+docs = DocArray[TextDoc]([TextDoc(text='first'), TextDoc(text='second')])
+
+app = FastAPI()
+
+
+# Always use our custom response class (needed to dump tensors)
+@app.post("/doc/", response_class=DocArrayResponse)
+async def create_embeddings(docs: List[TextDoc]) -> List[TextDoc]:
+    # The docs FastAPI will receive will be treated as List[TextDoc]
+    # so you need to cast it to DocArray
+    docs = DocArray[TextDoc].construct(docs)
+
+    # Embed docs
+    for doc in docs:
+        doc.embedding = np.zeros((3, 224, 224))
+
+    # Return your DocArray as a list
+    return list(docs)
+
+
+async with AsyncClient(app=app, base_url="http://test") as ac:
+    response = await ac.post("/doc/", data=docs.to_json())  # sending docs as json
+
+assert response.status_code == 200
+# You can read FastAPI's response in the following way
+docs = DocArray[TextDoc].from_json(response.content.decode())
+```
diff --git a/mkdocs.yml b/mkdocs.yml
index 09b8d12e79a..991966990f2 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -98,8 +98,6 @@ nav:
     - how_to/multimodal_training_and_serving.md
     - how_to/optimize_performance_with_id_generation.md
     - how_to/audio2text.md
-  - Integrations:
-    - integrations/fastapi.md
   - Data Types:
     - data_types/text/text.md
     - data_types/image/image.md

From a642abe967514e0ecf939049b7222b8a2721228c Mon Sep 17 00:00:00 2001
From: samsja <sami.jaghouar@hotmail.fr>
Date: Wed, 12 Apr 2023 17:25:40 +0200
Subject: [PATCH 22/33] fix: fix fastAPI

Signed-off-by: samsja <sami.jaghouar@hotmail.fr>
---
 simple-dl.csv                    |  4 ++--
 simple-dl.json                   |  2 +-
 tests/documentation/test_docs.py | 19 +++++++++++--------
 3 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/simple-dl.csv b/simple-dl.csv
index 73fcef9088e..b30400587f3 100644
--- a/simple-dl.csv
+++ b/simple-dl.csv
@@ -1,3 +1,3 @@
 id,text
-e5083675a1ff093b5db61485dea954e1,doc 0
-6cf91fb8ce69c2adcca4abeacab1bbb2,doc 1
+31b05a66db6fffb90f7b3e5edb71fc52,doc 0
+2bfa118dceb366281d0714b02a78b9c7,doc 1
diff --git a/simple-dl.json b/simple-dl.json
index e8402651a63..07bc6ea9e99 100644
--- a/simple-dl.json
+++ b/simple-dl.json
@@ -1 +1 @@
-[{"id":"c972944303fc583b0a66057c323af21a","text":"doc 0"},{"id":"febc35bbd6563d24fa8a832447fba5bb","text":"doc 1"}]
\ No newline at end of file
+[{"id":"7d913dc1ed6d875c0b576abf092100d3","text":"doc 0"},{"id":"af7978f9eb8d44de95371e3781a3f37e","text":"doc 1"}]
\ No newline at end of file
diff --git a/tests/documentation/test_docs.py b/tests/documentation/test_docs.py
index 6b5390215e6..447d549788f 100644
--- a/tests/documentation/test_docs.py
+++ b/tests/documentation/test_docs.py
@@ -43,14 +43,17 @@ def check_md_file(fpath, memory=False, lang="python", keyword_ignore=[]):
         check_raw_file_full(text, lang=lang, keyword_ignore=keyword_ignore)
 
 
-@pytest.mark.parametrize(
-    'fpath',
-    [
-        *list(pathlib.Path('docs/user_guide').glob('**/*.md')),
-        *list(pathlib.Path('docs/data_types').glob('**/*.md')),
-    ],
-    ids=str,
-)
+files_to_check = [
+    *list(pathlib.Path('docs/user_guide').glob('**/*.md')),
+    *list(pathlib.Path('docs/data_types').glob('**/*.md')),
+]
+
+for file in files_to_check:
+    if 'fastAPI' in str(file):  # for now we don't test fastAPI stuff because of async
+        files_to_check.remove(file)
+
+
+@pytest.mark.parametrize('fpath', files_to_check, ids=str)
 def test_files_good(fpath):
     check_md_file(fpath=fpath, memory=True, keyword_ignore=['pickle'])
 

From 840a650929056cdaf324d7e971f484da6b524d92 Mon Sep 17 00:00:00 2001
From: samsja <sami.jaghouar@hotmail.fr>
Date: Wed, 12 Apr 2023 17:30:03 +0200
Subject: [PATCH 23/33] fix: remove uselss mixin

Signed-off-by: samsja <sami.jaghouar@hotmail.fr>
---
 docs/api_references/array/da.md | 1 -
 1 file changed, 1 deletion(-)

diff --git a/docs/api_references/array/da.md b/docs/api_references/array/da.md
index eedcec827cd..21a206a9537 100644
--- a/docs/api_references/array/da.md
+++ b/docs/api_references/array/da.md
@@ -1,4 +1,3 @@
 # DocList
 
 ::: docarray.array.doc_list.doc_list.DocList
-::: docarray.array.doc_list.io.IOMixinArray

From 8c2cf02d3577c05287acc4d85ae615e341f59a3a Mon Sep 17 00:00:00 2001
From: samsja <sami.jaghouar@hotmail.fr>
Date: Thu, 13 Apr 2023 13:25:05 +0200
Subject: [PATCH 24/33] faet: add jina section

Signed-off-by: samsja <sami.jaghouar@hotmail.fr>
---
 docs/how_to/audio2text.md           | 79 ---------------------------
 docs/user_guide/sending/api/jina.md | 82 ++++++++++++++++++++++++++++-
 mkdocs.yml                          |  1 -
 simple-dl.csv                       |  3 --
 simple-dl.json                      |  1 -
 tests/documentation/test_docs.py    | 12 ++++-
 6 files changed, 91 insertions(+), 87 deletions(-)
 delete mode 100644 docs/how_to/audio2text.md
 delete mode 100644 simple-dl.csv
 delete mode 100644 simple-dl.json

diff --git a/docs/how_to/audio2text.md b/docs/how_to/audio2text.md
deleted file mode 100644
index fcec869ce0f..00000000000
--- a/docs/how_to/audio2text.md
+++ /dev/null
@@ -1,79 +0,0 @@
-# Creating an Audio to Text App with Jina and DocArray V2
-
-This is how you can build an Audio to Text app using Jina, Docarray and Whisper
-
-We will use: 
-
-* DocarrayV2: Helps us to load and preprocess multimodal data such as image, text and audio in our case
-* Jina: Helps us serve the model quickly and create a client
-
-First let's install requirements
-
-## 💾 Installation
-
-```bash
-pip install transformers
-pip install openai-whisper
-pip install jina
-```
-
-Now let's import necessary libraries
-
-
-```python
-import whisper
-from jina import Executor, requests, Deployment
-from docarray import BaseDoc, DocList
-from docarray.typing import AudioUrl
-```
-
-Now we need to create the schema of our input and output documents. Since our input is an audio
-our input schema should contain an AudioUrl like the following
-
-```python
-class AudioURL(BaseDoc):
-    audio: AudioUrl
-```
-
-As for the output schema we would like to receive the transcribed text so we use the following:
-
-```python
-class Response(BaseDoc):
-    text: str
-```
-
-Now it's time we create our model, we wrap our model into Jina Executor, this allows us to serve to model
-later on and expose its endpoint /transcribe
-
-```python
-class WhisperExecutor(Executor):
-    def __init__(self, device: str, *args, **kwargs):
-        super().__init__(*args, **kwargs)
-        self.model = whisper.load_model("medium.en", device=device)
-
-    @requests
-    def transcribe(self, docs: DocList[AudioURL], **kwargs) -> DocList[Response]:
-        response_docs = DocList[Response]()
-        for doc in docs:
-            transcribed_text = self.model.transcribe(str(doc.audio))['text']
-            response_docs.append(Response(text=transcribed_text))
-        return response_docs
-```
-
-Now we can leverage Deployment object provided by Jina to use this executor
-then we send a request to transcribe endpoint. Here we are using an audio file previously recorded
-that says, "A Man reading a book" saved under resources/audio.mp3 but feel free to use your own audio.
-
-```python
-with Deployment(
-    uses=WhisperExecutor, uses_with={'device': "cpu"}, port=12349, timeout_ready=-1
-) as d:
-    docs = d.post(
-        on='/transcribe',
-        inputs=[AudioURL(audio='resources/audio.mp3')],
-        return_type=DocList[Response],
-    )
-    print(docs[0].text)
-```
-
-And we get the transcribed result!
\ No newline at end of file
diff --git a/docs/user_guide/sending/api/jina.md b/docs/user_guide/sending/api/jina.md
index 4e51fd6ee93..1afd247f1d4 100644
--- a/docs/user_guide/sending/api/jina.md
+++ b/docs/user_guide/sending/api/jina.md
@@ -1 +1,81 @@
-# Jina
\ No newline at end of file
+# Jina
+
+# Creating an Audio to Text App with Jina and DocArray V2
+
+This is how you can build an Audio to Text app using Jina, Docarray and Whisper
+
+We will use: 
+
+* DocarrayV2: Helps us to load and preprocess multimodal data such as image, text and audio in our case
+* Jina: Helps us serve the model quickly and create a client
+
+First let's install requirements
+
+## 💾 Installation
+
+```bash
+pip install transformers
+pip install openai-whisper
+pip install jina
+```
+
+Now let's import necessary libraries
+
+
+```python
+import whisper
+from jina import Executor, requests, Deployment
+from docarray import BaseDoc, DocList
+from docarray.typing import AudioUrl
+```
+
+Now we need to create the schema of our input and output documents. Since our input is an audio
+our input schema should contain an AudioUrl like the following
+
+```python
+class AudioURL(BaseDoc):
+    audio: AudioUrl
+```
+
+As for the output schema we would like to receive the transcribed text so we use the following:
+
+```python
+class Response(BaseDoc):
+    text: str
+```
+
+Now it's time we create our model, we wrap our model into Jina Executor, this allows us to serve to model
+later on and expose its endpoint /transcribe
+
+```python
+class WhisperExecutor(Executor):
+    def __init__(self, device: str, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.model = whisper.load_model("medium.en", device=device)
+
+    @requests
+    def transcribe(self, docs: DocList[AudioURL], **kwargs) -> DocList[Response]:
+        response_docs = DocList[Response]()
+        for doc in docs:
+            transcribed_text = self.model.transcribe(str(doc.audio))['text']
+            response_docs.append(Response(text=transcribed_text))
+        return response_docs
+```
+
+Now we can leverage Deployment object provided by Jina to use this executor
+then we send a request to transcribe endpoint. Here we are using an audio file previously recorded
+that says, "A Man reading a book" saved under resources/audio.mp3 but feel free to use your own audio.
+
+```python
+with Deployment(
+    uses=WhisperExecutor, uses_with={'device': "cpu"}, port=12349, timeout_ready=-1
+) as d:
+    docs = d.post(
+        on='/transcribe',
+        inputs=[AudioURL(audio='resources/audio.mp3')],
+        return_type=DocList[Response],
+    )
+    print(docs[0].text)
+```
+
+And we get the transcribed result!
\ No newline at end of file
diff --git a/mkdocs.yml b/mkdocs.yml
index 991966990f2..f7f8c00ae9f 100644
--- a/mkdocs.yml
+++ b/mkdocs.yml
@@ -97,7 +97,6 @@ nav:
     - how_to/add_doc_index.md
     - how_to/multimodal_training_and_serving.md
     - how_to/optimize_performance_with_id_generation.md
-    - how_to/audio2text.md
   - Data Types:
     - data_types/text/text.md
     - data_types/image/image.md
diff --git a/simple-dl.csv b/simple-dl.csv
deleted file mode 100644
index b30400587f3..00000000000
--- a/simple-dl.csv
+++ /dev/null
@@ -1,3 +0,0 @@
-id,text
-31b05a66db6fffb90f7b3e5edb71fc52,doc 0
-2bfa118dceb366281d0714b02a78b9c7,doc 1
diff --git a/simple-dl.json b/simple-dl.json
deleted file mode 100644
index 07bc6ea9e99..00000000000
--- a/simple-dl.json
+++ /dev/null
@@ -1 +0,0 @@
-[{"id":"7d913dc1ed6d875c0b576abf092100d3","text":"doc 0"},{"id":"af7978f9eb8d44de95371e3781a3f37e","text":"doc 1"}]
\ No newline at end of file
diff --git a/tests/documentation/test_docs.py b/tests/documentation/test_docs.py
index 447d549788f..ccda4714700 100644
--- a/tests/documentation/test_docs.py
+++ b/tests/documentation/test_docs.py
@@ -4,6 +4,8 @@
 from mktestdocs import grab_code_blocks
 from mktestdocs.__main__ import _executors, check_raw_string
 
+file_to_skip = ['fastAPI', 'jina']
+
 
 def check_raw_file_full(raw, lang="python", keyword_ignore=[]):
     if lang not in _executors:
@@ -48,9 +50,15 @@ def check_md_file(fpath, memory=False, lang="python", keyword_ignore=[]):
     *list(pathlib.Path('docs/data_types').glob('**/*.md')),
 ]
 
+file_to_remove = []
+
 for file in files_to_check:
-    if 'fastAPI' in str(file):  # for now we don't test fastAPI stuff because of async
-        files_to_check.remove(file)
+    for fn in file_to_skip:
+        if fn in str(file):
+            file_to_remove.append(file)
+
+for file in file_to_remove:
+    files_to_check.remove(file)
 
 
 @pytest.mark.parametrize('fpath', files_to_check, ids=str)

From c7507bf3199087c2f2922962beeb163bbe26e72b Mon Sep 17 00:00:00 2001
From: samsja <sami.jaghouar@hotmail.fr>
Date: Thu, 13 Apr 2023 13:41:21 +0200
Subject: [PATCH 25/33] fix: compress -> compression

Signed-off-by: samsja <sami.jaghouar@hotmail.fr>
---
 docarray/array/doc_list/doc_list.py | 2 +-
 docarray/array/doc_list/io.py       | 2 +-
 docarray/base_doc/doc.py            | 8 ++++----
 docarray/base_doc/mixins/io.py      | 2 +-
 4 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/docarray/array/doc_list/doc_list.py b/docarray/array/doc_list/doc_list.py
index 44e3ba2abce..3725fcc0737 100644
--- a/docarray/array/doc_list/doc_list.py
+++ b/docarray/array/doc_list/doc_list.py
@@ -335,7 +335,7 @@ def from_bytes(
 
         :param data: Bytes from which to deserialize
         :param protocol: protocol that was used to serialize
-        :param compress: compress algorithm that was used to serialize between `lz4`, `bz2`, `lzma`, `zlib`, `gzip`
+        :param compress: compression algorithm that was used to serialize between `lz4`, `bz2`, `lzma`, `zlib`, `gzip`
         :param show_progress: show progress bar, only works when protocol is `pickle` or `protobuf`
         :return: the deserialized DocList
         """
diff --git a/docarray/array/doc_list/io.py b/docarray/array/doc_list/io.py
index 3e3b36adde4..688d0310bee 100644
--- a/docarray/array/doc_list/io.py
+++ b/docarray/array/doc_list/io.py
@@ -141,7 +141,7 @@ def from_bytes(
 
         :param data: Bytes from which to deserialize
         :param protocol: protocol that was used to serialize
-        :param compress: compress algorithm that was used to serialize between `lz4`, `bz2`, `lzma`, `zlib`, `gzip`
+        :param compress: compression algorithm that was used to serialize between `lz4`, `bz2`, `lzma`, `zlib`, `gzip`
         :param show_progress: show progress bar, only works when protocol is `pickle` or `protobuf`
         :return: the deserialized `DocList`
         """
diff --git a/docarray/base_doc/doc.py b/docarray/base_doc/doc.py
index bb319f3074d..cfb73f1f422 100644
--- a/docarray/base_doc/doc.py
+++ b/docarray/base_doc/doc.py
@@ -170,7 +170,7 @@ def to_bytes(
         For more Pythonic code, please use ``bytes(...)``.
 
         :param protocol: protocol to use. It can be 'pickle' or 'protobuf'
-        :param compress: compress algorithm to use
+        :param compress: compression algorithm to use
         :return: the binary serialization in bytes
         """
         return super().to_bytes(protocol, compress)
@@ -186,7 +186,7 @@ def from_bytes(
 
         :param data: binary bytes
         :param protocol: protocol to use. It can be 'pickle' or 'protobuf'
-        :param compress: compress method to use
+        :param compress: compression method to use
         :return: a Document object
         """
         return super(BaseDoc, cls).from_bytes(data, protocol, compress)
@@ -197,7 +197,7 @@ def to_base64(
         """Serialize a Document object into as base64 string
 
         :param protocol: protocol to use. It can be 'pickle' or 'protobuf'
-        :param compress: compress method to use
+        :param compress: compression method to use
         :return: a base64 encoded string
         """
         return super().to_base64(protocol, compress)
@@ -213,7 +213,7 @@ def from_base64(
 
         :param data: a base64 encoded string
         :param protocol: protocol to use. It can be 'pickle' or 'protobuf'
-        :param compress: compress method to use
+        :param compress: compression method to use
         :return: a Document object
         """
         return super(BaseDoc, cls).from_base64(data, protocol, compress)
diff --git a/docarray/base_doc/mixins/io.py b/docarray/base_doc/mixins/io.py
index b2a64e8082b..e50d9ac791d 100644
--- a/docarray/base_doc/mixins/io.py
+++ b/docarray/base_doc/mixins/io.py
@@ -138,7 +138,7 @@ def to_bytes(
         For more Pythonic code, please use ``bytes(...)``.
 
         :param protocol: protocol to use. It can be 'pickle' or 'protobuf'
-        :param compress: compress algorithm to use
+        :param compress: compression algorithm to use
         :return: the binary serialization in bytes
         """
         import pickle

From 27b48bff322fc829156c18136960571ecfaaa2ab Mon Sep 17 00:00:00 2001
From: samsja <55492238+samsja@users.noreply.github.com>
Date: Thu, 13 Apr 2023 13:43:44 +0200
Subject: [PATCH 26/33] feat: apply suggestion

Co-authored-by: Alex Cureton-Griffiths <alexcg1@users.noreply.github.com>
Co-authored-by: Charlotte Gerhaher <charlotte.gerhaher@jina.ai>
Signed-off-by: samsja <55492238+samsja@users.noreply.github.com>
---
 docarray/base_doc/doc.py                    | 9 +++++----
 docs/user_guide/sending/api/fastAPI.md      | 2 +-
 docs/user_guide/sending/first_step.md       | 4 ++--
 docs/user_guide/sending/ser/send_doc.md     | 4 ++--
 docs/user_guide/sending/ser/send_doclist.md | 2 +-
 docs/user_guide/sending/ser/send_docvec.md  | 2 +-
 6 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/docarray/base_doc/doc.py b/docarray/base_doc/doc.py
index cfb73f1f422..3319e6b6cc8 100644
--- a/docarray/base_doc/doc.py
+++ b/docarray/base_doc/doc.py
@@ -230,8 +230,8 @@ def from_protobuf(cls: Type[T], pb_msg: 'DocProto') -> T:
     def update(self, other: T_update):
         """
         Updates self with the content of other. Changes are applied to self.
-        Updating one Document with another consists in the following:
-         - setting data properties of the second Document to the first Document
+        Updating one Document with another consists of the following:
+         - Setting data properties of the second Document to the first Document
          if they are not None
          - Concatenating lists and updating sets
          - Updating recursively Documents and DocArrays
@@ -249,8 +249,9 @@ def update(self, other: T_update):
         ---
 
         ```python
+        from typing import Optional, List
+        
         from docarray import BaseDoc
-        from docarray.documents import Text
 
 
         class MyDocument(BaseDoc):
@@ -271,7 +272,7 @@ class MyDocument(BaseDoc):
         ```
 
         ---
-        :param other: The Document with which to update the contents of this
+        :param other: The Document used to update the contents of this Document
         """
         super().update(other)
 
diff --git a/docs/user_guide/sending/api/fastAPI.md b/docs/user_guide/sending/api/fastAPI.md
index 5409b989787..d35308fefce 100644
--- a/docs/user_guide/sending/api/fastAPI.md
+++ b/docs/user_guide/sending/api/fastAPI.md
@@ -1,6 +1,6 @@
 # FastAPI
 
-[FastAPI](https://fastapi.tiangolo.com/) is a high-performance web framework for building APIs with Python based on python type hint. It's designed to be easy to use and supports asynchronous programming. 
+[FastAPI](https://fastapi.tiangolo.com/) is a high-performance web framework for building APIs with Python based on Python type hints. It's designed to be easy to use and supports asynchronous programming. 
 Since [`DocArray` documents are Pydantic Models (with a twist)](../../representing/first_step.md) they can be easily integrated with FastAPI, 
 and provide a seamless and efficient way to work with multimodal data in FastAPI-powered APIs.
 
diff --git a/docs/user_guide/sending/first_step.md b/docs/user_guide/sending/first_step.md
index 05441f8337d..5d7fbfa5816 100644
--- a/docs/user_guide/sending/first_step.md
+++ b/docs/user_guide/sending/first_step.md
@@ -4,8 +4,8 @@ In the representation section we saw how to use [`BaseDoc`][docarray.base_doc.do
 to represent multi-modal data. In this section we will see **how to send these data over the wire**.
 
 
-This section is dived in two:
+This section is divided into two:
 
 - [Serialization](./ser/send_doc.md) of [`BaseDoc`][docarray.base_doc.doc.BaseDoc], [`DocList`][docarray.array.doc_list.doc_list.DocList] and [`DocVec`][docarray.array.doc_vec.doc_vec.DocVec]
-- [Using DocArray with web framework to build multimodal API](./api/jina.md)
+- [Using DocArray with a web framework to build a multimodal API](./api/jina.md)
 
diff --git a/docs/user_guide/sending/ser/send_doc.md b/docs/user_guide/sending/ser/send_doc.md
index e65f5d7d950..0f3826e2288 100644
--- a/docs/user_guide/sending/ser/send_doc.md
+++ b/docs/user_guide/sending/ser/send_doc.md
@@ -1,8 +1,8 @@
 # BaseDoc
 
-In order to send or store [BaseDoc][docarray.base_doc.doc.BaseDoc] you need to serialize them first. 
+You need to serialize a [BaseDoc][docarray.base_doc.doc.BaseDoc] before you can store or send it.
 
-!! note
+!!! note
     [BaseDoc][docarray.base_doc.doc.BaseDoc] supports serialization to `protobuf` and `json` formats.
 
 ## Serialization to protobuf
diff --git a/docs/user_guide/sending/ser/send_doclist.md b/docs/user_guide/sending/ser/send_doclist.md
index e3bb6583e6e..dd4362a3cbe 100644
--- a/docs/user_guide/sending/ser/send_doclist.md
+++ b/docs/user_guide/sending/ser/send_doclist.md
@@ -78,7 +78,7 @@ dl_from_base64 = DocList[SimpleDoc].from_base64(
 ```
 
 ## Binary
-Similar as in `Base64` serialization, `Binary` serialization also supports different protocols and compression methods.
+Similar to `Base64` serialization, `Binary` serialization also supports different protocols and compression methods.
 
 To save a [DocList][docarray.array.doc_list.doc_list.DocList] into a binary file, you can use [`save_binary()`][docarray.array.doc_list.doc_list.DocList.to_base64]  and [`load_binary()`][docarray.array.doc_list.doc_list.DocList.from_protobuf] to serialize and deserialize a [DocList][docarray.array.doc_list.doc_list.from_base64].
 
diff --git a/docs/user_guide/sending/ser/send_docvec.md b/docs/user_guide/sending/ser/send_docvec.md
index 3868ff7c60b..3fbaf759075 100644
--- a/docs/user_guide/sending/ser/send_docvec.md
+++ b/docs/user_guide/sending/ser/send_docvec.md
@@ -22,7 +22,7 @@ dv_from_proto = DocVec[SimpleVecDoc].from_protobuf(proto_message_dv)
 ```
 
 !!! note
-    We are planning to add more serilization format in the future, notably JSON.
+    We are planning to add more serialization formats in the future, notably JSON.
 
 [`to_protobuf`][docarray.array.doc_list.doc_list.DocVec.to_protobuf] returns a protobuf object of `docarray_pb2.DocVecProto` class. [`from_protobuf`][docarray.array.doc_list.doc_list.DocVec.from_protobuf] accepts a protobuf message object to construct a [DocVec][docarray.array.doc_list.doc_list.DocVec].
 

From 1b1c5037138a58784f0f8555e8f450916c17e827 Mon Sep 17 00:00:00 2001
From: samsja <sami.jaghouar@hotmail.fr>
Date: Thu, 13 Apr 2023 13:51:37 +0200
Subject: [PATCH 27/33] fix: apply alex suggestion

Signed-off-by: samsja <sami.jaghouar@hotmail.fr>
---
 docs/user_guide/sending/ser/send_doc.md     |  2 +-
 docs/user_guide/sending/ser/send_doclist.md | 12 ++++++------
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/docs/user_guide/sending/ser/send_doc.md b/docs/user_guide/sending/ser/send_doc.md
index 0f3826e2288..caa93c4fdb1 100644
--- a/docs/user_guide/sending/ser/send_doc.md
+++ b/docs/user_guide/sending/ser/send_doc.md
@@ -26,7 +26,7 @@ new_doc = MyDoc.from_protobuf(proto_message)
 assert doc == new_doc  # True
 ```
 
-## Serialization to json
+## Serialization to JSON
 
 You can use [`json`][docarray.base_doc.doc.BaseDoc.json] to serialize a [BaseDoc][docarray.base_doc.doc.BaseDoc] to a json string
 and use [`parse_raw`][docarray.base_doc.doc.BaseDoc.parse_raw] to deserialize it.
diff --git a/docs/user_guide/sending/ser/send_doclist.md b/docs/user_guide/sending/ser/send_doclist.md
index dd4362a3cbe..33c0a7b1718 100644
--- a/docs/user_guide/sending/ser/send_doclist.md
+++ b/docs/user_guide/sending/ser/send_doclist.md
@@ -2,7 +2,7 @@
 When sending or storing [`DocList`][docarray.array.doc_list.doc_list.DocList], you need to use serialization. [DocList][docarray.array.doc_list.doc_list.DocList] supports multiple ways to serialize the data.
 
 ## JSON
-You can use [`to_json()`][docarray.array.doc_list.doc_list.DocList.to_json] and [`from_json()`][docarray.array.doc_list.doc_list.DocList.from_json] to serialize and deserialize a [DocList][docarray.array.doc_list.doc_list.DocList].
+You can use [`to_json()`][docarray.array.doc_list.doc_list.DocList.to_json] and [`from_json()`][docarray.array.doc_list.doc_list.DocList.from_json] to serialize and deserialize a [DocList][docarray.array.doc_list.doc_list.DocList]:
 
 ```python
 from docarray import BaseDoc, DocList
@@ -31,7 +31,7 @@ b'[{"id":"5540e72d407ae81abb2390e9249ed066","text":"doc 0"},{"id":"fbe9f80d2fa03
 ```
 
 ## Protobuf
-To serialize a DocList with `protobuf`, you can use [`to_protobuf()`][docarray.array.doc_list.doc_list.DocList.to_protobuf]  and [`from_protobuf()`][docarray.array.doc_list.doc_list.DocList.from_protobuf] to serialize and deserialize a [DocList][docarray.array.doc_list.doc_list.DocList].
+To serialize a DocList with `protobuf`, you can use [`to_protobuf()`][docarray.array.doc_list.doc_list.DocList.to_protobuf]  and [`from_protobuf()`][docarray.array.doc_list.doc_list.DocList.from_protobuf] to serialize and deserialize a [DocList][docarray.array.doc_list.doc_list.DocList]:
 
 ```python
 from docarray import BaseDoc, DocList
@@ -55,7 +55,7 @@ print(dl_from_proto)
 When transferring over the network, you can choose `Base64` format to serialize the [`DocList`][docarray.array.doc_list.doc_list.DocList].
 Serializing a [DocList][docarray.array.doc_list.doc_list.DocList] in Base64 supports both `pickle` and `protobuf` protocols. Besides, you can choose different compression methods.
 
-To serialize a [DocList][docarray.array.doc_list.doc_list.DocList] in Base64, you can use [`to_base64()`][docarray.array.doc_list.doc_list.DocList.to_base64]  and [`from_base64()`][docarray.array.doc_list.doc_list.DocList.from_protobuf] to serialize and deserialize a [DocList][docarray.array.doc_list.doc_list.from_base64].
+To serialize a [DocList][docarray.array.doc_list.doc_list.DocList] in Base64, you can use [`to_base64()`][docarray.array.doc_list.doc_list.DocList.to_base64]  and [`from_base64()`][docarray.array.doc_list.doc_list.DocList.from_protobuf] to serialize and deserialize a [DocList][docarray.array.doc_list.doc_list.from_base64]:
 
 We support multiple compression methods. (namely : `lz4`, `bz2`, `lzma`, `zlib`, `gzip`)
 
@@ -80,7 +80,7 @@ dl_from_base64 = DocList[SimpleDoc].from_base64(
 ## Binary
 Similar to `Base64` serialization, `Binary` serialization also supports different protocols and compression methods.
 
-To save a [DocList][docarray.array.doc_list.doc_list.DocList] into a binary file, you can use [`save_binary()`][docarray.array.doc_list.doc_list.DocList.to_base64]  and [`load_binary()`][docarray.array.doc_list.doc_list.DocList.from_protobuf] to serialize and deserialize a [DocList][docarray.array.doc_list.doc_list.from_base64].
+To save a [DocList][docarray.array.doc_list.doc_list.DocList] into a binary file, you can use [`save_binary()`][docarray.array.doc_list.doc_list.DocList.to_base64]  and [`load_binary()`][docarray.array.doc_list.doc_list.DocList.from_protobuf] to serialize and deserialize a [DocList][docarray.array.doc_list.doc_list.from_base64]:
 
 ```python
 from docarray import BaseDoc, DocList
@@ -123,7 +123,7 @@ dl_from_bytes = DocList[SimpleDoc].from_bytes(
 
 
 ## CSV
-You can use [`from_csv()`][docarray.array.doc_list.doc_list.DocList.from_csv] and [`to_csv()`][docarray.array.doc_list.doc_list.DocList.to_csv] to de-/serializae and deserialize the [DocList][docarray.array.doc_list.doc_list.DocList] from/to a CSV file. Use the `dialect` parameter to choose the dialect of the CSV format.
+You can use [`from_csv()`][docarray.array.doc_list.doc_list.DocList.from_csv] and [`to_csv()`][docarray.array.doc_list.doc_list.DocList.to_csv] to de-/serializae and deserialize the [DocList][docarray.array.doc_list.doc_list.DocList] from/to a CSV file. Use the `dialect` parameter to choose the dialect of the CSV format:
 
 ```python
 from docarray import BaseDoc, DocList
@@ -142,7 +142,7 @@ print(dl_from_csv)
 
 
 ## Pandas.Dataframe
-You can use [`from_dataframe()`][docarray.array.doc_list.doc_list.DocList.from_dataframe] and [`to_dataframe()`][docarray.array.doc_list.doc_list.DocList.to_dataframe] to load/save the [DocList][docarray.array.doc_list.doc_list.DocList] from/to a pandas DataFrame.
+You can use [`from_dataframe()`][docarray.array.doc_list.doc_list.DocList.from_dataframe] and [`to_dataframe()`][docarray.array.doc_list.doc_list.DocList.to_dataframe] to load/save the [DocList][docarray.array.doc_list.doc_list.DocList] from/to a pandas DataFrame:
 
 ```python
 from docarray import BaseDoc, DocList

From dde1612d6a25854b8f40453b5a6615bf557adccc Mon Sep 17 00:00:00 2001
From: samsja <sami.jaghouar@hotmail.fr>
Date: Thu, 13 Apr 2023 14:04:36 +0200
Subject: [PATCH 28/33] wip

Signed-off-by: samsja <sami.jaghouar@hotmail.fr>
---
 docarray/base_doc/doc.py                 | 121 -----------------------
 docs/api_references/base_doc/base_doc.md |   3 +
 2 files changed, 3 insertions(+), 121 deletions(-)

diff --git a/docarray/base_doc/doc.py b/docarray/base_doc/doc.py
index 3319e6b6cc8..4ba3ae117c1 100644
--- a/docarray/base_doc/doc.py
+++ b/docarray/base_doc/doc.py
@@ -162,127 +162,6 @@ def _docarray_to_json_compatible(self) -> Dict:
     ### this section is just for documentation purposes will be removed later once https://github.com/mkdocstrings/griffe/issues/138 is fixed ##############
     ########################################################################################################################################################
 
-    def to_bytes(
-        self, protocol: str = 'protobuf', compress: Optional[str] = None
-    ) -> bytes:
-        """Serialize itself into bytes.
-
-        For more Pythonic code, please use ``bytes(...)``.
-
-        :param protocol: protocol to use. It can be 'pickle' or 'protobuf'
-        :param compress: compression algorithm to use
-        :return: the binary serialization in bytes
-        """
-        return super().to_bytes(protocol, compress)
-
-    @classmethod
-    def from_bytes(
-        cls: Type[T],
-        data: bytes,
-        protocol: str = 'protobuf',
-        compress: Optional[str] = None,
-    ) -> T:
-        """Build Document object from binary bytes
-
-        :param data: binary bytes
-        :param protocol: protocol to use. It can be 'pickle' or 'protobuf'
-        :param compress: compression method to use
-        :return: a Document object
-        """
-        return super(BaseDoc, cls).from_bytes(data, protocol, compress)
-
-    def to_base64(
-        self, protocol: str = 'protobuf', compress: Optional[str] = None
-    ) -> str:
-        """Serialize a Document object into as base64 string
-
-        :param protocol: protocol to use. It can be 'pickle' or 'protobuf'
-        :param compress: compression method to use
-        :return: a base64 encoded string
-        """
-        return super().to_base64(protocol, compress)
-
-    @classmethod
-    def from_base64(
-        cls: Type[T],
-        data: str,
-        protocol: str = 'pickle',
-        compress: Optional[str] = None,
-    ) -> T:
-        """Build Document object from binary bytes
-
-        :param data: a base64 encoded string
-        :param protocol: protocol to use. It can be 'pickle' or 'protobuf'
-        :param compress: compression method to use
-        :return: a Document object
-        """
-        return super(BaseDoc, cls).from_base64(data, protocol, compress)
-
-    @classmethod
-    def from_protobuf(cls: Type[T], pb_msg: 'DocProto') -> T:
-        """create a Document from a protobuf message
-
-        :param pb_msg: the proto message of the Document
-        :return: a Document initialize with the proto data
-        """
-        return super(BaseDoc, cls).from_protobuf(pb_msg)
-
-    def update(self, other: T_update):
-        """
-        Updates self with the content of other. Changes are applied to self.
-        Updating one Document with another consists of the following:
-         - Setting data properties of the second Document to the first Document
-         if they are not None
-         - Concatenating lists and updating sets
-         - Updating recursively Documents and DocArrays
-         - Updating Dictionaries of the left with the right
-
-        It behaves as an update operation for Dictionaries, except that since
-        it is applied to a static schema type, the presence of the field is
-        given by the field not having a None value and that DocArrays,
-        lists and sets are concatenated. It is worth mentioning that Tuples
-        are not merged together since they are meant to be inmutable,
-        so they behave as regular types and the value of `self` is updated
-        with the value of `other`
-
-
-        ---
-
-        ```python
-        from typing import Optional, List
-        
-        from docarray import BaseDoc
-
-
-        class MyDocument(BaseDoc):
-            content: str
-            title: Optional[str] = None
-            tags_: List
-
-
-        doc1 = MyDocument(
-            content='Core content of the document', title='Title', tags_=['python', 'AI']
-        )
-        doc2 = MyDocument(content='Core content updated', tags_=['docarray'])
-
-        doc1.update(doc2)
-        assert doc1.content == 'Core content updated'
-        assert doc1.title == 'Title'
-        assert doc1.tags_ == ['python', 'AI', 'docarray']
-        ```
-
-        ---
-        :param other: The Document used to update the contents of this Document
-        """
-        super().update(other)
-
-    def to_protobuf(self) -> 'DocProto':
-        """Convert Document into a Protobuf message.
-
-        :return: the protobuf message
-        """
-        return super().to_protobuf()
-
     def json(
         self,
         *,
diff --git a/docs/api_references/base_doc/base_doc.md b/docs/api_references/base_doc/base_doc.md
index 0fe2dc80891..abce654ee96 100644
--- a/docs/api_references/base_doc/base_doc.md
+++ b/docs/api_references/base_doc/base_doc.md
@@ -1,3 +1,6 @@
 # BaseDoc
 
 ::: docarray.base_doc.doc.BaseDoc
+::: docarray.base_doc.mixins.io.IOMixin
+::: docarray.base_doc.mixins.update.UpdateMixin
+

From 3d0d7457e358940cd6dca372f35a7a7dadf117b4 Mon Sep 17 00:00:00 2001
From: samsja <sami.jaghouar@hotmail.fr>
Date: Thu, 13 Apr 2023 15:58:42 +0200
Subject: [PATCH 29/33] fix: fix all docstring

Signed-off-by: samsja <sami.jaghouar@hotmail.fr>
---
 docarray/array/any_array.py                 |  14 +-
 docarray/array/doc_list/doc_list.py         | 294 +-------------------
 docs/api_references/array/da.md             |   2 +
 docs/user_guide/sending/ser/send_doc.md     |   4 +-
 docs/user_guide/sending/ser/send_doclist.md |  18 +-
 5 files changed, 21 insertions(+), 311 deletions(-)

diff --git a/docarray/array/any_array.py b/docarray/array/any_array.py
index 901f87f82a9..3d966d34904 100644
--- a/docarray/array/any_array.py
+++ b/docarray/array/any_array.py
@@ -121,7 +121,7 @@ def _set_data_column(
         field: str,
         values: Union[List, T, 'AbstractTensor'],
     ):
-        """Set all Documents in this [`DocList`][docarray.typing.DocList] using the passed values
+        """Set all Documents in this [`DocList`][docarray.array.doc_list.doc_list.DocList] using the passed values
 
         :param field: name of the fields to extract
         :values: the values to set at the DocList level
@@ -140,7 +140,7 @@ def to_protobuf(self) -> 'DocListProto':
         ...
 
     def _to_node_protobuf(self) -> 'NodeProto':
-        """Convert a [`DocList`][docarray.typing.DocList] into a NodeProto protobuf message.
+        """Convert a [`DocList`][docarray.array.doc_list.doc_list.DocList] into a NodeProto protobuf message.
          This function should be called when a DocList
         is nested into another Document that need to be converted into a protobuf
 
@@ -157,7 +157,7 @@ def traverse_flat(
     ) -> Union[List[Any], 'AbstractTensor']:
         """
         Return a List of the accessed objects when applying the `access_path`. If this
-        results in a nested list or list of [`DocList`s][docarray.typing.DocList], the list will be flattened
+        results in a nested list or list of [`DocList`s][docarray.array.doc_list.doc_list.DocList], the list will be flattened
         on the first level. The access path is a string that consists of attribute
         names, concatenated and `"__"`-separated. It describes the path from the first
         level to an arbitrary one, e.g. `'content__image__url'`.
@@ -209,7 +209,7 @@ class Book(BaseDoc):
         ```
 
 
-        If your [`DocList`][docarray.typing.DocList] is in doc_vec mode, and you want to access a field of
+        If your [`DocList`][docarray.array.doc_list.doc_list.DocList] is in doc_vec mode, and you want to access a field of
         type [`AnyTensor`][docarray.typing.AnyTensor], the doc_vec tensor will be returned instead of a list:
 
         ```python
@@ -265,7 +265,7 @@ def _flatten_one_level(sequence: List[Any]) -> List[Any]:
 
     def summary(self):
         """
-        Print a summary of this [`DocList`][docarray.typing.DocList] object and a summary of the schema of its
+        Print a summary of this [`DocList`][docarray.array.doc_list.doc_list.DocList] object and a summary of the schema of its
         Document type.
         """
         DocArraySummary(self).summary()
@@ -277,13 +277,13 @@ def _batch(
         show_progress: bool = False,
     ) -> Generator[T, None, None]:
         """
-        Creates a `Generator` that yields [`DocList`][docarray.typing.DocList] of size `batch_size`.
+        Creates a `Generator` that yields [`DocList`][docarray.array.doc_list.doc_list.DocList] of size `batch_size`.
         Note, that the last batch might be smaller than `batch_size`.
 
         :param batch_size: Size of each generated batch.
         :param shuffle: If set, shuffle the Documents before dividing into minibatches.
         :param show_progress: if set, show a progress bar when batching documents.
-        :yield: a Generator of [`DocList`][docarray.typing.DocList], each in the length of `batch_size`
+        :yield: a Generator of [`DocList`][docarray.array.doc_list.doc_list.DocList], each in the length of `batch_size`
         """
         from rich.progress import track
 
diff --git a/docarray/array/doc_list/doc_list.py b/docarray/array/doc_list/doc_list.py
index 3725fcc0737..8eb1a822d59 100644
--- a/docarray/array/doc_list/doc_list.py
+++ b/docarray/array/doc_list/doc_list.py
@@ -1,15 +1,10 @@
-import csv
 import io
-import pathlib
 from functools import wraps
 from typing import (
     TYPE_CHECKING,
     Any,
-    BinaryIO,
     Callable,
-    Generator,
     Iterable,
-    Iterator,
     List,
     MutableSequence,
     Optional,
@@ -23,7 +18,7 @@
 from typing_inspect import is_union_type
 
 from docarray.array.any_array import AnyDocArray
-from docarray.array.doc_list.io import IOMixinArray, _LazyRequestReader
+from docarray.array.doc_list.io import IOMixinArray
 from docarray.array.doc_list.pushpull import PushPullMixin
 from docarray.array.doc_list.sequence_indexing_mixin import (
     IndexingSequenceMixin,
@@ -33,7 +28,6 @@
 from docarray.typing import NdArray
 
 if TYPE_CHECKING:
-    import pandas as pd
     from pydantic import BaseConfig
     from pydantic.fields import ModelField
 
@@ -314,289 +308,3 @@ def __getitem__(self: T, item: IndexIterType) -> T:
 
     def __getitem__(self, item):
         return super().__getitem__(item)
-
-    ########################################################################################################################################################
-    ### this section is just for documentation purposes will be removed later once https://github.com/mkdocstrings/griffe/issues/138 is fixed ##############
-    ########################################################################################################################################################
-
-    def to_protobuf(self) -> 'DocListProto':
-        """Convert DocList into a Protobuf message"""
-        return super(DocList, self).to_protobuf()
-
-    @classmethod
-    def from_bytes(
-        cls: Type[T],
-        data: bytes,
-        protocol: str = 'protobuf-array',
-        compress: Optional[str] = None,
-        show_progress: bool = False,
-    ) -> T:
-        """Deserialize bytes into a DocList.
-
-        :param data: Bytes from which to deserialize
-        :param protocol: protocol that was used to serialize
-        :param compress: compression algorithm that was used to serialize between `lz4`, `bz2`, `lzma`, `zlib`, `gzip`
-        :param show_progress: show progress bar, only works when protocol is `pickle` or `protobuf`
-        :return: the deserialized DocList
-        """
-        return super(DocList, cls).from_bytes(
-            data, protocol=protocol, compress=compress, show_progress=show_progress
-        )
-
-    def to_binary_stream(
-        self,
-        protocol: str = 'protobuf',
-        compress: Optional[str] = None,
-        show_progress: bool = False,
-    ) -> Iterator[bytes]:
-        return super().to_binary_stream(
-            protocol=protocol, compress=compress, show_progress=show_progress
-        )
-
-    def to_bytes(
-        self,
-        protocol: str = 'protobuf-array',
-        compress: Optional[str] = None,
-        file_ctx: Optional[BinaryIO] = None,
-        show_progress: bool = False,
-    ) -> Optional[bytes]:
-        """Serialize itself into bytes.
-
-        For more Pythonic code, please use ``bytes(...)``.
-
-        :param protocol: protocol to use. It can be 'pickle-array', 'protobuf-array', 'pickle' or 'protobuf'
-        :param compress: compress algorithm to use between `lz4`, `bz2`, `lzma`, `zlib`, `gzip`
-        :param file_ctx: File or filename or serialized bytes where the data is stored.
-        :param show_progress: show progress bar, only works when protocol is `pickle` or `protobuf`
-        :return: the binary serialization in bytes or None if file_ctx is passed where to store
-        """
-        return super().to_bytes(
-            protocol=protocol,
-            compress=compress,
-            file_ctx=file_ctx,
-            show_progress=show_progress,
-        )
-
-    @classmethod
-    def from_base64(
-        cls: Type[T],
-        data: str,
-        protocol: str = 'protobuf-array',
-        compress: Optional[str] = None,
-        show_progress: bool = False,
-    ) -> T:
-        """Deserialize base64 strings into a DocList.
-
-        :param data: Base64 string to deserialize
-        :param protocol: protocol that was used to serialize
-        :param compress: compress algorithm that was used to serialize
-        :param show_progress: show progress bar, only works when protocol is `pickle` or `protobuf`
-        :return: the deserialized DocList
-        """
-        return super(DocList, cls).from_base64(
-            data, protocol=protocol, compress=compress, show_progress=show_progress
-        )
-
-    def to_base64(
-        self,
-        protocol: str = 'protobuf-array',
-        compress: Optional[str] = None,
-        show_progress: bool = False,
-    ) -> str:
-        """Serialize itself into base64 encoded string.
-
-        :param protocol: protocol to use. It can be 'pickle-array', 'protobuf-array', 'pickle' or 'protobuf'
-        :param compress: compress algorithm to use between `lz4`, `bz2`, `lzma`, `zlib`, `gzip`
-        :param show_progress: show progress bar, only works when protocol is `pickle` or `protobuf`
-        :return: the binary serialization in bytes or None if file_ctx is passed where to store
-        """
-        return super().to_base64(
-            protocol=protocol, compress=compress, show_progress=show_progress
-        )
-
-    @classmethod
-    def from_json(
-        cls: Type[T],
-        file: Union[str, bytes, bytearray],
-    ) -> T:
-        """Deserialize JSON strings or bytes into a DocList.
-
-        :param file: JSON object from where to deserialize a DocList
-        :return: the deserialized DocList
-        """
-        return super(DocList, cls).from_json(file)
-
-    def to_json(self) -> bytes:
-        """Convert the object into JSON bytes. Can be loaded via :meth:`.from_json`.
-        :return: JSON serialization of DocList
-        """
-        return super().to_json()
-
-    @classmethod
-    def from_csv(
-        cls,
-        file_path: str,
-        encoding: str = 'utf-8',
-        dialect: Union[str, csv.Dialect] = 'excel',
-    ) -> 'DocList':
-        """
-        Load a DocList from a csv file following the schema defined in the
-        :attr:`~docarray.DocList.doc_type` attribute.
-        Every row of the csv file will be mapped to one document in the doc_list.
-        The column names (defined in the first row) have to match the field names
-        of the Document type.
-        For nested fields use "__"-separated access paths, such as 'image__url'.
-
-        List-like fields (including field of type DocList) are not supported.
-
-        :param file_path: path to csv file to load DocList from.
-        :param encoding: encoding used to read the csv file. Defaults to 'utf-8'.
-        :param dialect: defines separator and how to handle whitespaces etc.
-            Can be a csv.Dialect instance or one string of:
-            'excel' (for comma seperated values),
-            'excel-tab' (for tab separated values),
-            'unix' (for csv file generated on UNIX systems).
-        :return: DocList
-        """
-        return super(DocList, cls).from_csv(
-            file_path, encoding=encoding, dialect=dialect
-        )
-
-    def to_csv(
-        self, file_path: str, dialect: Union[str, csv.Dialect] = 'excel'
-    ) -> None:
-        """
-        Save a DocList to a csv file.
-        The field names will be stored in the first row. Each row corresponds to the
-        information of one Document.
-        Columns for nested fields will be named after the "__"-seperated access paths,
-        such as `"image__url"` for `image.url`.
-
-        :param file_path: path to a csv file.
-        :param dialect: defines separator and how to handle whitespaces etc.
-            Can be a csv.Dialect instance or one string of:
-            'excel' (for comma seperated values),
-            'excel-tab' (for tab separated values),
-            'unix' (for csv file generated on UNIX systems).
-        """
-        return super().to_csv(file_path, dialect=dialect)
-
-    @classmethod
-    def from_dataframe(cls, df: 'pd.DataFrame') -> 'DocList':
-        """
-        Load a DocList from a `pandas.DataFrame` following the schema
-        defined in the :attr:`~docarray.DocList.doc_type` attribute.
-        Every row of the dataframe will be mapped to one Document in the doc_list.
-        The column names of the dataframe have to match the field names of the
-        Document type.
-        For nested fields use "__"-separated access paths as column names,
-        such as 'image__url'.
-
-        List-like fields (including field of type DocList) are not supported.
-
-
-        ---
-
-        ```python
-        import pandas as pd
-
-        from docarray import BaseDoc, DocList
-
-
-        class Person(BaseDoc):
-            name: str
-            follower: int
-
-
-        df = pd.DataFrame(
-            data=[['Maria', 12345], ['Jake', 54321]], columns=['name', 'follower']
-        )
-
-        docs = DocList[Person].from_dataframe(df)
-
-        assert docs.name == ['Maria', 'Jake']
-        assert docs.follower == [12345, 54321]
-        ```
-
-        ---
-
-        :param df: pandas.DataFrame to extract Document's information from
-        :return: DocList where each Document contains the information of one
-            corresponding row of the `pandas.DataFrame`.
-        """
-        return super(DocList, cls).from_dataframe(df)
-
-    def to_dataframe(self) -> 'pd.DataFrame':
-        """
-        Save a DocList to a `pandas.DataFrame`.
-        The field names will be stored as column names. Each row of the dataframe corresponds
-        to the information of one Document.
-        Columns for nested fields will be named after the "__"-seperated access paths,
-        such as `"image__url"` for `image.url`.
-
-        :return: pandas.DataFrame
-        """
-        return super().to_dataframe()
-
-    @classmethod
-    def load_binary(
-        cls: Type[T],
-        file: Union[str, bytes, pathlib.Path, io.BufferedReader, _LazyRequestReader],
-        protocol: str = 'protobuf-array',
-        compress: Optional[str] = None,
-        show_progress: bool = False,
-        streaming: bool = False,
-    ) -> Union[T, Generator['T_doc', None, None]]:
-        """Load doc_list elements from a compressed binary file.
-
-        :param file: File or filename or serialized bytes where the data is stored.
-        :param protocol: protocol to use. It can be 'pickle-array', 'protobuf-array', 'pickle' or 'protobuf'
-        :param compress: compress algorithm to use between 'lz4', 'gzip', 'bz2', 'zstd', 'lzma'
-        :param show_progress: show progress bar, only works when protocol is `pickle` or `protobuf`
-        :param streaming: if `True` returns a generator over `Document` objects.
-        In case protocol is pickle the `Documents` are streamed from disk to save memory usage
-        :return: a DocList object
-
-        .. note::
-            If `file` is `str` it can specify `protocol` and `compress` as file extensions.
-            This functionality assumes `file=file_name.$protocol.$compress` where `$protocol` and `$compress` refer to a
-            string interpolation of the respective `protocol` and `compress` methods.
-            For example if `file=my_docarray.protobuf.lz4` then the binary data will be loaded assuming `protocol=protobuf`
-            and `compress=lz4`.
-        """
-        return super().load_binary(
-            file, protocol=protocol, compress=compress, show_progress=show_progress
-        )
-
-    def save_binary(
-        self,
-        file: Union[str, pathlib.Path],
-        protocol: str = 'protobuf-array',
-        compress: Optional[str] = None,
-        show_progress: bool = False,
-    ) -> None:
-        """Save DocList into a binary file.
-
-        It will use the protocol to pick how to save the DocList.
-        If used 'picke-doc_list` and `protobuf-array` the DocList will be stored
-        and compressed at complete level using `pickle` or `protobuf`.
-        When using `protobuf` or `pickle` as protocol each Document in DocList
-        will be stored individually and this would make it available for streaming.
-
-        !! note
-            If `file` is `str` it can specify `protocol` and `compress` as file extensions.
-            This functionality assumes `file=file_name.$protocol.$compress` where `$protocol` and `$compress` refer to a
-            string interpolation of the respective `protocol` and `compress` methods.
-            For example if `file=my_docarray.protobuf.lz4` then the binary data will be created using `protocol=protobuf`
-            and `compress=lz4`.
-
-        :param file: File or filename to which the data is saved.
-        :param protocol: protocol to use. It can be 'pickle-array', 'protobuf-array', 'pickle' or 'protobuf'
-        :param compress: compress algorithm to use between : `lz4`, `bz2`, `lzma`, `zlib`, `gzip`
-        :param show_progress: show progress bar, only works when protocol is `pickle` or `protobuf`
-
-
-        """
-        return super().save_binary(
-            file, protocol=protocol, compress=compress, show_progress=show_progress
-        )
diff --git a/docs/api_references/array/da.md b/docs/api_references/array/da.md
index 21a206a9537..e1f5b33f008 100644
--- a/docs/api_references/array/da.md
+++ b/docs/api_references/array/da.md
@@ -1,3 +1,5 @@
 # DocList
 
 ::: docarray.array.doc_list.doc_list.DocList
+::: docarray.array.doc_list.io.IOMixinArray
+::: docarray.array.doc_list.pushpull.PushPullMixin
diff --git a/docs/user_guide/sending/ser/send_doc.md b/docs/user_guide/sending/ser/send_doc.md
index caa93c4fdb1..dd77557dbba 100644
--- a/docs/user_guide/sending/ser/send_doc.md
+++ b/docs/user_guide/sending/ser/send_doc.md
@@ -7,8 +7,8 @@ You need to serialize a [BaseDoc][docarray.base_doc.doc.BaseDoc] before you can
 
 ## Serialization to protobuf
 
-You can use [`to_protobuf`][docarray.base_doc.doc.BaseDoc.to_protobuf] to serialize a [BaseDoc][docarray.base_doc.doc.BaseDoc] to a protobuf message object
-and use [`from_protobuf`][docarray.base_doc.doc.BaseDoc.from_protobuf] to deserialize it.
+You can use [`to_protobuf`][docarray.base_doc.mixins.io.IOMixin.to_protobuf] to serialize a [BaseDoc][docarray.base_doc.doc.BaseDoc] to a protobuf message object
+and use [`from_protobuf`][docarray.base_doc.mixins.io.IOMixin.from_protobuf] to deserialize it.
 
 ```python
 from typing import List
diff --git a/docs/user_guide/sending/ser/send_doclist.md b/docs/user_guide/sending/ser/send_doclist.md
index 33c0a7b1718..70b1789ca5f 100644
--- a/docs/user_guide/sending/ser/send_doclist.md
+++ b/docs/user_guide/sending/ser/send_doclist.md
@@ -2,7 +2,7 @@
 When sending or storing [`DocList`][docarray.array.doc_list.doc_list.DocList], you need to use serialization. [DocList][docarray.array.doc_list.doc_list.DocList] supports multiple ways to serialize the data.
 
 ## JSON
-You can use [`to_json()`][docarray.array.doc_list.doc_list.DocList.to_json] and [`from_json()`][docarray.array.doc_list.doc_list.DocList.from_json] to serialize and deserialize a [DocList][docarray.array.doc_list.doc_list.DocList]:
+You can use [`to_json()`][docarray.array.doc_list.io.IOMixinArray.to_json] and [`from_json()`][docarray.array.doc_list.io.IOMixinArray.from_json] to serialize and deserialize a [DocList][docarray.array.doc_list.doc_list.DocList]:
 
 ```python
 from docarray import BaseDoc, DocList
@@ -24,14 +24,14 @@ with open('simple-dl.json', 'r') as f:
     print(dl_load_from_json)
 ```
 
-[to_json()][docarray.array.doc_list.doc_list.DocList.to_json] returns the binary representation of the json object. [from_json()][docarray.array.doc_list.doc_list.DocList.from_json] can load from either `str` or `binary` representation of the json object.
+[to_json()][docarray.array.doc_list.io.IOMixinArray.to_json] returns the binary representation of the json object. [from_json()][docarray.array.doc_list.io.IOMixinArray.from_json] can load from either `str` or `binary` representation of the json object.
 
 ```output
 b'[{"id":"5540e72d407ae81abb2390e9249ed066","text":"doc 0"},{"id":"fbe9f80d2fa03571e899a2887af1ac1b","text":"doc 1"}]'
 ```
 
 ## Protobuf
-To serialize a DocList with `protobuf`, you can use [`to_protobuf()`][docarray.array.doc_list.doc_list.DocList.to_protobuf]  and [`from_protobuf()`][docarray.array.doc_list.doc_list.DocList.from_protobuf] to serialize and deserialize a [DocList][docarray.array.doc_list.doc_list.DocList]:
+To serialize a DocList with `protobuf`, you can use [`to_protobuf()`][docarray.array.doc_list.io.IOMixinArray.to_protobuf]  and [`from_protobuf()`][docarray.array.doc_list.io.IOMixinArray.from_protobuf] to serialize and deserialize a [DocList][docarray.array.doc_list.doc_list.DocList]:
 
 ```python
 from docarray import BaseDoc, DocList
@@ -49,13 +49,13 @@ print(type(proto_message_dl))
 print(dl_from_proto)
 ```
 
-[to_protobuf()][docarray.array.doc_list.doc_list.DocList.to_protobuf]  returns a protobuf object of `docarray_pb2.DocListProto` class. [from_protobuf()][docarray.array.doc_list.doc_list.DocList.from_protobuf]  accepts a protobuf message object to construct a [DocList][docarray.array.doc_list.doc_list.DocList].
+[to_protobuf()][docarray.array.doc_list.io.IOMixinArray.to_protobuf]  returns a protobuf object of `docarray_pb2.DocListProto` class. [from_protobuf()][docarray.array.doc_list.io.IOMixinArray.from_protobuf]  accepts a protobuf message object to construct a [DocList][docarray.array.doc_list.doc_list.DocList].
 
 ## Base64
 When transferring over the network, you can choose `Base64` format to serialize the [`DocList`][docarray.array.doc_list.doc_list.DocList].
 Serializing a [DocList][docarray.array.doc_list.doc_list.DocList] in Base64 supports both `pickle` and `protobuf` protocols. Besides, you can choose different compression methods.
 
-To serialize a [DocList][docarray.array.doc_list.doc_list.DocList] in Base64, you can use [`to_base64()`][docarray.array.doc_list.doc_list.DocList.to_base64]  and [`from_base64()`][docarray.array.doc_list.doc_list.DocList.from_protobuf] to serialize and deserialize a [DocList][docarray.array.doc_list.doc_list.from_base64]:
+To serialize a [DocList][docarray.array.doc_list.doc_list.DocList] in Base64, you can use [`to_base64()`][docarray.array.doc_list.io.IOMixinArray.to_base64]  and [`from_base64()`][docarray.array.doc_list.io.IOMixinArray.from_protobuf] to serialize and deserialize a [DocList][docarray.array.doc_list.doc_list.DocList]:
 
 We support multiple compression methods. (namely : `lz4`, `bz2`, `lzma`, `zlib`, `gzip`)
 
@@ -80,7 +80,7 @@ dl_from_base64 = DocList[SimpleDoc].from_base64(
 ## Binary
 Similar to `Base64` serialization, `Binary` serialization also supports different protocols and compression methods.
 
-To save a [DocList][docarray.array.doc_list.doc_list.DocList] into a binary file, you can use [`save_binary()`][docarray.array.doc_list.doc_list.DocList.to_base64]  and [`load_binary()`][docarray.array.doc_list.doc_list.DocList.from_protobuf] to serialize and deserialize a [DocList][docarray.array.doc_list.doc_list.from_base64]:
+To save a [DocList][docarray.array.doc_list.doc_list.DocList] into a binary file, you can use [`save_binary()`][docarray.array.doc_list.io.IOMixinArray.to_base64]  and [`load_binary()`][docarray.array.doc_list.io.IOMixinArray.from_protobuf] to serialize and deserialize a [DocList][docarray.array.doc_list.doc_list.DocList]:
 
 ```python
 from docarray import BaseDoc, DocList
@@ -102,7 +102,7 @@ dl_from_binary = DocList[SimpleDoc].load_binary(
 The [DocList][docarray.array.doc_list.doc_list.DocList] is stored at `simple-dl.pickle` file.
 
 ### Bytes
-Under the hood,  [save_binary()][docarray.array.doc_list.doc_list.DocList.to_base64] prepares the file object and calls [to_bytes()][docarray.array.doc_list.doc_list.DocList.to_bytes] function to convert the [DocList][docarray.array.doc_list.doc_list.DocList] into a byte object. You can use [to_bytes()][docarray.array.doc_list.doc_list.DocList.to_bytes] function directly and use [from_bytes()][docarray.array.doc_list.doc_list.DocList.from_bytes] to load the [DocList][docarray.array.doc_list.doc_list.DocList] from a byte object. You can use `protocol` to choose between `pickle` and `protobuf`. Besides, [to_bytes()][docarray.array.doc_list.doc_list.DocList.to_bytes]  and [save_bytes()][docarray.array.doc_list.doc_list.DocList.save_bytes] support multiple options for `compress` as well. 
+Under the hood,  [save_binary()][docarray.array.doc_list.io.IOMixinArray.to_base64] prepares the file object and calls [to_bytes()][docarray.array.doc_list.io.IOMixinArray.to_bytes] function to convert the [DocList][docarray.array.doc_list.doc_list.DocList] into a byte object. You can use [to_bytes()][docarray.array.doc_list.io.IOMixinArray.to_bytes] function directly and use [from_bytes()][docarray.array.doc_list.io.IOMixinArray.from_bytes] to load the [DocList][docarray.array.doc_list.doc_list.DocList] from a byte object. You can use `protocol` to choose between `pickle` and `protobuf`. Besides, [to_bytes()][docarray.array.doc_list.io.IOMixinArray.to_bytes]  and [save_binary()][docarray.array.doc_list.io.IOMixinArray.save_binary] support multiple options for `compress` as well. 
 
 ```python
 from docarray import BaseDoc, DocList
@@ -123,7 +123,7 @@ dl_from_bytes = DocList[SimpleDoc].from_bytes(
 
 
 ## CSV
-You can use [`from_csv()`][docarray.array.doc_list.doc_list.DocList.from_csv] and [`to_csv()`][docarray.array.doc_list.doc_list.DocList.to_csv] to de-/serializae and deserialize the [DocList][docarray.array.doc_list.doc_list.DocList] from/to a CSV file. Use the `dialect` parameter to choose the dialect of the CSV format:
+You can use [`from_csv()`][docarray.array.doc_list.io.IOMixinArray.from_csv] and [`to_csv()`][docarray.array.doc_list.io.IOMixinArray.to_csv] to de-/serializae and deserialize the [DocList][docarray.array.doc_list.doc_list.DocList] from/to a CSV file. Use the `dialect` parameter to choose the dialect of the CSV format:
 
 ```python
 from docarray import BaseDoc, DocList
@@ -142,7 +142,7 @@ print(dl_from_csv)
 
 
 ## Pandas.Dataframe
-You can use [`from_dataframe()`][docarray.array.doc_list.doc_list.DocList.from_dataframe] and [`to_dataframe()`][docarray.array.doc_list.doc_list.DocList.to_dataframe] to load/save the [DocList][docarray.array.doc_list.doc_list.DocList] from/to a pandas DataFrame:
+You can use [`from_dataframe()`][docarray.array.doc_list.io.IOMixinArray.from_dataframe] and [`to_dataframe()`][docarray.array.doc_list.io.IOMixinArray.to_dataframe] to load/save the [DocList][docarray.array.doc_list.doc_list.DocList] from/to a pandas DataFrame:
 
 ```python
 from docarray import BaseDoc, DocList

From a6c9aa928ebe0f040697215263c308fd5d80977e Mon Sep 17 00:00:00 2001
From: samsja <sami.jaghouar@hotmail.fr>
Date: Thu, 13 Apr 2023 16:03:26 +0200
Subject: [PATCH 30/33] fix: fix update docstring

Signed-off-by: samsja <sami.jaghouar@hotmail.fr>
---
 docarray/base_doc/mixins/update.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/docarray/base_doc/mixins/update.py b/docarray/base_doc/mixins/update.py
index 5a21738a7d4..fd962237b02 100644
--- a/docarray/base_doc/mixins/update.py
+++ b/docarray/base_doc/mixins/update.py
@@ -25,7 +25,8 @@ def update(self, other: T):
         Updates self with the content of other. Changes are applied to self.
         Updating one Document with another consists in the following:
          - setting data properties of the second Document to the first Document
-         if they are not None
+         if they are not None:
+
          - Concatenating lists and updating sets
          - Updating recursively Documents and DocArrays
          - Updating Dictionaries of the left with the right

From 70c0f45737dffa87d6526ba82d63a63905200995 Mon Sep 17 00:00:00 2001
From: samsja <sami.jaghouar@hotmail.fr>
Date: Thu, 13 Apr 2023 16:14:06 +0200
Subject: [PATCH 31/33] fix: fix ruff

Signed-off-by: samsja <sami.jaghouar@hotmail.fr>
---
 docarray/base_doc/doc.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/docarray/base_doc/doc.py b/docarray/base_doc/doc.py
index 4ba3ae117c1..0ed39bd0d49 100644
--- a/docarray/base_doc/doc.py
+++ b/docarray/base_doc/doc.py
@@ -27,7 +27,6 @@
     from pydantic.typing import AbstractSetIntStr, MappingIntStrAny
 
     from docarray.array.doc_vec.column_storage import ColumnStorageView
-    from docarray.proto import DocProto
 
 _console: Console = Console()
 

From 2828cf21b93c97d5e4d9f4a525ec33204956ad13 Mon Sep 17 00:00:00 2001
From: samsja <sami.jaghouar@hotmail.fr>
Date: Thu, 13 Apr 2023 16:26:22 +0200
Subject: [PATCH 32/33] fix: fix smth

Signed-off-by: samsja <sami.jaghouar@hotmail.fr>
---
 docarray/array/doc_list/io.py | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/docarray/array/doc_list/io.py b/docarray/array/doc_list/io.py
index 688d0310bee..9f153e2f1bd 100644
--- a/docarray/array/doc_list/io.py
+++ b/docarray/array/doc_list/io.py
@@ -702,13 +702,7 @@ def load_binary(
     ) -> Union[T, Generator['T_doc', None, None]]:
         """Load doc_list elements from a compressed binary file.
 
-        :param file: File or filename or serialized bytes where the data is stored.
-        :param protocol: protocol to use. It can be 'pickle-array', 'protobuf-array', 'pickle' or 'protobuf'
-        :param compress: compress algorithm to use between `lz4`, `bz2`, `lzma`, `zlib`, `gzip`
-        :param show_progress: show progress bar, only works when protocol is `pickle` or `protobuf`
-        :param streaming: if `True` returns a generator over `Document` objects.
         In case protocol is pickle the `Documents` are streamed from disk to save memory usage
-        :return: a `DocList` object
 
         !!! note
             If `file` is `str` it can specify `protocol` and `compress` as file extensions.
@@ -716,6 +710,15 @@ def load_binary(
             string interpolation of the respective `protocol` and `compress` methods.
             For example if `file=my_docarray.protobuf.lz4` then the binary data will be loaded assuming `protocol=protobuf`
             and `compress=lz4`.
+
+        :param file: File or filename or serialized bytes where the data is stored.
+        :param protocol: protocol to use. It can be 'pickle-array', 'protobuf-array', 'pickle' or 'protobuf'
+        :param compress: compress algorithm to use between `lz4`, `bz2`, `lzma`, `zlib`, `gzip`
+        :param show_progress: show progress bar, only works when protocol is `pickle` or `protobuf`
+        :param streaming: if `True` returns a generator over `Document` objects.
+
+        :return: a `DocList` object
+
         """
         load_protocol: Optional[str] = protocol
         load_compress: Optional[str] = compress

From 228ddffeec3648831aecce2857d3e6719224451a Mon Sep 17 00:00:00 2001
From: samsja <55492238+samsja@users.noreply.github.com>
Date: Thu, 13 Apr 2023 16:34:13 +0200
Subject: [PATCH 33/33] feat: apply charllote suggestion

Co-authored-by: Charlotte Gerhaher <charlotte.gerhaher@jina.ai>
Signed-off-by: samsja <55492238+samsja@users.noreply.github.com>
---
 docarray/base_doc/mixins/update.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/docarray/base_doc/mixins/update.py b/docarray/base_doc/mixins/update.py
index fd962237b02..471e97483ba 100644
--- a/docarray/base_doc/mixins/update.py
+++ b/docarray/base_doc/mixins/update.py
@@ -43,8 +43,9 @@ def update(self, other: T):
         ---
 
         ```python
+        from typing import List, Optional
+
         from docarray import BaseDoc
-        from docarray.documents import Text
 
 
         class MyDocument(BaseDoc):