docarray · samsja · Apr 13, 2023 · Apr 10, 2023 · Apr 10, 2023 · Apr 10, 2023
diff --git a/docarray/array/any_array.py b/docarray/array/any_array.py
@@ -121,7 +121,7 @@ def _set_data_column(
         field: str,
         values: Union[List, T, 'AbstractTensor'],
     ):
-        """Set all Documents in this [`DocList`][docarray.typing.DocList] using the passed values
+        """Set all Documents in this [`DocList`][docarray.array.doc_list.doc_list.DocList] using the passed values
 
         :param field: name of the fields to extract
         :values: the values to set at the DocList level
@@ -140,7 +140,7 @@ def to_protobuf(self) -> 'DocListProto':
         ...
 
     def _to_node_protobuf(self) -> 'NodeProto':
-        """Convert a [`DocList`][docarray.typing.DocList] into a NodeProto protobuf message.
+        """Convert a [`DocList`][docarray.array.doc_list.doc_list.DocList] into a NodeProto protobuf message.
          This function should be called when a DocList
         is nested into another Document that need to be converted into a protobuf
 
@@ -157,13 +157,11 @@ def traverse_flat(
     ) -> Union[List[Any], 'AbstractTensor']:
         """
         Return a List of the accessed objects when applying the `access_path`. If this
-        results in a nested list or list of [`DocList`s][docarray.typing.DocList], the list will be flattened
+        results in a nested list or list of [`DocList`s][docarray.array.doc_list.doc_list.DocList], the list will be flattened
         on the first level. The access path is a string that consists of attribute
         names, concatenated and `"__"`-separated. It describes the path from the first
         level to an arbitrary one, e.g. `'content__image__url'`.
 
-        :param access_path: a string that represents the access path (`"__"`-separated).
-        :return: list of the accessed objects, flattened if nested.
 
         ```python
         from docarray import BaseDoc, DocList, Text
@@ -210,7 +208,8 @@ class Book(BaseDoc):
         chapters = docs.traverse_flat(access_path='chapters')  # list of 30 strings
         ```
 
-        If your [`DocList`][docarray.typing.DocList] is in doc_vec mode, and you want to access a field of
+
+        If your [`DocList`][docarray.array.doc_list.doc_list.DocList] is in doc_vec mode, and you want to access a field of
         type [`AnyTensor`][docarray.typing.AnyTensor], the doc_vec tensor will be returned instead of a list:
 
         ```python
@@ -232,6 +231,9 @@ class Image(BaseDoc):
             access_path='tensor'
         )  # tensor of shape (2, 3, 224, 224)
         ```
+
+        :param access_path: a string that represents the access path ("__"-separated).
+        :return: list of the accessed objects, flattened if nested.
         """
         ...
 
@@ -263,7 +265,7 @@ def _flatten_one_level(sequence: List[Any]) -> List[Any]:
 
     def summary(self):
         """
-        Print a summary of this [`DocList`][docarray.typing.DocList] object and a summary of the schema of its
+        Print a summary of this [`DocList`][docarray.array.doc_list.doc_list.DocList] object and a summary of the schema of its
         Document type.
         """
         DocArraySummary(self).summary()
@@ -275,13 +277,13 @@ def _batch(
         show_progress: bool = False,
     ) -> Generator[T, None, None]:
         """
-        Creates a `Generator` that yields [`DocList`][docarray.typing.DocList] of size `batch_size`.
+        Creates a `Generator` that yields [`DocList`][docarray.array.doc_list.doc_list.DocList] of size `batch_size`.
         Note, that the last batch might be smaller than `batch_size`.
 
         :param batch_size: Size of each generated batch.
         :param shuffle: If set, shuffle the Documents before dividing into minibatches.
         :param show_progress: if set, show a progress bar when batching documents.
-        :yield: a Generator of [`DocList`][docarray.typing.DocList], each in the length of `batch_size`
+        :yield: a Generator of [`DocList`][docarray.array.doc_list.doc_list.DocList], each in the length of `batch_size`
         """
         from rich.progress import track
 

diff --git a/docarray/array/doc_list/doc_list.py b/docarray/array/doc_list/doc_list.py
@@ -96,6 +96,7 @@ class Image(BaseDoc):
 
     # You can also set fields, with `docs.tensor = np.random.random([10, 100])`:
 
+
     import numpy as np
 
     docs.tensor = np.random.random([10, 100])

diff --git a/docarray/array/doc_list/io.py b/docarray/array/doc_list/io.py
@@ -141,7 +141,7 @@ def from_bytes(
 
         :param data: Bytes from which to deserialize
         :param protocol: protocol that was used to serialize
-        :param compress: compress algorithm that was used to serialize
+        :param compress: compression algorithm that was used to serialize between `lz4`, `bz2`, `lzma`, `zlib`, `gzip`
         :param show_progress: show progress bar, only works when protocol is `pickle` or `protobuf`
         :return: the deserialized `DocList`
         """
@@ -247,7 +247,7 @@ def to_bytes(
         For more Pythonic code, please use ``bytes(...)``.
 
         :param protocol: protocol to use. It can be 'pickle-array', 'protobuf-array', 'pickle' or 'protobuf'
-        :param compress: compress algorithm to use
+        :param compress: compress algorithm to use between : `lz4`, `bz2`, `lzma`, `zlib`, `gzip`
         :param file_ctx: File or filename or serialized bytes where the data is stored.
         :param show_progress: show progress bar, only works when protocol is `pickle` or `protobuf`
         :return: the binary serialization in bytes or None if file_ctx is passed where to store
@@ -277,7 +277,7 @@ def from_base64(
 
         :param data: Base64 string to deserialize
         :param protocol: protocol that was used to serialize
-        :param compress: compress algorithm that was used to serialize
+        :param compress: compress algorithm that was used to serialize between `lz4`, `bz2`, `lzma`, `zlib`, `gzip`
         :param show_progress: show progress bar, only works when protocol is `pickle` or `protobuf`
         :return: the deserialized `DocList`
         """
@@ -297,7 +297,7 @@ def to_base64(
         """Serialize itself into base64 encoded string.
 
         :param protocol: protocol to use. It can be 'pickle-array', 'protobuf-array', 'pickle' or 'protobuf'
-        :param compress: compress algorithm to use
+        :param compress: compress algorithm to use between `lz4`, `bz2`, `lzma`, `zlib`, `gzip`
         :param show_progress: show progress bar, only works when protocol is `pickle` or `protobuf`
         :return: the binary serialization in bytes or None if file_ctx is passed where to store
         """
@@ -566,7 +566,7 @@ def _load_binary_all(
     ):
         """Read a `DocList` object from a binary file
         :param protocol: protocol to use. It can be 'pickle-array', 'protobuf-array', 'pickle' or 'protobuf'
-        :param compress: compress algorithm to use
+        :param compress: compress algorithm to use between `lz4`, `bz2`, `lzma`, `zlib`, `gzip`
         :param show_progress: show progress bar, only works when protocol is `pickle` or `protobuf`
         :return: a `DocList`
         """
@@ -646,7 +646,7 @@ def _load_binary_stream(
         """Yield `Document` objects from a binary file
 
         :param protocol: protocol to use. It can be 'pickle' or 'protobuf'
-        :param compress: compress algorithm to use
+        :param compress: compress algorithm to use between `lz4`, `bz2`, `lzma`, `zlib`, `gzip`
         :param show_progress: show progress bar, only works when protocol is `pickle` or `protobuf`
         :return: a generator of `Document` objects
         """
@@ -702,20 +702,23 @@ def load_binary(
     ) -> Union[T, Generator['T_doc', None, None]]:
         """Load doc_list elements from a compressed binary file.
 
-        :param file: File or filename or serialized bytes where the data is stored.
-        :param protocol: protocol to use. It can be 'pickle-array', 'protobuf-array', 'pickle' or 'protobuf'
-        :param compress: compress algorithm to use
-        :param show_progress: show progress bar, only works when protocol is `pickle` or `protobuf`
-        :param streaming: if `True` returns a generator over `Document` objects.
         In case protocol is pickle the `Documents` are streamed from disk to save memory usage
-        :return: a `DocList` object
 
         !!! note
             If `file` is `str` it can specify `protocol` and `compress` as file extensions.
             This functionality assumes `file=file_name.$protocol.$compress` where `$protocol` and `$compress` refer to a
             string interpolation of the respective `protocol` and `compress` methods.
             For example if `file=my_docarray.protobuf.lz4` then the binary data will be loaded assuming `protocol=protobuf`
             and `compress=lz4`.
+
+        :param file: File or filename or serialized bytes where the data is stored.
+        :param protocol: protocol to use. It can be 'pickle-array', 'protobuf-array', 'pickle' or 'protobuf'
+        :param compress: compress algorithm to use between `lz4`, `bz2`, `lzma`, `zlib`, `gzip`
+        :param show_progress: show progress bar, only works when protocol is `pickle` or `protobuf`
+        :param streaming: if `True` returns a generator over `Document` objects.
+
+        :return: a `DocList` object
+
         """
         load_protocol: Optional[str] = protocol
         load_compress: Optional[str] = compress
@@ -765,7 +768,7 @@ def save_binary(
 
         :param file: File or filename to which the data is saved.
         :param protocol: protocol to use. It can be 'pickle-array', 'protobuf-array', 'pickle' or 'protobuf'
-        :param compress: compress algorithm to use
+        :param compress: compress algorithm to use between `lz4`, `bz2`, `lzma`, `zlib`, `gzip`
         :param show_progress: show progress bar, only works when protocol is `pickle` or `protobuf`
 
          !!! note

diff --git a/docarray/base_doc/doc.py b/docarray/base_doc/doc.py
@@ -1,5 +1,15 @@
 import os
-from typing import TYPE_CHECKING, Any, Dict, Optional, Type, TypeVar
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Callable,
+    Dict,
+    Optional,
+    Type,
+    TypeVar,
+    Union,
+    no_type_check,
+)
 
 import orjson
 from pydantic import BaseModel, Field
@@ -12,11 +22,16 @@
 from docarray.typing.tensor.abstract_tensor import AbstractTensor
 
 if TYPE_CHECKING:
+    from pydantic import Protocol
+    from pydantic.types import StrBytes
+    from pydantic.typing import AbstractSetIntStr, MappingIntStrAny
+
     from docarray.array.doc_vec.column_storage import ColumnStorageView
 
 _console: Console = Console()
 
 T = TypeVar('T', bound='BaseDoc')
+T_update = TypeVar('T_update', bound='UpdateMixin')
 
 
 class BaseDoc(BaseModel, IOMixin, UpdateMixin, BaseNode):
@@ -141,3 +156,67 @@ def _docarray_to_json_compatible(self) -> Dict:
         :return: A dictionary of the BaseDoc object
         """
         return self.dict()
+
+    ########################################################################################################################################################
+    ### this section is just for documentation purposes will be removed later once https://github.com/mkdocstrings/griffe/issues/138 is fixed ##############
+    ########################################################################################################################################################
+
+    def json(
+        self,
+        *,
+        include: Optional[Union['AbstractSetIntStr', 'MappingIntStrAny']] = None,
+        exclude: Optional[Union['AbstractSetIntStr', 'MappingIntStrAny']] = None,
+        by_alias: bool = False,
+        skip_defaults: Optional[bool] = None,
+        exclude_unset: bool = False,
+        exclude_defaults: bool = False,
+        exclude_none: bool = False,
+        encoder: Optional[Callable[[Any], Any]] = None,
+        models_as_dict: bool = True,
+        **dumps_kwargs: Any,
+    ) -> str:
+        """
+        Generate a JSON representation of the model, `include` and `exclude` arguments as per `dict()`.
+
+        `encoder` is an optional function to supply as `default` to json.dumps(), other arguments as per `json.dumps()`.
+        """
+        return super().json(
+            include=include,
+            exclude=exclude,
+            by_alias=by_alias,
+            skip_defaults=skip_defaults,
+            exclude_unset=exclude_unset,
+            exclude_defaults=exclude_defaults,
+            exclude_none=exclude_none,
+            encoder=encoder,
+            models_as_dict=models_as_dict,
+            **dumps_kwargs,
+        )
+
+    @no_type_check
+    @classmethod
+    def parse_raw(
+        cls: Type[T],
+        b: 'StrBytes',
+        *,
+        content_type: str = None,
+        encoding: str = 'utf8',
+        proto: 'Protocol' = None,
+        allow_pickle: bool = False,
+    ) -> T:
+        """
+        Parse a raw string or bytes into a base doc
+        :param b:
+        :param content_type:
+        :param encoding: the encoding to use when parsing a string, defaults to 'utf8'
+        :param proto: protocol to use.
+        :param allow_pickle: allow pickle protocol
+        :return: a document
+        """
+        return super(BaseDoc, cls).parse_raw(
+            b,
+            content_type=content_type,
+            encoding=encoding,
+            proto=proto,
+            allow_pickle=allow_pickle,
+        )
diff --git a/docarray/base_doc/mixins/io.py b/docarray/base_doc/mixins/io.py
@@ -138,7 +138,7 @@ def to_bytes(
         For more Pythonic code, please use ``bytes(...)``.
 
         :param protocol: protocol to use. It can be 'pickle' or 'protobuf'
-        :param compress: compress algorithm to use
+        :param compress: compression algorithm to use
         :return: the binary serialization in bytes
         """
         import pickle

diff --git a/docarray/base_doc/mixins/update.py b/docarray/base_doc/mixins/update.py
@@ -25,7 +25,8 @@ def update(self, other: T):
         Updates self with the content of other. Changes are applied to self.
         Updating one Document with another consists in the following:
          - setting data properties of the second Document to the first Document
-         if they are not None
+         if they are not None:
+
          - Concatenating lists and updating sets
          - Updating recursively Documents and DocArrays
          - Updating Dictionaries of the left with the right
@@ -38,30 +39,33 @@ def update(self, other: T):
         so they behave as regular types and the value of `self` is updated
         with the value of `other`
 
-            EXAMPLE USAGE
 
-            .. code-block:: python
+        ---
+
+        ```python
+        from typing import List, Optional
 
-                from docarray import BaseDoc
-                from docarray.documents import Text
+        from docarray import BaseDoc
 
 
-                class MyDocument(BaseDoc):
-                    content: str
-                    title: Optional[str] = None
-                    tags_: List
+        class MyDocument(BaseDoc):
+            content: str
+            title: Optional[str] = None
+            tags_: List
 
 
-                doc1 = MyDocument(
-                    content='Core content of the document', title='Title', tags_=['python', 'AI']
-                )
-                doc2 = MyDocument(content='Core content updated', tags_=['docarray'])
+        doc1 = MyDocument(
+            content='Core content of the document', title='Title', tags_=['python', 'AI']
+        )
+        doc2 = MyDocument(content='Core content updated', tags_=['docarray'])
 
-                doc1.update(doc2)
-                assert doc1.content == 'Core content updated'
-                assert doc1.title == 'Title'
-                assert doc1.tags_ == ['python', 'AI', 'docarray']
+        doc1.update(doc2)
+        assert doc1.content == 'Core content updated'
+        assert doc1.title == 'Title'
+        assert doc1.tags_ == ['python', 'AI', 'docarray']
+        ```
 
+        ---
         :param other: The Document with which to update the contents of this
         """
         if type(self) != type(other):

diff --git a/docs/api_references/array/da.md b/docs/api_references/array/da.md
@@ -2,3 +2,4 @@
 
 ::: docarray.array.doc_list.doc_list.DocList
 ::: docarray.array.doc_list.io.IOMixinArray
+::: docarray.array.doc_list.pushpull.PushPullMixin
diff --git a/docs/api_references/base_doc/base_doc.md b/docs/api_references/base_doc/base_doc.md
@@ -1,3 +1,6 @@
 # BaseDoc
 
 ::: docarray.base_doc.doc.BaseDoc
+::: docarray.base_doc.mixins.io.IOMixin
+::: docarray.base_doc.mixins.update.UpdateMixin
+
diff --git a/docs/integrations/fastapi.md → docs/user_guide/sending/api/fastAPI.md b/docs/integrations/fastapi.md → docs/user_guide/sending/api/fastAPI.md
@@ -1,9 +1,15 @@
-# Use DocArray with FastAPI
+# FastAPI
 
-FastAPI is a high-performance web framework for building APIs with Python. It's designed to be easy to use and supports asynchronous programming. 
-Since [`DocArray` documents are Pydantic Models (with a twist)](../user_guide/representing/first_step.md) they can be easily integrated with FastAPI, 
+[FastAPI](https://fastapi.tiangolo.com/) is a high-performance web framework for building APIs with Python based on Python type hints. It's designed to be easy to use and supports asynchronous programming. 
+Since [`DocArray` documents are Pydantic Models (with a twist)](../../representing/first_step.md) they can be easily integrated with FastAPI, 
 and provide a seamless and efficient way to work with multimodal data in FastAPI-powered APIs.
 
+!!! note
+    you need to install FastAPI to follow this section
+    ``` 
+    pip install fastapi
+    ```
+
 
 First, you should define schemas for your input and/or output Documents:
 ```python

diff --git a/docs/how_to/audio2text.md → docs/user_guide/sending/api/jina.md b/docs/how_to/audio2text.md → docs/user_guide/sending/api/jina.md
@@ -1,3 +1,5 @@
+# Jina
+
 # Create an audio to text app with Jina and DocArray V2
 
 This is how you can build an Audio to Text app using Jina, DocArray and Whisper.
Original file line number	Diff line number	Diff line change
Expand Up		@@ -96,6 +96,7 @@ class Image(BaseDoc):

		# You can also set fields, with `docs.tensor = np.random.random([10, 100])`:


		import numpy as np

		docs.tensor = np.random.random([10, 100])
Expand Down
Original file line number	Diff line number	Diff line change
Expand Up		@@ -2,3 +2,4 @@

		::: docarray.array.doc_list.doc_list.DocList
		::: docarray.array.doc_list.io.IOMixinArray
		::: docarray.array.doc_list.pushpull.PushPullMixin