docarray · samsja · Apr 14, 2023 · Apr 14, 2023 · Apr 14, 2023 · Apr 14, 2023
diff --git a/docarray/array/any_array.py b/docarray/array/any_array.py
@@ -210,7 +210,7 @@ class Book(BaseDoc):
 
 
         If your [`DocList`][docarray.array.doc_list.doc_list.DocList] is in doc_vec mode, and you want to access a field of
-        type [`AnyTensor`][docarray.typing.AnyTensor], the doc_vec tensor will be returned instead of a list:
+        type `AnyTensor`, the doc_vec tensor will be returned instead of a list:
 
         ```python
         class Image(BaseDoc):

diff --git a/docarray/array/doc_list/io.py b/docarray/array/doc_list/io.py
@@ -358,10 +358,9 @@ def from_csv(
         :param dialect: defines separator and how to handle whitespaces etc.
             Can be a [`csv.Dialect`](https://docs.python.org/3/library/csv.html#csv.Dialect)
             instance or one string of:
-
-                - 'excel' (for comma separated values),
-                - 'excel-tab' (for tab separated values),
-                - 'unix' (for csv file generated on UNIX systems).
+            `'excel'` (for comma separated values),
+            `'excel-tab'` (for tab separated values),
+            `'unix'` (for csv file generated on UNIX systems).
 
         :return: `DocList` object
         """
@@ -428,10 +427,10 @@ def to_csv(
         :param dialect: defines separator and how to handle whitespaces etc.
             Can be a [`csv.Dialect`](https://docs.python.org/3/library/csv.html#csv.Dialect)
             instance or one string of:
+            `'excel'` (for comma separated values),
+            `'excel-tab'` (for tab separated values),
+            `'unix'` (for csv file generated on UNIX systems).
 
-                - 'excel' (for comma seperated values),
-                - 'excel-tab' (for tab separated values),
-                - 'unix' (for csv file generated on UNIX systems).
         """
         fields = self.doc_type._get_access_paths()
 

diff --git a/docarray/array/doc_vec/doc_vec.py b/docarray/array/doc_vec/doc_vec.py
@@ -59,32 +59,34 @@ class DocVec(AnyDocArray[T_doc]):
     computation that require batches of data (ex: matrix multiplication, distance
     calculation, deep learning forward pass)
 
-    A DocVec has a similar interface as
-    {class}`~docarray.array.DocList` but with an underlying implementation that is
-    column based instead of row based. Each field
-    of the schema of the DocVec
-    (the :attr:`~docarray.array.doc_vec.DocVec.doc_type` which is a
-    `BaseDoc`) will be stored in a column. If the field is a tensor, the data from all Documents will be stored as a single, doc_vec (torch/np/tf) tensor.
-    If the tensor field
-    is `AnyTensor` or a Union of tensor types, the
-    :attr:`~docarray.array.doc_vec.DocVec.tensor_type` will be used to determine
-    the type of the doc_vec column.
-
-    If the field is another `BasedDoc` the column will be another DocVec that follows the
-    schema of the nested Document.
-    If the field is a `DocList` or
-    `DocVec` then the column will be a list of `DocVec`.
+    A DocVec has a similar interface as [`DocList`][docarray.array.DocList]
+    but with an underlying implementation that is column based instead of row based.
+    Each field of the schema of the `DocVec` (the `.doc_type` which is a
+    [`BaseDoc`][docarray.BaseDoc]) will be stored in a column.
+
+    If the field is a tensor, the data from all Documents will be stored as a single
+    doc_vec (torch/np/tf) tensor.
+
+    If the tensor field is `AnyTensor` or a Union of tensor types, the
+    `.tensor_type` will be used to determine the type of the doc_vec column.
+
+    If the field is another [`BaseDoc`][docarray.BaseDoc] the column will be another
+    `DocVec` that follows the schema of the nested Document.
+
+    If the field is a [`DocList`][docarray.DocList] or `DocVec` then the column will
+    be a list of `DocVec`.
+
     For any other type the column is a Python list.
 
-    Every `Document` inside a `DocVec` is a view into the data columns stored at the `DocVec` level. The `BaseDoc`  does
-     not hold any data itself. The behavior of
-     this Document "view" is similar to the behavior of `view = tensor[i]` in
-     numpy/PyTorch.
+    Every `Document` inside a `DocVec` is a view into the data columns stored at the
+    `DocVec` level. The `BaseDoc` does not hold any data itself. The behavior of
+    this Document "view" is similar to the behavior of `view = tensor[i]` in
+    numpy/PyTorch.
 
-    :param docs: a homogeneous sequence of BaseDoc
+    :param docs: a homogeneous sequence of `BaseDoc`
     :param tensor_type: Tensor Class used to wrap the doc_vec tensors. This is useful
-    if the BaseDoc of this DocVec has some undefined tensor type like
-    AnyTensor or Union of NdArray and TorchTensor
+        if the BaseDoc of this DocVec has some undefined tensor type like
+        AnyTensor or Union of NdArray and TorchTensor
     """
 
     doc_type: Type[T_doc]

diff --git a/docarray/base_doc/mixins/update.py b/docarray/base_doc/mixins/update.py
@@ -24,9 +24,9 @@ def update(self, other: T):
         """
         Updates self with the content of other. Changes are applied to self.
         Updating one Document with another consists in the following:
-         - setting data properties of the second Document to the first Document
-         if they are not None:
 
+         - Setting data properties of the second Document to the first Document
+         if they are not None
          - Concatenating lists and updating sets
          - Updating recursively Documents and DocArrays
          - Updating Dictionaries of the left with the right
@@ -35,9 +35,9 @@ def update(self, other: T):
         it is applied to a static schema type, the presence of the field is
         given by the field not having a None value and that DocArrays,
         lists and sets are concatenated. It is worth mentioning that Tuples
-        are not merged together since they are meant to be inmutable,
+        are not merged together since they are meant to be immutable,
         so they behave as regular types and the value of `self` is updated
-        with the value of `other`
+        with the value of `other`.
 
 
         ---

diff --git a/docarray/data/torch_dataset.py b/docarray/data/torch_dataset.py
@@ -14,30 +14,31 @@ class MultiModalDataset(Dataset, Generic[T_doc]):
     A dataset that can be used inside a PyTorch DataLoader.
     In other words, it implements the PyTorch Dataset interface.
 
-    :param docs: the DocList to be used as the dataset
-    :param preprocessing: a dictionary of field names and preprocessing functions
-
     The preprocessing dictionary passed to the constructor consists of keys that are
     field names and values that are functions that take a single argument and return
     a single argument.
 
-    EXAMPLE USAGE
-    .. code-block:: python
+    ---
+
+    ```python
     from torch.utils.data import DataLoader
     from docarray import DocList
     from docarray.data import MultiModalDataset
-    from docarray.documents import Text
+    from docarray.documents import TextDoc
 
 
     def prepend_number(text: str):
         return f"Number {text}"
 
 
-    docs = DocList[Text](Text(text=str(i)) for i in range(16))
-    ds = MultiModalDataset[Text](docs, preprocessing={'text': prepend_number})
-    loader = DataLoader(ds, batch_size=4, collate_fn=MultiModalDataset[Text].collate_fn)
+    docs = DocList[TextDoc](TextDoc(text=str(i)) for i in range(16))
+    ds = MultiModalDataset[TextDoc](docs, preprocessing={'text': prepend_number})
+    loader = DataLoader(ds, batch_size=4, collate_fn=MultiModalDataset[TextDoc].collate_fn)
     for batch in loader:
         print(batch.text)
+    ```
+
+    ---
 
     Nested fields can be accessed by using dot notation.
     The document itself can be accessed using the empty string as the key.
@@ -47,24 +48,25 @@ def prepend_number(text: str):
 
     The transformations will be applied according to their order in the dictionary.
 
-    EXAMPLE USAGE
-    .. code-block:: python
+    ---
+
+    ```python
     import torch
     from torch.utils.data import DataLoader
     from docarray import DocList, BaseDoc
     from docarray.data import MultiModalDataset
-    from docarray.documents import Text
+    from docarray.documents import TextDoc
 
 
     class Thesis(BaseDoc):
-        title: Text
+        title: TextDoc
 
 
     class Student(BaseDoc):
         thesis: Thesis
 
 
-    def embed_title(title: Text):
+    def embed_title(title: TextDoc):
         title.embedding = torch.ones(4)
 
 
@@ -90,6 +92,12 @@ def add_nonsense(student: Student):
     loader = DataLoader(ds, batch_size=4, collate_fn=ds.collate_fn)
     for batch in loader:
         print(batch.thesis.title.embedding)
+    ```
+
+    ---
+
+    :param docs: the `DocList` to be used as the dataset
+    :param preprocessing: a dictionary of field names and preprocessing functions
     """
 
     doc_type: Optional[Type[BaseDoc]] = None

diff --git a/docarray/typing/tensor/audio/abstract_audio_tensor.py b/docarray/typing/tensor/audio/abstract_audio_tensor.py
@@ -16,7 +16,7 @@
 class AbstractAudioTensor(AbstractTensor, ABC):
     def to_bytes(self) -> 'AudioBytes':
         """
-        Convert audio tensor to AudioBytes.
+        Convert audio tensor to [`AudioBytes`][docarray.typrin.AudioBytes].
         """
         from docarray.typing.bytes.audio_bytes import AudioBytes
 

diff --git a/docarray/typing/tensor/image/abstract_image_tensor.py b/docarray/typing/tensor/image/abstract_image_tensor.py
@@ -15,7 +15,7 @@
 class AbstractImageTensor(AbstractTensor, ABC):
     def to_bytes(self, format: str = 'PNG') -> 'ImageBytes':
         """
-        Convert image tensor to ImageBytes.
+        Convert image tensor to [`ImageBytes`][docarray.typing.ImageBytes].
 
         :param format: the image format use to store the image, can be 'PNG' , 'JPG' ...
         :return: an ImageBytes object

diff --git a/docarray/typing/tensor/image/image_tensorflow_tensor.py b/docarray/typing/tensor/image/image_tensorflow_tensor.py
@@ -14,7 +14,8 @@ class ImageTensorFlowTensor(
     """
     Subclass of [`TensorFlowTensor`][docarray.typing.TensorFlowTensor],
     to represent an image tensor. Adds image-specific features to the tensor.
-    For instance the ability convert the tensor back to image bytes which are
+    For instance the ability convert the tensor back to
+    [`ImageBytes`][docarray.typing.ImageBytes] which are
     optimized to send over the wire.
 
 

diff --git a/docarray/typing/tensor/image/image_torch_tensor.py b/docarray/typing/tensor/image/image_torch_tensor.py
@@ -12,7 +12,8 @@ class ImageTorchTensor(AbstractImageTensor, TorchTensor, metaclass=metaTorchAndN
     """
     Subclass of [`TorchTensor`][docarray.typing.TorchTensor], to represent an image tensor.
     Adds image-specific features to the tensor.
-    For instance the ability convert the tensor back to image bytes which are
+    For instance the ability convert the tensor back to
+    [`ImageBytes`][docarray.typing.ImageBytes] which are
     optimized to send over the wire.
 
 

diff --git a/docarray/typing/tensor/video/video_tensor_mixin.py b/docarray/typing/tensor/video/video_tensor_mixin.py
@@ -135,7 +135,7 @@ def to_bytes(
         audio_format: str = 'fltp',
     ) -> 'VideoBytes':
         """
-        Convert video tensor to VideoBytes.
+        Convert video tensor to [`VideoBytes`][docarray.typing.VideoBytes].
 
         :param audio_tensor: AudioTensor containing the video's soundtrack.
         :param video_frame_rate: video frames per second.

diff --git a/docarray/typing/url/url_3d/mesh_url.py b/docarray/typing/url/url_3d/mesh_url.py
@@ -26,33 +26,33 @@ def load(
         trimesh_args: Optional[Dict[str, Any]] = None,
     ) -> 'VerticesAndFaces':
         """
-         Load the data from the url into a VerticesAndFaces object containing
-         vertices and faces information.
+        Load the data from the url into a [`VerticesAndFaces`][docarray.documents.VerticesAndFaces]
+        object containing vertices and faces information.
 
         ---
 
-         ```python
-         from docarray import BaseDoc
+        ```python
+        from docarray import BaseDoc
 
-         from docarray.typing import Mesh3DUrl, NdArray
+        from docarray.typing import Mesh3DUrl, NdArray
 
 
-         class MyDoc(BaseDoc):
-             mesh_url: Mesh3DUrl
+        class MyDoc(BaseDoc):
+            mesh_url: Mesh3DUrl
 
 
-         doc = MyDoc(mesh_url="toydata/tetrahedron.obj")
+        doc = MyDoc(mesh_url="https://people.sc.fsu.edu/~jburkardt/data/obj/al.obj")
 
-         tensors = doc.mesh_url.load()
-         assert isinstance(tensors.vertices, NdArray)
-         assert isinstance(tensors.faces, NdArray)
-         ```
+        tensors = doc.mesh_url.load()
+        assert isinstance(tensors.vertices, NdArray)
+        assert isinstance(tensors.faces, NdArray)
+        ```
 
-         ---
-         :param skip_materials: Skip materials if True, else skip.
-         :param trimesh_args: dictionary of additional arguments for `trimesh.load()`
-             or `trimesh.load_remote()`.
-         :return: VerticesAndFaces object containing vertices and faces information.
+
+        :param skip_materials: Skip materials if True, else skip.
+        :param trimesh_args: dictionary of additional arguments for `trimesh.load()`
+            or `trimesh.load_remote()`.
+        :return: VerticesAndFaces object containing vertices and faces information.
         """
         from docarray.documents.mesh.vertices_and_faces import VerticesAndFaces
 

diff --git a/docarray/typing/url/url_3d/point_cloud_url.py b/docarray/typing/url/url_3d/point_cloud_url.py
@@ -29,7 +29,7 @@ def load(
         trimesh_args: Optional[Dict[str, Any]] = None,
     ) -> 'PointsAndColors':
         """
-        Load the data from the url into an NdArray containing point cloud information.
+        Load the data from the url into an `NdArray` containing point cloud information.
 
 
         ---
@@ -45,7 +45,7 @@ class MyDoc(BaseDoc):
             point_cloud_url: PointCloud3DUrl
 
 
-        doc = MyDoc(point_cloud_url="toydata/tetrahedron.obj")
+        doc = MyDoc(point_cloud_url="thttps://people.sc.fsu.edu/~jburkardt/data/obj/al.obj")
 
         # point_cloud = doc.point_cloud_url.load(samples=100)
 
@@ -96,20 +96,24 @@ def display(
         First, it loads the point cloud into a `PointsAndColors` object, and then
         calls display on it. The following is therefore equivalent:
 
-        .. code-block:: python
+        ---
 
-            import numpy as np
-            from docarray import BaseDoc
+        ```python
+        import numpy as np
+        from docarray import BaseDoc
 
-            from docarray.documents import PointCloud3D
+        from docarray.documents import PointCloud3D
 
-            pc = PointCloud3D("toydata/tetrahedron.obj")
+        pc = PointCloud3D(url="https://people.sc.fsu.edu/~jburkardt/data/obj/al.obj")
 
-            # option 1
-            pc.url.display()
+        # option 1
+        # pc.url.display()
 
-            # option 2 (equivalent)
-            pc.url.load(samples=10000).display()
+        # option 2 (equivalent)
+        # pc.url.load(samples=10000).display()
+        ```
+
+        ---
 
         :param samples: number of points to sample from the mesh.
         """

diff --git a/docarray/typing/url/video_url.py b/docarray/typing/url/video_url.py
@@ -73,8 +73,7 @@ class MyDoc(BaseDoc):
         ---
 
         :param kwargs: supports all keyword arguments that are being supported by
-            av.open() as described in:
-            https://pyav.org/docs/stable/api/_globals.html?highlight=open#av.open
+            av.open() as described [here](https://pyav.org/docs/stable/api/_globals.html?highlight=open#av.open)
 
         :return: [`AudioNdArray`][docarray.typing.AudioNdArray] representing the audio content,
             [`VideoNdArray`][docarray.typing.VideoNdArray] representing the images of the video,