docarray · samsja · Apr 4, 2023 · Apr 3, 2023 · Apr 3, 2023 · Apr 3, 2023
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -61,7 +61,7 @@ jobs:
           poetry install --without dev
           poetry run pip install tensorflow==2.11.0
       - name: Test basic import
-        run: poetry run python -c 'from docarray import DocArray, BaseDoc'
+        run: poetry run python -c 'from docarray import DocList, BaseDoc'
 
 
   check-mypy:

diff --git a/README.md b/README.md
@@ -77,9 +77,10 @@ doc = MultiModalDocument(
 )
 ```
 
-### Collect multiple `Documents` into a `DocArray`:
+### Collect multiple `Documents` into a `DocList`:
+
 ```python
-from docarray import DocArray, BaseDoc
+from docarray import DocList, BaseDoc
 from docarray.typing import AnyTensor, ImageUrl
 import numpy as np
 
@@ -90,9 +91,9 @@ class Image(BaseDoc):
 ```
 
 ```python
-from docarray import DocArray
+from docarray import DocList
 
-da = DocArray[Image](
+da = DocList[Image](
     [
         Image(
             url="https://upload.wikimedia.org/wikipedia/commons/2/2f/Alpamayo.jpg",
@@ -150,16 +151,16 @@ Image.from_protobuf(doc.to_protobuf())
 
 ```python
 # NOTE: DocumentStores are not yet implemented in version 2
-from docarray import DocArray
+from docarray import DocList
 from docarray.documents import ImageDoc
 from docarray.stores import DocumentStore
 import numpy as np
 
-da = DocArray([ImageDoc(embedding=np.zeros((128,))) for _ in range(1000)])
+da = DocList([ImageDoc(embedding=np.zeros((128,))) for _ in range(1000)])
 store = DocumentStore[ImageDoc](
     storage='qdrant'
 )  # create a DocumentStore with Qdrant as backend
-store.insert(da)  # insert the DocArray into the DocumentStore
+store.insert(da)  # insert the DocList into the DocumentStore
 # find the 10 most similar images based on the 'embedding' field
 match = store.find(ImageDoc(embedding=np.zeros((128,))), field='embedding', top_k=10)
 ```
@@ -233,7 +234,7 @@ Not very easy on the eyes if you ask us. And even worse, if you need to add one
 So, now let's see what the same code looks like with DocArray:
 
 ```python
-from docarray import DocArray, BaseDoc
+from docarray import DocList, BaseDoc
 from docarray.documents import ImageDoc, TextDoc, AudioDoc
 from docarray.typing import TorchTensor
 
@@ -258,18 +259,18 @@ class MyPodcastModel(nn.Module):
         self.image_encoder = ImageEncoder()
         self.text_encoder = TextEncoder()
 
-    def forward_podcast(self, da: DocArray[Podcast]) -> DocArray[Podcast]:
-        da.audio.embedding = self.audio_encoder(da.audio.tensor)
-        da.text.embedding = self.text_encoder(da.text.tensor)
-        da.image.embedding = self.image_encoder(da.image.tensor)
+    def forward_podcast(self, docs: DocList[Podcast]) -> DocList[Podcast]:
+        docs.audio.embedding = self.audio_encoder(docs.audio.tensor)
+        docs.text.embedding = self.text_encoder(docs.text.tensor)
+        docs.image.embedding = self.image_encoder(docs.image.tensor)
 
-        return da
+        return docs
 
-    def forward(self, da: DocArray[PairPodcast]) -> DocArray[PairPodcast]:
-        da.left = self.forward_podcast(da.left)
-        da.right = self.forward_podcast(da.right)
+    def forward(self, docs: DocList[PairPodcast]) -> DocList[PairPodcast]:
+        docs.left = self.forward_podcast(docs.left)
+        docs.right = self.forward_podcast(docs.right)
 
-        return da
+        return docs
 ```
 
 Looks much better, doesn't it?
@@ -297,7 +298,7 @@ This would look like the following:
 ```python
 from typing import Optional
 
-from docarray import DocArray, BaseDoc
+from docarray import DocList, BaseDoc
 
 import tensorflow as tf
 
@@ -312,7 +313,7 @@ class MyPodcastModel(tf.keras.Model):
         super().__init__()
         self.audio_encoder = AudioEncoder()
 
-    def call(self, inputs: DocArray[Podcast]) -> DocArray[Podcast]:
+    def call(self, inputs: DocList[Podcast]) -> DocList[Podcast]:
         inputs.audio_tensor.embedding = self.audio_encoder(
             inputs.audio_tensor.tensor
         )  # access audio_tensor's .tensor attribute
@@ -407,7 +408,7 @@ store it there, and thus make it searchable:
 
 ```python
 # NOTE: DocumentStores are not yet implemented in version 2
-from docarray import DocArray, BaseDoc
+from docarray import DocList, BaseDoc
 from docarray.stores import DocumentStore
 from docarray.documents import ImageDoc, TextDoc
 import numpy as np
@@ -427,7 +428,7 @@ def _random_my_doc():
     )
 
 
-da = DocArray([_random_my_doc() for _ in range(1000)])  # create some data
+da = DocList([_random_my_doc() for _ in range(1000)])  # create some data
 store = DocumentStore[MyDoc](
     storage='qdrant'
 )  # create a DocumentStore with Qdrant as backend

diff --git a/docarray/__init__.py b/docarray/__init__.py
@@ -2,10 +2,10 @@
 
 import logging
 
-from docarray.array import DocArray, DocArrayStacked
+from docarray.array import DocList, DocVec
 from docarray.base_doc.doc import BaseDoc
 
-__all__ = ['BaseDoc', 'DocArray', 'DocArrayStacked']
+__all__ = ['BaseDoc', 'DocList', 'DocVec']
 
 logger = logging.getLogger('docarray')
 

diff --git a/docarray/array/__init__.py b/docarray/array/__init__.py
@@ -1,4 +1,5 @@
-from docarray.array.array.array import DocArray
-from docarray.array.stacked.array_stacked import DocArrayStacked
+from docarray.array.any_array import AnyDocArray
+from docarray.array.doc_list.doc_list import DocList
+from docarray.array.doc_vec.doc_vec import DocVec
 
-__all__ = ['DocArray', 'DocArrayStacked']
+__all__ = ['DocList', 'DocVec', 'AnyDocArray']
diff --git a/docarray/array/abstract_array.py → docarray/array/any_array.py b/docarray/array/abstract_array.py → docarray/array/any_array.py
@@ -25,7 +25,7 @@
 from docarray.utils._internal._typing import change_cls_name
 
 if TYPE_CHECKING:
-    from docarray.proto import DocumentArrayProto, NodeProto
+    from docarray.proto import DocListProto, NodeProto
     from docarray.typing.tensor.abstract_tensor import AbstractTensor
 
 T = TypeVar('T', bound='AnyDocArray')
@@ -34,7 +34,7 @@
 
 
 class AnyDocArray(Sequence[T_doc], Generic[T_doc], AbstractType):
-    document_type: Type[BaseDoc]
+    doc_type: Type[BaseDoc]
     __typed_da__: Dict[Type['AnyDocArray'], Dict[Type[BaseDoc], Type]] = {}
 
     def __repr__(self):
@@ -58,9 +58,9 @@ def __class_getitem__(cls, item: Union[Type[BaseDoc], TypeVar, str]):
             global _DocArrayTyped
 
             class _DocArrayTyped(cls):  # type: ignore
-                document_type: Type[BaseDoc] = cast(Type[BaseDoc], item)
+                doc_type: Type[BaseDoc] = cast(Type[BaseDoc], item)
 
-            for field in _DocArrayTyped.document_type.__fields__.keys():
+            for field in _DocArrayTyped.doc_type.__fields__.keys():
 
                 def _property_generator(val: str):
                     def _getter(self):
@@ -121,34 +121,34 @@ def _set_data_column(
         field: str,
         values: Union[List, T, 'AbstractTensor'],
     ):
-        """Set all Documents in this DocArray using the passed values
+        """Set all Documents in this DocList using the passed values
 
         :param field: name of the fields to extract
-        :values: the values to set at the DocArray level
+        :values: the values to set at the DocList level
         """
         ...
 
     @classmethod
     @abstractmethod
-    def from_protobuf(cls: Type[T], pb_msg: 'DocumentArrayProto') -> T:
+    def from_protobuf(cls: Type[T], pb_msg: 'DocListProto') -> T:
         """create a Document from a protobuf message"""
         ...
 
     @abstractmethod
-    def to_protobuf(self) -> 'DocumentArrayProto':
-        """Convert DocArray into a Protobuf message"""
+    def to_protobuf(self) -> 'DocListProto':
+        """Convert DocList into a Protobuf message"""
         ...
 
     def _to_node_protobuf(self) -> 'NodeProto':
-        """Convert a DocArray into a NodeProto protobuf message.
-         This function should be called when a DocArray
+        """Convert a DocList into a NodeProto protobuf message.
+         This function should be called when a DocList
         is nested into another Document that need to be converted into a protobuf
 
         :return: the nested item protobuf message
         """
         from docarray.proto import NodeProto
 
-        return NodeProto(document_array=self.to_protobuf())
+        return NodeProto(doc_array=self.to_protobuf())
 
     @abstractmethod
     def traverse_flat(
@@ -157,7 +157,7 @@ def traverse_flat(
     ) -> Union[List[Any], 'AbstractTensor']:
         """
         Return a List of the accessed objects when applying the `access_path`. If this
-        results in a nested list or list of DocArrays, the list will be flattened
+        results in a nested list or list of DocLists, the list will be flattened
         on the first level. The access path is a string that consists of attribute
         names, concatenated and "__"-separated. It describes the path from the first
         level to an arbitrary one, e.g. 'content__image__url'.
@@ -167,7 +167,7 @@ def traverse_flat(
 
         EXAMPLE USAGE
         .. code-block:: python
-            from docarray import BaseDoc, DocArray, Text
+            from docarray import BaseDoc, DocList, Text
 
 
             class Author(BaseDoc):
@@ -179,49 +179,47 @@ class Book(BaseDoc):
                 content: Text
 
 
-            da = DocArray[Book](
+            docs = DocList[Book](
                 Book(author=Author(name='Jenny'), content=Text(text=f'book_{i}'))
                 for i in range(10)  # noqa: E501
             )
 
-            books = da.traverse_flat(access_path='content')  # list of 10 Text objs
+            books = docs.traverse_flat(access_path='content')  # list of 10 Text objs
 
-            authors = da.traverse_flat(access_path='author__name')  # list of 10 strings
+            authors = docs.traverse_flat(access_path='author__name')  # list of 10 strings
 
         If the resulting list is a nested list, it will be flattened:
 
         EXAMPLE USAGE
         .. code-block:: python
-            from docarray import BaseDoc, DocArray
+            from docarray import BaseDoc, DocList
 
 
             class Chapter(BaseDoc):
                 content: str
 
 
             class Book(BaseDoc):
-                chapters: DocArray[Chapter]
+                chapters: DocList[Chapter]
 
 
-            da = DocArray[Book](
-                Book(
-                    chapters=DocArray[Chapter]([Chapter(content='some_content') for _ in range(3)])
-                )
+            docs = DocList[Book](
+                Book(chapters=DocList[Chapter]([Chapter(content='some_content') for _ in range(3)]))
                 for _ in range(10)
             )
 
-            chapters = da.traverse_flat(access_path='chapters')  # list of 30 strings
+            chapters = docs.traverse_flat(access_path='chapters')  # list of 30 strings
 
-        If your DocArray is in stacked mode, and you want to access a field of
-        type AnyTensor, the stacked tensor will be returned instead of a list:
+        If your DocList is in doc_vec mode, and you want to access a field of
+        type AnyTensor, the doc_vec tensor will be returned instead of a list:
 
         EXAMPLE USAGE
         .. code-block:: python
             class Image(BaseDoc):
                 tensor: TorchTensor[3, 224, 224]
 
 
-            batch = DocArray[Image](
+            batch = DocList[Image](
                 [
                     Image(
                         tensor=torch.zeros(3, 224, 224),
@@ -243,9 +241,9 @@ def _traverse(node: Any, access_path: str):
         if access_path:
             curr_attr, _, path_attrs = access_path.partition('__')
 
-            from docarray.array import DocArray
+            from docarray.array import DocList
 
-            if isinstance(node, (DocArray, list)):
+            if isinstance(node, (DocList, list)):
                 for n in node:
                     x = getattr(n, curr_attr)
                     yield from AnyDocArray._traverse(x, path_attrs)
@@ -257,16 +255,16 @@ def _traverse(node: Any, access_path: str):
 
     @staticmethod
     def _flatten_one_level(sequence: List[Any]) -> List[Any]:
-        from docarray import DocArray
+        from docarray import DocList
 
-        if len(sequence) == 0 or not isinstance(sequence[0], (list, DocArray)):
+        if len(sequence) == 0 or not isinstance(sequence[0], (list, DocList)):
             return sequence
         else:
             return [item for sublist in sequence for item in sublist]
 
     def summary(self):
         """
-        Print a summary of this DocArray object and a summary of the schema of its
+        Print a summary of this DocList object and a summary of the schema of its
         Document type.
         """
         DocArraySummary(self).summary()
@@ -278,13 +276,13 @@ def _batch(
         show_progress: bool = False,
     ) -> Generator[T, None, None]:
         """
-        Creates a `Generator` that yields `DocArray` of size `batch_size`.
+        Creates a `Generator` that yields `DocList` of size `batch_size`.
         Note, that the last batch might be smaller than `batch_size`.
 
         :param batch_size: Size of each generated batch.
         :param shuffle: If set, shuffle the Documents before dividing into minibatches.
         :param show_progress: if set, show a progress bar when batching documents.
-        :yield: a Generator of `DocArray`, each in the length of `batch_size`
+        :yield: a Generator of `DocList`, each in the length of `batch_size`
         """
         from rich.progress import track
 

diff --git a/docarray/array/array/__init__.py → docarray/array/doc_list/__init__.py b/docarray/array/array/__init__.py → docarray/array/doc_list/__init__.py