Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions docarray/array/any_array.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,9 +140,10 @@ def to_protobuf(self) -> 'DocListProto':
...

def _to_node_protobuf(self) -> 'NodeProto':
"""Convert a [`DocList`][docarray.array.doc_list.doc_list.DocList] into a NodeProto protobuf message.
This function should be called when a DocList
is nested into another Document that need to be converted into a protobuf
"""Convert a [`DocList`][docarray.array.doc_list.doc_list.DocList] into a NodeProto
protobuf message.
This function should be called when a DocList is nested into
another Document that need to be converted into a protobuf.

:return: the nested item protobuf message
"""
Expand Down Expand Up @@ -208,7 +209,6 @@ class Book(BaseDoc):
chapters = docs.traverse_flat(access_path='chapters') # list of 30 strings
```


If your [`DocList`][docarray.array.doc_list.doc_list.DocList] is in doc_vec mode, and you want to access a field of
type `AnyTensor`, the doc_vec tensor will be returned instead of a list:

Expand Down
26 changes: 13 additions & 13 deletions docarray/array/doc_list/pushpull.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,10 +86,10 @@ def push(
show_progress: bool = False,
branding: Optional[Dict] = None,
) -> Dict:
"""Push this DocList object to the specified url.
"""Push this `DocList` object to the specified url.

:param url: url specifying the protocol and save name of the DocList. Should be of the form ``protocol://namespace/name``. e.g. ``s3://bucket/path/to/namespace/name``, ``file:///path/to/folder/name``
:param public: Only used by ``jac`` protocol. If true, anyone can pull a DocList if they know its name.
:param url: url specifying the protocol and save name of the `DocList`. Should be of the form ``protocol://namespace/name``. e.g. ``s3://bucket/path/to/namespace/name``, ``file:///path/to/folder/name``
:param public: Only used by ``jac`` protocol. If true, anyone can pull a `DocList` if they know its name.
Setting this to false will restrict access to only the creator.
:param show_progress: If true, a progress bar will be displayed.
:param branding: Only used by ``jac`` protocol. A dictionary of branding information to be sent to Jina AI Cloud. {"icon": "emoji", "background": "#fff"}
Expand All @@ -112,8 +112,8 @@ def push_stream(
"""Push a stream of documents to the specified url.

:param docs: a stream of documents
:param url: url specifying the protocol and save name of the DocList. Should be of the form ``protocol://namespace/name``. e.g. ``s3://bucket/path/to/namespace/name``, ``file:///path/to/folder/name``
:param public: Only used by ``jac`` protocol. If true, anyone can pull a DocList if they know its name.
:param url: url specifying the protocol and save name of the `DocList`. Should be of the form ``protocol://namespace/name``. e.g. ``s3://bucket/path/to/namespace/name``, ``file:///path/to/folder/name``
:param public: Only used by ``jac`` protocol. If true, anyone can pull a `DocList` if they know its name.
:param show_progress: If true, a progress bar will be displayed.
:param branding: Only used by ``jac`` protocol. A dictionary of branding information to be sent to Jina AI Cloud. {"icon": "emoji", "background": "#fff"}
"""
Expand All @@ -130,19 +130,19 @@ def pull(
show_progress: bool = False,
local_cache: bool = True,
) -> 'DocList':
"""Pull a :class:`DocList` from the specified url.
"""Pull a `DocList` from the specified url.

:param url: url specifying the protocol and save name of the DocList. Should be of the form ``protocol://namespace/name``. e.g. ``s3://bucket/path/to/namespace/name``, ``file:///path/to/folder/name``
:param url: url specifying the protocol and save name of the `DocList`. Should be of the form ``protocol://namespace/name``. e.g. ``s3://bucket/path/to/namespace/name``, ``file:///path/to/folder/name``
:param show_progress: if true, display a progress bar.
:param local_cache: store the downloaded DocList to local folder
:return: a :class:`DocList` object
:param local_cache: store the downloaded `DocList` to local folder
:return: a `DocList` object
"""
from docarray.base_doc import AnyDoc

if cls.doc_type == AnyDoc:
raise TypeError(
'There is no document schema defined. '
'Please specify the DocList\'s Document type using `DocList[MyDoc]`.'
'Please specify the `DocList`\'s Document type using `DocList[MyDoc]`.'
)

logging.info(f'Pulling {url}')
Expand All @@ -160,17 +160,17 @@ def pull_stream(
) -> Iterator['BaseDoc']:
"""Pull a stream of Documents from the specified url.

:param url: url specifying the protocol and save name of the DocList. Should be of the form ``protocol://namespace/name``. e.g. ``s3://bucket/path/to/namespace/name``, ``file:///path/to/folder/name``
:param url: url specifying the protocol and save name of the `DocList`. Should be of the form ``protocol://namespace/name``. e.g. ``s3://bucket/path/to/namespace/name``, ``file:///path/to/folder/name``
:param show_progress: if true, display a progress bar.
:param local_cache: store the downloaded DocList to local folder
:param local_cache: store the downloaded `DocList` to local folder
:return: Iterator of Documents
"""
from docarray.base_doc import AnyDoc

if cls.doc_type == AnyDoc:
raise TypeError(
'There is no document schema defined. '
'Please specify the DocList\'s Document type using `DocList[MyDoc]`.'
'Please specify the `DocList`\'s Document type using `DocList[MyDoc]`.'
)

logging.info(f'Pulling Document stream from {url}')
Expand Down
27 changes: 16 additions & 11 deletions docarray/store/file.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,15 @@


class FileDocStore(AbstractDocStore):
"""Class to push and pull [`DocList`][docarray.DocList] on-disk."""

@staticmethod
def _abs_filepath(name: str) -> Path:
"""Resolve a name to an absolute path.
If it is not a path, the cache directoty is prepended.
If it is a path, it is resolved to an absolute path.

:param name: If it is not a path, the cache directory is prepended.
If it is a path, it is resolved to an absolute path.
:return: Path
"""
if not (name.startswith('/') or name.startswith('~') or name.startswith('.')):
name = str(_get_cache_path() / name)
Expand All @@ -32,11 +36,11 @@ def _abs_filepath(name: str) -> Path:
def list(
cls: Type[SelfFileDocStore], namespace: str, show_table: bool
) -> List[str]:
"""List all DocArrays in a directory.
"""List all [`DocList`s][docarray.DocList] in a directory.

:param namespace: The directory to list.
:param show_table: If True, print a table of the files in the directory.
:return: A list of the names of the DocArrays in the directory.
:return: A list of the names of the `DocLists` in the directory.
"""
namespace_dir = cls._abs_filepath(namespace)
if not namespace_dir.exists():
Expand All @@ -51,7 +55,7 @@ def list(
from rich.table import Table

table = Table(
title=f'You have {len(da_files)} DocArrays in file://{namespace_dir}',
title=f'You have {len(da_files)} DocLists in file://{namespace_dir}',
box=box.SIMPLE,
highlight=True,
)
Expand All @@ -74,9 +78,9 @@ def list(
def delete(
cls: Type[SelfFileDocStore], name: str, missing_ok: bool = False
) -> bool:
"""Delete a DocList from the local filesystem.
"""Delete a [`DocList`][docarray.DocList] from the local filesystem.

:param name: The name of the DocList to delete.
:param name: The name of the `DocList` to delete.
:param missing_ok: If True, do not raise an exception if the file does not exist. Defaults to False.
:return: True if the file was deleted, False if it did not exist.
"""
Expand All @@ -98,8 +102,9 @@ def push(
show_progress: bool,
branding: Optional[Dict],
) -> Dict:
"""Push this DocList object to the specified file path.
"""Push this [`DocList`][docarray.DocList] object to the specified file path.

:param docs: The `DocList` to push.
:param name: The file path to push to.
:param public: Not used by the ``file`` protocol.
:param show_progress: If true, a progress bar will be displayed.
Expand Down Expand Up @@ -150,12 +155,12 @@ def pull(
show_progress: bool,
local_cache: bool,
) -> 'DocList':
"""Pull a :class:`DocList` from the specified url.
"""Pull a [`DocList`][docarray.DocList] from the specified url.

:param name: The file path to pull from.
:param show_progress: if true, display a progress bar.
:param local_cache: store the downloaded DocList to local folder
:return: a :class:`DocList` object
:param local_cache: store the downloaded `DocList` to local folder
:return: a `DocList` object
"""

return docs_cls(
Expand Down
30 changes: 16 additions & 14 deletions docarray/store/jac.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ def _get_raw_summary(self: 'DocList') -> List[Dict[str, Any]]:


class JACDocStore(AbstractDocStore):
"""Class to push and pull DocList to and from Jina AI Cloud."""
"""Class to push and pull [`DocList`][docarray.DocList] to and from Jina AI Cloud."""

@staticmethod
@hubble.login_required
Expand Down Expand Up @@ -135,7 +135,7 @@ def list(namespace: str = '', show_table: bool = False) -> List[str]:
@hubble.login_required
def delete(name: str, missing_ok: bool = True) -> bool:
"""
Delete a DocList from the cloud.
Delete a [`DocList`][docarray.DocList] from the cloud.
:param name: the name of the DocList to delete.
:param missing_ok: if true, do not raise an error if the DocList does not exist.
:return: True if the DocList was deleted, False if it did not exist.
Expand All @@ -158,17 +158,18 @@ def push(
show_progress: bool = False,
branding: Optional[Dict] = None,
) -> Dict:
"""Push this DocList object to Jina AI Cloud
"""Push this [`DocList`][docarray.DocList] object to Jina AI Cloud

.. note::
!!! note
- Push with the same ``name`` will override the existing content.
- Kinda like a public clipboard where everyone can override anyone's content.
So to make your content survive longer, you may want to use longer & more complicated name.
- The lifetime of the content is not promised atm, could be a day, could be a week. Do not use it for
persistence. Only use this full temporary transmission/storage/clipboard.

:param name: A name that can later be used to retrieve this :class:`DocList`.
:param public: By default, anyone can pull a DocList if they know its name.
:param docs: The `DocList` to push.
:param name: A name that can later be used to retrieve this `DocList`.
:param public: By default, anyone can pull a `DocList` if they know its name.
Setting this to false will restrict access to only the creator.
:param show_progress: If true, a progress bar will be displayed.
:param branding: A dictionary of branding information to be sent to Jina Cloud. e.g. {"icon": "emoji", "background": "#fff"}
Expand Down Expand Up @@ -245,15 +246,16 @@ def push_stream(
) -> Dict:
"""Push a stream of documents to Jina AI Cloud

.. note::
!!! note
- Push with the same ``name`` will override the existing content.
- Kinda like a public clipboard where everyone can override anyone's content.
So to make your content survive longer, you may want to use longer & more complicated name.
- The lifetime of the content is not promised atm, could be a day, could be a week. Do not use it for
persistence. Only use this full temporary transmission/storage/clipboard.

:param name: A name that can later be used to retrieve this :class:`DocList`.
:param public: By default, anyone can pull a DocList if they know its name.
:param docs: a stream of documents
:param name: A name that can later be used to retrieve this `DocList`.
:param public: By default, anyone can pull a `DocList` if they know its name.
Setting this to false will restrict access to only the creator.
:param show_progress: If true, a progress bar will be displayed.
:param branding: A dictionary of branding information to be sent to Jina Cloud. e.g. {"icon": "emoji", "background": "#fff"}
Expand All @@ -278,12 +280,12 @@ def pull(
show_progress: bool = False,
local_cache: bool = True,
) -> 'DocList':
"""Pull a :class:`DocList` from Jina AI Cloud to local.
"""Pull a [`DocList`][docarray.DocList] from Jina AI Cloud to local.

:param name: the upload name set during :meth:`.push`
:param name: the upload name set during `.push`
:param show_progress: if true, display a progress bar.
:param local_cache: store the downloaded DocList to local folder
:return: a :class:`DocList` object
:return: a [`DocList`][docarray.DocList] object
"""
from docarray import DocList

Expand All @@ -299,9 +301,9 @@ def pull_stream(
show_progress: bool = False,
local_cache: bool = False,
) -> Iterator['BaseDoc']:
"""Pull a :class:`DocList` from Jina AI Cloud to local.
"""Pull a [`DocList`][docarray.DocList] from Jina AI Cloud to local.

:param name: the upload name set during :meth:`.push`
:param name: the upload name set during `.push`
:param show_progress: if true, display a progress bar.
:param local_cache: store the downloaded DocList to local folder
:return: An iterator of Documents
Expand Down
18 changes: 9 additions & 9 deletions docarray/store/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,15 +48,15 @@ def close(self):


class S3DocStore(AbstractDocStore):
"""Class to push and pull DocList to and from S3."""
"""Class to push and pull [`DocList`][docarray.DocList] to and from S3."""

@staticmethod
def list(namespace: str, show_table: bool = False) -> List[str]:
"""List all DocArrays in the specified bucket and namespace.
"""List all [`DocList`s][docarray.DocList] in the specified bucket and namespace.

:param namespace: The bucket and namespace to list. e.g. my_bucket/my_namespace
:param show_table: If true, a rich table will be printed to the console.
:return: A list of DocList names.
:return: A list of `DocList` names.
"""
bucket, namespace = namespace.split('/', 1)
s3 = boto3.resource('s3')
Expand All @@ -74,7 +74,7 @@ def list(namespace: str, show_table: bool = False) -> List[str]:
from rich.table import Table

table = Table(
title=f'You have {len(da_files)} DocArrays in bucket s3://{bucket} under the namespace "{namespace}"',
title=f'You have {len(da_files)} DocLists in bucket s3://{bucket} under the namespace "{namespace}"',
box=box.SIMPLE,
highlight=True,
)
Expand All @@ -94,7 +94,7 @@ def list(namespace: str, show_table: bool = False) -> List[str]:

@staticmethod
def delete(name: str, missing_ok: bool = True) -> bool:
"""Delete the DocList object at the specified bucket and key.
"""Delete the [`DocList`][docarray.DocList] object at the specified bucket and key.

:param name: The bucket and key to delete. e.g. my_bucket/my_key
:param missing_ok: If true, no error will be raised if the object does not exist.
Expand Down Expand Up @@ -125,9 +125,9 @@ def push(
show_progress: bool = False,
branding: Optional[Dict] = None,
) -> Dict:
"""Push this DocList object to the specified bucket and key.
"""Push this [`DocList`][docarray.DocList] object to the specified bucket and key.

:param docs: The DocList to push.
:param docs: The `DocList` to push.
:param name: The bucket and key to push to. e.g. my_bucket/my_key
:param public: Not used by the ``s3`` protocol.
:param show_progress: If true, a progress bar will be displayed.
Expand Down Expand Up @@ -182,12 +182,12 @@ def pull(
show_progress: bool = False,
local_cache: bool = False,
) -> 'DocList':
"""Pull a :class:`DocList` from the specified bucket and key.
"""Pull a [`DocList`][docarray.DocList] from the specified bucket and key.

:param name: The bucket and key to pull from. e.g. my_bucket/my_key
:param show_progress: if true, display a progress bar.
:param local_cache: store the downloaded DocList to local cache
:return: a :class:`DocList` object
:return: a `DocList` object
"""
docs = docs_cls( # type: ignore
cls.pull_stream(
Expand Down
3 changes: 3 additions & 0 deletions docs/api_references/doc_store/doc_store.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# DocStore

::: docarray.store.abstract_doc_store.AbstractDocStore
3 changes: 3 additions & 0 deletions docs/api_references/doc_store/file_doc_store.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# FileDocStore

::: docarray.store.file.FileDocStore
3 changes: 3 additions & 0 deletions docs/api_references/doc_store/jac_doc_store.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# JACDocStore

::: docarray.store.jac.JACDocStore
3 changes: 3 additions & 0 deletions docs/api_references/doc_store/s3_doc_store.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# S3DocStore

::: docarray.store.s3.S3DocStore
Loading