Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ Requires Python 3.7+ and `numpy` only:
```
pip install docarray
```
[Additional features](https://docarray.jina.ai/#install) can be enabled by installing the full dependencies: `pip install docarray[full]`.
[Additional features](https://docarray.jina.ai/#install) can be enabled by installing the full dependencies: `pip install "docarray[full]"`.

## [Documentation](https://docarray.jina.ai)

Expand Down
3 changes: 3 additions & 0 deletions docarray/array/mixins/evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,9 @@ def evaluate(

binary_relevance = [1 if hash_fn(m) in desired else 0 for m in d.matches]

if 'max_rel' not in kwargs:
kwargs['max_rel'] = len(gd.matches)

r = metric_fn(binary_relevance, **kwargs)
d.evaluations[metric_name] = NamedScore(
value=r, op_name=str(metric_fn), ref_id=d.id
Expand Down
3 changes: 2 additions & 1 deletion docarray/array/mixins/getattr.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@ def get_attributes(self, *fields: str) -> List:
if b_index is None and e_index is None:
return contents

contents = [contents]
if len(fields) == 1:
contents = [contents]
if b_index is not None:
contents.insert(b_index, self.blobs)
if e_index is not None:
Expand Down
2 changes: 1 addition & 1 deletion docarray/array/mixins/group.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ class GroupMixin:
"""These helpers yield groups of :class:`DocumentArray` from
a source :class:`DocumentArray`."""

def split(self, tag: str) -> Dict[Any, 'DocumentArray']:
def split_by_tag(self, tag: str) -> Dict[Any, 'DocumentArray']:
"""Split the `DocumentArray` into multiple DocumentArray according to the tag value of each `Document`.

:param tag: the tag name to split stored in tags.
Expand Down
14 changes: 12 additions & 2 deletions docarray/array/mixins/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,13 @@ class PlotMixin:
"""Helper functions for plotting the arrays. """

def summary(self):
"""Print the structure and attribute summary of this DocumentArray object.

.. warning::
Calling {meth}`.summary` on large DocumentArray can be slow.

"""

from rich.table import Table
from rich.console import Console
from rich import box
Expand Down Expand Up @@ -74,10 +81,13 @@ def summary(self):
try:
_a = set(_a)
except:
pass
pass # intentional ignore as some fields are not hashable
_set_type_a = set(type(_aa).__name__ for _aa in _a)
attr_table.add_row(
_a_name, str(tuple(_set_type_a)), str(len(_a)), str(None in _a)
_a_name,
str(tuple(_set_type_a)),
str(len(_a)),
str(any(_aa is None for _aa in _a)),
)
console.print(table, attr_table)

Expand Down
24 changes: 15 additions & 9 deletions docarray/math/evaluation.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def _check_k(k):
raise ValueError(f'`k` must be >=1 or `None`')


def r_precision(binary_relevance: List[int]) -> float:
def r_precision(binary_relevance: List[int], **kwargs) -> float:
"""R Precision after all relevant documents have been retrieved
Relevance is binary (nonzero is relevant).

Expand All @@ -28,7 +28,9 @@ def r_precision(binary_relevance: List[int]) -> float:
return float(np.mean(binary_relevance[: z[-1] + 1]))


def precision_at_k(binary_relevance: List[int], k: Optional[int] = None) -> float:
def precision_at_k(
binary_relevance: List[int], k: Optional[int] = None, **kwargs
) -> float:
"""Precision @K.

:param binary_relevance: binary relevancy in rank order
Expand All @@ -40,7 +42,7 @@ def precision_at_k(binary_relevance: List[int], k: Optional[int] = None) -> floa
return float(np.mean(binary_relevance))


def hit_at_k(binary_relevance: List[int], k: Optional[int] = None) -> int:
def hit_at_k(binary_relevance: List[int], k: Optional[int] = None, **kwargs) -> int:
"""Score is percentage of first relevant item in list that occur

:param binary_relevance: binary relevancy in rank order
Expand All @@ -51,7 +53,7 @@ def hit_at_k(binary_relevance: List[int], k: Optional[int] = None) -> int:
return 1 if np.sum(binary_relevance[:k]) > 0 else 0


def average_precision(binary_relevance: List[int]) -> float:
def average_precision(binary_relevance: List[int], **kwargs) -> float:
"""Score is average precision (area under PR curve)
Relevance is binary (nonzero is relevant).

Expand All @@ -65,7 +67,7 @@ def average_precision(binary_relevance: List[int]) -> float:
return float(np.mean(out))


def reciprocal_rank(binary_relevance: List[int]) -> float:
def reciprocal_rank(binary_relevance: List[int], **kwargs) -> float:
"""Score is reciprocal of the rank of the first relevant item

:param binary_relevance: binary relevancy in rank order
Expand All @@ -76,7 +78,7 @@ def reciprocal_rank(binary_relevance: List[int]) -> float:


def recall_at_k(
binary_relevance: List[int], max_rel: int, k: Optional[int] = None
binary_relevance: List[int], max_rel: int, k: Optional[int] = None, **kwargs
) -> float:
"""Score is recall after all relevant documents have been retrieved
Relevance is binary (nonzero is relevant).
Expand All @@ -94,7 +96,7 @@ def recall_at_k(


def f1_score_at_k(
binary_relevance: List[int], max_rel: int, k: Optional[int] = None
binary_relevance: List[int], max_rel: int, k: Optional[int] = None, **kwargs
) -> float:
"""Score is harmonic mean of precision and recall
Relevance is binary (nonzero is relevant).
Expand All @@ -113,7 +115,9 @@ def f1_score_at_k(
return 0.0


def dcg_at_k(relevance: List[float], method: int = 0, k: Optional[int] = None):
def dcg_at_k(
relevance: List[float], method: int = 0, k: Optional[int] = None, **kwargs
):
"""Score is discounted cumulative gain (dcg)
Relevance is positive real values. Can use binary
as the previous methods.
Expand All @@ -140,7 +144,9 @@ def dcg_at_k(relevance: List[float], method: int = 0, k: Optional[int] = None):
return 0.0


def ndcg_at_k(relevance: List[float], method: int = 0, k: Optional[int] = None):
def ndcg_at_k(
relevance: List[float], method: int = 0, k: Optional[int] = None, **kwargs
):
"""Score is normalized discounted cumulative gain (ndcg)
Relevance is positive real values. Can use binary
as the previous methods.
Expand Down
8 changes: 7 additions & 1 deletion docarray/math/ndarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,13 @@
def unravel(docs: Sequence['Document'], field: str) -> Optional['ArrayType']:
_first = getattr(docs[0], field)
if _first is None:
return None
# failed to unravel, return as a list
r = [getattr(d, field) for d in docs]
if any(_rr is not None for _rr in r):
return r
else:
return None

framework, is_sparse = get_array_type(_first)
all_fields = [getattr(d, field) for d in docs]
cls_type = type(_first)
Expand Down
Binary file added docs/_static/60fps.mp4
Binary file not shown.
Binary file added docs/_static/favicon.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/_static/hello.wav
Binary file not shown.
Binary file added docs/_static/mov_bbb.mp4
Binary file not shown.
Binary file added docs/_static/olleh.wav
Binary file not shown.
12 changes: 6 additions & 6 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@
html_js_files = ['https://cdn.jsdelivr.net/npm/vue@2/dist/vue.min.js', 'docbot.js']
htmlhelp_basename = slug
html_show_sourcelink = False
html_favicon = '_static/favicon.ico'
html_favicon = '_static/favicon.png'

latex_documents = [(master_doc, f'{slug}.tex', project, author, 'manual')]
man_pages = [(master_doc, slug, project, [author], 1)]
Expand Down Expand Up @@ -162,23 +162,23 @@
ogp_use_first_image = True
ogp_description_length = 300
ogp_type = 'website'
ogp_site_name = f'Jina {os.environ.get("SPHINX_MULTIVERSION_VERSION", version)} Documentation'
ogp_site_name = f'DocArray {os.environ.get("SPHINX_MULTIVERSION_VERSION", version)} Documentation'

ogp_custom_meta_tags = [
'<meta name="twitter:card" content="summary_large_image">',
'<meta name="twitter:site" content="@JinaAI_">',
'<meta name="twitter:creator" content="@JinaAI_">',
'<meta name="description" content="Jina is the cloud-native neural search solution powered by the state-of-the-art AI and deep learning">',
'<meta property="og:description" content="Jina is the cloud-native neural search solution powered by the state-of-the-art AI and deep learning">',
'<meta name="description" content="DocArray is a library for nested, unstructured data such as text, image, audio, video, 3D mesh.">',
'<meta property="og:description" content="DocArray is a library for nested, unstructured data such as text, image, audio, video, 3D mesh. It allows deep learning engineers to easily preprocess, embed, search, recommend and transfer the data.">',
'''
<!-- Global site tag (gtag.js) - Google Analytics -->
<script async src="https://www.googletagmanager.com/gtag/js?id=G-48ZDWC8GT6"></script>
<script async src="https://www.googletagmanager.com/gtag/js?id=G-48WE9V68SD"></script>
<script>
window.dataLayer = window.dataLayer || [];
function gtag(){dataLayer.push(arguments);}
gtag('js', new Date());

gtag('config', 'G-48ZDWC8GT6');
gtag('config', 'G-48WE9V68SD');
</script>

<script async defer src="https://buttons.github.io/buttons.js"></script>
Expand Down
61 changes: 61 additions & 0 deletions docs/datatypes/audio/index.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
(audio-type)=
# {octicon}`unmute` Audio

## Load `.wav` file

To load a wav file as a Document.

```python
from docarray import Document

d = Document(uri='toy.wav').load_uri_to_audio_blob()

print(d.blob.shape, d.blob.dtype)
```

```text
(30833,) float32
```

## Save as `.wav` file

You can save Document `.blob` as a `.wav` file:

```python
d.save_audio_blob_to_file('toy.wav')
```


## Example

Let's load the "hello" audio file, reverse it and finally save it.

```python
from docarray import Document

d = Document(uri='hello.wav').load_uri_to_audio_blob()
d.blob = d.blob[::-1]
d.save_audio_blob_to_file('olleh.wav')
```

<table>
<tr>
<th>hello.wav</th>
<th>olleh.wav</th>
</tr>
<tr>
<td><audio controls><source src="../../_static/hello.wav" type="audio/wav"></audio></td>
<td><audio controls><source src="../../_static/olleh.wav" type="audio/wav"></audio></td>
</tr>
</table>


## Other tools & libraries for audio data

By no means you are restricted to use DocArray native methods for audio processing. Here are some command-line tools, programs and libraries to use for more advanced handling of audio data:

- [`FFmpeg`](https://ffmpeg.org) is a free, open-source project for handling multimedia files and streams.
- [`pydub`](https://github.com/jiaaro/pydub): manipulate audio with a simple and easy high level interface
- [`librosa`](https://librosa.github.io/librosa/): a python package for music and audio analysis.
- [`pyAudioAnalysis`](https://github.com/tyiannak/pyAudioAnalysis): for IO or for more advanced feature extraction and signal analysis.

Binary file added docs/datatypes/image/apple-proc.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/datatypes/image/apple.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added docs/datatypes/image/complicated-image.jpeg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading