docarray · hanxiao · Jan 7, 2022 · Jan 7, 2022 · Jan 7, 2022 · Jan 7, 2022
diff --git a/README.md b/README.md
@@ -25,7 +25,7 @@ Requires Python 3.7+ and `numpy` only:
 ```
 pip install docarray
 ```
-[Additional features](https://docarray.jina.ai/#install) can be enabled by installing the full dependencies: `pip install docarray[full]`.
+[Additional features](https://docarray.jina.ai/#install) can be enabled by installing the full dependencies: `pip install "docarray[full]"`.
 
 ## [Documentation](https://docarray.jina.ai)
 

diff --git a/docarray/array/mixins/evaluation.py b/docarray/array/mixins/evaluation.py
@@ -74,6 +74,9 @@ def evaluate(
 
             binary_relevance = [1 if hash_fn(m) in desired else 0 for m in d.matches]
 
+            if 'max_rel' not in kwargs:
+                kwargs['max_rel'] = len(gd.matches)
+
             r = metric_fn(binary_relevance, **kwargs)
             d.evaluations[metric_name] = NamedScore(
                 value=r, op_name=str(metric_fn), ref_id=d.id

diff --git a/docarray/array/mixins/getattr.py b/docarray/array/mixins/getattr.py
@@ -31,7 +31,8 @@ def get_attributes(self, *fields: str) -> List:
             if b_index is None and e_index is None:
                 return contents
 
-            contents = [contents]
+            if len(fields) == 1:
+                contents = [contents]
             if b_index is not None:
                 contents.insert(b_index, self.blobs)
             if e_index is not None:

diff --git a/docarray/array/mixins/group.py b/docarray/array/mixins/group.py
@@ -12,7 +12,7 @@ class GroupMixin:
     """These helpers yield groups of :class:`DocumentArray` from
     a source :class:`DocumentArray`."""
 
-    def split(self, tag: str) -> Dict[Any, 'DocumentArray']:
+    def split_by_tag(self, tag: str) -> Dict[Any, 'DocumentArray']:
         """Split the `DocumentArray` into multiple DocumentArray according to the tag value of each `Document`.
 
         :param tag: the tag name to split stored in tags.

diff --git a/docarray/array/mixins/plot.py b/docarray/array/mixins/plot.py
@@ -15,6 +15,13 @@ class PlotMixin:
     """Helper functions for plotting the arrays. """
 
     def summary(self):
+        """Print the structure and attribute summary of this DocumentArray object.
+
+        .. warning::
+            Calling {meth}`.summary` on large DocumentArray can be slow.
+
+        """
+
         from rich.table import Table
         from rich.console import Console
         from rich import box
@@ -74,10 +81,13 @@ def summary(self):
             try:
                 _a = set(_a)
             except:
-                pass
+                pass  # intentional ignore as some fields are not hashable
             _set_type_a = set(type(_aa).__name__ for _aa in _a)
             attr_table.add_row(
-                _a_name, str(tuple(_set_type_a)), str(len(_a)), str(None in _a)
+                _a_name,
+                str(tuple(_set_type_a)),
+                str(len(_a)),
+                str(any(_aa is None for _aa in _a)),
             )
         console.print(table, attr_table)
 

diff --git a/docarray/math/evaluation.py b/docarray/math/evaluation.py
@@ -11,7 +11,7 @@ def _check_k(k):
         raise ValueError(f'`k` must be >=1 or `None`')
 
 
-def r_precision(binary_relevance: List[int]) -> float:
+def r_precision(binary_relevance: List[int], **kwargs) -> float:
     """R Precision after all relevant documents have been retrieved
     Relevance is binary (nonzero is relevant).
 
@@ -28,7 +28,9 @@ def r_precision(binary_relevance: List[int]) -> float:
     return float(np.mean(binary_relevance[: z[-1] + 1]))
 
 
-def precision_at_k(binary_relevance: List[int], k: Optional[int] = None) -> float:
+def precision_at_k(
+    binary_relevance: List[int], k: Optional[int] = None, **kwargs
+) -> float:
     """Precision @K.
 
     :param binary_relevance: binary relevancy in rank order
@@ -40,7 +42,7 @@ def precision_at_k(binary_relevance: List[int], k: Optional[int] = None) -> floa
     return float(np.mean(binary_relevance))
 
 
-def hit_at_k(binary_relevance: List[int], k: Optional[int] = None) -> int:
+def hit_at_k(binary_relevance: List[int], k: Optional[int] = None, **kwargs) -> int:
     """Score is percentage of first relevant item in list that occur
 
     :param binary_relevance: binary relevancy in rank order
@@ -51,7 +53,7 @@ def hit_at_k(binary_relevance: List[int], k: Optional[int] = None) -> int:
     return 1 if np.sum(binary_relevance[:k]) > 0 else 0
 
 
-def average_precision(binary_relevance: List[int]) -> float:
+def average_precision(binary_relevance: List[int], **kwargs) -> float:
     """Score is average precision (area under PR curve)
     Relevance is binary (nonzero is relevant).
 
@@ -65,7 +67,7 @@ def average_precision(binary_relevance: List[int]) -> float:
     return float(np.mean(out))
 
 
-def reciprocal_rank(binary_relevance: List[int]) -> float:
+def reciprocal_rank(binary_relevance: List[int], **kwargs) -> float:
     """Score is reciprocal of the rank of the first relevant item
 
     :param binary_relevance: binary relevancy in rank order
@@ -76,7 +78,7 @@ def reciprocal_rank(binary_relevance: List[int]) -> float:
 
 
 def recall_at_k(
-    binary_relevance: List[int], max_rel: int, k: Optional[int] = None
+    binary_relevance: List[int], max_rel: int, k: Optional[int] = None, **kwargs
 ) -> float:
     """Score is recall after all relevant documents have been retrieved
     Relevance is binary (nonzero is relevant).
@@ -94,7 +96,7 @@ def recall_at_k(
 
 
 def f1_score_at_k(
-    binary_relevance: List[int], max_rel: int, k: Optional[int] = None
+    binary_relevance: List[int], max_rel: int, k: Optional[int] = None, **kwargs
 ) -> float:
     """Score is harmonic mean of precision and recall
     Relevance is binary (nonzero is relevant).
@@ -113,7 +115,9 @@ def f1_score_at_k(
         return 0.0
 
 
-def dcg_at_k(relevance: List[float], method: int = 0, k: Optional[int] = None):
+def dcg_at_k(
+    relevance: List[float], method: int = 0, k: Optional[int] = None, **kwargs
+):
     """Score is discounted cumulative gain (dcg)
     Relevance is positive real values. Can use binary
     as the previous methods.
@@ -140,7 +144,9 @@ def dcg_at_k(relevance: List[float], method: int = 0, k: Optional[int] = None):
     return 0.0
 
 
-def ndcg_at_k(relevance: List[float], method: int = 0, k: Optional[int] = None):
+def ndcg_at_k(
+    relevance: List[float], method: int = 0, k: Optional[int] = None, **kwargs
+):
     """Score is normalized discounted cumulative gain (ndcg)
     Relevance is positive real values.  Can use binary
     as the previous methods.

diff --git a/docarray/math/ndarray.py b/docarray/math/ndarray.py
@@ -10,7 +10,13 @@
 def unravel(docs: Sequence['Document'], field: str) -> Optional['ArrayType']:
     _first = getattr(docs[0], field)
     if _first is None:
-        return None
+        # failed to unravel, return as a list
+        r = [getattr(d, field) for d in docs]
+        if any(_rr is not None for _rr in r):
+            return r
+        else:
+            return None
+
     framework, is_sparse = get_array_type(_first)
     all_fields = [getattr(d, field) for d in docs]
     cls_type = type(_first)

diff --git a/docs/_static/60fps.mp4 b/docs/_static/60fps.mp4
diff --git a/docs/_static/favicon.png b/docs/_static/favicon.png
diff --git a/docs/_static/hello.wav b/docs/_static/hello.wav
diff --git a/docs/_static/mov_bbb.mp4 b/docs/_static/mov_bbb.mp4
diff --git a/docs/_static/olleh.wav b/docs/_static/olleh.wav
diff --git a/docs/conf.py b/docs/conf.py
@@ -79,7 +79,7 @@
 html_js_files = ['https://cdn.jsdelivr.net/npm/vue@2/dist/vue.min.js', 'docbot.js']
 htmlhelp_basename = slug
 html_show_sourcelink = False
-html_favicon = '_static/favicon.ico'
+html_favicon = '_static/favicon.png'
 
 latex_documents = [(master_doc, f'{slug}.tex', project, author, 'manual')]
 man_pages = [(master_doc, slug, project, [author], 1)]
@@ -162,23 +162,23 @@
 ogp_use_first_image = True
 ogp_description_length = 300
 ogp_type = 'website'
-ogp_site_name = f'Jina {os.environ.get("SPHINX_MULTIVERSION_VERSION", version)} Documentation'
+ogp_site_name = f'DocArray {os.environ.get("SPHINX_MULTIVERSION_VERSION", version)} Documentation'
 
 ogp_custom_meta_tags = [
     '<meta name="twitter:card" content="summary_large_image">',
     '<meta name="twitter:site" content="@JinaAI_">',
     '<meta name="twitter:creator" content="@JinaAI_">',
-    '<meta name="description" content="Jina is the cloud-native neural search solution powered by the state-of-the-art AI and deep learning">',
-    '<meta property="og:description" content="Jina is the cloud-native neural search solution powered by the state-of-the-art AI and deep learning">',
+    '<meta name="description" content="DocArray is a library for nested, unstructured data such as text, image, audio, video, 3D mesh.">',
+    '<meta property="og:description" content="DocArray is a library for nested, unstructured data such as text, image, audio, video, 3D mesh. It allows deep learning engineers to easily preprocess, embed, search, recommend and transfer the data.">',
     '''
     <!-- Global site tag (gtag.js) - Google Analytics -->
-<script async src="https://www.googletagmanager.com/gtag/js?id=G-48ZDWC8GT6"></script>
+<script async src="https://www.googletagmanager.com/gtag/js?id=G-48WE9V68SD"></script>
 <script>
   window.dataLayer = window.dataLayer || [];
   function gtag(){dataLayer.push(arguments);}
   gtag('js', new Date());
 
-  gtag('config', 'G-48ZDWC8GT6');
+  gtag('config', 'G-48WE9V68SD');
 </script>
 
 <script async defer src="https://buttons.github.io/buttons.js"></script>

diff --git a/docs/datatypes/audio/index.md b/docs/datatypes/audio/index.md
@@ -0,0 +1,61 @@
+(audio-type)=
+# {octicon}`unmute` Audio
+
+## Load `.wav` file 
+
+To load a wav file as a Document.
+
+```python
+from docarray import Document
+
+d = Document(uri='toy.wav').load_uri_to_audio_blob()
+
+print(d.blob.shape, d.blob.dtype)
+```
+
+```text
+(30833,) float32
+```
+
+## Save as `.wav` file
+
+You can save Document `.blob` as a `.wav` file:
+
+```python
+d.save_audio_blob_to_file('toy.wav')
+```
+
+
+## Example
+
+Let's load the "hello" audio file, reverse it and finally save it.
+
+```python
+from docarray import Document
+
+d = Document(uri='hello.wav').load_uri_to_audio_blob()
+d.blob = d.blob[::-1]
+d.save_audio_blob_to_file('olleh.wav')
+```
+
+<table>
+  <tr>
+    <th>hello.wav</th>
+    <th>olleh.wav</th>
+  </tr>
+  <tr>
+    <td><audio controls><source src="../../_static/hello.wav" type="audio/wav"></audio></td>
+    <td><audio controls><source src="../../_static/olleh.wav" type="audio/wav"></audio></td>
+  </tr>
+</table>
+
+
+## Other tools & libraries for audio data
+
+By no means you are restricted to use DocArray native methods for audio processing. Here are some command-line tools, programs and libraries to use for more advanced handling of audio data:
+
+- [`FFmpeg`](https://ffmpeg.org) is a free, open-source project for handling multimedia files and streams. 
+- [`pydub`](https://github.com/jiaaro/pydub): manipulate audio with a simple and easy high level interface
+- [`librosa`](https://librosa.github.io/librosa/): a python package for music and audio analysis.
+- [`pyAudioAnalysis`](https://github.com/tyiannak/pyAudioAnalysis): for IO or for more advanced feature extraction and signal analysis.
+
diff --git a/docs/datatypes/image/apple-proc.png b/docs/datatypes/image/apple-proc.png
diff --git a/docs/datatypes/image/apple.png b/docs/datatypes/image/apple.png
diff --git a/docs/datatypes/image/complicated-image.jpeg b/docs/datatypes/image/complicated-image.jpeg