From d01414830560df729166ce654a2c6db03a701420 Mon Sep 17 00:00:00 2001
From: Han Xiao <han.xiao@jina.ai>
Date: Fri, 25 Feb 2022 22:39:43 +0100
Subject: [PATCH] fix(array): storage info in summary

---
 docarray/array/mixins/plot.py                 | 12 +++--
 docarray/array/storage/base/backend.py        |  4 +-
 docarray/array/storage/memory/backend.py      |  5 --
 docarray/array/storage/pqlite/backend.py      |  4 +-
 docarray/array/storage/qdrant/backend.py      |  4 +-
 docarray/array/storage/sqlite/backend.py      |  4 +-
 docarray/array/storage/weaviate/backend.py    |  4 +-
 .../documentarray/serialization.md            | 48 +++++++++++++------
 8 files changed, 46 insertions(+), 39 deletions(-)

diff --git a/docarray/array/mixins/plot.py b/docarray/array/mixins/plot.py
index 816ae8322d0..ec778ad05c8 100644
--- a/docarray/array/mixins/plot.py
+++ b/docarray/array/mixins/plot.py
@@ -98,13 +98,15 @@ def summary(self):
                 )
             tables.append(attr_table)
 
-        storage_table = Table(box=box.SIMPLE, title='Storage Summary')
-        storage_table.show_header = False
         storage_infos = self._get_storage_infos()
-        for k, v in storage_infos.items():
-            storage_table.add_row(k, v)
+        if storage_infos:
+            storage_table = Table(box=box.SIMPLE, title='Storage Summary')
+            storage_table.show_header = False
 
-        tables.append(storage_table)
+            for k, v in storage_infos.items():
+                storage_table.add_row(k, v)
+
+            tables.append(storage_table)
 
         console.print(*tables)
 
diff --git a/docarray/array/storage/base/backend.py b/docarray/array/storage/base/backend.py
index 10093a53228..0341dbb886a 100644
--- a/docarray/array/storage/base/backend.py
+++ b/docarray/array/storage/base/backend.py
@@ -19,5 +19,5 @@ def _init_storage(
     ):
         self._load_offset2ids()
 
-    def _get_storage_infos(self) -> Dict:
-        return {'Class': self.__class__.__name__}
+    def _get_storage_infos(self) -> Optional[Dict]:
+        ...
diff --git a/docarray/array/storage/memory/backend.py b/docarray/array/storage/memory/backend.py
index 693f0059819..c7ec93e4dda 100644
--- a/docarray/array/storage/memory/backend.py
+++ b/docarray/array/storage/memory/backend.py
@@ -49,8 +49,3 @@ def _init_storage(
                     self.append(Document(_docs, copy=True))
                 else:
                     self.append(_docs)
-
-    def _get_storage_infos(self) -> Dict:
-        storage_infos = super()._get_storage_infos()
-        storage_infos['Backend'] = 'In Memory'
-        return storage_infos
diff --git a/docarray/array/storage/pqlite/backend.py b/docarray/array/storage/pqlite/backend.py
index 6b079abc1e3..6b983e21a30 100644
--- a/docarray/array/storage/pqlite/backend.py
+++ b/docarray/array/storage/pqlite/backend.py
@@ -90,11 +90,9 @@ def __setstate__(self, state):
         self._pqlite = PQLite(n_dim, lock=False, **config)
 
     def _get_storage_infos(self) -> Dict:
-        storage_infos = super()._get_storage_infos()
         return {
-            'Backend': 'PQLite (https://github.com/jina-ai/pqlite)',
+            'Backend': 'PQLite',
             'Distance Metric': self._pqlite.metric.name,
             'Data Path': self._config.data_path,
             'Serialization Protocol': self._config.serialize_config.get('protocol'),
-            **storage_infos,
         }
diff --git a/docarray/array/storage/qdrant/backend.py b/docarray/array/storage/qdrant/backend.py
index ec1139e82a3..19c42570f11 100644
--- a/docarray/array/storage/qdrant/backend.py
+++ b/docarray/array/storage/qdrant/backend.py
@@ -173,13 +173,11 @@ def _update_offset2ids_meta(self):
         )
 
     def _get_storage_infos(self) -> Dict:
-        storage_infos = super()._get_storage_infos()
         return {
-            'Backend': 'Qdrant (https://qdrant.tech)',
+            'Backend': 'Qdrant',
             'Host': self._config.host,
             'Port': str(self._config.port),
             'Collection Name': self.collection_name,
             'Distance': self._config.distance,
             'Serialization Protocol': self._config.serialize_config.get('protocol'),
-            **storage_infos,
         }
diff --git a/docarray/array/storage/sqlite/backend.py b/docarray/array/storage/sqlite/backend.py
index adfb4824998..28f2fa1a92c 100644
--- a/docarray/array/storage/sqlite/backend.py
+++ b/docarray/array/storage/sqlite/backend.py
@@ -143,11 +143,9 @@ def __setstate__(self, state):
         )
 
     def _get_storage_infos(self) -> Dict:
-        storage_infos = super()._get_storage_infos()
         return {
-            'Backend': 'SQLite (https://www.sqlite.org)',
+            'Backend': 'SQLite',
             'Connection': self._config.connection,
             'Table Name': self._table_name,
             'Serialization Protocol': self._config.serialize_config.get('protocol'),
-            **storage_infos,
         }
diff --git a/docarray/array/storage/weaviate/backend.py b/docarray/array/storage/weaviate/backend.py
index 56654fc22ae..4f0f34c7b71 100644
--- a/docarray/array/storage/weaviate/backend.py
+++ b/docarray/array/storage/weaviate/backend.py
@@ -316,11 +316,9 @@ def _wmap(self, doc_id: str):
         return str(uuid.uuid5(uuid.NAMESPACE_URL, doc_id + self._class_name))
 
     def _get_storage_infos(self) -> Dict:
-        storage_infos = super()._get_storage_infos()
         return {
-            'Backend': 'Weaviate (www.semi.technology/developers/weaviate)',
+            'Backend': 'Weaviate',
             'Hostname': self._config.client,
             'Schema Name': self._config.name,
             'Serialization Protocol': self._config.serialize_config.get('protocol'),
-            **storage_infos,
         }
diff --git a/docs/fundamentals/documentarray/serialization.md b/docs/fundamentals/documentarray/serialization.md
index 50bdd01c18f..a5fc2d75b5f 100644
--- a/docs/fundamentals/documentarray/serialization.md
+++ b/docs/fundamentals/documentarray/serialization.md
@@ -163,7 +163,7 @@ Afterwards, `doc1_bytes` describes how many bytes are used to serialize `doc1`,
 The pattern `dock_bytes` and `dock.to_bytes` is repeated `len(docs)` times.
 
 
-### From/to Disk
+### From/to disk
 
 If you want to store a `DocumentArray` to disk you can use `.save_binary(filename, protocol, compress)` where `protocol` and `compress` refer to the protocol and compression methods used to serialize the data.
 If you want to load a `DocumentArray` from disk you can use `.load_binary(filename, protocol, compress)`.
@@ -177,31 +177,49 @@ da = DocumentArray([Document(text='hello'), Document(text='world')])
 
 da.save_binary('my_docarray.bin', protocol='protobuf', compress='lz4')
 da_rec = DocumentArray.load_binary('my_docarray.bin', protocol='protobuf', compress='lz4')
-da_rec == da
+da_rec.summary()
 ```
 
-Note that in the previous code snippet the user needs to remember the protol and compression methods used to store the data in order to load it back correctly. `DocArray` allows you to specify `protocol` and `compress` as file extensions.
-By doing so you can forget later on which protocol and compression methods were used to serialize the data to disk.
-This functionality assumes `.save_binary` and `.load_binary` are called with `filename` following the form `file_name.$protocol.$compress`,  where `$protocol` and `$compress` refer to a  string interpolation of the respective `protocol` and `compress` methods. 
+```text
+                  Documents Summary                   
+                                                      
+  Length                 2                            
+  Homogenous Documents   True                         
+  Common Attributes      ('id', 'mime_type', 'text')  
+                                                      
+                     Attributes Summary                     
+                                                            
+  Attribute   Data type   #Unique values   Has empty value  
+ ────────────────────────────────────────────────────────── 
+  id          ('str',)    2                False            
+  mime_type   ('str',)    1                False            
+  text        ('str',)    2                False            
+                                                                                               
+```
 
-For example if `file=my_docarray.protobuf.lz4` then the binary data will be created using `protocol=protobuf` and `compress=lz4`.
 
-The previous code snippet can be simplified to 
+User do not need  to remember the protocol and compression methods on loading. You can simply specify `protocol` and `compress` in the file extension via:
 
-```python
-from docarray import DocumentArray, Document
+```text
+filename.protobuf.gzip
+         ~~~~~~~~ ^^^^
+             |      |
+             |      |-- compress
+             |
+             |-- protocol
+```
 
-da = DocumentArray([Document(text='hello'), Document(text='world')])
 
+When a filename is given as the above format in `.save_binary`, you can simply load it back with `.load_binary` without specifying the protocol and compress method again.
+
+
+The previous code snippet can be simplified to 
+
+```python
 da.save_binary('my_docarray.protobuf.lz4')
 da_rec = DocumentArray.load_binary('my_docarray.protobuf.lz4')
-da_rec == da
 ```
 
-```{tip}
-If you don't want to specify and remember `protocol` and `compress` to store/load to/from disk, save your `DocumentArray` `da` using 
-`da.save_binary('file_name.$protocol.$compress')` so that it can be loaded back with `DocumentArray.load_binary('file_name.$protocol.$compress')`
-```
 
 ### Stream large binary serialization from disk