From 7d844d80e73783de1b5a7bdbbf756a12d7487b2d Mon Sep 17 00:00:00 2001 From: Jackmin801 Date: Fri, 14 Oct 2022 18:31:20 +0800 Subject: [PATCH 01/16] fix(array): have exiting context manager call sync --- docarray/array/document.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/docarray/array/document.py b/docarray/array/document.py index d119d2f2983..89e513060d9 100644 --- a/docarray/array/document.py +++ b/docarray/array/document.py @@ -144,10 +144,9 @@ def __enter__(self): def __exit__(self, *args, **kwargs): """ - Ensures that offset2ids are stored in the db after - operations in the DocumentArray are performed. + Ensures that we sync the data to the storage backend when exiting the context manager """ - self._save_offset2ids() + self.sync() def __new__(cls, *args, storage: str = 'memory', **kwargs): if cls is DocumentArray: From 4a1a8a80de7df7e43b531f57daa57ab2e1b55191 Mon Sep 17 00:00:00 2001 From: Jackmin801 Date: Fri, 14 Oct 2022 18:38:42 +0800 Subject: [PATCH 02/16] fix(array): change all del to sync --- docarray/array/storage/annlite/getsetdel.py | 4 ++-- docarray/array/storage/base/getsetdel.py | 2 +- docarray/array/storage/elastic/seqlike.py | 7 ------- docarray/array/storage/sqlite/seqlike.py | 4 ++-- docarray/array/storage/weaviate/seqlike.py | 4 ++-- 5 files changed, 7 insertions(+), 14 deletions(-) diff --git a/docarray/array/storage/annlite/getsetdel.py b/docarray/array/storage/annlite/getsetdel.py index 584edf63c3a..c2114d04062 100644 --- a/docarray/array/storage/annlite/getsetdel.py +++ b/docarray/array/storage/annlite/getsetdel.py @@ -42,14 +42,14 @@ def _set_docs_by_ids(self, ids, docs: Iterable['Document'], mismatch_ids: Dict): def _del_docs_by_ids(self, ids): self._annlite.delete(ids) - def __del__(self) -> None: + def sync(self) -> None: if not self._persist: self._offset2ids.clear() self._annlite.clear() self._annlite.close() - super().__del__() + super().sync() def _load_offset2ids(self): self._offsetmapping = OffsetMapping( diff --git a/docarray/array/storage/base/getsetdel.py b/docarray/array/storage/base/getsetdel.py index ab49d0f4e22..682b6964cb2 100644 --- a/docarray/array/storage/base/getsetdel.py +++ b/docarray/array/storage/base/getsetdel.py @@ -325,6 +325,6 @@ def _load_offset2ids(self): def _save_offset2ids(self): ... - def __del__(self): + def sync(self): if hasattr(self, '_offset2ids'): self._save_offset2ids() diff --git a/docarray/array/storage/elastic/seqlike.py b/docarray/array/storage/elastic/seqlike.py index 3ca78acc0cc..9c2007b3c7b 100644 --- a/docarray/array/storage/elastic/seqlike.py +++ b/docarray/array/storage/elastic/seqlike.py @@ -45,13 +45,6 @@ def __contains__(self, x: Union[str, 'Document']): else: return False - def __del__(self): - """Delete this :class:`DocumentArrayElastic` object""" - self._save_offset2ids() - - # if not self._persist: - # self._offset2ids.clear() - def __repr__(self): """Return the string representation of :class:`DocumentArrayElastic` object :return: string representation of this object diff --git a/docarray/array/storage/sqlite/seqlike.py b/docarray/array/storage/sqlite/seqlike.py index 06fb4f0a732..651d6f34fe9 100644 --- a/docarray/array/storage/sqlite/seqlike.py +++ b/docarray/array/storage/sqlite/seqlike.py @@ -48,8 +48,8 @@ def _append(self, doc: 'Document', commit: bool = True, **kwargs) -> None: if commit: self._commit() - def __del__(self) -> None: - super().__del__() + def sync(self) -> None: + super().sync() if not self._persist: self._sql( 'DELETE FROM metadata WHERE table_name=? AND container_type=?', diff --git a/docarray/array/storage/weaviate/seqlike.py b/docarray/array/storage/weaviate/seqlike.py index f0a86cf2df6..33f746b9a30 100644 --- a/docarray/array/storage/weaviate/seqlike.py +++ b/docarray/array/storage/weaviate/seqlike.py @@ -54,9 +54,9 @@ def __contains__(self, x: Union[str, 'Document']): else: return False - def __del__(self): + def sync(self): """Delete this :class:`DocumentArrayWeaviate` object""" - super().__del__() + super().sync() if ( not self._persist and len(_REGISTRY[self.__class__.__name__][self._class_name]) == 1 From 017173592c8ac735d2eb67c2da250e39e5f712a1 Mon Sep 17 00:00:00 2001 From: Jackmin801 Date: Fri, 14 Oct 2022 19:41:47 +0800 Subject: [PATCH 03/16] test(array): test sync excluding annlite --- docarray/array/storage/annlite/getsetdel.py | 4 ++-- tests/unit/array/test_advance_indexing.py | 20 +++++++++++--------- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/docarray/array/storage/annlite/getsetdel.py b/docarray/array/storage/annlite/getsetdel.py index c2114d04062..73af7007961 100644 --- a/docarray/array/storage/annlite/getsetdel.py +++ b/docarray/array/storage/annlite/getsetdel.py @@ -46,11 +46,11 @@ def sync(self) -> None: if not self._persist: self._offset2ids.clear() self._annlite.clear() + super().sync() + def __del__(self): self._annlite.close() - super().sync() - def _load_offset2ids(self): self._offsetmapping = OffsetMapping( data_path=self._config.data_path, in_memory=False diff --git a/tests/unit/array/test_advance_indexing.py b/tests/unit/array/test_advance_indexing.py index 8cf7780c3ba..177377fbdcc 100644 --- a/tests/unit/array/test_advance_indexing.py +++ b/tests/unit/array/test_advance_indexing.py @@ -680,7 +680,7 @@ def test_edge_case_two_strings(storage, config_gen, start_storage): [ ('sqlite', None), ('weaviate', WeaviateConfig(n_dim=123)), - ('annlite', AnnliteConfig(n_dim=123)), + # ('annlite', AnnliteConfig(n_dim=123)), ('qdrant', QdrantConfig(n_dim=123)), ('elasticsearch', ElasticConfig(n_dim=123)), ('redis', RedisConfig(n_dim=123)), @@ -689,13 +689,7 @@ def test_edge_case_two_strings(storage, config_gen, start_storage): def test_offset2ids_persistence(storage, config, start_storage): da = DocumentArray(storage=storage, config=config) - da.extend( - [ - Document(id='0'), - Document(id='2'), - Document(id='4'), - ] - ) + da.extend([Document(id=i) for i in '024']) da.insert(1, Document(id='1')) da.insert(3, Document(id='3')) @@ -703,12 +697,20 @@ def test_offset2ids_persistence(storage, config, start_storage): da_ids = da[:, 'id'] assert da_ids == [str(i) for i in range(5)] da._persist = True - da.__del__() + da.sync() da = DocumentArray(storage=storage, config=config) assert da[:, 'id'] == da_ids + with da: + da.extend([Document(id=i) for i in 'abc']) + da_ids2 = da[:, 'id'] + assert len(da) == 8 + + da2 = DocumentArray(storage=storage, config=config) + assert da2[:, 'id'] == da[:, 'id'] + def test_dam_conflicting_ids(): docs = [ From 34b63d2fe83a5d626cb33d1e35c027f513e17d8c Mon Sep 17 00:00:00 2001 From: Jackmin801 Date: Mon, 17 Oct 2022 18:46:27 +0800 Subject: [PATCH 04/16] fix(array): fixed bad logic with managing annlite tmp file --- docarray/array/storage/annlite/getsetdel.py | 6 +++--- tests/unit/array/test_advance_indexing.py | 1 - tests/unit/array/test_sequence.py | 6 ++++-- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/docarray/array/storage/annlite/getsetdel.py b/docarray/array/storage/annlite/getsetdel.py index 73af7007961..7bd55907e8c 100644 --- a/docarray/array/storage/annlite/getsetdel.py +++ b/docarray/array/storage/annlite/getsetdel.py @@ -43,12 +43,12 @@ def _del_docs_by_ids(self, ids): self._annlite.delete(ids) def sync(self) -> None: + super().sync() + + def __del__(self) -> None: if not self._persist: self._offset2ids.clear() self._annlite.clear() - super().sync() - - def __del__(self): self._annlite.close() def _load_offset2ids(self): diff --git a/tests/unit/array/test_advance_indexing.py b/tests/unit/array/test_advance_indexing.py index 177377fbdcc..ea7f1d871b9 100644 --- a/tests/unit/array/test_advance_indexing.py +++ b/tests/unit/array/test_advance_indexing.py @@ -696,7 +696,6 @@ def test_offset2ids_persistence(storage, config, start_storage): config = da._config da_ids = da[:, 'id'] assert da_ids == [str(i) for i in range(5)] - da._persist = True da.sync() da = DocumentArray(storage=storage, config=config) diff --git a/tests/unit/array/test_sequence.py b/tests/unit/array/test_sequence.py index 458e563f7c5..92b04995357 100644 --- a/tests/unit/array/test_sequence.py +++ b/tests/unit/array/test_sequence.py @@ -104,8 +104,10 @@ def test_context_manager_from_disk(storage, config, start_storage, tmpdir, tmpfi assert len(da2) == 2 assert len(da2._offset2ids.ids) == 2 - del da - del da2 + # Cleanup modifications made in test + with da: + del da[0] + del da[0] @pytest.mark.parametrize( From 76b49aca8fe74d2702cde8b0e958d63b9a24fbab Mon Sep 17 00:00:00 2001 From: Jackmin801 Date: Mon, 17 Oct 2022 19:07:16 +0800 Subject: [PATCH 05/16] fix: take out uneccessary override --- docarray/array/storage/annlite/getsetdel.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/docarray/array/storage/annlite/getsetdel.py b/docarray/array/storage/annlite/getsetdel.py index 7bd55907e8c..57a88db383f 100644 --- a/docarray/array/storage/annlite/getsetdel.py +++ b/docarray/array/storage/annlite/getsetdel.py @@ -42,9 +42,6 @@ def _set_docs_by_ids(self, ids, docs: Iterable['Document'], mismatch_ids: Dict): def _del_docs_by_ids(self, ids): self._annlite.delete(ids) - def sync(self) -> None: - super().sync() - def __del__(self) -> None: if not self._persist: self._offset2ids.clear() From 312169b32cc5420178fa8ec8704d9afd903505f6 Mon Sep 17 00:00:00 2001 From: Jackmin801 Date: Mon, 17 Oct 2022 21:41:00 +0800 Subject: [PATCH 06/16] test: make annlite temp data persist for test --- tests/unit/array/test_advance_indexing.py | 32 +++++++++++++++-------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/tests/unit/array/test_advance_indexing.py b/tests/unit/array/test_advance_indexing.py index ea7f1d871b9..fefb176fc5b 100644 --- a/tests/unit/array/test_advance_indexing.py +++ b/tests/unit/array/test_advance_indexing.py @@ -680,7 +680,7 @@ def test_edge_case_two_strings(storage, config_gen, start_storage): [ ('sqlite', None), ('weaviate', WeaviateConfig(n_dim=123)), - # ('annlite', AnnliteConfig(n_dim=123)), + ('annlite', AnnliteConfig(n_dim=123)), ('qdrant', QdrantConfig(n_dim=123)), ('elasticsearch', ElasticConfig(n_dim=123)), ('redis', RedisConfig(n_dim=123)), @@ -689,26 +689,36 @@ def test_edge_case_two_strings(storage, config_gen, start_storage): def test_offset2ids_persistence(storage, config, start_storage): da = DocumentArray(storage=storage, config=config) - da.extend([Document(id=i) for i in '024']) - da.insert(1, Document(id='1')) - da.insert(3, Document(id='3')) + with da: + da.extend( + [ + Document(id='0'), + Document(id='2'), + Document(id='4'), + ] + ) + da.insert(1, Document(id='1')) + da.insert(3, Document(id='3')) config = da._config da_ids = da[:, 'id'] assert da_ids == [str(i) for i in range(5)] da.sync() - da = DocumentArray(storage=storage, config=config) + # Keep the tmp file around + if storage == 'annlite': + da._persist = True - assert da[:, 'id'] == da_ids + da1 = DocumentArray(storage=storage, config=config) - with da: - da.extend([Document(id=i) for i in 'abc']) - da_ids2 = da[:, 'id'] - assert len(da) == 8 + assert da1[:, 'id'] == da_ids + + with da1: + da1.extend([Document(id=i) for i in 'abc']) + assert len(da1) == 8 da2 = DocumentArray(storage=storage, config=config) - assert da2[:, 'id'] == da[:, 'id'] + assert da2[:, 'id'] == da1[:, 'id'] def test_dam_conflicting_ids(): From 0d2e29f0e8bbccfabeb2480e3807f773a44093c5 Mon Sep 17 00:00:00 2001 From: Jackmin801 Date: Mon, 17 Oct 2022 22:51:19 +0800 Subject: [PATCH 07/16] fix: remove persist flag in annlite --- docarray/array/storage/annlite/backend.py | 3 +-- docarray/array/storage/annlite/getsetdel.py | 3 --- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/docarray/array/storage/annlite/backend.py b/docarray/array/storage/annlite/backend.py index 874c4ab6f0e..e2cfde9cb5c 100644 --- a/docarray/array/storage/annlite/backend.py +++ b/docarray/array/storage/annlite/backend.py @@ -88,8 +88,7 @@ def _init_storage( elif isinstance(config, dict): config = dataclass_from_dict(AnnliteConfig, config) - self._persist = bool(config.data_path) - if not self._persist: + if config.data_path is None: from tempfile import TemporaryDirectory config.data_path = TemporaryDirectory().name diff --git a/docarray/array/storage/annlite/getsetdel.py b/docarray/array/storage/annlite/getsetdel.py index 57a88db383f..d153175fe03 100644 --- a/docarray/array/storage/annlite/getsetdel.py +++ b/docarray/array/storage/annlite/getsetdel.py @@ -43,9 +43,6 @@ def _del_docs_by_ids(self, ids): self._annlite.delete(ids) def __del__(self) -> None: - if not self._persist: - self._offset2ids.clear() - self._annlite.clear() self._annlite.close() def _load_offset2ids(self): From 722c31e2fceadb641088f570d662398cec84bc62 Mon Sep 17 00:00:00 2001 From: Jackmin801 Date: Mon, 17 Oct 2022 22:51:35 +0800 Subject: [PATCH 08/16] fix: remove persist flag in elastic --- docarray/array/storage/elastic/backend.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/docarray/array/storage/elastic/backend.py b/docarray/array/storage/elastic/backend.py index 28bb90f07e6..7519c94ed73 100644 --- a/docarray/array/storage/elastic/backend.py +++ b/docarray/array/storage/elastic/backend.py @@ -83,11 +83,8 @@ def _init_storage( config = dataclass_from_dict(ElasticConfig, config) if config.index_name is None: - self._persist = False id = uuid.uuid4().hex config.index_name = 'index_name__' + id - else: - self._persist = True self._index_name_offset2id = 'offset2id__' + config.index_name self._config = config From a9f995c318827290b54dd60e81794d4a80013945 Mon Sep 17 00:00:00 2001 From: Jackmin801 Date: Mon, 17 Oct 2022 22:51:48 +0800 Subject: [PATCH 09/16] fix: remove persist flag in qdrant --- docarray/array/storage/qdrant/backend.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/docarray/array/storage/qdrant/backend.py b/docarray/array/storage/qdrant/backend.py index 5561def8801..d153d65aafa 100644 --- a/docarray/array/storage/qdrant/backend.py +++ b/docarray/array/storage/qdrant/backend.py @@ -86,7 +86,6 @@ def _init_storage( self._client = QdrantClient(host=config.host, port=config.port) self._config = config - self._persist = bool(self._config.collection_name) self._config.columns = self._normalize_columns(self._config.columns) @@ -96,7 +95,6 @@ def _init_storage( else self._config.collection_name ) - self._persist = self._config.collection_name self._initialize_qdrant_schema() super()._init_storage() From 614e32279309c87d84ef9ffe31fbe4287576d650 Mon Sep 17 00:00:00 2001 From: Jackmin801 Date: Mon, 17 Oct 2022 22:52:06 +0800 Subject: [PATCH 10/16] fix: remove persist flag in sqlite --- docarray/array/storage/sqlite/backend.py | 1 - docarray/array/storage/sqlite/seqlike.py | 10 ---------- 2 files changed, 11 deletions(-) diff --git a/docarray/array/storage/sqlite/backend.py b/docarray/array/storage/sqlite/backend.py index 7422738a1e7..043d00f5151 100644 --- a/docarray/array/storage/sqlite/backend.py +++ b/docarray/array/storage/sqlite/backend.py @@ -91,7 +91,6 @@ def _init_storage( if config.table_name is None else _sanitize_table_name(config.table_name) ) - self._persist = bool(config.table_name) config.table_name = self._table_name initialize_table( self._table_name, self.__class__.__name__, self.schema_version, self._cursor diff --git a/docarray/array/storage/sqlite/seqlike.py b/docarray/array/storage/sqlite/seqlike.py index 651d6f34fe9..ec871a7b6f0 100644 --- a/docarray/array/storage/sqlite/seqlike.py +++ b/docarray/array/storage/sqlite/seqlike.py @@ -48,16 +48,6 @@ def _append(self, doc: 'Document', commit: bool = True, **kwargs) -> None: if commit: self._commit() - def sync(self) -> None: - super().sync() - if not self._persist: - self._sql( - 'DELETE FROM metadata WHERE table_name=? AND container_type=?', - (self._table_name, self.__class__.__name__), - ) - self._sql(f'DROP TABLE IF EXISTS {self._table_name}') - self._commit() - def __contains__(self, item: Union[str, 'Document']): if isinstance(item, str): r = self._sql(f'SELECT 1 FROM {self._table_name} WHERE doc_id=?', (item,)) From c1353fb1bc0d0a4aa1dd4675e3716a178e3b3cee Mon Sep 17 00:00:00 2001 From: Jackmin801 Date: Mon, 17 Oct 2022 22:52:27 +0800 Subject: [PATCH 11/16] fix: remove persist flag in weaviate --- docarray/array/storage/weaviate/backend.py | 2 -- docarray/array/storage/weaviate/seqlike.py | 11 ----------- 2 files changed, 13 deletions(-) diff --git a/docarray/array/storage/weaviate/backend.py b/docarray/array/storage/weaviate/backend.py index 9d747908562..52b41908baa 100644 --- a/docarray/array/storage/weaviate/backend.py +++ b/docarray/array/storage/weaviate/backend.py @@ -111,8 +111,6 @@ def _init_storage( 'Please capitalize when declaring the name field in config.' ) - self._persist = bool(config.name) - self._client = weaviate.Client( f'{config.protocol}://{config.host}:{config.port}', timeout_config=config.timeout_config, diff --git a/docarray/array/storage/weaviate/seqlike.py b/docarray/array/storage/weaviate/seqlike.py index 33f746b9a30..f480899ccab 100644 --- a/docarray/array/storage/weaviate/seqlike.py +++ b/docarray/array/storage/weaviate/seqlike.py @@ -54,17 +54,6 @@ def __contains__(self, x: Union[str, 'Document']): else: return False - def sync(self): - """Delete this :class:`DocumentArrayWeaviate` object""" - super().sync() - if ( - not self._persist - and len(_REGISTRY[self.__class__.__name__][self._class_name]) == 1 - ): - self._client.schema.delete_class(self._class_name) - self._client.schema.delete_class(self._meta_name) - _REGISTRY[self.__class__.__name__][self._class_name].remove(self) - def __repr__(self): """Return the string representation of :class:`DocumentArrayWeaviate` object :return: string representation of this object From fc91efd36a017e7c782f5ab609aa3b3884d39739 Mon Sep 17 00:00:00 2001 From: Jackmin801 Date: Mon, 17 Oct 2022 22:52:52 +0800 Subject: [PATCH 12/16] test: remove persist flag in test --- tests/unit/array/test_advance_indexing.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/tests/unit/array/test_advance_indexing.py b/tests/unit/array/test_advance_indexing.py index fefb176fc5b..666ae8596b9 100644 --- a/tests/unit/array/test_advance_indexing.py +++ b/tests/unit/array/test_advance_indexing.py @@ -705,10 +705,6 @@ def test_offset2ids_persistence(storage, config, start_storage): assert da_ids == [str(i) for i in range(5)] da.sync() - # Keep the tmp file around - if storage == 'annlite': - da._persist = True - da1 = DocumentArray(storage=storage, config=config) assert da1[:, 'id'] == da_ids From 1ccb3c2e45d8c17f7de0a6c60d65dabfc79dbcb1 Mon Sep 17 00:00:00 2001 From: Jackmin801 Date: Tue, 18 Oct 2022 19:46:49 +0800 Subject: [PATCH 13/16] docs: add warning about tmp index --- docs/advanced/document-store/index.md | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/docs/advanced/document-store/index.md b/docs/advanced/document-store/index.md index 5db58353f22..da73c0758b0 100644 --- a/docs/advanced/document-store/index.md +++ b/docs/advanced/document-store/index.md @@ -70,7 +70,7 @@ The procedures for creating, retrieving, updating, and deleting Documents are id ## Construct -There are two ways for initializing a DocumentArray with an external storage backend. +There are two ways to initialize a DocumentArray with an external storage backend. ````{tab} Specify storage @@ -145,6 +145,14 @@ da = DocumentArray( Using dataclass gives you better type-checking in IDE but requires an extra import; using dict is more flexible but can be error-prone. You can choose the style that fits best to your context. +```{admonition} Creating DocumentArrays without specifying index +:class: warning +When you specify an index (table name for SQL stores) in the config, the index will be used to persist the DocumentArray in the document store. +If you create a DocumentArray but do not specify an index, a randomized placeholder index will be created to persist the data. + +Creating DocumentArrays without index is useful during prototyping but should not be used in a production setting as randomized placeholder data will be persisted in the document store unnecessarily. +``` + ## Feature summary From 6ece8fff45ff8a1d3e790b48b7c691232b0ff751 Mon Sep 17 00:00:00 2001 From: Jackmin801 Date: Tue, 18 Oct 2022 19:57:09 +0800 Subject: [PATCH 14/16] docs: document explicit syncing --- docs/advanced/document-store/index.md | 36 +++++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) diff --git a/docs/advanced/document-store/index.md b/docs/advanced/document-store/index.md index da73c0758b0..18d2fffa7a8 100644 --- a/docs/advanced/document-store/index.md +++ b/docs/advanced/document-store/index.md @@ -357,8 +357,8 @@ array([[7., 7., 7.], ## Persistence, mutations and context manager Having DocumentArrays that are backed by a document store introduces an extra consideration into the way you think about DocumentArrays. -The DocumentArray object created in your Python program is now a view of the underlying implementation in the external store. -This means that your DocumentArray object in Python can be out of sync with what is persisted to the external store. +The DocumentArray object created in your Python program is now a view of the underlying implementation in the document store. +This means that your DocumentArray object in Python can be out of sync with what is persisted to the document store. **For example** ```python @@ -455,6 +455,38 @@ Length of da2 is 3 The append you made to the DocumentArray is now persisted properly. Hurray! +### Explicitly calling `sync` +You can explicitly call the `sync` method of the DocumentArray to save the data in the document store. + +```python +from docarray import DocumentArray, Document + +da1 = DocumentArray(storage='redis', config=dict(n_dim=3, index_name="another_index")) +da1.append(Document()) +da.sync() +print(f"Length of da1 is {len(da1)}") + +da2 = DocumentArray(storage='redis', config=dict(n_dim=3, index_name="another_index")) +print(f"Length of da2 is {len(da2)}") +``` +**First run output** +```console +Length of da1 is 1 +Length of da2 is 1 +``` +**Second run output** +```console +Length of da1 is 2 +Length of da2 is 2 +``` +**Third run output** +```console +Length of da1 is 3 +Length of da2 is 3 +``` + +The append you made to the DocumentArray is now persisted properly. Hurray! + ## Known limitations From 988725fbe95cc82743fc7e9f20d0c59e4cfc2b43 Mon Sep 17 00:00:00 2001 From: Jackmin801 Date: Tue, 18 Oct 2022 20:23:35 +0800 Subject: [PATCH 15/16] docs: refactor sync section into tabs --- docs/advanced/document-store/index.md | 31 +++++++++------------------ 1 file changed, 10 insertions(+), 21 deletions(-) diff --git a/docs/advanced/document-store/index.md b/docs/advanced/document-store/index.md index 18d2fffa7a8..267570936a7 100644 --- a/docs/advanced/document-store/index.md +++ b/docs/advanced/document-store/index.md @@ -423,8 +423,10 @@ Length of da1 is 3 ```` Now that you know the issue, let's explore what you should do to work with DocumentArrays backed by document store in a more predictable manner. -### Using Context Manager -The recommended way is to use the DocumentArray as a context manager like so: + +````{tab} Use with + +The data will be synced when the context manager is exited. ```python from docarray import DocumentArray, Document @@ -437,38 +439,24 @@ print(f"Length of da1 is {len(da1)}") da2 = DocumentArray(storage='redis', config=dict(n_dim=3, index_name="my_index")) print(f"Length of da2 is {len(da2)}") ``` -**First run output** -```console -Length of da1 is 1 -Length of da2 is 1 -``` -**Second run output** -```console -Length of da1 is 2 -Length of da2 is 2 -``` -**Third run output** -```console -Length of da1 is 3 -Length of da2 is 3 -``` +```` -The append you made to the DocumentArray is now persisted properly. Hurray! +````{tab} Use sync -### Explicitly calling `sync` -You can explicitly call the `sync` method of the DocumentArray to save the data in the document store. +Explicitly calling the `sync` method of the DocumentArray will save the data to the document store. ```python from docarray import DocumentArray, Document da1 = DocumentArray(storage='redis', config=dict(n_dim=3, index_name="another_index")) da1.append(Document()) -da.sync() +da.sync() # Call the sync method print(f"Length of da1 is {len(da1)}") da2 = DocumentArray(storage='redis', config=dict(n_dim=3, index_name="another_index")) print(f"Length of da2 is {len(da2)}") ``` +```` **First run output** ```console Length of da1 is 1 @@ -487,6 +475,7 @@ Length of da2 is 3 The append you made to the DocumentArray is now persisted properly. Hurray! +The recommended way to sync data to the document store is to use the DocumentArray inside the `with` context manager. ## Known limitations From aa24e2cf20828aa1fca5e427657b293e5860bb0d Mon Sep 17 00:00:00 2001 From: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> Date: Tue, 18 Oct 2022 22:39:23 +0800 Subject: [PATCH 16/16] docs: minor fix Co-authored-by: Nicholas Dunham <11730795+NicholasDunham@users.noreply.github.com> Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> --- docs/advanced/document-store/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/advanced/document-store/index.md b/docs/advanced/document-store/index.md index 267570936a7..b8c9dc7be9f 100644 --- a/docs/advanced/document-store/index.md +++ b/docs/advanced/document-store/index.md @@ -150,7 +150,7 @@ Using dataclass gives you better type-checking in IDE but requires an extra impo When you specify an index (table name for SQL stores) in the config, the index will be used to persist the DocumentArray in the document store. If you create a DocumentArray but do not specify an index, a randomized placeholder index will be created to persist the data. -Creating DocumentArrays without index is useful during prototyping but should not be used in a production setting as randomized placeholder data will be persisted in the document store unnecessarily. +Creating DocumentArrays without indexes is useful during prototyping but should not be used in a production setting as randomized placeholder data will be persisted in the document store unnecessarily. ```