From 3205c4de11db15e3b65c61dcb11359e8f227e12b Mon Sep 17 00:00:00 2001 From: Jackmin801 Date: Wed, 12 Oct 2022 00:28:12 +0800 Subject: [PATCH 1/7] docs(array): tidbit rewordings --- docs/advanced/document-store/index.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/advanced/document-store/index.md b/docs/advanced/document-store/index.md index 3c5ed5e1dce..baa105594a3 100644 --- a/docs/advanced/document-store/index.md +++ b/docs/advanced/document-store/index.md @@ -15,11 +15,11 @@ benchmark ``` Documents inside a DocumentArray can live in a [document store](https://en.wikipedia.org/wiki/Document-oriented_database) instead of in memory, e.g. in SQLite, Redis. -Comparing to the in-memory storage, the benefit of using an external store is often about longer persistence and faster retrieval. +The benefit of using an external store over an in-memory store is often about longer persistence and faster retrieval. The look-and-feel of a DocumentArray with external store is **almost the same** as a regular in-memory DocumentArray. This allows users to easily switch between backends under the same DocArray idiom. -Take SQLite as an example, using it as the store backend of a DocumentArray is as simple as follows: +Take SQLite as an example, using it as the storage backend of a DocumentArray is as simple as follows: ```python from docarray import DocumentArray, Document @@ -58,19 +58,19 @@ da.summary() │ │ ╰────────────────────────────────────────────────────────────────────────────╯ ``` -Note that `da` was modified inside a `with` statement. This context manager ensures that the the `DocumentArray` indices, +Note that `da` was modified inside a `with` statement. This context manager ensures that the the `DocumentArray` indices, which allow users to access the `DocumentArray` by position (allowing statements such as `da[1]`), are properly mapped and saved to the storage backend. This is the recommended default usage to modify a DocumentArray that lives on a document store to avoid unexpected behaviors that can yield to, for example, inaccessible elements by position. -Creating, retrieving, updating, deleting Documents are identical to the regular {ref}`DocumentArray`. All DocumentArray methods such as `.summary()`, `.embed()`, `.plot_embeddings()` should work out of the box. +Creating, retrieving, updating, and deleting Documents are identical to that of a regular {ref}`DocumentArray`. All DocumentArray methods such as `.summary()`, `.embed()`, `.plot_embeddings()` should also work out of the box. ## Construct -There are two ways for initializing a DocumentArray with a store backend. +There are two ways for initializing a DocumentArray with an external storage backend. ````{tab} Specify storage From 0d5aa3e57fe65b1b3c0d107a217ce5dd95b24816 Mon Sep 17 00:00:00 2001 From: Jackmin801 Date: Wed, 12 Oct 2022 00:29:20 +0800 Subject: [PATCH 2/7] docs(array): add section on persistence mutation and context manager --- docs/advanced/document-store/index.md | 173 ++++++++++++++++++++++++++ 1 file changed, 173 insertions(+) diff --git a/docs/advanced/document-store/index.md b/docs/advanced/document-store/index.md index baa105594a3..094568563c0 100644 --- a/docs/advanced/document-store/index.md +++ b/docs/advanced/document-store/index.md @@ -346,6 +346,179 @@ array([[7., 7., 7.], [4., 4., 4.]]) ``` +## Persistence, mutations and context manager + +Having DocumentArrays that are backed by an external store does however introduce an extra consideration into the way one thinks about DocumentArrays. +The DocumentArray object created in our Python program is now a view of the underlying implementation in the external store. +This means that our DocumentArray object in Python can be out of sync with what is persisted to the external store. + +**For example** +```python +from docarray import DocumentArray, Document + +da1 = DocumentArray(storage='redis', config=dict(n_dim=3, index_name="my_index")) +da1.append(Document()) +print(f"Length of da1 is {len(da1)}") + +da2 = DocumentArray(storage='redis', config=dict(n_dim=3, index_name="my_index")) +print(f"Length of da2 is {len(da2)}") +``` +**Output** +```console +Length of da1 is 1 +Length of da2 is 0 +``` + +Executing this script multiple times yields the same result. + +When we ran the line `da1.append(Document())`, we expect the DocumentArray with `index_name="my_index"` to now have a length of `1`. +However, when we try to create another view of the DocumentArray in `da2`, we get a fresh DocumentArray. + +We also expect the script to increment the length of the DocumentArrays every time we run it. +This is because the previous run should have saved the length of the DocumentArray with `index_name="my_index"` and our most recent run will append a new document, incrementing the length by `+1` each time. + +However, it seems like our append operation is also not being persisted. + +````{dropdown} What actually happened here? +The DocumentArray actually did persist. +But not in the way we might expect. +Since we did not use the `with` context manager or scope our mutation, the persistence logic is being evaluated when the program exits. +`da1` is destroyed first, persisting the DocumentArray of length `1`. +But when `da2` is destroyed, it persists a DocumentArray of length `0` to the same index in Redis as `da1`, overriding it's value. + +This means that if we had not created `da2`, the overriding would not have occured and the script will actually increment the length of the DocumentArray correctly. +You can prove this to yourself by commenting out the last 2 lines of the script and running the script repeatedly. + +**Script** +```python +from docarray import DocumentArray, Document + +da1 = DocumentArray(storage='redis', config=dict(n_dim=3, index_name="my_index")) +da1.append(Document()) +print(f"Length of da1 is {len(da1)}") + +# da2 = DocumentArray(storage='redis', config=dict(n_dim=3, index_name="my_index")) +# print(f"Length of da2 is {len(da2)}") +``` + +**First run output** +```console +Length of da1 is 1 +``` +**Second run output** +```console +Length of da1 is 2 +``` +**Third run output** +```console +Length of da1 is 3 +``` +```` + +Now that we know the issue, let's explore some idioms we can use to work with DocumentArrays backed by external storage in a more predictable manner +### Using Context Manager +The recommended way is to use the DocumentArray as a context manager like so: + +```python +from docarray import DocumentArray, Document + +da1 = DocumentArray(storage='redis', config=dict(n_dim=3, index_name="my_index")) +with da1: # Let's use the context manager to make sure we persist the mutation + da1.append(Document()) # +print(f"Length of da1 is {len(da1)}") + +da2 = DocumentArray(storage='redis', config=dict(n_dim=3, index_name="my_index")) +print(f"Length of da2 is {len(da2)}") +``` +**First run output** +```console +Length of da1 is 1 +Length of da2 is 1 +``` +**Second run output** +```console +Length of da1 is 2 +Length of da2 is 2 +``` +**Third run output** +```console +Length of da1 is 3 +Length of da2 is 3 +``` + +The append we made to the DocumentArray is now persisted properly. Hurray! + +### Using Scope +Another method that is sometimes useful when multiple DocumentArrays are involed is to perform the mutation in a function scope. +The DocumentArrays will always persist when they fall out of scope. + +```python +from docarray import DocumentArray, Document + +# Let's wrap our mutation logic in a function scope +def foo(): + da1 = DocumentArray(storage='redis', config=dict(n_dim=3, index_name="my_index")) + da1.append(Document()) + print(f"Length of da1 is {len(da1)}") + + +foo() + +da2 = DocumentArray(storage='redis', config=dict(n_dim=3, index_name="my_index")) +print(f"Length of da2 is {len(da2)}") +``` +**First run output** +```console +Length of da1 is 1 +Length of da2 is 1 +``` +**Second run output** +```console +Length of da1 is 2 +Length of da2 is 2 +``` +**Third run output** +```console +Length of da1 is 3 +Length of da2 is 3 +``` + +However, one needs to be extra cautious when opting for this method. +Using scopes in Python can be tricky because it is possible to accidentally promote the scope of a variable by creating a reference to it from an object of higher scope. + +**For example** +``` +from docarray import DocumentArray, Document + +list_outside_of_foo_scope = [] + +def foo(): + da1 = DocumentArray(storage='redis', config=dict(n_dim=3, index_name="my_index")) + da1.append(Document()) + print(f"Length of da1 is {len(da1)}") + list_outside_of_foo_scope.append(da1) # Whoopsie! + +foo() + +da2 = DocumentArray(storage='redis', config=dict(n_dim=3, index_name="my_index")) +print(f"Length of da2 is {len(da2)}") +``` +**First run output** +```console +Length of da1 is 1 +Length of da2 is 0 +``` +**Subsequent run outputs** +```console +Length of da1 is 1 +Length of da2 is 0 +``` + +The above example is somewhat contrived for simplicity. +However, when codebases get big and convoluted enough, it can be difficult to notice accidental promotions like this. + +It is thus recommended that you use the `with` context manager for most cases and only use the scoped method for quick and simple prototyping code. + ## Known limitations From 72af96d3b8b5117fca6671dcadf9f56920313636 Mon Sep 17 00:00:00 2001 From: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> Date: Wed, 12 Oct 2022 13:44:03 +0800 Subject: [PATCH 3/7] docs(array): minor grammatical changes Co-authored-by: Nicholas Dunham <11730795+NicholasDunham@users.noreply.github.com> Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> --- docs/advanced/document-store/index.md | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/docs/advanced/document-store/index.md b/docs/advanced/document-store/index.md index 094568563c0..44c96995751 100644 --- a/docs/advanced/document-store/index.md +++ b/docs/advanced/document-store/index.md @@ -19,7 +19,7 @@ The benefit of using an external store over an in-memory store is often about lo The look-and-feel of a DocumentArray with external store is **almost the same** as a regular in-memory DocumentArray. This allows users to easily switch between backends under the same DocArray idiom. -Take SQLite as an example, using it as the storage backend of a DocumentArray is as simple as follows: +Take SQLite as an example. Using it as the storage backend of a DocumentArray is as simple as follows: ```python from docarray import DocumentArray, Document @@ -65,7 +65,7 @@ This is the recommended default usage to modify a DocumentArray that lives on a unexpected behaviors that can yield to, for example, inaccessible elements by position. -Creating, retrieving, updating, and deleting Documents are identical to that of a regular {ref}`DocumentArray`. All DocumentArray methods such as `.summary()`, `.embed()`, `.plot_embeddings()` should also work out of the box. +The procedures for creating, retrieving, updating, and deleting Documents are identical to those for a regular {ref}`DocumentArray`. All DocumentArray methods such as `.summary()`, `.embed()`, `.plot_embeddings()` should also work out of the box. ## Construct @@ -371,7 +371,7 @@ Length of da2 is 0 Executing this script multiple times yields the same result. -When we ran the line `da1.append(Document())`, we expect the DocumentArray with `index_name="my_index"` to now have a length of `1`. +When we run the line `da1.append(Document())`, we expect the DocumentArray with `index_name="my_index"` to now have a length of `1`. However, when we try to create another view of the DocumentArray in `da2`, we get a fresh DocumentArray. We also expect the script to increment the length of the DocumentArrays every time we run it. @@ -381,12 +381,12 @@ However, it seems like our append operation is also not being persisted. ````{dropdown} What actually happened here? The DocumentArray actually did persist. -But not in the way we might expect. +but not in the way we might expect. Since we did not use the `with` context manager or scope our mutation, the persistence logic is being evaluated when the program exits. `da1` is destroyed first, persisting the DocumentArray of length `1`. -But when `da2` is destroyed, it persists a DocumentArray of length `0` to the same index in Redis as `da1`, overriding it's value. +But when `da2` is destroyed, it persists a DocumentArray of length `0` to the same index in Redis as `da1`, overriding its value. -This means that if we had not created `da2`, the overriding would not have occured and the script will actually increment the length of the DocumentArray correctly. +This means that if we had not created `da2`, the overriding would not have occured and the script would actually increment the length of the DocumentArray correctly. You can prove this to yourself by commenting out the last 2 lines of the script and running the script repeatedly. **Script** @@ -415,7 +415,7 @@ Length of da1 is 3 ``` ```` -Now that we know the issue, let's explore some idioms we can use to work with DocumentArrays backed by external storage in a more predictable manner +Now that we know the issue, let's explore some idioms we can use to work with DocumentArrays backed by external storage in a more predictable manner. ### Using Context Manager The recommended way is to use the DocumentArray as a context manager like so: From 949bced48588be2bc5a7b329be6d07bf1b4f4b28 Mon Sep 17 00:00:00 2001 From: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> Date: Wed, 12 Oct 2022 17:04:18 +0800 Subject: [PATCH 4/7] docs(array): use clearer and consistent wording Co-authored-by: Johannes Messner <44071807+JohannesMessner@users.noreply.github.com> Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> --- docs/advanced/document-store/index.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/advanced/document-store/index.md b/docs/advanced/document-store/index.md index 44c96995751..41073058401 100644 --- a/docs/advanced/document-store/index.md +++ b/docs/advanced/document-store/index.md @@ -348,9 +348,9 @@ array([[7., 7., 7.], ## Persistence, mutations and context manager -Having DocumentArrays that are backed by an external store does however introduce an extra consideration into the way one thinks about DocumentArrays. -The DocumentArray object created in our Python program is now a view of the underlying implementation in the external store. -This means that our DocumentArray object in Python can be out of sync with what is persisted to the external store. +Having DocumentArrays that are backed by a document store introduces an extra consideration into the way you think about DocumentArrays. +The DocumentArray object created in your Python program is now a view of the underlying implementation in the external store. +This means that your DocumentArray object in Python can be out of sync with what is persisted to the external store. **For example** ```python @@ -371,7 +371,7 @@ Length of da2 is 0 Executing this script multiple times yields the same result. -When we run the line `da1.append(Document())`, we expect the DocumentArray with `index_name="my_index"` to now have a length of `1`. +When we run the line `da1.append(Document())`, we expect the DocumentArray with `index_name='my_index'` to now have a length of `1`. However, when we try to create another view of the DocumentArray in `da2`, we get a fresh DocumentArray. We also expect the script to increment the length of the DocumentArrays every time we run it. From 2daaa5f4f6eeac45a6502f10ea3889e85a8b935a Mon Sep 17 00:00:00 2001 From: Jackmin801 Date: Wed, 12 Oct 2022 17:43:34 +0800 Subject: [PATCH 5/7] docs(array): remove section on function scope --- docs/advanced/document-store/index.md | 72 +-------------------------- 1 file changed, 1 insertion(+), 71 deletions(-) diff --git a/docs/advanced/document-store/index.md b/docs/advanced/document-store/index.md index 41073058401..4cc683abb3b 100644 --- a/docs/advanced/document-store/index.md +++ b/docs/advanced/document-store/index.md @@ -415,7 +415,7 @@ Length of da1 is 3 ``` ```` -Now that we know the issue, let's explore some idioms we can use to work with DocumentArrays backed by external storage in a more predictable manner. +Now that you know the issue, let's explore what you should do to work with DocumentArrays backed by document store in a more predictable manner. ### Using Context Manager The recommended way is to use the DocumentArray as a context manager like so: @@ -448,76 +448,6 @@ Length of da2 is 3 The append we made to the DocumentArray is now persisted properly. Hurray! -### Using Scope -Another method that is sometimes useful when multiple DocumentArrays are involed is to perform the mutation in a function scope. -The DocumentArrays will always persist when they fall out of scope. - -```python -from docarray import DocumentArray, Document - -# Let's wrap our mutation logic in a function scope -def foo(): - da1 = DocumentArray(storage='redis', config=dict(n_dim=3, index_name="my_index")) - da1.append(Document()) - print(f"Length of da1 is {len(da1)}") - - -foo() - -da2 = DocumentArray(storage='redis', config=dict(n_dim=3, index_name="my_index")) -print(f"Length of da2 is {len(da2)}") -``` -**First run output** -```console -Length of da1 is 1 -Length of da2 is 1 -``` -**Second run output** -```console -Length of da1 is 2 -Length of da2 is 2 -``` -**Third run output** -```console -Length of da1 is 3 -Length of da2 is 3 -``` - -However, one needs to be extra cautious when opting for this method. -Using scopes in Python can be tricky because it is possible to accidentally promote the scope of a variable by creating a reference to it from an object of higher scope. - -**For example** -``` -from docarray import DocumentArray, Document - -list_outside_of_foo_scope = [] - -def foo(): - da1 = DocumentArray(storage='redis', config=dict(n_dim=3, index_name="my_index")) - da1.append(Document()) - print(f"Length of da1 is {len(da1)}") - list_outside_of_foo_scope.append(da1) # Whoopsie! - -foo() - -da2 = DocumentArray(storage='redis', config=dict(n_dim=3, index_name="my_index")) -print(f"Length of da2 is {len(da2)}") -``` -**First run output** -```console -Length of da1 is 1 -Length of da2 is 0 -``` -**Subsequent run outputs** -```console -Length of da1 is 1 -Length of da2 is 0 -``` - -The above example is somewhat contrived for simplicity. -However, when codebases get big and convoluted enough, it can be difficult to notice accidental promotions like this. - -It is thus recommended that you use the `with` context manager for most cases and only use the scoped method for quick and simple prototyping code. ## Known limitations From 5619e2caa8ac0287f23dbbf06d3ec837d9282562 Mon Sep 17 00:00:00 2001 From: Jackmin801 Date: Wed, 12 Oct 2022 17:51:07 +0800 Subject: [PATCH 6/7] docs(array): increase usage of you in wording --- docs/advanced/document-store/index.md | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/docs/advanced/document-store/index.md b/docs/advanced/document-store/index.md index 4cc683abb3b..5db58353f22 100644 --- a/docs/advanced/document-store/index.md +++ b/docs/advanced/document-store/index.md @@ -100,7 +100,7 @@ da = DocumentArray() ```` -Depending on the context, you can choose the style that fits better. For example, if one wants to use class method such as `DocumentArray.empty(10)`, then explicit importing `DocumentArraySqlite` is the way to go. Of course, you can choose not to alias the imported class to make the code even more explicit. +Depending on the context, you can choose the style that fits better. For example, if you want to use a class method such as `DocumentArray.empty(10)`, then explicitly importing `DocumentArraySqlite` is the way to go. Of course, you can choose not to alias the imported class to make the code even more explicit. ```{admonition} Subindices :class: seealso @@ -116,7 +116,7 @@ To learn how to do that, see {ref}`here `. The config of a store backend is either store-specific dataclass object or a `dict` that can be parsed into the former. -One can pass the config in the constructor via `config`: +You can pass the config in the constructor via `config`: ````{tab} Use dataclass @@ -371,22 +371,21 @@ Length of da2 is 0 Executing this script multiple times yields the same result. -When we run the line `da1.append(Document())`, we expect the DocumentArray with `index_name='my_index'` to now have a length of `1`. -However, when we try to create another view of the DocumentArray in `da2`, we get a fresh DocumentArray. +When you run the line `da1.append(Document())`, you expect the DocumentArray with `index_name='my_index'` to now have a length of `1`. +However, when you try to create another view of the DocumentArray in `da2`, you get a fresh DocumentArray. -We also expect the script to increment the length of the DocumentArrays every time we run it. -This is because the previous run should have saved the length of the DocumentArray with `index_name="my_index"` and our most recent run will append a new document, incrementing the length by `+1` each time. +You also expect the script to increment the length of the DocumentArrays every time you run it. +This is because the previous run should have saved the length of the DocumentArray with `index_name="my_index"` and your most recent run will append a new document, incrementing the length by `+1` each time. -However, it seems like our append operation is also not being persisted. +However, it seems like your append operation is also not being persisted. ````{dropdown} What actually happened here? -The DocumentArray actually did persist. -but not in the way we might expect. -Since we did not use the `with` context manager or scope our mutation, the persistence logic is being evaluated when the program exits. +The DocumentArray actually did persist, but not in the way you might expect. +Since you did not use the `with` context manager or scope your mutation, the persistence logic is being evaluated when the program exits. `da1` is destroyed first, persisting the DocumentArray of length `1`. But when `da2` is destroyed, it persists a DocumentArray of length `0` to the same index in Redis as `da1`, overriding its value. -This means that if we had not created `da2`, the overriding would not have occured and the script would actually increment the length of the DocumentArray correctly. +This means that if you had not created `da2`, the overriding would not have occured and the script would actually increment the length of the DocumentArray correctly. You can prove this to yourself by commenting out the last 2 lines of the script and running the script repeatedly. **Script** @@ -423,7 +422,7 @@ The recommended way is to use the DocumentArray as a context manager like so: from docarray import DocumentArray, Document da1 = DocumentArray(storage='redis', config=dict(n_dim=3, index_name="my_index")) -with da1: # Let's use the context manager to make sure we persist the mutation +with da1: # Use the context manager to make sure you persist the mutation da1.append(Document()) # print(f"Length of da1 is {len(da1)}") @@ -446,7 +445,7 @@ Length of da1 is 3 Length of da2 is 3 ``` -The append we made to the DocumentArray is now persisted properly. Hurray! +The append you made to the DocumentArray is now persisted properly. Hurray! ## Known limitations @@ -516,7 +515,7 @@ Take home message is, use the context manager and put your write operations into ### Out-of-array modification -One can not take a Document *out* from a DocumentArray and modify it, then expect its modification to be committed back to the DocumentArray. +You can not take a Document *out* from a DocumentArray and modify it, then expect its modification to be committed back to the DocumentArray. Specifically, the pattern below is not supported by any external store backend: From b19479bc27393b3eef490164a7d50254fe4adcc6 Mon Sep 17 00:00:00 2001 From: Jackmin801 Date: Sat, 15 Oct 2022 00:54:32 +0800 Subject: [PATCH 7/7] docs: make sure all extends use context manager --- docs/advanced/document-store/elasticsearch.md | 63 ++++++----- docs/advanced/document-store/redis.md | 104 +++++++++--------- docs/advanced/document-store/weaviate.md | 45 ++++---- 3 files changed, 111 insertions(+), 101 deletions(-) diff --git a/docs/advanced/document-store/elasticsearch.md b/docs/advanced/document-store/elasticsearch.md index 67f069cd122..a6f83a22679 100644 --- a/docs/advanced/document-store/elasticsearch.md +++ b/docs/advanced/document-store/elasticsearch.md @@ -82,7 +82,8 @@ da = DocumentArray( config={'index_name': 'old_stuff', 'n_dim': 128}, ) -da.extend([Document() for _ in range(1000)]) +with da: + da.extend([Document() for _ in range(1000)]) da2 = DocumentArray( storage='elasticsearch', @@ -304,13 +305,14 @@ for those that have `pizza` in their text description. from docarray import DocumentArray, Document da = DocumentArray(storage='elasticsearch', config={'n_dim': 2, 'index_text': True}) -da.extend( - [ - Document(text='Person eating'), - Document(text='Person eating pizza'), - Document(text='Pizza restaurant'), - ] -) +with da: + da.extend( + [ + Document(text='Person eating'), + Document(text='Person eating pizza'), + Document(text='Pizza restaurant'), + ] + ) pizza_docs = da.find('pizza') pizza_docs[:, 'text'] @@ -336,28 +338,29 @@ from docarray import DocumentArray, Document da = DocumentArray( storage='elasticsearch', config={'n_dim': 32, 'tag_indices': ['food_type', 'price']} ) -da.extend( - [ - Document( - tags={ - 'food_type': 'Italian and Spanish food', - 'price': 'cheap but not that cheap', - }, - ), - Document( - tags={ - 'food_type': 'French and Italian food', - 'price': 'on the expensive side', - }, - ), - Document( - tags={ - 'food_type': 'chinese noddles', - 'price': 'quite cheap for what you get!', - }, - ), - ] -) +with da: + da.extend( + [ + Document( + tags={ + 'food_type': 'Italian and Spanish food', + 'price': 'cheap but not that cheap', + }, + ), + Document( + tags={ + 'food_type': 'French and Italian food', + 'price': 'on the expensive side', + }, + ), + Document( + tags={ + 'food_type': 'chinese noddles', + 'price': 'quite cheap for what you get!', + }, + ), + ] + ) results_cheap = da.find('cheap', index='price') print('searching "cheap" in :\n\t', results_cheap[:, 'tags__price']) diff --git a/docs/advanced/document-store/redis.md b/docs/advanced/document-store/redis.md index e17cf84fab6..2f131922fda 100644 --- a/docs/advanced/document-store/redis.md +++ b/docs/advanced/document-store/redis.md @@ -144,26 +144,27 @@ da = DocumentArray( }, ) -da.extend( - [ - Document( - id=f'{i}', - embedding=i * np.ones(n_dim), - tags={'price': i, 'color': 'blue', 'stock': i % 2 == 0}, - ) - for i in range(10) - ] -) -da.extend( - [ - Document( - id=f'{i+10}', - embedding=i * np.ones(n_dim), - tags={'price': i, 'color': 'red', 'stock': i % 2 == 0}, - ) - for i in range(10) - ] -) +with da: + da.extend( + [ + Document( + id=f'{i}', + embedding=i * np.ones(n_dim), + tags={'price': i, 'color': 'blue', 'stock': i % 2 == 0}, + ) + for i in range(10) + ] + ) + da.extend( + [ + Document( + id=f'{i+10}', + embedding=i * np.ones(n_dim), + tags={'price': i, 'color': 'red', 'stock': i % 2 == 0}, + ) + for i in range(10) + ] + ) print('\nIndexed price, color and stock:\n') for doc in da: @@ -301,7 +302,8 @@ da = DocumentArray( }, ) -da.extend([Document(id=f'{i}', embedding=i * np.ones(n_dim)) for i in range(10)]) +with da: + da.extend([Document(id=f'{i}', embedding=i * np.ones(n_dim)) for i in range(10)]) np_query = np.ones(n_dim) * 8 n_limit = 5 @@ -367,13 +369,14 @@ The following example builds a `DocumentArray` with several documents containing from docarray import Document, DocumentArray da = DocumentArray(storage='redis', config={'n_dim': 2, 'index_text': True}) -da.extend( - [ - Document(id='1', text='token1 token2 token3'), - Document(id='2', text='token1 token2'), - Document(id='3', text='token2 token3 token4'), - ] -) +with da: + da.extend( + [ + Document(id='1', text='token1 token2 token3'), + Document(id='2', text='token1 token2'), + Document(id='3', text='token2 token3 token4'), + ] + ) results = da.find('token1') print(results[:, 'text']) @@ -420,28 +423,29 @@ da = DocumentArray( storage='redis', config={'n_dim': 32, 'tag_indices': ['food_type', 'price']}, ) -da.extend( - [ - Document( - tags={ - 'food_type': 'Italian and Spanish food', - 'price': 'cheap but not that cheap', - }, - ), - Document( - tags={ - 'food_type': 'French and Italian food', - 'price': 'on the expensive side', - }, - ), - Document( - tags={ - 'food_type': 'chinese noddles', - 'price': 'quite cheap for what you get!', - }, - ), - ] -) +with da: + da.extend( + [ + Document( + tags={ + 'food_type': 'Italian and Spanish food', + 'price': 'cheap but not that cheap', + }, + ), + Document( + tags={ + 'food_type': 'French and Italian food', + 'price': 'on the expensive side', + }, + ), + Document( + tags={ + 'food_type': 'chinese noddles', + 'price': 'quite cheap for what you get!', + }, + ), + ] + ) results_cheap = da.find('cheap', index='price') print('searching "cheap" in :\n\t', results_cheap[:, 'tags__price']) diff --git a/docs/advanced/document-store/weaviate.md b/docs/advanced/document-store/weaviate.md index f94b341d685..87a75a12e54 100644 --- a/docs/advanced/document-store/weaviate.md +++ b/docs/advanced/document-store/weaviate.md @@ -127,13 +127,14 @@ Then, we can index some Documents: ```python from docarray import Document -da.extend( - [ - Document(text='Persist Documents with Weaviate.'), - Document(text='And enjoy fast nearest neighbor search.'), - Document(text='All while using DocArray API.'), - ] -) +with da: + da.extend( + [ + Document(text='Persist Documents with Weaviate.'), + Document(text='And enjoy fast nearest neighbor search.'), + Document(text='All while using DocArray API.'), + ] + ) ``` Now, we can generate embeddings inside the database using BERT model: @@ -426,13 +427,14 @@ da = DocumentArray( ) # load the dummy data -da.extend( - [ - Document(text='Persist Documents with Weaviate.'), - Document(text='And enjoy fast nearest neighbor search.'), - Document(text='All while using DocArray API.'), - ] -) +with da: + da.extend( + [ + Document(text='Persist Documents with Weaviate.'), + Document(text='And enjoy fast nearest neighbor search.'), + Document(text='All while using DocArray API.'), + ] + ) tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased') model = AutoModel.from_pretrained('bert-base-uncased') @@ -493,13 +495,14 @@ da = DocumentArray( ) # load some dummy data -da.extend( - [ - Document(text='Persist Documents with Weaviate.'), - Document(text='And enjoy fast nearest neighbor search.'), - Document(text='All while using DocArray API.'), - ] -) +with da: + da.extend( + [ + Document(text='Persist Documents with Weaviate.'), + Document(text='And enjoy fast nearest neighbor search.'), + Document(text='All while using DocArray API.'), + ] + ) tokenizer = AutoTokenizer.from_pretrained('bert-base-uncased') model = AutoModel.from_pretrained('bert-base-uncased')