From 6952fa75b66b9c84151716b584a6a8be4cd73e55 Mon Sep 17 00:00:00 2001 From: anna-charlotte Date: Mon, 17 Apr 2023 12:09:46 +0200 Subject: [PATCH 1/3] docs: fix doc store code snippet Signed-off-by: anna-charlotte --- README.md | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/README.md b/README.md index d86d6df1f71..a90ddea552c 100644 --- a/README.md +++ b/README.md @@ -328,28 +328,28 @@ But fret not! DocArray has you covered! The Document Store interface lets you push and pull Documents to and from multiple data sources, all with the same user interface. -As an example, let's take a look at how that would work with AWS S3 storage: +As an example, let's take a look at how that would work with Jina AI Cloud storage: ```python -from docarray import DocList -from docarray.documents import ImageDoc -import numpy as np +from docarray import BaseDoc, DocList +import os -dl = DocList[ImageDoc]( - [ - ImageDoc( - url="https://upload.wikimedia.org/wikipedia/commons/2/2f/Alpamayo.jpg", - tensor=np.zeros((3, 224, 224)), - ) - for _ in range(100) - ] -) -# push the DocList to S3 -dl.push('s3://my-bucket/my-documents', show_progress=True) +class SimpleDoc(BaseDoc): + text: str + + +# first create a personal access token here: https://cloud.jina.ai/settings/tokens +os.environ['JINA_AUTH_TOKEN'] = 'YOUR_PAT' + +DL_NAME = 'simple-dl' +dl = DocList[SimpleDoc]([SimpleDoc(text=f'doc {i}') for i in range(8)]) + +# push to Jina AI Cloud +dl.push(f'jac://{DL_NAME}') -# pull the DocList from S3 -dl_2 = DocList[ImageDoc].pull('s3://my-bucket/my-documents', show_progress=True) +# pull from Jina AI Cloud +dl_pull = DocList[SimpleDoc].pull(f'jac://{DL_NAME}') ``` From f242281e7052e8bfb8cb5483df7222b69316aade Mon Sep 17 00:00:00 2001 From: anna-charlotte Date: Mon, 17 Apr 2023 12:39:09 +0200 Subject: [PATCH 2/3] fix: on disk example instead of jina ai cloud Signed-off-by: anna-charlotte --- README.md | 16 ++++------------ 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index a90ddea552c..c4abd1b0903 100644 --- a/README.md +++ b/README.md @@ -328,28 +328,20 @@ But fret not! DocArray has you covered! The Document Store interface lets you push and pull Documents to and from multiple data sources, all with the same user interface. -As an example, let's take a look at how that would work with Jina AI Cloud storage: +As an example, let's take a look at how that would work with on-disk storage: ```python from docarray import BaseDoc, DocList -import os class SimpleDoc(BaseDoc): text: str -# first create a personal access token here: https://cloud.jina.ai/settings/tokens -os.environ['JINA_AUTH_TOKEN'] = 'YOUR_PAT' +docs = DocList[SimpleDoc]([SimpleDoc(text=f'doc {i}') for i in range(8)]) +docs.push('file://simple_docs') -DL_NAME = 'simple-dl' -dl = DocList[SimpleDoc]([SimpleDoc(text=f'doc {i}') for i in range(8)]) - -# push to Jina AI Cloud -dl.push(f'jac://{DL_NAME}') - -# pull from Jina AI Cloud -dl_pull = DocList[SimpleDoc].pull(f'jac://{DL_NAME}') +docs_pull = DocList[SimpleDoc].pull('file://simple_docs') ``` From 1fd5d97766b5a98e5637a8e653260f494016ee08 Mon Sep 17 00:00:00 2001 From: Charlotte Gerhaher Date: Mon, 17 Apr 2023 13:07:23 +0200 Subject: [PATCH 3/3] fix: apply alex suggestions Co-authored-by: Alex Cureton-Griffiths Signed-off-by: Charlotte Gerhaher --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index c4abd1b0903..f307156dbba 100644 --- a/README.md +++ b/README.md @@ -328,7 +328,7 @@ But fret not! DocArray has you covered! The Document Store interface lets you push and pull Documents to and from multiple data sources, all with the same user interface. -As an example, let's take a look at how that would work with on-disk storage: +For example, let's see how that works with on-disk storage: ```python from docarray import BaseDoc, DocList