From 828aae0186e38e5b1f3779a219dfa738f2f83fea Mon Sep 17 00:00:00 2001 From: samsja Date: Wed, 29 Mar 2023 14:34:20 +0200 Subject: [PATCH 1/2] fix: fix utils Signed-off-by: samsja --- docs/api_references/utils/filter.md | 2 +- docs/api_references/utils/find.md | 2 +- docs/api_references/utils/maps_docs.md | 2 +- docs/api_references/utils/reduce.md | 8 ++++++++ 4 files changed, 11 insertions(+), 3 deletions(-) create mode 100644 docs/api_references/utils/reduce.md diff --git a/docs/api_references/utils/filter.md b/docs/api_references/utils/filter.md index 8eedc91509c..b40c59c510a 100644 --- a/docs/api_references/utils/filter.md +++ b/docs/api_references/utils/filter.md @@ -1,4 +1,4 @@ -# Filter +# filter ::: docarray.utils.filter.filter_docs diff --git a/docs/api_references/utils/find.md b/docs/api_references/utils/find.md index 097e7372d29..e94a9401149 100644 --- a/docs/api_references/utils/find.md +++ b/docs/api_references/utils/find.md @@ -1,4 +1,4 @@ -# Find +# find ::: docarray.utils.find.find ::: docarray.utils.find.find_batched diff --git a/docs/api_references/utils/maps_docs.md b/docs/api_references/utils/maps_docs.md index 3f663b114b3..9ccd971b1fd 100644 --- a/docs/api_references/utils/maps_docs.md +++ b/docs/api_references/utils/maps_docs.md @@ -1,4 +1,4 @@ -# Map +# map ::: docarray.utils.map.map_docs ::: docarray.utils.map.map_docs_batch diff --git a/docs/api_references/utils/reduce.md b/docs/api_references/utils/reduce.md new file mode 100644 index 00000000000..9b6db3eea02 --- /dev/null +++ b/docs/api_references/utils/reduce.md @@ -0,0 +1,8 @@ +# reduce + +::: docarray.utils.reduce.reduce +::: docarray.utils.reduce.reduce_all + + + + From 4de5f1e38fd4b798800c06f20939f2637e5f0cce Mon Sep 17 00:00:00 2001 From: samsja Date: Wed, 29 Mar 2023 14:39:54 +0200 Subject: [PATCH 2/2] fix: fix map Signed-off-by: samsja --- docarray/utils/map.py | 86 ++++++++++++++++++++++--------------------- 1 file changed, 44 insertions(+), 42 deletions(-) diff --git a/docarray/utils/map.py b/docarray/utils/map.py index 3f8af0f2b7a..8ce4d974b18 100644 --- a/docarray/utils/map.py +++ b/docarray/utils/map.py @@ -29,27 +29,29 @@ def map_docs( --- ```python - # from docarray import DocArray - # from docarray.documents import ImageDoc - # from docarray.utils.map import map_docs - # - # - # def load_url_to_tensor(img: ImageDoc) -> ImageDoc: - # img.tensor = img.url.load() - # return img - # - # - # url = 'https://github.com/docarray/artwork/blob/main/stacked/color/docarray-stacked-color.png' - # - # - # da = DocArray[ImageDoc]([ImageDoc(url=url) for _ in range(100)]) - # da = DocArray[ImageDoc]( - # list(map_docs(da, load_url_to_tensor, backend='thread')) - # ) # threading is usually a good option for IO-bound tasks such as loading an - # # ImageDoc from url - # - # for doc in da: - # assert doc.tensor is not None + from docarray import DocArray + from docarray.documents import ImageDoc + from docarray.utils.map import map_docs + + + def load_url_to_tensor(img: ImageDoc) -> ImageDoc: + img.tensor = img.url.load() + return img + + + url = ( + 'https://upload.wikimedia.org/wikipedia/commons/8/80/' + 'Dag_Sebastian_Ahlander_at_G%C3%B6teborg_Book_Fair_2012b.jpg' + ) + + da = DocArray[ImageDoc]([ImageDoc(url=url) for _ in range(100)]) + da = DocArray[ImageDoc]( + list(map_docs(da, load_url_to_tensor, backend='thread')) + ) # threading is usually a good option for IO-bound tasks such as loading an + # ImageDoc from url + + for doc in da: + assert doc.tensor is not None ``` --- @@ -119,27 +121,27 @@ def map_docs_batch( --- ```python - # from docarray import BaseDoc, DocArray - # from docarray.utils.map import map_docs_batch - # - # - # class MyDoc(BaseDoc): - # name: str - # - # - # def upper_case_name(da: DocArray[MyDoc]) -> DocArray[MyDoc]: - # da.name = [n.upper() for n in da.name] - # return da - # - # - # batch_size = 16 - # da = DocArray[MyDoc]([MyDoc(name='my orange cat') for _ in range(100)]) - # it = map_docs_batch(da, upper_case_name, batch_size=batch_size) - # for i, d in enumerate(it): - # da[i * batch_size : (i + 1) * batch_size] = d - # - # assert len(da) == 100 - # print(da.name[:3]) + from docarray import BaseDoc, DocArray + from docarray.utils.map import map_docs_batch + + + class MyDoc(BaseDoc): + name: str + + + def upper_case_name(da: DocArray[MyDoc]) -> DocArray[MyDoc]: + da.name = [n.upper() for n in da.name] + return da + + + batch_size = 16 + da = DocArray[MyDoc]([MyDoc(name='my orange cat') for _ in range(100)]) + it = map_docs_batch(da, upper_case_name, batch_size=batch_size) + for i, d in enumerate(it): + da[i * batch_size : (i + 1) * batch_size] = d + + assert len(da) == 100 + print(da.name[:3]) ``` ---