From 340d4eb62f7710950a392bd53f4340f338442447 Mon Sep 17 00:00:00 2001 From: anna-charlotte Date: Wed, 29 Mar 2023 15:51:45 +0200 Subject: [PATCH] refactor: map_docs_batch to map_docs_batched Signed-off-by: anna-charlotte --- docarray/utils/map.py | 8 ++++---- docs/api_references/utils/maps_docs.md | 2 +- tests/benchmark_tests/test_map.py | 10 +++++----- tests/units/util/test_map.py | 6 +++--- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/docarray/utils/map.py b/docarray/utils/map.py index 8ce4d974b18..31e93bc2175 100644 --- a/docarray/utils/map.py +++ b/docarray/utils/map.py @@ -1,4 +1,4 @@ -__all__ = ['map_docs', 'map_docs_batch'] +__all__ = ['map_docs', 'map_docs_batched'] from contextlib import nullcontext from math import ceil from multiprocessing.pool import Pool, ThreadPool @@ -103,7 +103,7 @@ def load_url_to_tensor(img: ImageDoc) -> ImageDoc: yield x -def map_docs_batch( +def map_docs_batched( da: T, func: Callable[[T], Union[T, T_doc]], batch_size: int, @@ -122,7 +122,7 @@ def map_docs_batch( ```python from docarray import BaseDoc, DocArray - from docarray.utils.map import map_docs_batch + from docarray.utils.map import map_docs_batched class MyDoc(BaseDoc): @@ -136,7 +136,7 @@ def upper_case_name(da: DocArray[MyDoc]) -> DocArray[MyDoc]: batch_size = 16 da = DocArray[MyDoc]([MyDoc(name='my orange cat') for _ in range(100)]) - it = map_docs_batch(da, upper_case_name, batch_size=batch_size) + it = map_docs_batched(da, upper_case_name, batch_size=batch_size) for i, d in enumerate(it): da[i * batch_size : (i + 1) * batch_size] = d diff --git a/docs/api_references/utils/maps_docs.md b/docs/api_references/utils/maps_docs.md index 9ccd971b1fd..da71bc867e9 100644 --- a/docs/api_references/utils/maps_docs.md +++ b/docs/api_references/utils/maps_docs.md @@ -1,7 +1,7 @@ # map ::: docarray.utils.map.map_docs -::: docarray.utils.map.map_docs_batch +::: docarray.utils.map.map_docs_batched diff --git a/tests/benchmark_tests/test_map.py b/tests/benchmark_tests/test_map.py index ace42dacfa7..d6018b9fdb0 100644 --- a/tests/benchmark_tests/test_map.py +++ b/tests/benchmark_tests/test_map.py @@ -7,7 +7,7 @@ from docarray import BaseDoc, DocArray from docarray.documents import ImageDoc from docarray.typing import NdArray -from docarray.utils.map import map_docs, map_docs_batch +from docarray.utils.map import map_docs, map_docs_batched from tests.units.typing.test_bytes import IMAGE_PATHS pytestmark = [pytest.mark.benchmark, pytest.mark.slow] @@ -56,7 +56,7 @@ def cpu_intensive_batch(da: DocArray[MyMatrix]) -> DocArray[MyMatrix]: return da -def test_map_docs_batch_multiprocessing(): +def test_map_docs_batched_multiprocessing(): if os.cpu_count() > 1: def time_multiprocessing(num_workers: int) -> float: @@ -66,7 +66,7 @@ def time_multiprocessing(num_workers: int) -> float: da = DocArray[MyMatrix]([MyMatrix(matrix=m) for m in matrices]) start_time = time() list( - map_docs_batch( + map_docs_batched( da=da, func=cpu_intensive_batch, batch_size=8, @@ -113,7 +113,7 @@ def io_intensive_batch(da: DocArray[ImageDoc]) -> DocArray[ImageDoc]: return da -def test_map_docs_batch_multithreading(): +def test_map_docs_batched_multithreading(): def time_multithreading_batch(num_workers: int) -> float: n_docs = 100 da = DocArray[ImageDoc]( @@ -121,7 +121,7 @@ def time_multithreading_batch(num_workers: int) -> float: ) start_time = time() list( - map_docs_batch( + map_docs_batched( da=da, func=io_intensive_batch, backend='thread', diff --git a/tests/units/util/test_map.py b/tests/units/util/test_map.py index 0e54aaa2732..c36ebc2f46e 100644 --- a/tests/units/util/test_map.py +++ b/tests/units/util/test_map.py @@ -5,7 +5,7 @@ from docarray import BaseDoc, DocArray from docarray.documents import ImageDoc from docarray.typing import ImageUrl, NdArray -from docarray.utils.map import map_docs, map_docs_batch +from docarray.utils.map import map_docs, map_docs_batched from tests.units.typing.test_bytes import IMAGE_PATHS N_DOCS = 2 @@ -73,10 +73,10 @@ class MyImage(BaseDoc): @pytest.mark.slow @pytest.mark.parametrize('n_docs,batch_size', [(10, 5), (10, 8)]) @pytest.mark.parametrize('backend', ['thread', 'process']) -def test_map_docs_batch(n_docs, batch_size, backend): +def test_map_docs_batched(n_docs, batch_size, backend): da = DocArray[MyImage]([MyImage(url=IMAGE_PATHS['png']) for _ in range(n_docs)]) - it = map_docs_batch( + it = map_docs_batched( da=da, func=load_from_da, batch_size=batch_size, backend=backend ) assert isinstance(it, Generator)