Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions docarray/utils/map.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__all__ = ['map_docs', 'map_docs_batch']
__all__ = ['map_docs', 'map_docs_batched']
from contextlib import nullcontext
from math import ceil
from multiprocessing.pool import Pool, ThreadPool
Expand Down Expand Up @@ -103,7 +103,7 @@ def load_url_to_tensor(img: ImageDoc) -> ImageDoc:
yield x


def map_docs_batch(
def map_docs_batched(
da: T,
func: Callable[[T], Union[T, T_doc]],
batch_size: int,
Expand All @@ -122,7 +122,7 @@ def map_docs_batch(

```python
from docarray import BaseDoc, DocArray
from docarray.utils.map import map_docs_batch
from docarray.utils.map import map_docs_batched


class MyDoc(BaseDoc):
Expand All @@ -136,7 +136,7 @@ def upper_case_name(da: DocArray[MyDoc]) -> DocArray[MyDoc]:

batch_size = 16
da = DocArray[MyDoc]([MyDoc(name='my orange cat') for _ in range(100)])
it = map_docs_batch(da, upper_case_name, batch_size=batch_size)
it = map_docs_batched(da, upper_case_name, batch_size=batch_size)
for i, d in enumerate(it):
da[i * batch_size : (i + 1) * batch_size] = d

Expand Down
2 changes: 1 addition & 1 deletion docs/api_references/utils/maps_docs.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# map

::: docarray.utils.map.map_docs
::: docarray.utils.map.map_docs_batch
::: docarray.utils.map.map_docs_batched



Expand Down
10 changes: 5 additions & 5 deletions tests/benchmark_tests/test_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from docarray import BaseDoc, DocArray
from docarray.documents import ImageDoc
from docarray.typing import NdArray
from docarray.utils.map import map_docs, map_docs_batch
from docarray.utils.map import map_docs, map_docs_batched
from tests.units.typing.test_bytes import IMAGE_PATHS

pytestmark = [pytest.mark.benchmark, pytest.mark.slow]
Expand Down Expand Up @@ -56,7 +56,7 @@ def cpu_intensive_batch(da: DocArray[MyMatrix]) -> DocArray[MyMatrix]:
return da


def test_map_docs_batch_multiprocessing():
def test_map_docs_batched_multiprocessing():
if os.cpu_count() > 1:

def time_multiprocessing(num_workers: int) -> float:
Expand All @@ -66,7 +66,7 @@ def time_multiprocessing(num_workers: int) -> float:
da = DocArray[MyMatrix]([MyMatrix(matrix=m) for m in matrices])
start_time = time()
list(
map_docs_batch(
map_docs_batched(
da=da,
func=cpu_intensive_batch,
batch_size=8,
Expand Down Expand Up @@ -113,15 +113,15 @@ def io_intensive_batch(da: DocArray[ImageDoc]) -> DocArray[ImageDoc]:
return da


def test_map_docs_batch_multithreading():
def test_map_docs_batched_multithreading():
def time_multithreading_batch(num_workers: int) -> float:
n_docs = 100
da = DocArray[ImageDoc](
[ImageDoc(url=IMAGE_PATHS['png']) for _ in range(n_docs)]
)
start_time = time()
list(
map_docs_batch(
map_docs_batched(
da=da,
func=io_intensive_batch,
backend='thread',
Expand Down
6 changes: 3 additions & 3 deletions tests/units/util/test_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from docarray import BaseDoc, DocArray
from docarray.documents import ImageDoc
from docarray.typing import ImageUrl, NdArray
from docarray.utils.map import map_docs, map_docs_batch
from docarray.utils.map import map_docs, map_docs_batched
from tests.units.typing.test_bytes import IMAGE_PATHS

N_DOCS = 2
Expand Down Expand Up @@ -73,10 +73,10 @@ class MyImage(BaseDoc):
@pytest.mark.slow
@pytest.mark.parametrize('n_docs,batch_size', [(10, 5), (10, 8)])
@pytest.mark.parametrize('backend', ['thread', 'process'])
def test_map_docs_batch(n_docs, batch_size, backend):
def test_map_docs_batched(n_docs, batch_size, backend):

da = DocArray[MyImage]([MyImage(url=IMAGE_PATHS['png']) for _ in range(n_docs)])
it = map_docs_batch(
it = map_docs_batched(
da=da, func=load_from_da, batch_size=batch_size, backend=backend
)
assert isinstance(it, Generator)
Expand Down