From d365266cda4736a8962c9e364c3e97dfc7d40756 Mon Sep 17 00:00:00 2001 From: Alaeddine Abdessalem Date: Fri, 2 Sep 2022 11:44:06 +0100 Subject: [PATCH 1/2] perf: only check if field is set --- docarray/array/queryset/lookup.py | 4 +++- docarray/document/data.py | 36 ++++++++++++++++++------------- 2 files changed, 24 insertions(+), 16 deletions(-) diff --git a/docarray/array/queryset/lookup.py b/docarray/array/queryset/lookup.py index b31a503b822..b41643914a3 100644 --- a/docarray/array/queryset/lookup.py +++ b/docarray/array/queryset/lookup.py @@ -28,6 +28,8 @@ """ from typing import TYPE_CHECKING +from docarray.document.data import _is_not_empty + if TYPE_CHECKING: from docarray import Document import re @@ -121,7 +123,7 @@ def lookup(key, val, doc: 'Document') -> bool: return is_empty != val else: - return (get_key in doc.non_empty_fields) == val + return (_is_not_empty(get_key, value)) == val else: # return value == val raise ValueError( diff --git a/docarray/document/data.py b/docarray/document/data.py index 6b033da23a0..61b4681bfd3 100644 --- a/docarray/document/data.py +++ b/docarray/document/data.py @@ -35,6 +35,26 @@ _all_mime_types = set(mimetypes.types_map.values()) +def _is_not_empty(attribute, value): + if attribute not in default_values: + return True + else: + dv = default_values[attribute] + if dv in ( + 'ChunkArray', + 'MatchArray', + 'DocumentArray', + list, + dict, + 'Dict[str, NamedScore]', + ): + if value: + return True + elif value != dv: + return True + return False + + @dataclass(unsafe_hash=True, eq=False) class DocumentData: _reference_doc: 'Document' = field(hash=False, compare=False) @@ -68,22 +88,8 @@ def _non_empty_fields(self) -> Tuple[str]: if not f_name.startswith('_') or f_name == '_metadata': v = getattr(self, f_name) if v is not None: - if f_name not in default_values: + if _is_not_empty(f_name, v): r.append(f_name) - else: - dv = default_values[f_name] - if dv in ( - 'ChunkArray', - 'MatchArray', - 'DocumentArray', - list, - dict, - 'Dict[str, NamedScore]', - ): - if v: - r.append(f_name) - elif v != dv: - r.append(f_name) return tuple(r) From c7af65c883cc978ea8d4c471464b3292c9ca5de3 Mon Sep 17 00:00:00 2001 From: Alaeddine Abdessalem Date: Fri, 2 Sep 2022 12:05:26 +0100 Subject: [PATCH 2/2] fix: correct _is_not_empty implementation --- docarray/document/data.py | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/docarray/document/data.py b/docarray/document/data.py index 61b4681bfd3..c6e4f53f3ea 100644 --- a/docarray/document/data.py +++ b/docarray/document/data.py @@ -36,22 +36,23 @@ def _is_not_empty(attribute, value): - if attribute not in default_values: - return True - else: - dv = default_values[attribute] - if dv in ( - 'ChunkArray', - 'MatchArray', - 'DocumentArray', - list, - dict, - 'Dict[str, NamedScore]', - ): - if value: - return True - elif value != dv: + if value is not None: + if attribute not in default_values: return True + else: + dv = default_values[attribute] + if dv in ( + 'ChunkArray', + 'MatchArray', + 'DocumentArray', + list, + dict, + 'Dict[str, NamedScore]', + ): + if value: + return True + elif value != dv: + return True return False @@ -87,9 +88,8 @@ def _non_empty_fields(self) -> Tuple[str]: f_name = f.name if not f_name.startswith('_') or f_name == '_metadata': v = getattr(self, f_name) - if v is not None: - if _is_not_empty(f_name, v): - r.append(f_name) + if _is_not_empty(f_name, v): + r.append(f_name) return tuple(r)