From ab7be5fc5a40ddc02306ed5c96ed05b6c7692261 Mon Sep 17 00:00:00 2001 From: David Buchaca Prats Date: Thu, 3 Feb 2022 08:47:49 +0100 Subject: [PATCH 1/7] feat: allow empty to get kwargs --- docarray/array/mixins/empty.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docarray/array/mixins/empty.py b/docarray/array/mixins/empty.py index 9959562fdbb..92a95f96624 100644 --- a/docarray/array/mixins/empty.py +++ b/docarray/array/mixins/empty.py @@ -10,11 +10,11 @@ class EmptyMixin: """Helper functions for building arrays with empty Document.""" @classmethod - def empty(cls: Type['T'], size: int = 0) -> 'T': + def empty(cls: Type['T'], size: int = 0, *args, **kwargs) -> 'T': """Create a :class:`DocumentArray` object with :attr:`size` empty :class:`Document` objects. :param size: the number of empty Documents in this container :return: a :class:`DocumentArray` object """ - return cls(Document() for _ in range(size)) + return cls((Document() for _ in range(size)), *args, **kwargs) From ac1c9fda81023e5d5e44d36f3cf06fdd2ee4da3b Mon Sep 17 00:00:00 2001 From: David Buchaca Prats Date: Thu, 3 Feb 2022 09:58:07 +0100 Subject: [PATCH 2/7] feat: fix black --- docarray/array/mixins/io/binary.py | 27 +++++++++++++++++++++++---- docarray/array/mixins/io/csv.py | 2 ++ docarray/array/mixins/io/dataframe.py | 2 +- docarray/array/mixins/io/from_gen.py | 10 ++++++++++ docarray/array/mixins/io/json.py | 20 +++++++++++++++----- docarray/array/mixins/io/pushpull.py | 4 +--- docarray/array/mixins/match.py | 1 - docarray/array/mixins/pydantic.py | 5 ++--- docarray/document/mixins/porting.py | 19 +++++++++++++------ docarray/document/mixins/protobuf.py | 4 ++-- docarray/document/mixins/pydantic.py | 4 ++-- 11 files changed, 71 insertions(+), 27 deletions(-) diff --git a/docarray/array/mixins/io/binary.py b/docarray/array/mixins/io/binary.py index 3b7b772c1c6..6269e26150a 100644 --- a/docarray/array/mixins/io/binary.py +++ b/docarray/array/mixins/io/binary.py @@ -24,6 +24,8 @@ def load_binary( compress: Optional[str] = None, _show_progress: bool = False, streaming: bool = False, + *args, + **kwargs, ) -> Union['DocumentArray', Generator['Document', None, None]]: """Load array elements from a compressed binary file. @@ -49,9 +51,13 @@ def load_binary( protocol=protocol, compress=compress, _show_progress=_show_progress, + *args, + **kwargs, ) else: - return cls._load_binary_all(file_ctx, protocol, compress, _show_progress) + return cls._load_binary_all( + file_ctx, protocol, compress, _show_progress, *args, **kwargs + ) @classmethod def _load_binary_stream( @@ -60,6 +66,8 @@ def _load_binary_stream( protocol=None, compress=None, _show_progress=False, + *args, + **kwargs, ) -> Generator['Document', None, None]: """Yield `Document` objects from a binary file @@ -97,7 +105,9 @@ def _load_binary_stream( ) @classmethod - def _load_binary_all(cls, file_ctx, protocol, compress, show_progress): + def _load_binary_all( + cls, file_ctx, protocol, compress, show_progress, *args, **kwargs + ): """Read a `DocumentArray` object from a binary file :param protocol: protocol to use @@ -166,9 +176,14 @@ def from_bytes( protocol: str = 'pickle-array', compress: Optional[str] = None, _show_progress: bool = False, + *args, + **kwargs, ) -> 'T': return cls.load_binary( - data, protocol=protocol, compress=compress, _show_progress=_show_progress + data, + protocol=protocol, + compress=compress, + _show_progress=_show_progress, ) def save_binary( @@ -280,7 +295,9 @@ def to_protobuf(self) -> 'DocumentArrayProto': return dap @classmethod - def from_protobuf(cls: Type['T'], pb_msg: 'DocumentArrayProto') -> 'T': + def from_protobuf( + cls: Type['T'], pb_msg: 'DocumentArrayProto', *args, **kwargs + ) -> 'T': from .... import Document return cls(Document.from_protobuf(od) for od in pb_msg.docs) @@ -295,6 +312,8 @@ def from_base64( protocol: str = 'pickle-array', compress: Optional[str] = None, _show_progress: bool = False, + *args, + **kwargs, ) -> 'T': return cls.load_binary( base64.b64decode(data), diff --git a/docarray/array/mixins/io/csv.py b/docarray/array/mixins/io/csv.py index 9edca8820d2..9c68ce3fefe 100644 --- a/docarray/array/mixins/io/csv.py +++ b/docarray/array/mixins/io/csv.py @@ -88,6 +88,8 @@ def load_csv( cls: Type['T'], file: Union[str, TextIO], field_resolver: Optional[Dict[str, str]] = None, + *args, + **kwargs, ) -> 'T': """Load array elements from a binary file. diff --git a/docarray/array/mixins/io/dataframe.py b/docarray/array/mixins/io/dataframe.py index 65d2fd7637e..2ae729a0823 100644 --- a/docarray/array/mixins/io/dataframe.py +++ b/docarray/array/mixins/io/dataframe.py @@ -24,7 +24,7 @@ def to_dataframe(self, **kwargs) -> 'DataFrame': return DataFrame.from_dict(self.to_list(), **kwargs) @classmethod - def from_dataframe(cls: Type['T'], df: 'DataFrame') -> 'T': + def from_dataframe(cls: Type['T'], df: 'DataFrame', *args, **kwargs) -> 'T': """Import a :class:`DocumentArray` from a :class:`pandas.DataFrame` object. :param df: a :class:`pandas.DataFrame` object. diff --git a/docarray/array/mixins/io/from_gen.py b/docarray/array/mixins/io/from_gen.py index e038b526ffb..45f9f7f3889 100644 --- a/docarray/array/mixins/io/from_gen.py +++ b/docarray/array/mixins/io/from_gen.py @@ -36,6 +36,8 @@ def from_ndarray( axis: int = 0, size: Optional[int] = None, shuffle: bool = False, + *args, + **kwargs, ) -> 'T': """Build from a numpy array. @@ -65,6 +67,8 @@ def from_files( sampling_rate: Optional[float] = None, read_mode: Optional[str] = None, to_dataturi: bool = False, + *args, + **kwargs, ) -> 'T': """Build from a list of file path or the content of the files. @@ -98,6 +102,8 @@ def from_csv( size: Optional[int] = None, sampling_rate: Optional[float] = None, dialect: Union[str, 'csv.Dialect'] = 'excel', + *args, + **kwargs, ) -> 'T': """Build from CSV. @@ -171,6 +177,8 @@ def from_ndjson( field_resolver: Optional[Dict[str, str]] = None, size: Optional[int] = None, sampling_rate: Optional[float] = None, + *args, + **kwargs, ) -> 'T': """Build from line separated JSON. Yields documents. @@ -203,6 +211,8 @@ def from_lines( field_resolver: Optional[Dict[str, str]] = None, size: Optional[int] = None, sampling_rate: Optional[float] = None, + *args, + **kwargs, ) -> 'T': """Build from lines, json and csv. Yields documents or strings. diff --git a/docarray/array/mixins/io/json.py b/docarray/array/mixins/io/json.py index 2740bfa7fa0..3147c0fe1da 100644 --- a/docarray/array/mixins/io/json.py +++ b/docarray/array/mixins/io/json.py @@ -32,7 +32,11 @@ def save_json( @classmethod def load_json( - cls: Type['T'], file: Union[str, TextIO], protocol: str = 'jsonschema', **kwargs + cls: Type['T'], + file: Union[str, TextIO], + protocol: str = 'jsonschema', + *args, + **kwargs ) -> 'T': """Load array elements from a JSON file. @@ -57,17 +61,23 @@ def load_json( @classmethod def from_json( - cls: Type['T'], file: Union[str, TextIO], protocol: str = 'jsonschema', **kwargs + cls: Type['T'], + file: Union[str, TextIO], + protocol: str = 'jsonschema', + *args, + **kwargs ) -> 'T': - return cls.load_json(file, protocol=protocol, **kwargs) + return cls.load_json(file, protocol=protocol, *args, **kwargs) @classmethod def from_list( - cls: Type['T'], values: List, protocol: str = 'jsonschema', **kwargs + cls: Type['T'], values: List, protocol: str = 'jsonschema', *args, **kwargs ) -> 'T': from .... import Document - return cls(Document.from_dict(v, protocol=protocol, **kwargs) for v in values) + return cls( + Document.from_dict(v, protocol=protocol, *args, **kwargs) for v in values + ) def to_list(self, protocol: str = 'jsonschema', **kwargs) -> List: """Convert the object into a Python list. diff --git a/docarray/array/mixins/io/pushpull.py b/docarray/array/mixins/io/pushpull.py index 4461bffee3b..6688863c785 100644 --- a/docarray/array/mixins/io/pushpull.py +++ b/docarray/array/mixins/io/pushpull.py @@ -91,9 +91,7 @@ def read(self, n=-1): @classmethod def pull( - cls: Type['T'], - token: str, - show_progress: bool = False, + cls: Type['T'], token: str, show_progress: bool = False, *args, **kwargs ) -> 'T': """Pulling a :class:`DocumentArray` from Jina Cloud Service to local. diff --git a/docarray/array/mixins/match.py b/docarray/array/mixins/match.py index 6a8ae335df6..d880bd1a88c 100644 --- a/docarray/array/mixins/match.py +++ b/docarray/array/mixins/match.py @@ -148,7 +148,6 @@ def _match(self, darray, cdist, limit, normalization, metric_name): :param metric_name: if provided, then match result will be marked with this string. :return: distances and indices """ - x_mat = self.embeddings y_mat = darray.embeddings diff --git a/docarray/array/mixins/pydantic.py b/docarray/array/mixins/pydantic.py index 7673aba5133..1aa6c22a698 100644 --- a/docarray/array/mixins/pydantic.py +++ b/docarray/array/mixins/pydantic.py @@ -8,7 +8,7 @@ class PydanticMixin: @classmethod - def get_json_schema(cls, indent: int = 2) -> str: + def get_json_schema(cls, indent: int = 2, *args, **kwargs) -> str: """Return a JSON Schema of DocumentArray class.""" from pydantic import schema_json_of from ...document.pydantic_model import PydanticDocumentArray @@ -23,8 +23,7 @@ def to_pydantic_model(self) -> 'PydanticDocumentArray': @classmethod def from_pydantic_model( - cls: Type['T'], - model: List['BaseModel'], + cls: Type['T'], model: List['BaseModel'], *args, **kwargs ) -> 'T': """Convert a list of PydanticDocument into diff --git a/docarray/document/mixins/porting.py b/docarray/document/mixins/porting.py index 9880e9c6055..13393c5b10b 100644 --- a/docarray/document/mixins/porting.py +++ b/docarray/document/mixins/porting.py @@ -13,7 +13,7 @@ class PortingMixin: @classmethod def from_dict( - cls: Type['T'], obj: Dict, protocol: str = 'jsonschema', **kwargs + cls: Type['T'], obj: Dict, protocol: str = 'jsonschema', *args, **kwargs ) -> 'T': """Convert a dict object into a Document. @@ -31,14 +31,14 @@ def from_dict( from ...proto.docarray_pb2 import DocumentProto pb_msg = DocumentProto() - json_format.ParseDict(obj, pb_msg, **kwargs) + json_format.ParseDict(obj, pb_msg, *args, **kwargs) return cls.from_protobuf(pb_msg) else: raise ValueError(f'protocol=`{protocol}` is not supported') @classmethod def from_json( - cls: Type['T'], obj: str, protocol: str = 'jsonschema', **kwargs + cls: Type['T'], obj: str, protocol: str = 'jsonschema', *args, **kwargs ) -> 'T': """Convert a JSON string into a Document. @@ -85,8 +85,9 @@ def to_dict(self, protocol: str = 'jsonschema', **kwargs) -> Dict[str, Any]: return dataclasses.asdict(self._data) def to_bytes( - self, protocol: str = 'pickle', compress: Optional[str] = None + self, protocol: str = 'protobuf', compress: Optional[str] = None ) -> bytes: + if protocol == 'pickle': bstr = pickle.dumps(self) elif protocol == 'protobuf': @@ -101,8 +102,10 @@ def to_bytes( def from_bytes( cls: Type['T'], data: bytes, - protocol: str = 'pickle', + protocol: str = 'protobuf', compress: Optional[str] = None, + *args, + **kwargs, ) -> 'T': """Build Document object from binary bytes @@ -158,6 +161,8 @@ def from_base64( data: str, protocol: str = 'pickle', compress: Optional[str] = None, + *args, + **kwargs, ) -> 'T': """Build Document object from binary bytes @@ -166,4 +171,6 @@ def from_base64( :param compress: compress method to use :return: a Document object """ - return cls.from_bytes(base64.b64decode(data), protocol, compress) + return cls.from_bytes( + base64.b64decode(data), protocol, compress, *args, **kwargs + ) diff --git a/docarray/document/mixins/protobuf.py b/docarray/document/mixins/protobuf.py index d5154bdfac7..1f090e74867 100644 --- a/docarray/document/mixins/protobuf.py +++ b/docarray/document/mixins/protobuf.py @@ -7,10 +7,10 @@ class ProtobufMixin: @classmethod - def from_protobuf(cls: Type['T'], pb_msg: 'DocumentProto') -> 'T': + def from_protobuf(cls: Type['T'], pb_msg: 'DocumentProto', *args, **kwargs) -> 'T': from ...proto.io import parse_proto - return parse_proto(pb_msg) + return parse_proto(pb_msg, *args, **kwargs) def to_protobuf(self) -> 'DocumentProto': from ...proto.io import flush_proto diff --git a/docarray/document/mixins/pydantic.py b/docarray/document/mixins/pydantic.py index 72347eb2d4d..2d78f12a47b 100644 --- a/docarray/document/mixins/pydantic.py +++ b/docarray/document/mixins/pydantic.py @@ -14,7 +14,7 @@ class PydanticMixin: """Provide helper functions to convert to/from a Pydantic model""" @classmethod - def get_json_schema(cls, indent: int = 2) -> str: + def get_json_schema(cls, indent: int = 2, *args, **kwargs) -> str: """Return a JSON Schema of Document class.""" from ..pydantic_model import PydanticDocument as DP @@ -38,7 +38,7 @@ def to_pydantic_model(self) -> 'PydanticDocument': return DP(**_p_dict) @classmethod - def from_pydantic_model(cls: Type['T'], model: 'BaseModel') -> 'T': + def from_pydantic_model(cls: Type['T'], model: 'BaseModel', *args, **kwargs) -> 'T': """Build a Document object from a Pydantic model :param model: the pydantic data model object that represents a Document From 800221d8d8f70a2c7cda21e8545924890791c1ff Mon Sep 17 00:00:00 2001 From: David Buchaca Prats Date: Thu, 3 Feb 2022 15:22:35 +0100 Subject: [PATCH 3/7] refactor: remove uneeded args kwargs --- docarray/array/mixins/io/binary.py | 16 ++-------------- docarray/array/mixins/io/csv.py | 2 -- docarray/array/mixins/io/dataframe.py | 2 +- docarray/array/mixins/io/json.py | 16 ++++------------ docarray/array/mixins/io/pushpull.py | 4 +--- docarray/array/mixins/pydantic.py | 6 ++---- docarray/document/mixins/porting.py | 8 +------- docarray/document/mixins/pydantic.py | 4 ++-- 8 files changed, 13 insertions(+), 45 deletions(-) diff --git a/docarray/array/mixins/io/binary.py b/docarray/array/mixins/io/binary.py index 6269e26150a..80571a1f269 100644 --- a/docarray/array/mixins/io/binary.py +++ b/docarray/array/mixins/io/binary.py @@ -24,8 +24,6 @@ def load_binary( compress: Optional[str] = None, _show_progress: bool = False, streaming: bool = False, - *args, - **kwargs, ) -> Union['DocumentArray', Generator['Document', None, None]]: """Load array elements from a compressed binary file. @@ -55,9 +53,7 @@ def load_binary( **kwargs, ) else: - return cls._load_binary_all( - file_ctx, protocol, compress, _show_progress, *args, **kwargs - ) + return cls._load_binary_all(file_ctx, protocol, compress, _show_progress) @classmethod def _load_binary_stream( @@ -66,8 +62,6 @@ def _load_binary_stream( protocol=None, compress=None, _show_progress=False, - *args, - **kwargs, ) -> Generator['Document', None, None]: """Yield `Document` objects from a binary file @@ -105,9 +99,7 @@ def _load_binary_stream( ) @classmethod - def _load_binary_all( - cls, file_ctx, protocol, compress, show_progress, *args, **kwargs - ): + def _load_binary_all(cls, file_ctx, protocol, compress, show_progress): """Read a `DocumentArray` object from a binary file :param protocol: protocol to use @@ -176,8 +168,6 @@ def from_bytes( protocol: str = 'pickle-array', compress: Optional[str] = None, _show_progress: bool = False, - *args, - **kwargs, ) -> 'T': return cls.load_binary( data, @@ -312,8 +302,6 @@ def from_base64( protocol: str = 'pickle-array', compress: Optional[str] = None, _show_progress: bool = False, - *args, - **kwargs, ) -> 'T': return cls.load_binary( base64.b64decode(data), diff --git a/docarray/array/mixins/io/csv.py b/docarray/array/mixins/io/csv.py index 9c68ce3fefe..9edca8820d2 100644 --- a/docarray/array/mixins/io/csv.py +++ b/docarray/array/mixins/io/csv.py @@ -88,8 +88,6 @@ def load_csv( cls: Type['T'], file: Union[str, TextIO], field_resolver: Optional[Dict[str, str]] = None, - *args, - **kwargs, ) -> 'T': """Load array elements from a binary file. diff --git a/docarray/array/mixins/io/dataframe.py b/docarray/array/mixins/io/dataframe.py index 2ae729a0823..65d2fd7637e 100644 --- a/docarray/array/mixins/io/dataframe.py +++ b/docarray/array/mixins/io/dataframe.py @@ -24,7 +24,7 @@ def to_dataframe(self, **kwargs) -> 'DataFrame': return DataFrame.from_dict(self.to_list(), **kwargs) @classmethod - def from_dataframe(cls: Type['T'], df: 'DataFrame', *args, **kwargs) -> 'T': + def from_dataframe(cls: Type['T'], df: 'DataFrame') -> 'T': """Import a :class:`DocumentArray` from a :class:`pandas.DataFrame` object. :param df: a :class:`pandas.DataFrame` object. diff --git a/docarray/array/mixins/io/json.py b/docarray/array/mixins/io/json.py index 3147c0fe1da..2c14fdd3593 100644 --- a/docarray/array/mixins/io/json.py +++ b/docarray/array/mixins/io/json.py @@ -35,8 +35,6 @@ def load_json( cls: Type['T'], file: Union[str, TextIO], protocol: str = 'jsonschema', - *args, - **kwargs ) -> 'T': """Load array elements from a JSON file. @@ -57,27 +55,21 @@ def load_json( constructor = Document.from_dict with file_ctx as fp: - return cls(constructor(v, protocol=protocol, **kwargs) for v in fp) + return cls(constructor(v, protocol=protocol) for v in fp) @classmethod def from_json( cls: Type['T'], file: Union[str, TextIO], protocol: str = 'jsonschema', - *args, - **kwargs ) -> 'T': - return cls.load_json(file, protocol=protocol, *args, **kwargs) + return cls.load_json(file, protocol=protocol) @classmethod - def from_list( - cls: Type['T'], values: List, protocol: str = 'jsonschema', *args, **kwargs - ) -> 'T': + def from_list(cls: Type['T'], values: List, protocol: str = 'jsonschema') -> 'T': from .... import Document - return cls( - Document.from_dict(v, protocol=protocol, *args, **kwargs) for v in values - ) + return cls(Document.from_dict(v, protocol=protocol) for v in values) def to_list(self, protocol: str = 'jsonschema', **kwargs) -> List: """Convert the object into a Python list. diff --git a/docarray/array/mixins/io/pushpull.py b/docarray/array/mixins/io/pushpull.py index 6688863c785..e6c5df8aa70 100644 --- a/docarray/array/mixins/io/pushpull.py +++ b/docarray/array/mixins/io/pushpull.py @@ -90,9 +90,7 @@ def read(self, n=-1): ) @classmethod - def pull( - cls: Type['T'], token: str, show_progress: bool = False, *args, **kwargs - ) -> 'T': + def pull(cls: Type['T'], token: str, show_progress: bool = False) -> 'T': """Pulling a :class:`DocumentArray` from Jina Cloud Service to local. :param token: the upload token set during :meth:`.push` diff --git a/docarray/array/mixins/pydantic.py b/docarray/array/mixins/pydantic.py index 1aa6c22a698..d3fa3df478a 100644 --- a/docarray/array/mixins/pydantic.py +++ b/docarray/array/mixins/pydantic.py @@ -8,7 +8,7 @@ class PydanticMixin: @classmethod - def get_json_schema(cls, indent: int = 2, *args, **kwargs) -> str: + def get_json_schema(cls, indent: int = 2) -> str: """Return a JSON Schema of DocumentArray class.""" from pydantic import schema_json_of from ...document.pydantic_model import PydanticDocumentArray @@ -22,9 +22,7 @@ def to_pydantic_model(self) -> 'PydanticDocumentArray': return [d.to_pydantic_model() for d in self] @classmethod - def from_pydantic_model( - cls: Type['T'], model: List['BaseModel'], *args, **kwargs - ) -> 'T': + def from_pydantic_model(cls: Type['T'], model: List['BaseModel']) -> 'T': """Convert a list of PydanticDocument into :param model: the pydantic data model object that represents a DocumentArray diff --git a/docarray/document/mixins/porting.py b/docarray/document/mixins/porting.py index 13393c5b10b..7506a03754c 100644 --- a/docarray/document/mixins/porting.py +++ b/docarray/document/mixins/porting.py @@ -104,8 +104,6 @@ def from_bytes( data: bytes, protocol: str = 'protobuf', compress: Optional[str] = None, - *args, - **kwargs, ) -> 'T': """Build Document object from binary bytes @@ -161,8 +159,6 @@ def from_base64( data: str, protocol: str = 'pickle', compress: Optional[str] = None, - *args, - **kwargs, ) -> 'T': """Build Document object from binary bytes @@ -171,6 +167,4 @@ def from_base64( :param compress: compress method to use :return: a Document object """ - return cls.from_bytes( - base64.b64decode(data), protocol, compress, *args, **kwargs - ) + return cls.from_bytes(base64.b64decode(data), protocol, compress) diff --git a/docarray/document/mixins/pydantic.py b/docarray/document/mixins/pydantic.py index 2d78f12a47b..72347eb2d4d 100644 --- a/docarray/document/mixins/pydantic.py +++ b/docarray/document/mixins/pydantic.py @@ -14,7 +14,7 @@ class PydanticMixin: """Provide helper functions to convert to/from a Pydantic model""" @classmethod - def get_json_schema(cls, indent: int = 2, *args, **kwargs) -> str: + def get_json_schema(cls, indent: int = 2) -> str: """Return a JSON Schema of Document class.""" from ..pydantic_model import PydanticDocument as DP @@ -38,7 +38,7 @@ def to_pydantic_model(self) -> 'PydanticDocument': return DP(**_p_dict) @classmethod - def from_pydantic_model(cls: Type['T'], model: 'BaseModel', *args, **kwargs) -> 'T': + def from_pydantic_model(cls: Type['T'], model: 'BaseModel') -> 'T': """Build a Document object from a Pydantic model :param model: the pydantic data model object that represents a Document From 48d5993df7bb98dd38737459c3ebee42b9a6fbf7 Mon Sep 17 00:00:00 2001 From: David Buchaca Prats Date: Thu, 3 Feb 2022 15:35:10 +0100 Subject: [PATCH 4/7] refactor: remove unwanted args --- docarray/array/mixins/io/binary.py | 6 +----- docarray/array/mixins/io/from_gen.py | 8 -------- docarray/document/mixins/porting.py | 6 +++--- docarray/document/mixins/protobuf.py | 4 ++-- 4 files changed, 6 insertions(+), 18 deletions(-) diff --git a/docarray/array/mixins/io/binary.py b/docarray/array/mixins/io/binary.py index 80571a1f269..32bcc194f34 100644 --- a/docarray/array/mixins/io/binary.py +++ b/docarray/array/mixins/io/binary.py @@ -49,8 +49,6 @@ def load_binary( protocol=protocol, compress=compress, _show_progress=_show_progress, - *args, - **kwargs, ) else: return cls._load_binary_all(file_ctx, protocol, compress, _show_progress) @@ -285,9 +283,7 @@ def to_protobuf(self) -> 'DocumentArrayProto': return dap @classmethod - def from_protobuf( - cls: Type['T'], pb_msg: 'DocumentArrayProto', *args, **kwargs - ) -> 'T': + def from_protobuf(cls: Type['T'], pb_msg: 'DocumentArrayProto') -> 'T': from .... import Document return cls(Document.from_protobuf(od) for od in pb_msg.docs) diff --git a/docarray/array/mixins/io/from_gen.py b/docarray/array/mixins/io/from_gen.py index 45f9f7f3889..2428e2e103e 100644 --- a/docarray/array/mixins/io/from_gen.py +++ b/docarray/array/mixins/io/from_gen.py @@ -67,8 +67,6 @@ def from_files( sampling_rate: Optional[float] = None, read_mode: Optional[str] = None, to_dataturi: bool = False, - *args, - **kwargs, ) -> 'T': """Build from a list of file path or the content of the files. @@ -102,8 +100,6 @@ def from_csv( size: Optional[int] = None, sampling_rate: Optional[float] = None, dialect: Union[str, 'csv.Dialect'] = 'excel', - *args, - **kwargs, ) -> 'T': """Build from CSV. @@ -177,8 +173,6 @@ def from_ndjson( field_resolver: Optional[Dict[str, str]] = None, size: Optional[int] = None, sampling_rate: Optional[float] = None, - *args, - **kwargs, ) -> 'T': """Build from line separated JSON. Yields documents. @@ -211,8 +205,6 @@ def from_lines( field_resolver: Optional[Dict[str, str]] = None, size: Optional[int] = None, sampling_rate: Optional[float] = None, - *args, - **kwargs, ) -> 'T': """Build from lines, json and csv. Yields documents or strings. diff --git a/docarray/document/mixins/porting.py b/docarray/document/mixins/porting.py index 7506a03754c..f09481e5fdc 100644 --- a/docarray/document/mixins/porting.py +++ b/docarray/document/mixins/porting.py @@ -13,7 +13,7 @@ class PortingMixin: @classmethod def from_dict( - cls: Type['T'], obj: Dict, protocol: str = 'jsonschema', *args, **kwargs + cls: Type['T'], obj: Dict, protocol: str = 'jsonschema', **kwargs ) -> 'T': """Convert a dict object into a Document. @@ -31,14 +31,14 @@ def from_dict( from ...proto.docarray_pb2 import DocumentProto pb_msg = DocumentProto() - json_format.ParseDict(obj, pb_msg, *args, **kwargs) + json_format.ParseDict(obj, pb_msg, **kwargs) return cls.from_protobuf(pb_msg) else: raise ValueError(f'protocol=`{protocol}` is not supported') @classmethod def from_json( - cls: Type['T'], obj: str, protocol: str = 'jsonschema', *args, **kwargs + cls: Type['T'], obj: str, protocol: str = 'jsonschema', **kwargs ) -> 'T': """Convert a JSON string into a Document. diff --git a/docarray/document/mixins/protobuf.py b/docarray/document/mixins/protobuf.py index 1f090e74867..d5154bdfac7 100644 --- a/docarray/document/mixins/protobuf.py +++ b/docarray/document/mixins/protobuf.py @@ -7,10 +7,10 @@ class ProtobufMixin: @classmethod - def from_protobuf(cls: Type['T'], pb_msg: 'DocumentProto', *args, **kwargs) -> 'T': + def from_protobuf(cls: Type['T'], pb_msg: 'DocumentProto') -> 'T': from ...proto.io import parse_proto - return parse_proto(pb_msg, *args, **kwargs) + return parse_proto(pb_msg) def to_protobuf(self) -> 'DocumentProto': from ...proto.io import flush_proto From 3dd666f6058f24cb8d1d26338c25d4e561b54d30 Mon Sep 17 00:00:00 2001 From: David Buchaca Prats Date: Thu, 3 Feb 2022 15:39:27 +0100 Subject: [PATCH 5/7] refactor: revert document mixin changes --- docarray/document/mixins/porting.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docarray/document/mixins/porting.py b/docarray/document/mixins/porting.py index f09481e5fdc..fbfcd859c71 100644 --- a/docarray/document/mixins/porting.py +++ b/docarray/document/mixins/porting.py @@ -85,7 +85,7 @@ def to_dict(self, protocol: str = 'jsonschema', **kwargs) -> Dict[str, Any]: return dataclasses.asdict(self._data) def to_bytes( - self, protocol: str = 'protobuf', compress: Optional[str] = None + self, protocol: str = 'pickle', compress: Optional[str] = None ) -> bytes: if protocol == 'pickle': @@ -102,7 +102,7 @@ def to_bytes( def from_bytes( cls: Type['T'], data: bytes, - protocol: str = 'protobuf', + protocol: str = 'pickle', compress: Optional[str] = None, ) -> 'T': """Build Document object from binary bytes From e57c87588b494beaca33c6e79ef3b24ba5cb511b Mon Sep 17 00:00:00 2001 From: David Buchaca Prats Date: Thu, 3 Feb 2022 15:39:49 +0100 Subject: [PATCH 6/7] refactor: revert document mixin changes --- docarray/document/mixins/porting.py | 1 - 1 file changed, 1 deletion(-) diff --git a/docarray/document/mixins/porting.py b/docarray/document/mixins/porting.py index fbfcd859c71..9880e9c6055 100644 --- a/docarray/document/mixins/porting.py +++ b/docarray/document/mixins/porting.py @@ -87,7 +87,6 @@ def to_dict(self, protocol: str = 'jsonschema', **kwargs) -> Dict[str, Any]: def to_bytes( self, protocol: str = 'pickle', compress: Optional[str] = None ) -> bytes: - if protocol == 'pickle': bstr = pickle.dumps(self) elif protocol == 'protobuf': From cb254632486d4d1d68efb6fe2a4d28186a7a3330 Mon Sep 17 00:00:00 2001 From: David Buchaca Prats Date: Thu, 3 Feb 2022 16:27:57 +0100 Subject: [PATCH 7/7] refactor: restore kwargs from main --- docarray/array/mixins/io/json.py | 18 ++++++++---------- docarray/array/mixins/match.py | 1 + 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/docarray/array/mixins/io/json.py b/docarray/array/mixins/io/json.py index 2c14fdd3593..2740bfa7fa0 100644 --- a/docarray/array/mixins/io/json.py +++ b/docarray/array/mixins/io/json.py @@ -32,9 +32,7 @@ def save_json( @classmethod def load_json( - cls: Type['T'], - file: Union[str, TextIO], - protocol: str = 'jsonschema', + cls: Type['T'], file: Union[str, TextIO], protocol: str = 'jsonschema', **kwargs ) -> 'T': """Load array elements from a JSON file. @@ -55,21 +53,21 @@ def load_json( constructor = Document.from_dict with file_ctx as fp: - return cls(constructor(v, protocol=protocol) for v in fp) + return cls(constructor(v, protocol=protocol, **kwargs) for v in fp) @classmethod def from_json( - cls: Type['T'], - file: Union[str, TextIO], - protocol: str = 'jsonschema', + cls: Type['T'], file: Union[str, TextIO], protocol: str = 'jsonschema', **kwargs ) -> 'T': - return cls.load_json(file, protocol=protocol) + return cls.load_json(file, protocol=protocol, **kwargs) @classmethod - def from_list(cls: Type['T'], values: List, protocol: str = 'jsonschema') -> 'T': + def from_list( + cls: Type['T'], values: List, protocol: str = 'jsonschema', **kwargs + ) -> 'T': from .... import Document - return cls(Document.from_dict(v, protocol=protocol) for v in values) + return cls(Document.from_dict(v, protocol=protocol, **kwargs) for v in values) def to_list(self, protocol: str = 'jsonschema', **kwargs) -> List: """Convert the object into a Python list. diff --git a/docarray/array/mixins/match.py b/docarray/array/mixins/match.py index d880bd1a88c..6a8ae335df6 100644 --- a/docarray/array/mixins/match.py +++ b/docarray/array/mixins/match.py @@ -148,6 +148,7 @@ def _match(self, darray, cdist, limit, normalization, metric_name): :param metric_name: if provided, then match result will be marked with this string. :return: distances and indices """ + x_mat = self.embeddings y_mat = darray.embeddings