docarray · hanxiao · Mar 16, 2022 · Mar 15, 2022 · Mar 15, 2022 · Mar 15, 2022
diff --git a/docarray/array/mixins/io/json.py b/docarray/array/mixins/io/json.py
@@ -31,9 +31,7 @@ def save_json(
             file_ctx = open(file, 'w', encoding=encoding)
 
         with file_ctx as fp:
-            for d in self:
-                json.dump(d.to_dict(protocol=protocol, **kwargs), fp)
-                fp.write('\n')
+            fp.write(self.to_json(protocol=protocol, **kwargs))
 
     @classmethod
     def load_json(
@@ -51,30 +49,27 @@ def load_json(
 
         :return: a DocumentArrayLike object
         """
-
-        from .... import Document
-
-        constructor = Document.from_json
         if hasattr(file, 'read'):
             file_ctx = nullcontext(file)
-        elif os.path.exists(file):
-            file_ctx = open(file, 'r', encoding=encoding)
         else:
-            file_ctx = nullcontext(json.loads(file))
-            constructor = Document.from_dict
+            file_ctx = open(file, 'r', encoding=encoding)
 
         with file_ctx as fp:
-            return cls([constructor(v, protocol=protocol) for v in fp], **kwargs)
+            return cls.from_json(fp.read(), protocol=protocol, **kwargs)
 
     @classmethod
     def from_json(
         cls: Type['T'],
-        file: Union[str, bytes, bytearray, TextIO],
+        file: Union[str, bytes, bytearray],
         protocol: str = 'jsonschema',
-        encoding: str = 'utf-8',
         **kwargs
     ) -> 'T':
-        return cls.load_json(file, protocol=protocol, encoding=encoding, **kwargs)
+        from .... import Document
+
+        json_docs = json.loads(file)
+        return cls(
+            [Document.from_dict(v, protocol=protocol) for v in json_docs], **kwargs
+        )
 
     @classmethod
     def from_list(

diff --git a/docarray/document/mixins/porting.py b/docarray/document/mixins/porting.py
@@ -1,5 +1,6 @@
 import base64
 import dataclasses
+import json
 import pickle
 import warnings
 from typing import Optional, TYPE_CHECKING, Type, Dict, Any, Union
@@ -34,7 +35,7 @@ def from_dict(
             json_format.ParseDict(obj, pb_msg, **kwargs)
             return cls.from_protobuf(pb_msg)
         else:
-            raise ValueError(f'protocol=`{protocol}` is not supported')
+            return cls(obj)
 
     @classmethod
     def from_json(
@@ -62,7 +63,7 @@ def from_json(
             json_format.Parse(obj, pb_msg, **kwargs)
             return cls.from_protobuf(pb_msg)
         else:
-            raise ValueError(f'protocol=`{protocol}` is not supported')
+            return cls.from_dict(json.loads(obj), protocol=protocol)
 
     def to_dict(self, protocol: str = 'jsonschema', **kwargs) -> Dict[str, Any]:
         """Convert itself into a Python dict object.
@@ -81,11 +82,7 @@ def to_dict(self, protocol: str = 'jsonschema', **kwargs) -> Dict[str, Any]:
                 **kwargs,
             )
         else:
-            warnings.warn(
-                f'protocol=`{protocol}` is not supported, '
-                f'the result dict is a Python dynamic typing dict without any promise on the schema.'
-            )
-            return dataclasses.asdict(self._data)
+            raise ValueError(f'protocol=`{protocol}` is not supported')
 
     def to_bytes(
         self, protocol: str = 'pickle', compress: Optional[str] = None

diff --git a/docs/fundamentals/document/serialization.md b/docs/fundamentals/document/serialization.md
@@ -45,6 +45,8 @@ print(d_as_json, d)
 
 By default, it uses {ref}`JSON Schema and pydantic model<schema-gen>` for serialization, i.e. `protocol='jsonschema'`. You can switch the method to `protocol='protobuf'`, which leverages Protobuf as the JSON serialization backend.
 
+To load an arbitrary JSON file, please set `protocol=None`. But as it is "arbitrary", you should not expect it can be succesfully loaded. DocArray tries its best reasonable effort by first load this JSON into `dict` and then load it via `Document(dict)`.
+
 ```python
 from docarray import Document
 

diff --git a/tests/unit/document/test_porting.py b/tests/unit/document/test_porting.py
@@ -1,3 +1,5 @@
+import json
+
 import pytest
 
 from docarray import Document, DocumentArray
@@ -25,6 +27,30 @@ def test_dict_json(target, protocol, to_fn):
         assert d == d_r
 
 
+@pytest.mark.parametrize('to_fn,preproc', [('dict', dict), ('json', json.dumps)])
+def test_schemaless(to_fn, preproc):
+    input = {
+        'attr1': 123,
+        'attr2': 'abc',
+        'attr3': [1, 2, 3],
+        'attr4': ['a', 'b', 'c'],
+        'attr5': {
+            'attr6': 'a',
+            'attr7': 1,
+        },
+    }
+    doc = getattr(Document, f'from_{to_fn}')(preproc(input), protocol=None)
+    assert doc.tags['attr1'] == 123
+    assert doc.tags['attr2'] == 'abc'
+    assert doc.tags['attr3'] == [1, 2, 3]
+    assert doc.tags['attr4'] == ['a', 'b', 'c']
+
+    assert doc.tags['attr5'] == {
+        'attr6': 'a',
+        'attr7': 1,
+    }
+
+
 @pytest.mark.parametrize('protocol', ['protobuf', 'pickle'])
 @pytest.mark.parametrize('compress', ['lz4', 'bz2', 'lzma', 'zlib', 'gzip', None])
 def test_to_from_base64(protocol, compress):