Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 71 additions & 0 deletions benchmark_serializing.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
import os
import sys
import time

from tests import random_docs

from docarray import DocumentArray, Document

from docarray.proto.dummy_pb2 import BytesWrapper, DocsWrapper

DOC_SIZE = 1024 * 100
DOC_COUNT = 10000
# da = DocumentArray(
# [
# Document(buffer=bytes(bytearray(os.urandom(DOC_SIZE))))
# for _ in range(DOC_COUNT)
# ]
# )
da = random_docs(DOC_COUNT)


def serialize_bytes_wrapper():
return BytesWrapper(docs=da.to_bytes()).SerializeToString()


def deserialize_bytes_wrapper(proto_byte_array):
loaded_bw = BytesWrapper()
loaded_bw.ParseFromString(proto_byte_array)
return DocumentArray.load_binary(loaded_bw.docs)


def serialize_doc_wrapper():
dw = DocsWrapper()
for d in da:
dw.docs.append(d.to_protobuf())
return dw.SerializeToString()


def deserialize_doc_wrapper(proto_byte_array):
loaded_dw = DocsWrapper()
loaded_dw.ParseFromString(proto_byte_array)
return loaded_dw.docs


start_bw_serializer = time.time()
proto_byte_array = serialize_bytes_wrapper()
end_bw_serializer = time.time()

start_bw_deserializer = time.time()
loaded_da = deserialize_bytes_wrapper(proto_byte_array)
end_bw_deserializer = time.time()

print(
f'Byte array proto serialization took {end_bw_serializer-start_bw_serializer}, deserialization took {end_bw_deserializer-start_bw_deserializer} and serialized size is {sys.getsizeof(proto_byte_array)} - loaded da has {len(loaded_da)} docs'
)

start_dw_serializer = time.time()
proto_byte_array = serialize_doc_wrapper()
end_dw_serializer = time.time()

start_dw_deserializer = time.time()
new_da = DocumentArray()
loaded_da = deserialize_doc_wrapper(proto_byte_array)
for d in loaded_da:
new_da.append(Document.from_protobuf(d))

end_dw_deserializer = time.time()

print(
f'Doc array proto serialization took {end_dw_serializer-start_dw_serializer}, deserialization took {end_dw_deserializer-start_dw_deserializer} and serialized size is {sys.getsizeof(proto_byte_array)} - loaded da has {len(loaded_da)} docs'
)
10 changes: 10 additions & 0 deletions docarray/proto/dummy.proto
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
syntax = "proto3";
import "docarray.proto";

message BytesWrapper {
bytes docs = 1;
}

message DocsWrapper {
repeated docarray.DocumentProto docs = 1;
}
113 changes: 113 additions & 0 deletions docarray/proto/dummy_pb2.py

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.