Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
7af2e98
test: test_embed covers weaviate
alaeddine-13 Feb 1, 2022
ec1c644
fix: drop table only if exists
alaeddine-13 Feb 1, 2022
16fdd55
feat: make sqlite document array pickable
alaeddine-13 Feb 1, 2022
113a4c9
ci: increase ci timeout
alaeddine-13 Feb 1, 2022
a0bc613
feat: reset default serialization protocol for sqlite to pickle
alaeddine-13 Feb 1, 2022
e443cf4
test: test_content covers weaviate
alaeddine-13 Feb 1, 2022
34ad9a6
test: test_parallel covers weaviate
alaeddine-13 Feb 1, 2022
a8c4d07
test: test_empty covers weaviate
alaeddine-13 Feb 1, 2022
cbfc4e4
test: reduce the number of docs in test_embed
alaeddine-13 Feb 1, 2022
3dac063
fix: protobuf as default backend for weaviate
alaeddine-13 Feb 2, 2022
907d5f9
test: cover weaviate in getset
alaeddine-13 Feb 2, 2022
b1b1060
fix: add start_weaviate
alaeddine-13 Feb 2, 2022
e4dadfa
test: cover weaviate in test_magic
alaeddine-13 Feb 2, 2022
07b93e6
test: cover weaviate in test_construct
alaeddine-13 Feb 2, 2022
50090bf
ci: reduce ci timeout
alaeddine-13 Feb 2, 2022
6d00102
test: cover weaviate
alaeddine-13 Feb 2, 2022
6d1a114
test: cover weaviate in test_text
alaeddine-13 Feb 2, 2022
2088999
chore: add explaining comments to __setitem__
alaeddine-13 Feb 2, 2022
f767ff1
fix: accept extra kwargs
alaeddine-13 Feb 2, 2022
e803499
fix: fix change id in weaviate
alaeddine-13 Feb 2, 2022
c8f736a
feat: show storage information in summary
alaeddine-13 Feb 2, 2022
52a2526
test: fix test_from_to_base64
alaeddine-13 Feb 2, 2022
b700904
test: fix test_from_to_base64
alaeddine-13 Feb 2, 2022
5f2c0bd
fix: fix set many attributes by offset
alaeddine-13 Feb 2, 2022
b80d2f3
chore: handle todos
alaeddine-13 Feb 2, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions docarray/array/mixins/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def summary(self):
table.add_row('Common Attributes', str(list(attr_counter.items())[0][0]))
else:
for _a, _n in attr_counter.most_common():
if _n <= 1:
if _n == 1:
_doc_text = f'{_n} Document has'
else:
_doc_text = f'{_n} Documents have'
Expand Down Expand Up @@ -96,7 +96,11 @@ def summary(self):
str(len(_a)),
str(any(_aa is None for _aa in _a)),
)
console.print(table, attr_table)

storage_table = Table(box=box.SIMPLE, title='Storage Summary')
self._fill_storage_table(storage_table)

console.print(table, attr_table, storage_table)

def plot_embeddings(
self,
Expand Down
48 changes: 44 additions & 4 deletions docarray/array/mixins/setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,24 +64,41 @@ def __setitem__(
value: Union['Document', Sequence['Document']],
):

# set by offset
# allows da[1] = Document()
if isinstance(index, (int, np.generic)) and not isinstance(index, bool):
self._set_doc_by_offset(int(index), value)
elif isinstance(index, str):
# set by traversal paths
# allows da['@m,c] = [m1, m2, ..., mn, c1, c2, ..., cp]
if index.startswith('@'):
self._set_doc_value_pairs_nested(self.traverse_flat(index[1:]), value)

# set by ID
# allows da['id_123'] = Document()
else:
self._set_doc_by_id(index, value)
# set by slice
# allows da[1:3] = [d1, d2]
elif isinstance(index, slice):
self._set_docs_by_slice(index, value)

# flatten and set
# allows da[...] = [d1, d2,..., dn]
elif index is Ellipsis:
self._set_doc_value_pairs(self.flatten(), value)

# index is sequence
elif isinstance(index, Sequence):
# allows da[idx1, idx2] = value
if isinstance(index, tuple) and len(index) == 2:
self._set_by_pair(index[0], index[1], value)

# allows da[True, False, True, True]
elif isinstance(index[0], bool):
self._set_by_mask(index, value)

# allows da[id1, id2, id3] = [d1, d2, d3]
elif isinstance(index[0], (int, str)):
for si, _val in zip(index, value):
self[si] = _val # leverage existing setter
Expand All @@ -90,6 +107,7 @@ def __setitem__(
f'{index} should be either a sequence of bool, int or str'
)

# set by ndarray
elif isinstance(index, np.ndarray):
index = index.squeeze()
if index.ndim == 1:
Expand All @@ -104,12 +122,15 @@ def __setitem__(
def _set_by_pair(self, idx1, idx2, value):
if isinstance(idx1, str) and not idx1.startswith('@'):
# second is an ID
# allows da[id1, id2] = [d1, d2]
if isinstance(idx2, str) and idx2 in self:
self._set_doc_value_pairs((self[idx1], self[idx2]), value)
# second is an attribute
# allows da[id, attr] = attr_value
elif isinstance(idx2, str) and hasattr(self[idx1], idx2):
self._set_doc_attr_by_id(idx1, idx2, value)
# second is a list of attributes:
# allows da[id, [attr1, attr2, attr3]] = [v1, v2, v3]
elif (
isinstance(idx2, Sequence)
and all(isinstance(attr, str) for attr in idx2)
Expand All @@ -121,36 +142,40 @@ def _set_by_pair(self, idx1, idx2, value):
raise IndexError(f'`{idx2}` is neither a valid id nor attribute name')
elif isinstance(idx1, int):
# second is an offset
# allows da[offset1, offset2] = [d1, d2]
if isinstance(idx2, int):
self._set_doc_value_pairs((self[idx1], self[idx2]), value)
# second is an attribute
# allows da[offset, attr] = value
elif isinstance(idx2, str) and hasattr(self[idx1], idx2):
self._set_doc_attr_by_offset(idx1, idx2, value)
# second is a list of attributes:
# second is a list of attributes
# allows da[offset, [attr1, attr2, attr3]] = [v1, v2, v3]
elif (
isinstance(idx2, Sequence)
and all(isinstance(attr, str) for attr in idx2)
and all(hasattr(self[idx1], attr) for attr in idx2)
):
for attr, _v in zip(idx2, value):
self._set_doc_attr_by_id(idx1, attr, _v)
self._set_doc_attr_by_offset(idx1, attr, _v)
else:
raise IndexError(f'`{idx2}` must be an attribute or list of attributes')

# allows da[sequence/slice/ellipsis/traversal_path, attributes] = [v1, v2, ...]
elif (
isinstance(idx1, (slice, Sequence))
or idx1 is Ellipsis
or (isinstance(idx1, str) and idx1.startswith('@'))
):
self._set_docs_attributes(idx1, idx2, value)
# TODO: else raise error
else:
raise IndexError(f'Unsupported first index type {typename(idx1)}: {idx1}')

def _set_by_mask(self, mask: List[bool], value):
_selected = itertools.compress(self, mask)
self._set_doc_value_pairs(_selected, value)

def _set_docs_attributes(self, index, attributes, value):
# TODO: handle index is Ellipsis
if isinstance(attributes, str):
# a -> [a]
# [a, a] -> [a, a]
Expand All @@ -159,6 +184,21 @@ def _set_docs_attributes(self, index, attributes, value):

if isinstance(index, str) and index.startswith('@'):
self._set_docs_attributes_traversal_paths(index, attributes, value)
elif index is Ellipsis:
_docs = self[index]
for _a, _v in zip(attributes, value):
if _a == 'tensor':
_docs.tensors = _v
elif _a == 'embedding':
_docs.embeddings = _v
else:
if not isinstance(_v, (list, tuple)):
for _d in _docs:
setattr(_d, _a, _v)
else:
for _d, _vv in zip(_docs, _v):
setattr(_d, _a, _vv)
self._set_doc_value_pairs_nested(_docs, _docs)
else:
_docs = self[index]
if not _docs:
Expand Down
8 changes: 8 additions & 0 deletions docarray/array/storage/base/backend.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,15 @@
from abc import ABC, abstractmethod
from typing import TYPE_CHECKING

if TYPE_CHECKING:
from rich.table import Table


class BaseBackendMixin(ABC):
@abstractmethod
def _init_storage(self, *args, **kwargs):
...

def _fill_storage_table(self, table: 'Table'):
table.show_header = False
table.add_row('Class', self.__class__.__name__)
5 changes: 5 additions & 0 deletions docarray/array/storage/memory/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from ....types import (
DocumentArraySourceType,
)
from rich.table import Table


def needs_id2offset_rebuild(func) -> Callable:
Expand Down Expand Up @@ -86,3 +87,7 @@ def _init_storage(
self.append(Document(_docs, copy=True))
else:
self.append(_docs)

def _fill_storage_table(self, table: 'Table'):
super()._fill_storage_table(table)
table.add_row('Backend', 'In Memory')
5 changes: 1 addition & 4 deletions docarray/array/storage/sqlite/__init__.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,11 @@
from abc import ABC

from .backend import BackendMixin, SqliteConfig
from .binary import SqliteBinaryIOMixin
from .getsetdel import GetSetDelMixin
from .seqlike import SequenceLikeMixin

__all__ = ['StorageMixins', 'SqliteConfig']


class StorageMixins(
SqliteBinaryIOMixin, BackendMixin, GetSetDelMixin, SequenceLikeMixin, ABC
):
class StorageMixins(BackendMixin, GetSetDelMixin, SequenceLikeMixin, ABC):
...
40 changes: 34 additions & 6 deletions docarray/array/storage/sqlite/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from ....types import (
DocumentArraySourceType,
)
from rich.table import Table


def _sanitize_table_name(table_name: str) -> str:
Expand Down Expand Up @@ -76,16 +77,18 @@ def _init_storage(
'Document', lambda x: Document.from_bytes(x, **config.serialize_config)
)

_conn_kwargs = dict(
detect_types=sqlite3.PARSE_DECLTYPES, check_same_thread=False
)
_conn_kwargs = dict()
_conn_kwargs.update(config.conn_config)
if config.connection is None:
config.connection = NamedTemporaryFile().name

if isinstance(config.connection, str):
self._connection = sqlite3.connect(
NamedTemporaryFile().name, **_conn_kwargs
config.connection,
detect_types=sqlite3.PARSE_DECLTYPES,
check_same_thread=False,
**_conn_kwargs,
)
elif isinstance(config.connection, str):
self._connection = sqlite3.connect(config.connection, **_conn_kwargs)
elif isinstance(config.connection, sqlite3.Connection):
self._connection = config.connection
else:
Expand Down Expand Up @@ -118,3 +121,28 @@ def _init_storage(
else:
if isinstance(_docs, Document):
self.append(_docs)

def __getstate__(self):
d = dict(self.__dict__)
del d['_connection']
return d

def __setstate__(self, state):
self.__dict__ = state
_conn_kwargs = dict()
_conn_kwargs.update(state['_config'].conn_config)
self._connection = sqlite3.connect(
state['_config'].connection,
detect_types=sqlite3.PARSE_DECLTYPES,
check_same_thread=False,
**_conn_kwargs,
)

def _fill_storage_table(self, table: 'Table'):
super()._fill_storage_table(table)
table.add_row('Backend', 'SQLite (https://www.sqlite.org)')
table.add_row('Connection', self._config.connection)
table.add_row('Table Name', self._table_name)
table.add_row(
'Serialization Protocol', self._config.serialize_config.get('protocol')
)
134 changes: 0 additions & 134 deletions docarray/array/storage/sqlite/binary.py

This file was deleted.

Loading