From 0072fc658ba04f866c19651dd7386c0ff004b616 Mon Sep 17 00:00:00 2001 From: jupyterjazz Date: Mon, 3 Apr 2023 10:07:46 +0200 Subject: [PATCH 1/3] refactor: dummy change Signed-off-by: jupyterjazz --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 8d4b45ae264..18f8b4113bb 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ DocArray is a library for **representing, sending and storing multi-modal data**, with a focus on applications in **ML** and **Neural Search**. -This means that DocArray lets you do the following things: +This means that `DocArray` lets you do the following things: ## Represent From 04ef58750ec3ac0a743c506cb97c23a5eec0d21e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E3=82=AB=E3=83=AC=E3=83=B3?= <99171855+RStar2022@users.noreply.github.com> Date: Mon, 3 Apr 2023 17:09:29 +0900 Subject: [PATCH 2/3] feat: implement == for document and document array (#1224) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * refactor: rename predefined documents (#1208) * refactor: rename Image to ImageDoc Signed-off-by: samsja * refactor: rename Text to TextDoc Signed-off-by: samsja * refactor: rename Audio to AudioDoc Signed-off-by: samsja * refactor: rename Video to VideoDOc Signed-off-by: samsja * fix: fix test Signed-off-by: samsja * fix: fix test Signed-off-by: samsja --------- Signed-off-by: samsja Signed-off-by: RStar2022 * feat: support other text formats (#1207) * feat: add validation function and filetypes feat: add validation function and filetypes fix: formatting Signed-off-by: rik61072@gmail.com * feat: added extension validation tests for TextURL Signed-off-by: rik61072@gmail.com * fix: fix for tests and PR codereview Signed-off-by: rik61072@gmail.com * fix: added internet mark to json dump test for texturl Signed-off-by: rik61072@gmail.com --------- Signed-off-by: rik61072@gmail.com Signed-off-by: RStar2022 * refactor: refactor query builder (#1213) * refactor: query builder Signed-off-by: Johannes Messner * docs: add guidance for query builder Signed-off-by: Johannes Messner --------- Signed-off-by: Johannes Messner Signed-off-by: RStar2022 * feat(index): automatically convert a dict to Document (#1215) * feat: add _convert_dict_to_doc Signed-off-by: AnneY * fix: str cut Signed-off-by: AnneY * feat: _get_items add return type Signed-off-by: AnneY * fix: mypy Signed-off-by: AnneY * test: add test for _convert_dict_to_doc Signed-off-by: AnneY * refactor: simplify _convert_to_doc_list Signed-off-by: AnneY * docs: add docstring for _convert_dict_to_doc Signed-off-by: AnneY * feat: filter find text add return type Signed-off-by: AnneY * fix: type judge Signed-off-by: AnneY * refactor: change search_field default value Signed-off-by: AnneY * fix: always check length Signed-off-by: AnneY * refactor: create private result class Signed-off-by: AnneY * refactor: seperate helper method Signed-off-by: AnneY * refactor: create private result class Signed-off-by: AnneY --------- Signed-off-by: AnneY Signed-off-by: Anne Yang Signed-off-by: RStar2022 * refactor: da stack full column wise (#1183) * refactor: wip add storage class Signed-off-by: samsja * fix: remove impl Signed-off-by: samsja * refactor: move some tests Signed-off-by: samsja * feat: add storage view Signed-off-by: samsja * feat: add storage view Signed-off-by: samsja * feat: add storage view Signed-off-by: samsja * feat: add document view Signed-off-by: samsja * feat: add tests Signed-off-by: samsja * refactor: rename storage to column storage Signed-off-by: samsja * refactor: order of function in da stacked Signed-off-by: samsja * refactor: add inner doc in test Signed-off-by: samsja * refactor: rename storage to column Signed-off-by: samsja * feat: add from storage Signed-off-by: samsja * refactor: rename file Signed-off-by: samsja * refactor: storage take columns as init Signed-off-by: samsja * feat: add back getitiem Signed-off-by: samsja * fix: fix getatr view problem Signed-off-by: samsja * feat: add get array attribute Signed-off-by: samsja * feat: add setitem back Signed-off-by: samsja * fix: fix some tests Signed-off-by: samsja * fix: fix some tests Signed-off-by: samsja * fix: fix some tests Signed-off-by: samsja * feat: add to Signed-off-by: samsja * feat: add ttests Signed-off-by: samsja * feat: add unstack Signed-off-by: samsja * feat: add validation da set atr Signed-off-by: samsja * feat: add da and any Signed-off-by: samsja * feat: remove deleitem overload Signed-off-by: samsja * fix: remove context manager Signed-off-by: samsja * fix: fix test Signed-off-by: samsja * fix: fix test Signed-off-by: samsja * fix: move tests Signed-off-by: samsja * fix: add tidi Signed-off-by: samsja * fix: fix copy in ndarray mypy Signed-off-by: samsja * feat: add indexing sequence mixin Signed-off-by: samsja * feat: add list index class Signed-off-by: samsja * fix: add cast from tuple to list Signed-off-by: samsja * fix: fix indexing test Signed-off-by: samsja * feat: add back traversle flat Signed-off-by: samsja * fix: fix test Signed-off-by: samsja * fix: fix test Signed-off-by: samsja * feat: add to protobuf Signed-off-by: samsja * feat: add to protobuf Signed-off-by: samsja * feat: add to protobuf Signed-off-by: samsja * feat: add to protobuf Signed-off-by: samsja * feat: remove unstack mode Signed-off-by: samsja * feat: add proto Signed-off-by: samsja * feat: add init mixin Signed-off-by: samsja * fix: add back staticmethod Signed-off-by: samsja * fix: add len to mixin Signed-off-by: samsja * fix: add del to index seq Signed-off-by: samsja * fix: fix mypy del Signed-off-by: samsja * fix: fix mypy del Signed-off-by: samsja * docs: change DocumentArrayStacked docstring Signed-off-by: samsja * docs: add docstring dor ColumnStorage Signed-off-by: samsja * refactor: move docs to column storage to da stack Signed-off-by: samsja * refactor: remove useless arg Signed-off-by: samsja * fix: fix mypy Signed-off-by: samsja * fix: fix mypy Signed-off-by: samsja * fix: fix mypy Signed-off-by: samsja * fix: fix mypy 2 Signed-off-by: samsja * fix: fix mypy 2 Signed-off-by: samsja * fix: fix mypy 2 Signed-off-by: samsja * fix: fix nested array in da stack Signed-off-by: samsja * fix: fix mypy Signed-off-by: samsja * fix: fix mypy Signed-off-by: samsja * fix: fix mypy Signed-off-by: samsja * fix: fix mypy Signed-off-by: samsja * fix: fix mypy Signed-off-by: samsja * fix: fix mypy Signed-off-by: samsja * fix: remove type hint from setitiem Signed-off-by: samsja * fix: fix mypy Signed-off-by: samsja * fix: fix mypy Signed-off-by: samsja * fix: fix mypy Signed-off-by: samsja * fix: fix getitem Signed-off-by: samsja * merge: merge featrewrite v2 Signed-off-by: samsja * fix: black does nto format pb file Signed-off-by: samsja * fix: remove comment Signed-off-by: samsja * fix: fix mypy Signed-off-by: samsja * fix: fix proto3 Signed-off-by: samsja * fix: fix multimodal dataset tests The initialization of all the subclasses is no longer necessary Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * fix: fix del Signed-off-by: samsja * fix: fix scalar value Signed-off-by: samsja * fix: remove useless test Signed-off-by: samsja * feat: raise error when docs are empty Signed-off-by: samsja * fix: fix mypy Signed-off-by: samsja * fix: fix tests Signed-off-by: samsja * fix: fix tests Signed-off-by: samsja * fix: fix tests Signed-off-by: samsja * fix: fix tests Signed-off-by: samsja * fix: fix type hint Signed-off-by: samsja * fix: fix settatr Signed-off-by: samsja * fix: apply suggestion Co-authored-by: Johannes Messner <44071807+JohannesMessner@users.noreply.github.com> Co-authored-by: Joan Fontanals Signed-off-by: samsja <55492238+samsja@users.noreply.github.com> * fix: docstring Signed-off-by: samsja * fix: last fix hopefully Signed-off-by: samsja * feat: apply johannes suggestion Co-authored-by: Johannes Messner <44071807+JohannesMessner@users.noreply.github.com> Signed-off-by: samsja <55492238+samsja@users.noreply.github.com> * feat: apply johannes suggestion Co-authored-by: Johannes Messner <44071807+JohannesMessner@users.noreply.github.com> Signed-off-by: samsja <55492238+samsja@users.noreply.github.com> * fix: add comments Signed-off-by: samsja * fix: remove classvar Signed-off-by: samsja * fix: rename Signed-off-by: samsja * feat: apply johannes suggestion Co-authored-by: Johannes Messner <44071807+JohannesMessner@users.noreply.github.com> Signed-off-by: samsja <55492238+samsja@users.noreply.github.com> * fix: rename Signed-off-by: samsja * feat: apply johannes suggestion Co-authored-by: Johannes Messner <44071807+JohannesMessner@users.noreply.github.com> Signed-off-by: samsja <55492238+samsja@users.noreply.github.com> * fix: rename Signed-off-by: samsja * fix: rename Signed-off-by: samsja * fix: remvoe proprety Signed-off-by: samsja * fix: bring back proprety Signed-off-by: samsja * feat: apply johannes suggestion Co-authored-by: Johannes Messner <44071807+JohannesMessner@users.noreply.github.com> Signed-off-by: samsja <55492238+samsja@users.noreply.github.com> --------- Signed-off-by: samsja Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> Signed-off-by: samsja <55492238+samsja@users.noreply.github.com> Co-authored-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> Co-authored-by: Johannes Messner <44071807+JohannesMessner@users.noreply.github.com> Co-authored-by: Joan Fontanals Signed-off-by: RStar2022 * add equal function Signed-off-by: RStar2022 * Add equality function Signed-off-by: RStar2022 * Add equal to array and document Signed-off-by: RStar2022 * changes a bit Signed-off-by: RStar2022 * add __eq__ function in abstract_comp_backend and add test_array Signed-off-by: RStar2022 * feat: ellipsis in tensor shape definition (#1228) * feat: ellipsis in tensor shape definition Signed-off-by: jupyterjazz * fix: type hint Signed-off-by: jupyterjazz * refactor: add tests for other tensors Signed-off-by: jupyterjazz * test: fix tensorflow test Signed-off-by: jupyterjazz * docs: update docstrings with examples Signed-off-by: jupyterjazz * docs: clarify needed dimensions Signed-off-by: jupyterjazz --------- Signed-off-by: jupyterjazz Signed-off-by: RStar2022 * chore: update contributing guideline (#1240) * chore: update contrubing guideline Signed-off-by: samsja <55492238+samsja@users.noreply.github.com> * fix: mark dl test slow Signed-off-by: samsja --------- Signed-off-by: samsja <55492238+samsja@users.noreply.github.com> Signed-off-by: samsja Signed-off-by: RStar2022 * fix equality funtion Signed-off-by: RStar2022 * refactor: da stack full column wise (#1183) * refactor: wip add storage class Signed-off-by: samsja * fix: remove impl Signed-off-by: samsja * refactor: move some tests Signed-off-by: samsja * feat: add storage view Signed-off-by: samsja * feat: add storage view Signed-off-by: samsja * feat: add storage view Signed-off-by: samsja * feat: add document view Signed-off-by: samsja * feat: add tests Signed-off-by: samsja * refactor: rename storage to column storage Signed-off-by: samsja * refactor: order of function in da stacked Signed-off-by: samsja * refactor: add inner doc in test Signed-off-by: samsja * refactor: rename storage to column Signed-off-by: samsja * feat: add from storage Signed-off-by: samsja * refactor: rename file Signed-off-by: samsja * refactor: storage take columns as init Signed-off-by: samsja * feat: add back getitiem Signed-off-by: samsja * fix: fix getatr view problem Signed-off-by: samsja * feat: add get array attribute Signed-off-by: samsja * feat: add setitem back Signed-off-by: samsja * fix: fix some tests Signed-off-by: samsja * fix: fix some tests Signed-off-by: samsja * fix: fix some tests Signed-off-by: samsja * feat: add to Signed-off-by: samsja * feat: add ttests Signed-off-by: samsja * feat: add unstack Signed-off-by: samsja * feat: add validation da set atr Signed-off-by: samsja * feat: add da and any Signed-off-by: samsja * feat: remove deleitem overload Signed-off-by: samsja * fix: remove context manager Signed-off-by: samsja * fix: fix test Signed-off-by: samsja * fix: fix test Signed-off-by: samsja * fix: move tests Signed-off-by: samsja * fix: add tidi Signed-off-by: samsja * fix: fix copy in ndarray mypy Signed-off-by: samsja * feat: add indexing sequence mixin Signed-off-by: samsja * feat: add list index class Signed-off-by: samsja * fix: add cast from tuple to list Signed-off-by: samsja * fix: fix indexing test Signed-off-by: samsja * feat: add back traversle flat Signed-off-by: samsja * fix: fix test Signed-off-by: samsja * fix: fix test Signed-off-by: samsja * feat: add to protobuf Signed-off-by: samsja * feat: add to protobuf Signed-off-by: samsja * feat: add to protobuf Signed-off-by: samsja * feat: add to protobuf Signed-off-by: samsja * feat: remove unstack mode Signed-off-by: samsja * feat: add proto Signed-off-by: samsja * feat: add init mixin Signed-off-by: samsja * fix: add back staticmethod Signed-off-by: samsja * fix: add len to mixin Signed-off-by: samsja * fix: add del to index seq Signed-off-by: samsja * fix: fix mypy del Signed-off-by: samsja * fix: fix mypy del Signed-off-by: samsja * docs: change DocumentArrayStacked docstring Signed-off-by: samsja * docs: add docstring dor ColumnStorage Signed-off-by: samsja * refactor: move docs to column storage to da stack Signed-off-by: samsja * refactor: remove useless arg Signed-off-by: samsja * fix: fix mypy Signed-off-by: samsja * fix: fix mypy Signed-off-by: samsja * fix: fix mypy Signed-off-by: samsja * fix: fix mypy 2 Signed-off-by: samsja * fix: fix mypy 2 Signed-off-by: samsja * fix: fix mypy 2 Signed-off-by: samsja * fix: fix nested array in da stack Signed-off-by: samsja * fix: fix mypy Signed-off-by: samsja * fix: fix mypy Signed-off-by: samsja * fix: fix mypy Signed-off-by: samsja * fix: fix mypy Signed-off-by: samsja * fix: fix mypy Signed-off-by: samsja * fix: fix mypy Signed-off-by: samsja * fix: remove type hint from setitiem Signed-off-by: samsja * fix: fix mypy Signed-off-by: samsja * fix: fix mypy Signed-off-by: samsja * fix: fix mypy Signed-off-by: samsja * fix: fix getitem Signed-off-by: samsja * merge: merge featrewrite v2 Signed-off-by: samsja * fix: black does nto format pb file Signed-off-by: samsja * fix: remove comment Signed-off-by: samsja * fix: fix mypy Signed-off-by: samsja * fix: fix proto3 Signed-off-by: samsja * fix: fix multimodal dataset tests The initialization of all the subclasses is no longer necessary Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * fix: fix del Signed-off-by: samsja * fix: fix scalar value Signed-off-by: samsja * fix: remove useless test Signed-off-by: samsja * feat: raise error when docs are empty Signed-off-by: samsja * fix: fix mypy Signed-off-by: samsja * fix: fix tests Signed-off-by: samsja * fix: fix tests Signed-off-by: samsja * fix: fix tests Signed-off-by: samsja * fix: fix tests Signed-off-by: samsja * fix: fix type hint Signed-off-by: samsja * fix: fix settatr Signed-off-by: samsja * fix: apply suggestion Co-authored-by: Johannes Messner <44071807+JohannesMessner@users.noreply.github.com> Co-authored-by: Joan Fontanals Signed-off-by: samsja <55492238+samsja@users.noreply.github.com> * fix: docstring Signed-off-by: samsja * fix: last fix hopefully Signed-off-by: samsja * feat: apply johannes suggestion Co-authored-by: Johannes Messner <44071807+JohannesMessner@users.noreply.github.com> Signed-off-by: samsja <55492238+samsja@users.noreply.github.com> * feat: apply johannes suggestion Co-authored-by: Johannes Messner <44071807+JohannesMessner@users.noreply.github.com> Signed-off-by: samsja <55492238+samsja@users.noreply.github.com> * fix: add comments Signed-off-by: samsja * fix: remove classvar Signed-off-by: samsja * fix: rename Signed-off-by: samsja * feat: apply johannes suggestion Co-authored-by: Johannes Messner <44071807+JohannesMessner@users.noreply.github.com> Signed-off-by: samsja <55492238+samsja@users.noreply.github.com> * fix: rename Signed-off-by: samsja * feat: apply johannes suggestion Co-authored-by: Johannes Messner <44071807+JohannesMessner@users.noreply.github.com> Signed-off-by: samsja <55492238+samsja@users.noreply.github.com> * fix: rename Signed-off-by: samsja * fix: rename Signed-off-by: samsja * fix: remvoe proprety Signed-off-by: samsja * fix: bring back proprety Signed-off-by: samsja * feat: apply johannes suggestion Co-authored-by: Johannes Messner <44071807+JohannesMessner@users.noreply.github.com> Signed-off-by: samsja <55492238+samsja@users.noreply.github.com> --------- Signed-off-by: samsja Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> Signed-off-by: samsja <55492238+samsja@users.noreply.github.com> Co-authored-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> Co-authored-by: Johannes Messner <44071807+JohannesMessner@users.noreply.github.com> Co-authored-by: Joan Fontanals * chore: update contribution guideline (#1247) * chore: update contribution guideline Signed-off-by: samsja <55492238+samsja@users.noreply.github.com> * feat: apply alex suggestion Co-authored-by: Alex Cureton-Griffiths Signed-off-by: samsja <55492238+samsja@users.noreply.github.com> * feat: add poetry add Signed-off-by: samsja <55492238+samsja@users.noreply.github.com> * feat: apply alex suggestion Co-authored-by: Alex Cureton-Griffiths Signed-off-by: samsja <55492238+samsja@users.noreply.github.com> * feat: apply alex suggestion Co-authored-by: Alex Cureton-Griffiths Signed-off-by: samsja <55492238+samsja@users.noreply.github.com> --------- Signed-off-by: samsja <55492238+samsja@users.noreply.github.com> Co-authored-by: Alex Cureton-Griffiths Signed-off-by: RStar2022 * make default search field empty string (#1249) Signed-off-by: azayz Signed-off-by: RStar2022 * delete import numpy Signed-off-by: RStar2022 * fix key name Signed-off-by: RStar2022 * feat:changes-equality-operation Signed-off-by: RStar2022 * feat:add if Signed-off-by: RStar2022 * docs: add explanation about id field (#1242) * docs: add explanation about id field Signed-off-by: Johannes Messner * docs: update docs/tutorials/add_doc_index.md Co-authored-by: Charlotte Gerhaher Signed-off-by: Johannes Messner <44071807+JohannesMessner@users.noreply.github.com> --------- Signed-off-by: Johannes Messner Signed-off-by: Johannes Messner <44071807+JohannesMessner@users.noreply.github.com> Co-authored-by: Charlotte Gerhaher Signed-off-by: RStar2022 * feat: shift to mkdocs (#1244) * chore: remove sphinx and mkdocs Signed-off-by: samsja * fix: add DocumentArrayStacked to init py Signed-off-by: samsja * feat: add mkdocs Signed-off-by: samsja * feat: apply alex setting Signed-off-by: samsja * fix: fix ci Signed-off-by: samsja * fix: fix import Signed-off-by: samsja * fix: fix import Signed-off-by: samsja * fix: fix ci Signed-off-by: samsja * fix: fix ci Signed-off-by: samsja * fix: fix ci Signed-off-by: samsja * feat: uses sphinx style Signed-off-by: samsja * feat: udpate docs Signed-off-by: samsja * feat: add inherited menbers Signed-off-by: samsja * feat: add more Signed-off-by: samsja * fix: code block syntax highlight in docstring Signed-off-by: Alex C-G * feat: add typing Signed-off-by: samsja --------- Signed-off-by: samsja Signed-off-by: Alex C-G Co-authored-by: Alex C-G Signed-off-by: RStar2022 * fix:equal func Signed-off-by: RStar2022 * refactor: rename filter to filter_docs to avoid shadowing of filtern (#1257) Signed-off-by: anna-charlotte Signed-off-by: RStar2022 * feat: ad user defined mapping for python type to db type (#1252) * feat: user defined mapping for python type to db type Signed-off-by: anna-charlotte * feat: check if col_type available Signed-off-by: anna-charlotte * test: add test for base classes Signed-off-by: anna-charlotte * fix: clean up Signed-off-by: anna-charlotte * fix: test Signed-off-by: anna-charlotte * docs: add documentation for db type and python type Signed-off-by: anna-charlotte * docs: add doumentation for runtime config Signed-off-by: anna-charlotte * fix: add and test illegal col types Signed-off-by: anna-charlotte --------- Signed-off-by: anna-charlotte Signed-off-by: RStar2022 * fix:tensor type func Signed-off-by: RStar2022 * fix: comp_backend code delete Signed-off-by: RStar2022 * feat(index): index data with union types (#1220) * refactor: split flattening into separate method Signed-off-by: Johannes Messner * refactor: don't build column info during schema check Signed-off-by: Johannes Messner * feat: allos unions and optional in indexed data Signed-off-by: Johannes Messner * fix: mypy Signed-off-by: Johannes Messner * fix: mypy Signed-off-by: Johannes Messner * fix: import from typing inspect instead of typing Signed-off-by: Johannes Messner * fix: equality and hash for parametrized tensors Signed-off-by: Johannes Messner * test: add test for flatten docs Signed-off-by: Johannes Messner * refactor: apply suggestions Signed-off-by: Johannes Messner * docs: better docstrings Signed-off-by: Johannes Messner * refactor: use construct to create docarray Signed-off-by: Johannes Messner * fix: check for nonetype Signed-off-by: Johannes Messner * fix: none in equals check Signed-off-by: Johannes Messner --------- Signed-off-by: Johannes Messner Signed-off-by: Johannes Messner <44071807+JohannesMessner@users.noreply.github.com> Signed-off-by: RStar2022 * fix eq Signed-off-by: RStar2022 * fix: move test to integration test (#1260) * fix: move test to integration test Signed-off-by: samsja * fix: use a different runner for doc index Signed-off-by: samsja * fix: use a different runner for doc index Signed-off-by: samsja --------- Signed-off-by: samsja Signed-off-by: RStar2022 * fix: disable pycharm da property detection (#1262) * fix: disable unresolved attr detection for da in pycharm Signed-off-by: anna-charlotte * fix: add docstring Signed-off-by: anna-charlotte * fix: call super instead of pass Signed-off-by: anna-charlotte * fix: getattr Signed-off-by: anna-charlotte * fix: getattribute Signed-off-by: anna-charlotte --------- Signed-off-by: anna-charlotte Signed-off-by: RStar2022 * feat: add minimal logger (#1254) * feat: add minimal logger Signed-off-by: jupyterjazz * docs: add an example in contributing Signed-off-by: jupyterjazz * docs: try different format Signed-off-by: jupyterjazz * refactor: set one logger name Signed-off-by: jupyterjazz * docs: add quotes Signed-off-by: jupyterjazz * refactor: put logger as a class attr Signed-off-by: jupyterjazz * docs: small change Signed-off-by: jupyterjazz * fix: typo Signed-off-by: jupyterjazz * docs: rephrase text Signed-off-by: jupyterjazz * refactor: requested changes Signed-off-by: jupyterjazz --------- Signed-off-by: jupyterjazz Signed-off-by: Saba Sturua <45267439+jupyterjazz@users.noreply.github.com> Signed-off-by: RStar2022 * fix: bytes type in `TextDoc` and `VideoDoc` (#1270) * fix: bytes type is None in predefined documents Signed-off-by: anna-charlotte * fix: remove defaults to none Signed-off-by: anna-charlotte --------- Signed-off-by: anna-charlotte Signed-off-by: RStar2022 * refactor: doc index structure (#1266) * refactor: doc index structure Signed-off-by: jupyterjazz * fix: import unused error Signed-off-by: jupyterjazz * fix: run black Signed-off-by: jupyterjazz * refactor: remove noqa Signed-off-by: jupyterjazz * refactor: doc_index to index Signed-off-by: jupyterjazz * refactor: small changes Signed-off-by: jupyterjazz * refactor: readme Signed-off-by: jupyterjazz * refactor: add init file Signed-off-by: jupyterjazz * refactor: move docindex outside integration tests Signed-off-by: jupyterjazz * refactor: adjust config paths Signed-off-by: jupyterjazz --------- Signed-off-by: jupyterjazz Signed-off-by: RStar2022 * refactor(da): remove tensor type from `DocumentArray` init (#1268) * fix: remove tensor type from DocumentArray Signed-off-by: samsja * fix: fix test Signed-off-by: samsja * fix: fix tensorflow test Signed-off-by: samsja * fix: docstrng Signed-off-by: samsja * feat: apply charllote suggestion Co-authored-by: Charlotte Gerhaher Signed-off-by: samsja <55492238+samsja@users.noreply.github.com> * feat: apply saba suggestion Co-authored-by: Saba Sturua <45267439+jupyterjazz@users.noreply.github.com> Signed-off-by: samsja <55492238+samsja@users.noreply.github.com> --------- Signed-off-by: samsja Signed-off-by: samsja <55492238+samsja@users.noreply.github.com> Co-authored-by: Charlotte Gerhaher Co-authored-by: Saba Sturua <45267439+jupyterjazz@users.noreply.github.com> * refactor: bytes to bytes_ in predefined documents (#1273) * refactor: bytes to bytes_ Signed-off-by: anna-charlotte * refactor: missed fields Signed-off-by: anna-charlotte --------- Signed-off-by: anna-charlotte Signed-off-by: RStar2022 * fix: doc summary for dict and set attributes (#1279) Signed-off-by: anna-charlotte Signed-off-by: RStar2022 * feat: add `get_paths()` instead of v1 `from_files()` (#1267) * feat: add from_files() Signed-off-by: anna-charlotte * feat: add da classmethod from_files() Signed-off-by: anna-charlotte * docs: update docstring Signed-off-by: anna-charlotte * docs: add example usage Signed-off-by: anna-charlotte * fix: add get_paths, rm from_files Signed-off-by: anna-charlotte * fix: add print to debug ci Signed-off-by: anna-charlotte * fix: test Signed-off-by: anna-charlotte * fix: apply suggestions from code review Signed-off-by: anna-charlotte --------- Signed-off-by: anna-charlotte Signed-off-by: RStar2022 * fix: proto ser and deser for nested tuple/dict/list (#1278) * feat: add failing test Signed-off-by: samsja * refactor: shorten if else statememt Signed-off-by: samsja * refactor: shorten if else statememt Signed-off-by: samsja * fix: fix proto and list Signed-off-by: samsja * fix: fix proto and dict Signed-off-by: samsja * feat: add very complex test Signed-off-by: samsja * feat: fix pure tensor stuff Signed-off-by: samsja * feat: fix pure tensor stuff Signed-off-by: samsja * fix: fix mypy Signed-off-by: samsja * fix: fix from protobuf Signed-off-by: samsja * fix: fix from protobuf tensorflow Signed-off-by: samsja * fix: add more test Signed-off-by: samsja * fix: fix mypy Signed-off-by: samsja * fix: add more test Signed-off-by: samsja * fix: import ndarray Signed-off-by: samsja --------- Signed-off-by: samsja Signed-off-by: RStar2022 * feat: create documents from dict (#1283) * feat: create documents from dict Signed-off-by: jupyterjazz * fix: ignore type Signed-off-by: jupyterjazz * refactor: change fn names Signed-off-by: jupyterjazz --------- Signed-off-by: jupyterjazz Signed-off-by: RStar2022 * docs: fix up english (#1285) Signed-off-by: Alex C-G Signed-off-by: RStar2022 * fix: add int, float and others to doc summary (#1287) Signed-off-by: anna-charlotte Signed-off-by: RStar2022 * fix: hnswlib doc index (#1277) * fix: support for torch and tf Signed-off-by: Johannes Messner * fix: allow arbitrary payloads, including tensors Signed-off-by: Johannes Messner * test: mark tf tests Signed-off-by: Johannes Messner * test: another attempt at fixing tf tests Signed-off-by: Johannes Messner * test: remove parametrization of test Signed-off-by: Johannes Messner * test: fix test Signed-off-by: Johannes Messner * fix: add suggestion Signed-off-by: Johannes Messner * ci: exlude tf tests from index tests Signed-off-by: Johannes Messner --------- Signed-off-by: Johannes Messner Signed-off-by: RStar2022 * fix Signed-off-by: RStar2022 * fix: tensorflow Signed-off-by: RStar2022 * feat(test): DocumentArray method tests similar to list methods like reverse, sort, remove, pop (#1291) * feat: isort format fix Signed-off-by: agaraman0 * refactor: comment fixes Signed-off-by: agaraman0 * refactor: comment fixes Signed-off-by: agaraman0 --------- Signed-off-by: agaraman0 Signed-off-by: RStar2022 * fix:doc Signed-off-by: RStar2022 * feat: implement push/pull interface from JAC, file and s3 (#1182) * refactor: move streaming serialization into separate method Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * refactor: add binary io like protocol definition Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * feat: ported push pull to JAC Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * fix: protocol is not in 3.7 typing Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * fix: make mypy happy Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * fix: patch missing waterfall Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * refactor: jit import backends Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * feat: implement cache in jinaai pull Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * fix: add hubble dependency to jina group Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * refactor: better division of concerns Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * feat: add concept of namespace Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * fix: ignore missing hubble stubs Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * feat: streaming protocol stubs Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * refactor: make more general buffered caching reader Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * test: add tests for hubble pushpull Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * test: add tests for file backend Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * fix: remove hubble dependency from jina group This reverts commit b3044213d58517becb9d71194af34f3833560ebc. Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * feat: implement push pull for local filesystem Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * test: test concurrent pushes and pulls in file protocol Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * fix: resolve concurrent pushes and pulls correctly Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * fix: rename text to textdoc Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * feat: added some logging Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * test: s3 tests Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * feat: s3 pushpull Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * fix: add smart open dependency Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * fix: add smart opens silly python bound Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * test: update hubble tests (failing) Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * fix: fix delete return in hubble pushpull Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * Revert "fix: add smart open dependency" This reverts commit cf78c6cc6d2b367501d2358c18773a456426a448. This reverts commit eb0e52b4c521f2b638bf5de850701546a4996bc3. Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * fix: add hubble and smart open dependencies Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * fix: mypy fixes Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * ci: allow tests to see jina auth token Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * feat: add progress bars for streaming Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * style: blacken Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * feat: buffer writes to s3 Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * fix: mypy no like sequence Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * fix: make progress bar quieter when disabled Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * test: skip failing tests Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * feat: add tables when listing Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * test: add jina auth token to uncaped test Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * test: mock s3 tests with minio container Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * fix: silly error that cost me 2 hours of life Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * test: use tolerance ratio in file tests Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * feat: add caching to s3 pull Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * feat: add log messages for unused parameters Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * refactor: take out unneeded buffering smart open already buffers Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * feat: pick fastest protocol compression configuration for s3 Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * test: bump tolerance ratio for s3 test Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * refactor: reduce code duplication Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * refactor: put reader chunk size constant at top of file Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * test: reduce reader chunk size for memory tests Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * fix: multipart uploads get stuck frequently lets just do big uploads for now... Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * docs: add docstrings to mixin and file backend Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * docs: add docstring for s3 and hubble backends Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * test: remove unused test Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * refactor: use literal in protocol Co-authored-by: samsja <55492238+samsja@users.noreply.github.com> Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * refactor: protocols dont need to be inherited Co-authored-by: samsja <55492238+samsja@users.noreply.github.com> Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * fix: add make mypy happy with the literals Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * fix: literals not in 3.7 Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * refactor: move mixin out of init file Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * refactor: move cache path resolution to utils Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * feat: cache path is only evaluated once Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * refactor: loading backends makes more sense as debug log Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * tests: add slow and internet marks Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * refactor: pin image tag Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * refactor: use abc instead of protocol for typing backends Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * fix: revert - add hubble and smart open dependencies This reverts commit 1d1d2eeaf2b51be6ef00e6ab6ee5b9fd1bcf1d92. Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * fix: add hubble and aws dependencies Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * refactor: change all push pull mixin methods to class methods Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * fix: misstyped class method self reference Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * refactor: rename pushpull to docstore and use more classmethods Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * refactor: separate remote backend implementations from mixin Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * fix: missed import refactor Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * refactor: change submodule name to store Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * refactor: remove list and delete from mixin Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * tests: clear all the garbage in ci account Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * tests: skip test that is broken on ci Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> * refactor: standardize naming to jac Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> --------- Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> Co-authored-by: samsja <55492238+samsja@users.noreply.github.com> Signed-off-by: RStar2022 * refactor: rename `Document` to `Doc` (#1293) * refactor: rename document to doc Signed-off-by: samsja * refactor: rename document to doc in da Signed-off-by: samsja * refactor: rename base doc in md files Signed-off-by: samsja * refactor: rename base base document ot base doc Signed-off-by: samsja * fix(docs): fix docs building Signed-off-by: samsja * fix: ingore hubble test Signed-off-by: samsja * fix: ingore hubble test Signed-off-by: samsja --------- Signed-off-by: samsja * chore(docs): add ci and fix docs ui (#1295) * refactor: rename document to doc Signed-off-by: samsja * refactor: rename document to doc in da Signed-off-by: samsja * refactor: rename base doc in md files Signed-off-by: samsja * refactor: rename base base document ot base doc Signed-off-by: samsja * fix(docs): fix docs building Signed-off-by: samsja * fix: ingore hubble test Signed-off-by: samsja * fix: ingore hubble test Signed-off-by: samsja * docs: add userguide install Signed-off-by: samsja * docs: add awesome-pages Signed-off-by: samsja * docs: add install Signed-off-by: samsja * docs: rename tutorials to how to Signed-off-by: samsja * chore: add pre commit blacken docs Signed-off-by: samsja * chore: add blacken docs Signed-off-by: samsja * docs: arr warning docarray version Signed-off-by: samsja * docs: repo url Signed-off-by: samsja * docs: add social Signed-off-by: samsja * docs: add logo Signed-off-by: samsja * docs: add first step emtpy page Signed-off-by: samsja * docs: add document docs Signed-off-by: samsja * feat: add markdown documentation test Signed-off-by: samsja * docs: remove content Signed-off-by: samsja * docs: fix ci Signed-off-by: samsja --------- Signed-off-by: samsja Signed-off-by: RStar2022 * chore: add docstring test (#1298) * wip Signed-off-by: samsja * fix: cleanup namespace utils Signed-off-by: samsja * feat: add docstring test Signed-off-by: samsja --------- Signed-off-by: samsja Signed-off-by: RStar2022 * fix: rename DocArrayProto to DocumentArrayProto (#1297) Signed-off-by: samsja Signed-off-by: RStar2022 * fix: docstring polish typing (#1299) * wip Signed-off-by: samsja * fix: cleanup namespace utils Signed-off-by: samsja * feat: add docstring test Signed-off-by: samsja * fix: fix video url docstring Signed-off-by: samsja * fix: fix text url Signed-off-by: samsja * fix: fix image url Signed-off-by: samsja * fix: fic audio url Signed-off-by: samsja * fix: mesh 3d url Signed-off-by: samsja * fix: mesh 3d url Signed-off-by: samsja * fix: remove useless data Signed-off-by: samsja * fix: fix docstring ndarray and torch tensor Signed-off-by: samsja * fix: fix docstring ndarray and torch tensor Signed-off-by: samsja * fix: fix fix audio url and audio ndarray Signed-off-by: samsja * fix: fix fix audio url and audio ndarray Signed-off-by: samsja * fix: fix video tensor Signed-off-by: samsja * fix: fix video tensor Signed-off-by: samsja * fix: fix audio bytes Signed-off-by: samsja * fix: video and image bytes Signed-off-by: samsja * docs: move typing section Signed-off-by: samsja --------- Signed-off-by: samsja Signed-off-by: RStar2022 * fix: remove files (#1305) Signed-off-by: samsja Signed-off-by: RStar2022 * fix: flatten schema of abstract index (#1294) * fix: flatten schema of abstract index Signed-off-by: AnneY * fix: _convert_dict_to_doc Signed-off-by: AnneY * fix: catch exception when flatten schema Signed-off-by: AnneY * refactor: remove useless assignemnt Signed-off-by: AnneY * fix: use Abstractensor as tensor doc_type Signed-off-by: AnneY * fix: add AbstractTensor to hnswlib Signed-off-by: AnneY * docs: AbstractTensor as doc_type Signed-off-by: AnneY * docs: complete description about AbstracTensor Co-authored-by: Johannes Messner <44071807+JohannesMessner@users.noreply.github.com> Signed-off-by: Anne Yang --------- Signed-off-by: AnneY Signed-off-by: Anne Yang Co-authored-by: Johannes Messner <44071807+JohannesMessner@users.noreply.github.com> Signed-off-by: RStar2022 * docs: add utils section (#1307) * feat: add utils for map to docs and fix docstring Signed-off-by: samsja * feat: add utils for map to docs and fix docstring Signed-off-by: samsja * feat: add utils for find and fix docstring Signed-off-by: samsja * fix: fix video ndaray docstrng Signed-off-by: samsja * fix: fix video find docstrng Signed-off-by: samsja * fix: fix map docstring Signed-off-by: samsja * fix: fix fileter docstring Signed-off-by: samsja * fix: fix add reduce Signed-off-by: samsja --------- Signed-off-by: samsja Signed-off-by: RStar2022 * docs: fix docstring example of find_batched (#1308) Signed-off-by: Johannes Messner Signed-off-by: RStar2022 * docs: fix map docstring (#1311) * fix: fix utils Signed-off-by: samsja * fix: fix map Signed-off-by: samsja --------- Signed-off-by: samsja Signed-off-by: RStar2022 * feat: elasticsearch document index (#1196) * feat: __init__ of ElasticDocumentIndex Signed-off-by: AnneY * feat: add index func Signed-off-by: AnneY * feat: get and del funcs Signed-off-by: AnneY * fix: init and index creation Signed-off-by: AnneY * fix: __init__ and _index Signed-off-by: AnneY * fix: _get_items Signed-off-by: AnneY * feat: add _find Signed-off-by: AnneY * feat: add filter text and their batch version Signed-off-by: AnneY * feat: store id and get nested doc Signed-off-by: AnneY * fix: vector cannot be all zero Signed-off-by: AnneY * fix: __getitem__ raise error Signed-off-by: AnneY * feat: support more python types Signed-off-by: AnneY * fix: mypy Signed-off-by: AnneY * test: elastic index tests Signed-off-by: AnneY * test: comment scripts before ci setup Signed-off-by: AnneY * chore: add elasticsearch dependency to poetry Signed-off-by: AnneY * test: elastic index ci setup Signed-off-by: AnneY * feat: add num_candidates to rumtime config Signed-off-by: AnneY * fix: let user pass index_settings Signed-off-by: AnneY * feat: degrade to v7 and add query builder Signed-off-by: AnneY * fix: remove elastic_transport Signed-off-by: AnneY * feat: minor features Signed-off-by: AnneY * refactor: style fix Signed-off-by: AnneY * fix: fix mypy Signed-off-by: AnneY * feat: add chunk size to runtime config Signed-off-by: AnneY * fix: chunk size Signed-off-by: AnneY * feat: add chunk_size to funcs Signed-off-by: AnneY * feat: rewrite elastic v7 query builder Signed-off-by: AnneY * fix: poetry Signed-off-by: AnneY * fix: db_type should be elastic types Signed-off-by: AnneY * fix: minor adjustment Signed-off-by: AnneY * refactor: rename elastic index files Signed-off-by: AnneY * refactor: remove comments Signed-off-by: AnneY * feat: rename, batch operations, etc Signed-off-by: AnneY * test: add test for persistency and col config Signed-off-by: AnneY * feat: support more field types and subclass Signed-off-by: AnneY * feat: support more python types Signed-off-by: AnneY * test: tf, tensor and more elastic field types Signed-off-by: AnneY * fix: elastic should be optional in toml Co-authored-by: Charlotte Gerhaher Signed-off-by: Anne Yang * refactor: rename class Signed-off-by: AnneY * fix: change Dict to Mapping Signed-off-by: AnneY * fix: add AbstractTensor Signed-off-by: AnneY * test: rename class and add tests Signed-off-by: AnneY * fix: poetry Signed-off-by: AnneY --------- Signed-off-by: AnneY Signed-off-by: Anne Yang Co-authored-by: Charlotte Gerhaher Signed-off-by: RStar2022 * refactor: map_docs_batch to map_docs_batched (#1312) Signed-off-by: anna-charlotte Signed-off-by: RStar2022 * refactor: map_docs_batch to map_docs_batched (#1312) Signed-off-by: anna-charlotte Signed-off-by: RStar2022 * feat: torch backend basic operation tests (#1306) Signed-off-by: agaraman0 Signed-off-by: RStar2022 * chore: add instructions to pip installs and group extras (#1281) * chore: group extras and add instructions for pip installs Signed-off-by: anna-charlotte * fix: throw runtime error with install instructions for hnswlib Signed-off-by: anna-charlotte * feat: add instructions for video imports Signed-off-by: anna-charlotte * feat: add instructions for audio imports Signed-off-by: anna-charlotte * feat: add instructions for 3d imports Signed-off-by: anna-charlotte * feat: add instructions for image imports Signed-off-by: anna-charlotte * fix: import only audiosegment from pydub Signed-off-by: anna-charlotte * fix: generalize audio and image imports Signed-off-by: anna-charlotte * fix: add instructions for web imports Signed-off-by: anna-charlotte * fix: add instructions for web imports Signed-off-by: anna-charlotte * fix: add instructions for protobuf imports Signed-off-by: anna-charlotte * fix: add instructions for lz4 imports Signed-off-by: anna-charlotte * fix: fastapi import Signed-off-by: anna-charlotte * fix: revert changes in protobuf import Signed-off-by: anna-charlotte * fix: add instructions for torch, without raising error Signed-off-by: anna-charlotte * fix: add instructions for torch, with raising error Signed-off-by: anna-charlotte * fix: add instructions for tensorflow Signed-off-by: anna-charlotte * fix: base doc io imports Signed-off-by: anna-charlotte * fix: tf in doc index abstract Signed-off-by: anna-charlotte * fix: tf in doc index abstract Signed-off-by: anna-charlotte * fix: clean up imports Signed-off-by: anna-charlotte * fix: tf import in doc index Signed-off-by: anna-charlotte * fix: add getattr on module level Signed-off-by: anna-charlotte * fix: import torch for type checking Signed-off-by: anna-charlotte * fix: add type checking Signed-off-by: anna-charlotte * fix: test cross backend Signed-off-by: anna-charlotte * fix: add missing return statement Signed-off-by: anna-charlotte * fix: clean up Signed-off-by: anna-charlotte * fix: update error message Signed-off-by: anna-charlotte * fix: remove base document init Signed-off-by: anna-charlotte * fix: clean up Signed-off-by: anna-charlotte * fix: add trimesh easy extra Signed-off-by: anna-charlotte * fix: pil immage importfix: clean up Signed-off-by: anna-charlotte * chore: add lz4 to mypy missing type hint section Signed-off-by: anna-charlotte * docs: add instructions to doc index tutorial Signed-off-by: anna-charlotte * chore: extra pandas and condense module where missing imports ignore Signed-off-by: anna-charlotte * fix: update poetry lock Signed-off-by: anna-charlotte * fix: missed imports Signed-off-by: anna-charlotte * fix: clean up Signed-off-by: anna-charlotte * fix: revert last commit This reverts commit 9aca06f60ba5bc32b91271a7743d3869ac882434. Signed-off-by: anna-charlotte * revert "fix: missed imports" This reverts commit 353f029207ca63a9c4e41b8391539b75557375b0. Signed-off-by: anna-charlotte * fix: missed imports Signed-off-by: anna-charlotte * wip Signed-off-by: anna-charlotte * fix: rename DocArrayProto to DocumentArrayProto (#1297) Signed-off-by: samsja Signed-off-by: anna-charlotte * fix: docstring polish typing (#1299) * wip Signed-off-by: samsja * fix: cleanup namespace utils Signed-off-by: samsja * feat: add docstring test Signed-off-by: samsja * fix: fix video url docstring Signed-off-by: samsja * fix: fix text url Signed-off-by: samsja * fix: fix image url Signed-off-by: samsja * fix: fic audio url Signed-off-by: samsja * fix: mesh 3d url Signed-off-by: samsja * fix: mesh 3d url Signed-off-by: samsja * fix: remove useless data Signed-off-by: samsja * fix: fix docstring ndarray and torch tensor Signed-off-by: samsja * fix: fix docstring ndarray and torch tensor Signed-off-by: samsja * fix: fix fix audio url and audio ndarray Signed-off-by: samsja * fix: fix fix audio url and audio ndarray Signed-off-by: samsja * fix: fix video tensor Signed-off-by: samsja * fix: fix video tensor Signed-off-by: samsja * fix: fix audio bytes Signed-off-by: samsja * fix: video and image bytes Signed-off-by: samsja * docs: move typing section Signed-off-by: samsja --------- Signed-off-by: samsja * fix: fix for doc_string test Signed-off-by: anna-charlotte * fix: try short version in typing init getattr Signed-off-by: anna-charlotte * fix: shorter version in getattr Signed-off-by: anna-charlotte * fix: remove files (#1305) Signed-off-by: samsja Signed-off-by: anna-charlotte * fix: flatten schema of abstract index (#1294) * fix: flatten schema of abstract index Signed-off-by: AnneY * fix: _convert_dict_to_doc Signed-off-by: AnneY * fix: catch exception when flatten schema Signed-off-by: AnneY * refactor: remove useless assignemnt Signed-off-by: AnneY * fix: use Abstractensor as tensor doc_type Signed-off-by: AnneY * fix: add AbstractTensor to hnswlib Signed-off-by: AnneY * docs: AbstractTensor as doc_type Signed-off-by: AnneY * docs: complete description about AbstracTensor Co-authored-by: Johannes Messner <44071807+JohannesMessner@users.noreply.github.com> Signed-off-by: Anne Yang --------- Signed-off-by: AnneY Signed-off-by: Anne Yang Co-authored-by: Johannes Messner <44071807+JohannesMessner@users.noreply.github.com> * fix: add type hint for lib Signed-off-by: anna-charlotte * fix: add import error to inits getattrs Signed-off-by: anna-charlotte * docs: add utils section (#1307) * feat: add utils for map to docs and fix docstring Signed-off-by: samsja * feat: add utils for map to docs and fix docstring Signed-off-by: samsja * feat: add utils for find and fix docstring Signed-off-by: samsja * fix: fix video ndaray docstrng Signed-off-by: samsja * fix: fix video find docstrng Signed-off-by: samsja * fix: fix map docstring Signed-off-by: samsja * fix: fix fileter docstring Signed-off-by: samsja * fix: fix add reduce Signed-off-by: samsja --------- Signed-off-by: samsja Signed-off-by: anna-charlotte * docs: fix docstring example of find_batched (#1308) Signed-off-by: Johannes Messner Signed-off-by: anna-charlotte * docs: fix map docstring (#1311) * fix: fix utils Signed-off-by: samsja * fix: fix map Signed-off-by: samsja --------- Signed-off-by: samsja Signed-off-by: anna-charlotte * feat: elasticsearch document index (#1196) * feat: __init__ of ElasticDocumentIndex Signed-off-by: AnneY * feat: add index func Signed-off-by: AnneY * feat: get and del funcs Signed-off-by: AnneY * fix: init and index creation Signed-off-by: AnneY * fix: __init__ and _index Signed-off-by: AnneY * fix: _get_items Signed-off-by: AnneY * feat: add _find Signed-off-by: AnneY * feat: add filter text and their batch version Signed-off-by: AnneY * feat: store id and get nested doc Signed-off-by: AnneY * fix: vector cannot be all zero Signed-off-by: AnneY * fix: __getitem__ raise error Signed-off-by: AnneY * feat: support more python types Signed-off-by: AnneY * fix: mypy Signed-off-by: AnneY * test: elastic index tests Signed-off-by: AnneY * test: comment scripts before ci setup Signed-off-by: AnneY * chore: add elasticsearch dependency to poetry Signed-off-by: AnneY * test: elastic index ci setup Signed-off-by: AnneY * feat: add num_candidates to rumtime config Signed-off-by: AnneY * fix: let user pass index_settings Signed-off-by: AnneY * feat: degrade to v7 and add query builder Signed-off-by: AnneY * fix: remove elastic_transport Signed-off-by: AnneY * feat: minor features Signed-off-by: AnneY * refactor: style fix Signed-off-by: AnneY * fix: fix mypy Signed-off-by: AnneY * feat: add chunk size to runtime config Signed-off-by: AnneY * fix: chunk size Signed-off-by: AnneY * feat: add chunk_size to funcs Signed-off-by: AnneY * feat: rewrite elastic v7 query builder Signed-off-by: AnneY * fix: poetry Signed-off-by: AnneY * fix: db_type should be elastic types Signed-off-by: AnneY * fix: minor adjustment Signed-off-by: AnneY * refactor: rename elastic index files Signed-off-by: AnneY * refactor: remove comments Signed-off-by: AnneY * feat: rename, batch operations, etc Signed-off-by: AnneY * test: add test for persistency and col config Signed-off-by: AnneY * feat: support more field types and subclass Signed-off-by: AnneY * feat: support more python types Signed-off-by: AnneY * test: tf, tensor and more elastic field types Signed-off-by: AnneY * fix: elastic should be optional in toml Co-authored-by: Charlotte Gerhaher Signed-off-by: Anne Yang * refactor: rename class Signed-off-by: AnneY * fix: change Dict to Mapping Signed-off-by: AnneY * fix: add AbstractTensor Signed-off-by: AnneY * test: rename class and add tests Signed-off-by: AnneY * fix: poetry Signed-off-by: AnneY --------- Signed-off-by: AnneY Signed-off-by: Anne Yang Co-authored-by: Charlotte Gerhaher * fix: add case for elastic search Signed-off-by: anna-charlotte * refactor: map_docs_batch to map_docs_batched (#1312) Signed-off-by: anna-charlotte * refactor: map_docs_batch to map_docs_batched (#1312) Signed-off-by: anna-charlotte * fix: clean up Signed-off-by: anna-charlotte * feat: torch backend basic operation tests (#1306) Signed-off-by: agaraman0 Signed-off-by: anna-charlotte * fix: ci add --fix-missing to apt-get Signed-off-by: anna-charlotte * fix: revert "fix: ci add --fix-missing to apt-get" Signed-off-by: anna-charlotte * fix: ci apt-get update Signed-off-by: anna-charlotte * fix: apply samis suggestions from code review Signed-off-by: anna-charlotte * fix: apply samis suggestions from code review Signed-off-by: anna-charlotte --------- Signed-off-by: anna-charlotte Signed-off-by: samsja Signed-off-by: AnneY Signed-off-by: Anne Yang Signed-off-by: Johannes Messner Signed-off-by: agaraman0 Co-authored-by: samsja <55492238+samsja@users.noreply.github.com> Co-authored-by: Anne Yang Co-authored-by: Johannes Messner <44071807+JohannesMessner@users.noreply.github.com> Co-authored-by: Aman Agarwal Signed-off-by: RStar2022 * fix: mark es test as index (#1314) * fix: mark es test as index Signed-off-by: samsja * fix: mark es test as index Signed-off-by: samsja * fix: ci Signed-off-by: samsja * fix: ci Signed-off-by: samsja --------- Signed-off-by: samsja Signed-off-by: RStar2022 * feat: docarray fastapi simple integration (#1320) * refactor: docarray fastapi simple integration Signed-off-by: jupyterjazz * refactor: custom orjson response class Signed-off-by: jupyterjazz * refactor: docarray response name Signed-off-by: jupyterjazz * refactor: simplify from json Signed-off-by: jupyterjazz * test: refactor tests Signed-off-by: jupyterjazz * refactor: adjust type hint Signed-off-by: jupyterjazz * refactor: use orjson Signed-off-by: jupyterjazz * style: mypy errors Signed-off-by: jupyterjazz * refactor: abstract method Signed-off-by: jupyterjazz --------- Signed-off-by: jupyterjazz * fix doc Signed-off-by: RStar2022 --------- Signed-off-by: samsja Signed-off-by: RStar2022 Signed-off-by: rik61072@gmail.com Signed-off-by: Johannes Messner Signed-off-by: AnneY Signed-off-by: Anne Yang Signed-off-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> Signed-off-by: samsja <55492238+samsja@users.noreply.github.com> Signed-off-by: jupyterjazz Signed-off-by: azayz Signed-off-by: Johannes Messner <44071807+JohannesMessner@users.noreply.github.com> Signed-off-by: Alex C-G Signed-off-by: anna-charlotte Signed-off-by: Saba Sturua <45267439+jupyterjazz@users.noreply.github.com> Signed-off-by: agaraman0 Signed-off-by: カレン <99171855+RStar2022@users.noreply.github.com> Co-authored-by: samsja <55492238+samsja@users.noreply.github.com> Co-authored-by: Shatabarto "Rik" Bhattacharya Co-authored-by: Johannes Messner <44071807+JohannesMessner@users.noreply.github.com> Co-authored-by: Anne Yang Co-authored-by: Jackmin801 <56836461+Jackmin801@users.noreply.github.com> Co-authored-by: Joan Fontanals Co-authored-by: Saba Sturua <45267439+jupyterjazz@users.noreply.github.com> Co-authored-by: Alex Cureton-Griffiths Co-authored-by: Aziz Belaweid <40893766+azayz@users.noreply.github.com> Co-authored-by: Charlotte Gerhaher Co-authored-by: Alex C-G Co-authored-by: Aman Agarwal --- docarray/array/array/array.py | 10 +++ docarray/base_doc/doc.py | 36 +++++++++++ tests/units/array/test_array.py | 71 ++++++++++++++++++++- tests/units/document/test_docs_operators.py | 3 +- 4 files changed, 118 insertions(+), 2 deletions(-) diff --git a/docarray/array/array/array.py b/docarray/array/array/array.py index 73f1a25a17b..b09cfd0cf54 100644 --- a/docarray/array/array/array.py +++ b/docarray/array/array/array.py @@ -144,6 +144,16 @@ def construct( da._data = docs if isinstance(docs, list) else list(docs) return da + + def __eq__(self, other: Any) -> bool: + if self.__len__() != other.__len__(): + return False + for doc_self, doc_other in zip(self, other): + if doc_self != doc_other: + return False + return True + + def _validate_docs(self, docs: Iterable[T_doc]) -> Iterable[T_doc]: """ Validate if an Iterable of Document are compatible with this DocArray diff --git a/docarray/base_doc/doc.py b/docarray/base_doc/doc.py index 50abc6722a7..62af6769a76 100644 --- a/docarray/base_doc/doc.py +++ b/docarray/base_doc/doc.py @@ -102,9 +102,45 @@ def __setattr__(self, field, value) -> None: dict_ref[key] = val object.__setattr__(self, '__dict__', dict_ref) + + def __eq__(self, other) -> bool: + if self.dict().keys() != other.dict().keys(): + return False + + for field_name in self.__fields__: + value1 = getattr(self, field_name) + value2 = getattr(other, field_name) + + if field_name == 'id': + continue + + if isinstance(value1, AbstractTensor) and isinstance( + value2, AbstractTensor + ): + + comp_be1 = value1.get_comp_backend() + comp_be2 = value2.get_comp_backend() + + if comp_be1.shape(value1) != comp_be2.shape(value2): + return False + if ( + not (comp_be1.to_numpy(value1) == comp_be2.to_numpy(value2)) + .all() + .item() + ): + return False + else: + if value1 != value2: + return False + return True + + def __ne__(self, other) -> bool: + return not (self == other) + def _docarray_to_json_compatible(self) -> Dict: """ Convert itself into a json compatible object :return: A dictionary of the BaseDoc object """ return self.dict() + diff --git a/tests/units/array/test_array.py b/tests/units/array/test_array.py index be4fa6fa505..d47089176bb 100644 --- a/tests/units/array/test_array.py +++ b/tests/units/array/test_array.py @@ -1,5 +1,4 @@ from typing import Optional, TypeVar, Union - import numpy as np import pytest import torch @@ -10,6 +9,8 @@ tf_available = is_tf_available() if tf_available: + import tensorflow as tf + from docarray.typing import TensorFlowTensor @@ -80,6 +81,74 @@ class Text(BaseDoc): assert len(da) == 10 +def test_ndarray_equality(): + class Text(BaseDoc): + tensor: NdArray + + arr1 = Text(tensor=np.zeros(5)) + arr2 = Text(tensor=np.zeros(5)) + arr3 = Text(tensor=np.ones(5)) + arr4 = Text(tensor=np.zeros(4)) + + assert arr1 == arr2 + assert arr1 != arr3 + assert arr1 != arr4 + + +def test_tensor_equality(): + class Text(BaseDoc): + tensor: TorchTensor + + torch1 = Text(tensor=torch.zeros(128)) + torch2 = Text(tensor=torch.zeros(128)) + torch3 = Text(tensor=torch.zeros(126)) + torch4 = Text(tensor=torch.ones(128)) + + assert torch1 == torch2 + assert torch1 != torch3 + assert torch1 != torch4 + + +def test_documentarray(): + class Text(BaseDoc): + text: str + + da1 = DocArray([Text(text='hello')]) + da2 = DocArray([Text(text='hello')]) + + assert da1 == da2 + assert da1 == [Text(text='hello') for _ in range(len(da1))] + assert da2 == [Text(text='hello') for _ in range(len(da2))] + + +@pytest.mark.tensorflow +def test_tensorflowtensor_equality(): + class Text(BaseDoc): + tensor: TensorFlowTensor + + tensor1 = Text(tensor=tf.constant([1, 2, 3, 4, 5, 6])) + tensor2 = Text(tensor=tf.constant([1, 2, 3, 4, 5, 6])) + tensor3 = Text(tensor=tf.constant([[1.0, 2.0], [3.0, 5.0]])) + tensor4 = Text(tensor=tf.constant([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]])) + + assert tensor1 == tensor2 + assert tensor1 != tensor3 + assert tensor1 != tensor4 + + +def test_text_tensor(): + class Text1(BaseDoc): + tensor: NdArray + + class Text2(BaseDoc): + tensor: TorchTensor + + arr_tensor1 = Text1(tensor=np.zeros(2)) + arr_tensor2 = Text2(tensor=torch.zeros(2)) + + assert arr_tensor1 == arr_tensor2 + + def test_get_bulk_attributes_function(): class Mmdoc(BaseDoc): text: str diff --git a/tests/units/document/test_docs_operators.py b/tests/units/document/test_docs_operators.py index 53e7bb71e56..7e3b39e6071 100644 --- a/tests/units/document/test_docs_operators.py +++ b/tests/units/document/test_docs_operators.py @@ -11,7 +11,8 @@ def test_text_document_operators(): assert doc == doc2 doc3 = TextDoc(id='other-id', text='text', url='http://url.com') - assert doc != doc3 + assert doc == doc3 + assert 't' in doc assert 'a' not in doc From 8356e78a1a1892f2625aba164db8fe9bec4c4c40 Mon Sep 17 00:00:00 2001 From: jupyterjazz Date: Mon, 3 Apr 2023 10:11:29 +0200 Subject: [PATCH 3/3] style: run black Signed-off-by: jupyterjazz --- README.md | 2 +- docarray/array/array/array.py | 2 -- docarray/base_doc/doc.py | 3 --- tests/units/document/test_docs_operators.py | 1 - 4 files changed, 1 insertion(+), 7 deletions(-) diff --git a/README.md b/README.md index 18f8b4113bb..8d4b45ae264 100644 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ DocArray is a library for **representing, sending and storing multi-modal data**, with a focus on applications in **ML** and **Neural Search**. -This means that `DocArray` lets you do the following things: +This means that DocArray lets you do the following things: ## Represent diff --git a/docarray/array/array/array.py b/docarray/array/array/array.py index b09cfd0cf54..e3f56e74fda 100644 --- a/docarray/array/array/array.py +++ b/docarray/array/array/array.py @@ -144,7 +144,6 @@ def construct( da._data = docs if isinstance(docs, list) else list(docs) return da - def __eq__(self, other: Any) -> bool: if self.__len__() != other.__len__(): return False @@ -153,7 +152,6 @@ def __eq__(self, other: Any) -> bool: return False return True - def _validate_docs(self, docs: Iterable[T_doc]) -> Iterable[T_doc]: """ Validate if an Iterable of Document are compatible with this DocArray diff --git a/docarray/base_doc/doc.py b/docarray/base_doc/doc.py index 62af6769a76..48afbe6eddd 100644 --- a/docarray/base_doc/doc.py +++ b/docarray/base_doc/doc.py @@ -102,7 +102,6 @@ def __setattr__(self, field, value) -> None: dict_ref[key] = val object.__setattr__(self, '__dict__', dict_ref) - def __eq__(self, other) -> bool: if self.dict().keys() != other.dict().keys(): return False @@ -117,7 +116,6 @@ def __eq__(self, other) -> bool: if isinstance(value1, AbstractTensor) and isinstance( value2, AbstractTensor ): - comp_be1 = value1.get_comp_backend() comp_be2 = value2.get_comp_backend() @@ -143,4 +141,3 @@ def _docarray_to_json_compatible(self) -> Dict: :return: A dictionary of the BaseDoc object """ return self.dict() - diff --git a/tests/units/document/test_docs_operators.py b/tests/units/document/test_docs_operators.py index 7e3b39e6071..3e0e48f1a05 100644 --- a/tests/units/document/test_docs_operators.py +++ b/tests/units/document/test_docs_operators.py @@ -13,7 +13,6 @@ def test_text_document_operators(): doc3 = TextDoc(id='other-id', text='text', url='http://url.com') assert doc == doc3 - assert 't' in doc assert 'a' not in doc