From f4f17569c268153f5a6e6fbac9e436ba7c37d0f9 Mon Sep 17 00:00:00 2001 From: AnneY Date: Mon, 19 Dec 2022 19:27:28 +0800 Subject: [PATCH 1/5] feat: add language to RedisConfig Signed-off-by: AnneY --- docarray/array/storage/redis/backend.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/docarray/array/storage/redis/backend.py b/docarray/array/storage/redis/backend.py index bb72fdab405..b6f9f889596 100644 --- a/docarray/array/storage/redis/backend.py +++ b/docarray/array/storage/redis/backend.py @@ -27,6 +27,7 @@ class RedisConfig: redis_config: Dict[str, Any] = field(default_factory=dict) index_text: bool = field(default=False) tag_indices: List[str] = field(default_factory=list) + language: Optional[str] = None batch_size: int = field(default=64) method: str = field(default='HNSW') ef_construction: Optional[int] = None @@ -113,7 +114,9 @@ def _build_index(self, rebuild: bool = False): self._client.ft(index_name=self._config.index_name).dropindex() schema = self._build_schema_from_redis_config() - idef = IndexDefinition(prefix=[self._doc_prefix]) + idef = IndexDefinition( + prefix=[self._doc_prefix], language=self._config.language + ) self._client.ft(index_name=self._config.index_name).create_index( schema, definition=idef ) From cead4a755a3b7e9bbc2b118379da342aafa820af Mon Sep 17 00:00:00 2001 From: AnneY Date: Mon, 19 Dec 2022 19:39:33 +0800 Subject: [PATCH 2/5] test: add test for redis chinese support Signed-off-by: AnneY --- tests/unit/array/mixins/test_find.py | 29 ++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/tests/unit/array/mixins/test_find.py b/tests/unit/array/mixins/test_find.py index dae4518d0f4..72fbb9a6716 100644 --- a/tests/unit/array/mixins/test_find.py +++ b/tests/unit/array/mixins/test_find.py @@ -891,6 +891,35 @@ def test_redis_geo_filter(start_storage): assert distance[0][1] < 800 +def test_redis_language(start_storage): + n_dim = 128 + da = DocumentArray( + storage='redis', + config={ + 'n_dim': n_dim, + 'index_text': True, + 'language': 'chinese', + }, + ) + + with da: + da.extend( + [ + Document(id='1', text='意大利和西班牙 token1 token2 token3'), + Document(id='2', text='法国和中国 token1 token2'), + Document(id='3', text='意大利和法国 token2 token3 token4'), + ] + ) + + results = da.find('token1') + assert len(results) == 2 + assert set(results[:, 'id']) == {'1', '2'} + + results = da.find('意大利') + assert len(results) == 2 + assert set(results[:, 'id']) == {'1', '3'} + + @pytest.mark.parametrize('storage', ['memory']) @pytest.mark.parametrize('columns', [[('price', 'int')], {'price': 'int'}]) def test_unsupported_pre_filtering(storage, start_storage, columns): From a53edbed62d7dcd32d3cd5d0b67fc9079b6ac219 Mon Sep 17 00:00:00 2001 From: AnneY Date: Mon, 19 Dec 2022 19:40:03 +0800 Subject: [PATCH 3/5] docs: add language to RedisConfig Signed-off-by: AnneY --- docs/advanced/document-store/redis.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/advanced/document-store/redis.md b/docs/advanced/document-store/redis.md index 94d23413b01..8f8b1676ef5 100644 --- a/docs/advanced/document-store/redis.md +++ b/docs/advanced/document-store/redis.md @@ -130,6 +130,7 @@ The following configs can be set: | `method` | Vector similarity index algorithm in Redis, either `FLAT` or `HNSW` | `'HNSW'` | | `index_text` | Boolean flag indicating whether to index `.text`. `True` will enable full text search on `.text` | `None` | | `tag_indices` | List of tags to index as text field | `[]` | +| `language` | Optional parameter for Redis text search. Refer to the [list of supported languages](https://redis.io/docs/stack/search/reference/stemming/) | `None` | | `ef_construction` | Optional parameter for Redis HNSW algorithm | `200` | | `m` | Optional parameter for Redis HNSW algorithm | `16` | | `ef_runtime` | Optional parameter for Redis HNSW algorithm | `10` | From 26a1575ab19d40fef07043aae0f641d412756a7a Mon Sep 17 00:00:00 2001 From: AnneY Date: Mon, 19 Dec 2022 20:56:01 +0800 Subject: [PATCH 4/5] fix: numpy is updated to 1.24.0 Signed-off-by: AnneY --- docarray/array/storage/opensearch/find.py | 2 +- tests/unit/array/test_advance_indexing.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docarray/array/storage/opensearch/find.py b/docarray/array/storage/opensearch/find.py index 736df9678d5..040b464bb94 100644 --- a/docarray/array/storage/opensearch/find.py +++ b/docarray/array/storage/opensearch/find.py @@ -166,7 +166,7 @@ def _find( :return: DocumentArray containing the closest documents to the query if it is a single query, otherwise a list of DocumentArrays containing the closest Document objects for each of the queries in `query`. """ - query = np.array(query).astype(np.float) + query = np.array(query).astype(np.float32) num_rows, n_dim = ndarray.get_array_rows(query) if n_dim != 2: query = query.reshape((num_rows, -1)) diff --git a/tests/unit/array/test_advance_indexing.py b/tests/unit/array/test_advance_indexing.py index 712157be4c1..90ee196eba1 100644 --- a/tests/unit/array/test_advance_indexing.py +++ b/tests/unit/array/test_advance_indexing.py @@ -43,7 +43,7 @@ def test_getter_int_str(docs, storage, config, start_storage): docs = DocumentArray(docs, storage=storage) # getter assert docs[99].text == "99" - assert docs[np.int(99)].text == "99" + assert docs[np.int32(99)].text == "99" assert docs[-1].text == "99" assert docs[0].text == "0" # string index From 9e0d0663ab31944070101922197f5a8052103750 Mon Sep 17 00:00:00 2001 From: AnneY Date: Tue, 20 Dec 2022 10:27:33 +0800 Subject: [PATCH 5/5] fix: increase timeout-minutes for oldproto Signed-off-by: AnneY --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3ddbe6ff1d2..63d6facd16a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -241,7 +241,7 @@ jobs: pytest --suppress-no-test-exit-code --cov=docarray --cov-report=xml \ -v -s -m "not gpu" ${{ matrix.test-path }} echo "::set-output name=codecov_flag::docarray" - timeout-minutes: 60 + timeout-minutes: 70 env: JINA_AUTH_TOKEN: "${{ secrets.JINA_AUTH_TOKEN }}" - name: Check codecov file