diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 3ddbe6ff1d2..63d6facd16a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -241,7 +241,7 @@ jobs: pytest --suppress-no-test-exit-code --cov=docarray --cov-report=xml \ -v -s -m "not gpu" ${{ matrix.test-path }} echo "::set-output name=codecov_flag::docarray" - timeout-minutes: 60 + timeout-minutes: 70 env: JINA_AUTH_TOKEN: "${{ secrets.JINA_AUTH_TOKEN }}" - name: Check codecov file diff --git a/docarray/array/storage/opensearch/find.py b/docarray/array/storage/opensearch/find.py index 736df9678d5..040b464bb94 100644 --- a/docarray/array/storage/opensearch/find.py +++ b/docarray/array/storage/opensearch/find.py @@ -166,7 +166,7 @@ def _find( :return: DocumentArray containing the closest documents to the query if it is a single query, otherwise a list of DocumentArrays containing the closest Document objects for each of the queries in `query`. """ - query = np.array(query).astype(np.float) + query = np.array(query).astype(np.float32) num_rows, n_dim = ndarray.get_array_rows(query) if n_dim != 2: query = query.reshape((num_rows, -1)) diff --git a/docarray/array/storage/redis/backend.py b/docarray/array/storage/redis/backend.py index bb72fdab405..b6f9f889596 100644 --- a/docarray/array/storage/redis/backend.py +++ b/docarray/array/storage/redis/backend.py @@ -27,6 +27,7 @@ class RedisConfig: redis_config: Dict[str, Any] = field(default_factory=dict) index_text: bool = field(default=False) tag_indices: List[str] = field(default_factory=list) + language: Optional[str] = None batch_size: int = field(default=64) method: str = field(default='HNSW') ef_construction: Optional[int] = None @@ -113,7 +114,9 @@ def _build_index(self, rebuild: bool = False): self._client.ft(index_name=self._config.index_name).dropindex() schema = self._build_schema_from_redis_config() - idef = IndexDefinition(prefix=[self._doc_prefix]) + idef = IndexDefinition( + prefix=[self._doc_prefix], language=self._config.language + ) self._client.ft(index_name=self._config.index_name).create_index( schema, definition=idef ) diff --git a/docs/advanced/document-store/redis.md b/docs/advanced/document-store/redis.md index 5d5e4d20dbd..1f644d79ee9 100644 --- a/docs/advanced/document-store/redis.md +++ b/docs/advanced/document-store/redis.md @@ -130,6 +130,7 @@ The following configs can be set: | `method` | Vector similarity index algorithm in Redis, either `FLAT` or `HNSW` | `'HNSW'` | | `index_text` | Boolean flag indicating whether to index `.text`. `True` will enable full text search on `.text` | `None` | | `tag_indices` | List of tags to index as text field | `[]` | +| `language` | Optional parameter for Redis text search. Refer to the [list of supported languages](https://redis.io/docs/stack/search/reference/stemming/) | `None` | | `ef_construction` | Optional parameter for Redis HNSW algorithm | `200` | | `m` | Optional parameter for Redis HNSW algorithm | `16` | | `ef_runtime` | Optional parameter for Redis HNSW algorithm | `10` | diff --git a/tests/unit/array/mixins/test_find.py b/tests/unit/array/mixins/test_find.py index 7add054678e..810917ffbb4 100644 --- a/tests/unit/array/mixins/test_find.py +++ b/tests/unit/array/mixins/test_find.py @@ -986,6 +986,35 @@ def test_redis_geo_filter(start_storage): assert distance[0][1] < 800 +def test_redis_language(start_storage): + n_dim = 128 + da = DocumentArray( + storage='redis', + config={ + 'n_dim': n_dim, + 'index_text': True, + 'language': 'chinese', + }, + ) + + with da: + da.extend( + [ + Document(id='1', text='意大利和西班牙 token1 token2 token3'), + Document(id='2', text='法国和中国 token1 token2'), + Document(id='3', text='意大利和法国 token2 token3 token4'), + ] + ) + + results = da.find('token1') + assert len(results) == 2 + assert set(results[:, 'id']) == {'1', '2'} + + results = da.find('意大利') + assert len(results) == 2 + assert set(results[:, 'id']) == {'1', '3'} + + @pytest.mark.parametrize('storage', ['memory']) @pytest.mark.parametrize('columns', [[('price', 'int')], {'price': 'int'}]) def test_unsupported_pre_filtering(storage, start_storage, columns): diff --git a/tests/unit/array/test_advance_indexing.py b/tests/unit/array/test_advance_indexing.py index 712157be4c1..90ee196eba1 100644 --- a/tests/unit/array/test_advance_indexing.py +++ b/tests/unit/array/test_advance_indexing.py @@ -43,7 +43,7 @@ def test_getter_int_str(docs, storage, config, start_storage): docs = DocumentArray(docs, storage=storage) # getter assert docs[99].text == "99" - assert docs[np.int(99)].text == "99" + assert docs[np.int32(99)].text == "99" assert docs[-1].text == "99" assert docs[0].text == "0" # string index