Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ jobs:
pytest --suppress-no-test-exit-code --cov=docarray --cov-report=xml \
-v -s -m "not gpu" ${{ matrix.test-path }}
echo "::set-output name=codecov_flag::docarray"
timeout-minutes: 60
timeout-minutes: 70
env:
JINA_AUTH_TOKEN: "${{ secrets.JINA_AUTH_TOKEN }}"
- name: Check codecov file
Expand Down
2 changes: 1 addition & 1 deletion docarray/array/storage/opensearch/find.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ def _find(
:return: DocumentArray containing the closest documents to the query if it is a single query, otherwise a list of DocumentArrays containing
the closest Document objects for each of the queries in `query`.
"""
query = np.array(query).astype(np.float)
query = np.array(query).astype(np.float32)
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why this?

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why touching opensearch at all?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

numpy has a new release three days ago and its version is updated to 1.24.0 in ci, where np.float and np.int is no longer available

num_rows, n_dim = ndarray.get_array_rows(query)
if n_dim != 2:
query = query.reshape((num_rows, -1))
Expand Down
5 changes: 4 additions & 1 deletion docarray/array/storage/redis/backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ class RedisConfig:
redis_config: Dict[str, Any] = field(default_factory=dict)
index_text: bool = field(default=False)
tag_indices: List[str] = field(default_factory=list)
language: Optional[str] = None
batch_size: int = field(default=64)
method: str = field(default='HNSW')
ef_construction: Optional[int] = None
Expand Down Expand Up @@ -113,7 +114,9 @@ def _build_index(self, rebuild: bool = False):
self._client.ft(index_name=self._config.index_name).dropindex()

schema = self._build_schema_from_redis_config()
idef = IndexDefinition(prefix=[self._doc_prefix])
idef = IndexDefinition(
prefix=[self._doc_prefix], language=self._config.language
)
self._client.ft(index_name=self._config.index_name).create_index(
schema, definition=idef
)
Expand Down
1 change: 1 addition & 0 deletions docs/advanced/document-store/redis.md
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ The following configs can be set:
| `method` | Vector similarity index algorithm in Redis, either `FLAT` or `HNSW` | `'HNSW'` |
| `index_text` | Boolean flag indicating whether to index `.text`. `True` will enable full text search on `.text` | `None` |
| `tag_indices` | List of tags to index as text field | `[]` |
| `language` | Optional parameter for Redis text search. Refer to the [list of supported languages](https://redis.io/docs/stack/search/reference/stemming/) | `None` |
| `ef_construction` | Optional parameter for Redis HNSW algorithm | `200` |
| `m` | Optional parameter for Redis HNSW algorithm | `16` |
| `ef_runtime` | Optional parameter for Redis HNSW algorithm | `10` |
Expand Down
29 changes: 29 additions & 0 deletions tests/unit/array/mixins/test_find.py
Original file line number Diff line number Diff line change
Expand Up @@ -986,6 +986,35 @@ def test_redis_geo_filter(start_storage):
assert distance[0][1] < 800


def test_redis_language(start_storage):
n_dim = 128
da = DocumentArray(
storage='redis',
config={
'n_dim': n_dim,
'index_text': True,
'language': 'chinese',
},
)

with da:
da.extend(
[
Document(id='1', text='意大利和西班牙 token1 token2 token3'),
Document(id='2', text='法国和中国 token1 token2'),
Document(id='3', text='意大利和法国 token2 token3 token4'),
]
)

results = da.find('token1')
assert len(results) == 2
assert set(results[:, 'id']) == {'1', '2'}

results = da.find('意大利')
assert len(results) == 2
assert set(results[:, 'id']) == {'1', '3'}


@pytest.mark.parametrize('storage', ['memory'])
@pytest.mark.parametrize('columns', [[('price', 'int')], {'price': 'int'}])
def test_unsupported_pre_filtering(storage, start_storage, columns):
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/array/test_advance_indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def test_getter_int_str(docs, storage, config, start_storage):
docs = DocumentArray(docs, storage=storage)
# getter
assert docs[99].text == "99"
assert docs[np.int(99)].text == "99"
assert docs[np.int32(99)].text == "99"
assert docs[-1].text == "99"
assert docs[0].text == "0"
# string index
Expand Down