Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 74 additions & 25 deletions scripts/benchmarking.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
from rich.console import Console
from rich.table import Table

np.random.seed(123)

n_index_values = [1_000_000]
n_query = 1
D = 128
Expand All @@ -26,6 +28,12 @@
help='Whether to use default HNSW configurations',
action='store_true',
)

parser.add_argument(
'--exclude-backends',
help='list of comma separated backends to exclude from the benchmarks',
type=str,
)
args = parser.parse_args()

times = {}
Expand Down Expand Up @@ -98,12 +106,12 @@ def recall(predicted, relevant, eval_at):

if args.default_hnsw:
storage_backends = [
# ('memory', None),
# ('sqlite', None),
# (
# 'annlite',
# {'n_dim': D},
# ),
('memory', None),
('sqlite', None),
(
'annlite',
{'n_dim': D, 'columns': [('i', 'int')]},
),
(
'qdrant',
{
Expand All @@ -117,36 +125,40 @@ def recall(predicted, relevant, eval_at):
{
'n_dim': D,
'port': '41234',
'columns': ('i', 'int'),
},
),
(
'elasticsearch',
{
'n_dim': D,
'port': '41235',
'hosts': 'http://localhost:41235',
'columns': [('i', 'int')],
},
),
(
'redis',
{
'n_dim': D,
'port': '41236',
'columns': [('i', 'int')],
},
),
]
else:
storage_backends = [
# ('memory', None),
# ('sqlite', None),
# (
# 'annlite',
# {
# 'n_dim': D,
# 'ef_construction': 100,
# 'ef_search': 100,
# 'max_connection': 16,
# },
# ),
('memory', None),
('sqlite', None),
(
'annlite',
{
'n_dim': D,
'ef_construction': 100,
'ef_search': 100,
'max_connection': 16,
'columns': [('i', 'int')],
},
),
(
'qdrant',
{
Expand All @@ -165,15 +177,47 @@ def recall(predicted, relevant, eval_at):
'ef_construction': 100,
'max_connections': 16,
'port': '41234',
'columns': [('i', 'int')],
},
),
(
'elasticsearch',
{'n_dim': D, 'ef_construction': 100, 'm': 16, 'port': '41235'},
{
'n_dim': D,
'ef_construction': 100,
'm': 16,
'hosts': 'http://localhost:41235',
'columns': [('i', 'int')],
},
),
(
'redis',
{
'n_dim': D,
'ef_construction': 100,
'm': 16,
'ef_runtime': 100,
'port': '41236',
},
),
('redis', {'n_dim': D, 'ef_construction': 100, 'm': 16, 'port': '41236'}),
]

storage_backends = [
(backend, config)
for backend, config in storage_backends
if backend not in (args.exclude_backends or '').split(',')
]

storage_backend_filters = {
'memory': {'tags__i': {'$eq': 0}},
'sqlite': {'tags__i': {'$eq': 0}},
'annlite': {'i': {'$eq': 0}},
'qdrant': {'tags__i': {'$eq': 0}},
'weaviate': {'path': 'i', 'operator': 'Equal', 'valueInt': 0},
'elasticsearch': {'match': {'i': 0}},
'redis': {'i': {'$eq': 0}},
}

table = Table(
title=f'DocArray Benchmarking n_index={n_index_values[-1]} n_query={n_query} D={D} K={K}'
)
Expand Down Expand Up @@ -236,13 +280,15 @@ def recall(predicted, relevant, eval_at):
f'finding {n_query} docs by vector averaged {n_vector_queries} times ...'
)
if backend == 'sqlite':
find_by_vector_time, result = find_by_vector(da, vector_queries[0])
find_by_vector_time, result = find_by_vector(
da, vector_queries[0].squeeze()
)
recall_at_k = recall(result, ground_truth[0], K)
else:
recall_at_k_values = []
find_by_vector_times = []
for i, query in enumerate(vector_queries):
find_by_vector_time, results = find_by_vector(da, query)
find_by_vector_time, results = find_by_vector(da, query.squeeze())
find_by_vector_times.append(find_by_vector_time)
if backend == 'memory':
ground_truth.append(results)
Expand All @@ -256,7 +302,9 @@ def recall(predicted, relevant, eval_at):
)

console.print(f'finding {n_query} docs by condition ...')
find_by_condition_time, _ = find_by_condition(da, {'tags__i': {'$eq': 0}})
find_by_condition_time, _ = find_by_condition(
da, storage_backend_filters[backend]
)

if idx == len(n_index_values) - 1:
table.add_row(
Expand Down Expand Up @@ -290,11 +338,12 @@ def recall(predicted, relevant, eval_at):
find_by_vector_values[str(n_index)].append(find_by_vector_time)
create_values[str(n_index)].append(create_time)
except Exception as e:
console.print(f'Storage Backend {backend} failed: {e}')
console.print(f'Storage Backend {backend} failed')
raise e

find_df = pd.DataFrame(find_by_vector_values)
find_df.index = [backend for backend, _ in storage_backends]
find_df = find_df.drop(['sqlite'])
find_df = find_df.drop(['sqlite'], errors='ignore')
print(find_df)
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(17, 5))

Expand Down
12 changes: 8 additions & 4 deletions scripts/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ services:
weaviate:
image: semitechnologies/weaviate:1.13.2
ports:
- "41234:41234"
- "41234:8080"
environment:
CONTEXTIONARY_URL: contextionary:9999
QUERY_DEFAULTS_LIMIT: 25
Expand All @@ -12,7 +12,7 @@ services:
qdrant:
image: qdrant/qdrant:v0.7.0
ports:
- "41233:41233"
- "41233:6333"
ulimits: # Only required for tests, as there are a lot of collections created
nofile:
soft: 65535
Expand All @@ -23,8 +23,12 @@ services:
- xpack.security.enabled=false
- discovery.type=single-node
ports:
- "41235:41235"
- "41235:9200"
redis:
image: redislabs/redisearch:2.6.0
ports:
- "41236:41236"
- "41236:6379"

networks:
elastic:
name: elastic