-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathphase3_benchmark.py
More file actions
93 lines (72 loc) · 3.33 KB
/
phase3_benchmark.py
File metadata and controls
93 lines (72 loc) · 3.33 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import numpy as np
from feather_db import DB, Metadata, ContextType
import time
import os
import random
def run_benchmark():
db_path = "benchmark_phase3.feather"
if os.path.exists(db_path):
os.remove(db_path)
DIM = 128 # Standard-ish dimension for performance test
N_ITEMS = 10_000
N_LINKS = 20_000
print(f"=== ⚡️ Feather DB: Phase 3 Performance Benchmark ===")
print(f"Config: {N_ITEMS} Items (Multimodal), {N_LINKS} Graph Links, Dim={DIM}")
# Initialize
db = DB.open(db_path, dim=DIM)
# --- 1. Ingestion Benchmark ---
print("\n[1] Benchmarking Ingestion...")
vectors = np.random.rand(N_ITEMS, DIM).astype(np.float32)
start_time = time.time()
for i in range(N_ITEMS):
# Alternate modalities to test "Feather Pockets"
modality = "visual" if i % 2 == 0 else "text"
meta = Metadata()
meta.content = f"Item {i} content..."
meta.type = ContextType.FACT
db.add(i, vectors[i], meta, modality=modality)
ingest_time = time.time() - start_time
print(f" -> Ingested {N_ITEMS} multimodal records in {ingest_time:.4f}s")
print(f" -> Rate: {N_ITEMS / ingest_time:.0f} vectors/sec")
# --- 2. Graph Linking Benchmark ---
print("\n[2] Benchmarking Graph Construction...")
start_time = time.time()
# Create random links (simulating a dense-ish knowledge graph)
for _ in range(N_LINKS):
from_id = random.randint(0, N_ITEMS - 1)
to_id = random.randint(0, N_ITEMS - 1)
db.link(from_id, to_id)
link_time = time.time() - start_time
print(f" -> Created {N_LINKS} links in {link_time:.4f}s")
print(f" -> Rate: {N_LINKS / link_time:.0f} links/sec")
# --- 3. Retrieval + Traversal Benchmark ---
print("\n[3] Benchmarking 'Search + Graph Walk'...")
# This simulates: "Find relevant node (Vector Search) AND fetch its linked neighbors (Graph Walk)"
query = np.random.rand(DIM).astype(np.float32)
start_time = time.time()
N_QUERIES = 100
total_neighbors_fetched = 0
for _ in range(N_QUERIES):
# 1. Vector Search (Visual)
results = db.search(query, k=5, modality="visual")
# 2. Graph Walk (Simulated "Expansion")
for res in results:
neighbors = res.metadata.links
total_neighbors_fetched += len(neighbors)
# In a real app, we would db.get_metadata(n) here.
# Getting metadata is a hash map lookup O(1).
for n_id in neighbors:
_ = db.get_metadata(n_id)
query_time = time.time() - start_time
avg_latency = (query_time / N_QUERIES) * 1000 # ms
print(f" -> Ran {N_QUERIES} 'Search+Traverse' queries in {query_time:.4f}s")
print(f" -> Avg Latency: {avg_latency:.2f} ms")
print(f" -> Total Graph Nodes Visited: {total_neighbors_fetched}")
# --- 4. Adaptive Decay Overhead ---
print("\n[4] Benchmarking Scorer Overhead (Adaptive Decay)...")
# Search WITHOUT decay enabled is just raw HNSW.
# Our search() now ALWAYS runs Scorer logic in C++.
# So the latency above INCLUDES the decay calculation.
print(f" -> (Note: The {avg_latency:.2f} ms latency INCLUDES the real-time Adaptive Decay calculation)")
if __name__ == "__main__":
run_benchmark()