Skip to content

Commit 782c378

Browse files
committed
feat: add directory indexing example and enhance IndexContext
- Add new example script examples/index_directory/main.py that demonstrates recursive directory indexing functionality - The example supports command-line arguments for directory path and recursive/non-recursive scanning options - Add environment variable support for LLM configuration - Implement document listing, querying, and metrics reporting - Add __len__ and is_empty methods to PyIndexContext for better Python integration - Update IndexContext repr to show number of sources
1 parent d763949 commit 782c378

File tree

2 files changed

+111
-1
lines changed

2 files changed

+111
-1
lines changed

examples/index_directory/main.py

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
"""
2+
Directory indexing example — recursively index all documents in a directory.
3+
4+
Usage:
5+
python index_directory.py /path/to/docs
6+
python index_directory.py /path/to/docs --no-recursive
7+
8+
Environment variables:
9+
LLM_API_KEY — Your LLM API key (required)
10+
LLM_MODEL — Model name (default: google/gemini-3-flash-preview)
11+
LLM_ENDPOINT — API endpoint (default: http://localhost:4000/api/v1)
12+
"""
13+
14+
import argparse
15+
import asyncio
16+
import os
17+
18+
from vectorless import Engine, IndexContext, QueryContext
19+
20+
21+
async def main():
22+
parser = argparse.ArgumentParser(description="Index a directory of documents")
23+
parser.add_argument("directory", help="Directory path to index")
24+
parser.add_argument(
25+
"--no-recursive",
26+
action="store_true",
27+
help="Only scan top-level files (default: recursive)",
28+
)
29+
args = parser.parse_args()
30+
31+
# Build engine
32+
api_key = os.environ.get("LLM_API_KEY", "sk-or-v1-...")
33+
model = os.environ.get("LLM_MODEL", "google/gemini-3-flash-preview")
34+
endpoint = os.environ.get("LLM_ENDPOINT", "http://localhost:4000/api/v1")
35+
36+
engine = Engine(
37+
workspace="./workspace_directory_example",
38+
api_key=api_key,
39+
model=model,
40+
endpoint=endpoint,
41+
)
42+
43+
recursive = not args.no_recursive
44+
45+
# Index directory
46+
ctx = IndexContext.from_dir(args.directory, recursive=recursive)
47+
48+
if ctx.is_empty():
49+
print(f"No supported files found in: {args.directory}")
50+
return
51+
52+
print(f"{'Recursively scanning' if recursive else 'Scanning top-level files in'}: {args.directory}")
53+
print(f"Found files to index")
54+
55+
result = await engine.index(ctx)
56+
57+
print(f"\nIndexed {len(result.items)} document(s):")
58+
for item in result.items:
59+
print(f" {item.name} ({item.doc_id})")
60+
if item.metrics:
61+
print(f" nodes: {item.metrics.nodes_processed}, time: {item.metrics.total_time_ms}ms")
62+
63+
if result.has_failures():
64+
print("\nFailed:")
65+
for f in result.failed:
66+
print(f" {f.source}{f.error}")
67+
68+
# Query across all indexed documents
69+
query = "What is this about?"
70+
print(f'\nQuerying: "{query}"')
71+
72+
answer = await engine.query(QueryContext(query))
73+
for item in answer.items:
74+
print(f" [{item.doc_id} score={item.score:.2f}]")
75+
preview = item.content[:200]
76+
print(f" {preview}")
77+
if len(item.content) > 200:
78+
print(" ...")
79+
80+
# Metrics report
81+
report = engine.metrics_report()
82+
print("\nMetrics:")
83+
print(
84+
f" LLM: {report.llm.total_calls} calls, "
85+
f"{report.llm.total_tokens} tokens, "
86+
f"${report.llm.estimated_cost_usd:.4f}"
87+
)
88+
print(
89+
f" Retrieval: {report.retrieval.total_queries} queries, "
90+
f"avg score {report.retrieval.avg_path_score:.2f}"
91+
)
92+
93+
# Cleanup
94+
docs = await engine.list()
95+
for doc in docs:
96+
await engine.remove(doc.id)
97+
98+
99+
if __name__ == "__main__":
100+
asyncio.run(main())

python/src/lib.rs

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -283,8 +283,18 @@ impl PyIndexContext {
283283
Ok(Self { inner: ctx })
284284
}
285285

286+
/// Number of document sources.
287+
fn __len__(&self) -> usize {
288+
self.inner.len()
289+
}
290+
291+
/// Whether no sources are present.
292+
fn is_empty(&self) -> bool {
293+
self.inner.is_empty()
294+
}
295+
286296
fn __repr__(&self) -> String {
287-
"IndexContext(...)".to_string()
297+
format!("IndexContext(sources={})", self.inner.len())
288298
}
289299
}
290300

0 commit comments

Comments
 (0)