Skip to content

Commit d763949

Browse files
committed
feat(python): add recursive option to from_dir method
Add recursive parameter to PyIndexContext.from_dir with default value false. When recursive=True, uses IndexContext::from_dir_recursive instead of from_dir. feat(rust): implement recursive directory scanning for IndexContext Add IndexContext::from_dir_recursive method that scans directories recursively. Refactor from_dir to use internal scan_dir helper function. Update supported extensions to only include .md and .pdf files. docs: add directory indexing example with CLI interface Add new example showing how to recursively index documents in a directory with command line arguments for recursive/non-recursive modes.
1 parent ddaf01a commit d763949

File tree

3 files changed

+209
-15
lines changed

3 files changed

+209
-15
lines changed

python/src/lib.rs

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -219,11 +219,19 @@ impl PyIndexContext {
219219
}
220220

221221
/// Create an IndexContext from all supported files in a directory.
222+
///
223+
/// Args:
224+
/// path: Directory path to scan.
225+
/// recursive: If True, scan subdirectories recursively. Default: False.
222226
#[staticmethod]
223-
fn from_dir(path: String) -> Self {
224-
Self {
225-
inner: IndexContext::from_dir(&path),
226-
}
227+
#[pyo3(signature = (path, recursive=false))]
228+
fn from_dir(path: String, recursive: bool) -> Self {
229+
let inner = if recursive {
230+
IndexContext::from_dir_recursive(&path)
231+
} else {
232+
IndexContext::from_dir(&path)
233+
};
234+
Self { inner }
227235
}
228236

229237
/// Create an IndexContext from text content.

rust/examples/index_directory.rs

Lines changed: 123 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,123 @@
1+
// Copyright (c) 2026 vectorless developers
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
//! Directory indexing example — recursively index all documents in a directory.
5+
//!
6+
//! ```bash
7+
//! # Using environment variables for LLM config:
8+
//! LLM_API_KEY=sk-xxx LLM_MODEL=google/gemini-3-flash-preview \
9+
//! LLM_ENDPOINT=http://localhost:4000/api/v1 \
10+
//! cargo run --example index_directory -- /path/to/docs
11+
//!
12+
//! # With recursive flag (default):
13+
//! cargo run --example index_directory -- /path/to/docs --recursive
14+
//!
15+
//! # Non-recursive (top-level only):
16+
//! cargo run --example index_directory -- /path/to/docs --no-recursive
17+
//! ```
18+
19+
use vectorless::{EngineBuilder, IndexContext};
20+
21+
#[tokio::main]
22+
async fn main() -> vectorless::Result<()> {
23+
tracing_subscriber::fmt::init();
24+
25+
// Parse CLI arguments
26+
let args: Vec<String> = std::env::args().collect();
27+
let dir = args
28+
.get(1)
29+
.map(|s| s.as_str())
30+
.unwrap_or("./samples");
31+
let recursive = !args.iter().any(|a| a == "--no-recursive");
32+
33+
// Build engine
34+
let api_key = std::env::var("LLM_API_KEY").unwrap_or_else(|_| "sk-or-v1-...".to_string());
35+
let model =
36+
std::env::var("LLM_MODEL").unwrap_or_else(|_| "google/gemini-3-flash-preview".to_string());
37+
let endpoint = std::env::var("LLM_ENDPOINT")
38+
.unwrap_or_else(|_| "http://localhost:4000/api/v1".to_string());
39+
40+
let engine = EngineBuilder::new()
41+
.with_workspace("./workspace_directory_example")
42+
.with_key(&api_key)
43+
.with_model(&model)
44+
.with_endpoint(&endpoint)
45+
.build()
46+
.await
47+
.map_err(|e| vectorless::Error::Config(e.to_string()))?;
48+
49+
// Index directory
50+
let ctx = if recursive {
51+
println!("Recursively indexing: {}", dir);
52+
IndexContext::from_dir_recursive(dir)
53+
} else {
54+
println!("Indexing top-level files in: {}", dir);
55+
IndexContext::from_dir(dir)
56+
};
57+
58+
if ctx.is_empty() {
59+
println!("No supported files found in: {}", dir);
60+
return Ok(());
61+
}
62+
63+
println!("Found {} file(s) to index", ctx.len());
64+
65+
let result = engine.index(ctx).await?;
66+
67+
println!("\nIndexed {} document(s):", result.items.len());
68+
for item in &result.items {
69+
println!(" {} ({})", item.name, item.doc_id);
70+
if let Some(metrics) = &item.metrics {
71+
println!(
72+
" nodes: {}, time: {}ms",
73+
metrics.nodes_processed,
74+
metrics.total_time_ms()
75+
);
76+
}
77+
}
78+
79+
if result.has_failures() {
80+
println!("\nFailed:");
81+
for f in &result.failed {
82+
println!(" {} — {}", f.source, f.error);
83+
}
84+
}
85+
86+
// Query across all indexed documents
87+
let query = "What is this about?";
88+
println!("\nQuerying: \"{query}\"");
89+
90+
let answer = engine
91+
.query(vectorless::QueryContext::new(query))
92+
.await?;
93+
94+
for item in &answer.items {
95+
println!(" [{} score={:.2}]", item.doc_id, item.score);
96+
let preview: String = item.content.chars().take(200).collect();
97+
println!(" {preview}");
98+
if item.content.len() > 200 {
99+
println!(" ...");
100+
}
101+
}
102+
103+
// Metrics report
104+
let report = engine.metrics_report();
105+
println!("\nMetrics:");
106+
println!(
107+
" LLM: {} calls, {} tokens, ${:.4}",
108+
report.llm.total_calls,
109+
report.llm.total_tokens,
110+
report.llm.estimated_cost_usd,
111+
);
112+
println!(
113+
" Retrieval: {} queries, avg score {:.2}",
114+
report.retrieval.total_queries, report.retrieval.avg_path_score,
115+
);
116+
117+
// Cleanup
118+
for doc in engine.list().await? {
119+
engine.remove(&doc.id).await?;
120+
}
121+
122+
Ok(())
123+
}

rust/src/client/index_context.rs

Lines changed: 74 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,11 @@
2929
//! ```rust,no_run
3030
//! use vectorless::client::IndexContext;
3131
//!
32+
//! // Non-recursive (top-level only)
3233
//! let ctx = IndexContext::from_dir("./documents");
34+
//!
35+
//! // Recursive (includes subdirectories)
36+
//! let ctx = IndexContext::from_dir_recursive("./documents");
3337
//! ```
3438
3539
use std::path::PathBuf;
@@ -149,27 +153,58 @@ impl IndexContext {
149153
/// Create from a directory path.
150154
///
151155
/// Indexes all supported files in the directory (non-recursive).
152-
/// Supported extensions: `.md`, `.pdf`, `.txt`.
156+
/// Supported extensions: `.md`, `.pdf`.
153157
pub fn from_dir(dir: impl Into<PathBuf>) -> Self {
158+
Self::scan_dir(dir, false)
159+
}
160+
161+
/// Create from a directory path with recursive scanning.
162+
///
163+
/// Recursively indexes all supported files in the directory and its
164+
/// subdirectories. Supported extensions: `.md`, `.pdf`.
165+
pub fn from_dir_recursive(dir: impl Into<PathBuf>) -> Self {
166+
Self::scan_dir(dir, true)
167+
}
168+
169+
/// Internal: scan a directory for supported document files.
170+
fn scan_dir(dir: impl Into<PathBuf>, recursive: bool) -> Self {
154171
let dir = dir.into();
155-
let supported_extensions = ["md", "markdown", "pdf", "txt"];
172+
let supported_extensions = ["md", "pdf"];
156173

157174
let mut sources = Vec::new();
158-
if let Ok(entries) = std::fs::read_dir(&dir) {
175+
Self::collect_files(&dir, &supported_extensions, recursive, &mut sources);
176+
177+
Self {
178+
sources,
179+
name: None,
180+
options: IndexOptions::default(),
181+
}
182+
}
183+
184+
/// Recursively or non-recursively collect supported files.
185+
fn collect_files(
186+
dir: &std::path::Path,
187+
extensions: &[&str],
188+
recursive: bool,
189+
sources: &mut Vec<IndexSource>,
190+
) {
191+
if let Ok(entries) = std::fs::read_dir(dir) {
192+
let mut subdirs = Vec::new();
159193
for entry in entries.flatten() {
160194
let path = entry.path();
161-
if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
162-
if supported_extensions.contains(&ext.to_lowercase().as_str()) {
195+
if path.is_dir() {
196+
if recursive {
197+
subdirs.push(path);
198+
}
199+
} else if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
200+
if extensions.contains(&ext.to_lowercase().as_str()) {
163201
sources.push(IndexSource::Path(path));
164202
}
165203
}
166204
}
167-
}
168-
169-
Self {
170-
sources,
171-
name: None,
172-
options: IndexOptions::default(),
205+
for subdir in subdirs {
206+
Self::collect_files(&subdir, extensions, recursive, sources);
207+
}
173208
}
174209
}
175210

@@ -316,4 +351,32 @@ mod tests {
316351
let ctx = IndexContext::from(PathBuf::from("./test.md"));
317352
assert_eq!(ctx.len(), 1);
318353
}
354+
355+
#[test]
356+
fn test_from_dir_recursive() {
357+
// Create a temp directory structure:
358+
// tmp/
359+
// a.md
360+
// sub/
361+
// b.md
362+
// deep/
363+
// c.pdf
364+
let tmp = std::env::temp_dir().join("vectorless_test_dir_recursive");
365+
let _ = std::fs::remove_dir_all(&tmp);
366+
std::fs::create_dir_all(tmp.join("sub/deep")).unwrap();
367+
std::fs::write(tmp.join("a.md"), "# A").unwrap();
368+
std::fs::write(tmp.join("sub/b.md"), "# B").unwrap();
369+
std::fs::write(tmp.join("sub/deep/c.pdf"), b"%PDF").unwrap();
370+
std::fs::write(tmp.join("sub/deep/ignore.dat"), b"xxx").unwrap();
371+
372+
// Non-recursive: only top-level
373+
let ctx = IndexContext::from_dir(&tmp);
374+
assert_eq!(ctx.len(), 1); // only a.md
375+
376+
// Recursive: all levels
377+
let ctx = IndexContext::from_dir_recursive(&tmp);
378+
assert_eq!(ctx.len(), 3); // a.md, b.md, c.pdf
379+
380+
let _ = std::fs::remove_dir_all(&tmp);
381+
}
319382
}

0 commit comments

Comments
 (0)