feather-store
diff --git a/‎README.md‎
Lines changed: 40 additions & 5 deletions b/‎README.md‎
Lines changed: 40 additions & 5 deletions
diff --git a/‎bindings/feather.cpp‎
Lines changed: 48 additions & 18 deletions b/‎bindings/feather.cpp‎
Lines changed: 48 additions & 18 deletions
diff --git a/‎examples/advanced_context_test.py‎
Lines changed: 175 additions & 0 deletions b/‎examples/advanced_context_test.py‎
Lines changed: 175 additions & 0 deletions
@@ -5,11 +5,15 @@
 ## Features
 
 - 🚀 **High Performance**: Built with C++ and optimized HNSW algorithm
-- 🐍 **Python Integration**: Native Python bindings with NumPy support
-- 🦀 **Rust CLI**: Command-line interface for easy database operations
-- 💾 **Persistent Storage**: Custom binary format with automatic save/load
-- 🔍 **Fast Search**: Approximate nearest neighbor search with configurable parameters
-- 📦 **Multi-Language**: C++, Python, and Rust APIs
+- 🧠 **Context Engine**: Structured metadata storage (Facts, Preferences, Events, Conversations)
+- ⏳ **Temporal Retrieval**: Time-weighted scoring with exponential decay
+- 🔍 **Filtered Search**: Domain-logic filtering (by type, source, tags) during HNSW search
+- 🐍 **Python Integration**: Native Python bindings with `FilterBuilder` support
+- 🦀 **Rust CLI**: Enhanced CLI for metadata and filtered operations
+- 💾 **Persistent Storage**: Version 2 binary format with automatic metadata persistence
+
+[![PyPI](https://img.shields.io/pypi/v/feather-db)](https://pypi.org/project/feather-db/)
+[![Crates.io](https://img.shields.io/crates/v/feather-db-cli)](https://crates.io/crates/feather-db-cli)
 
 ## Quick Start
 
@@ -36,6 +40,25 @@ for i, (id, dist) in enumerate(zip(ids, distances)):
 
 # Save the database
 db.save()
+
+### Context Engine (Phase 2)
+
+```python
+from feather_db import DB, Metadata, ContextType, FilterBuilder
+
+# Add with metadata
+meta = Metadata()
+meta.content = "User prefers dark mode"
+meta.type = ContextType.PREFERENCE
+meta.importance = 0.9
+db.add(id=1, vec=embedding, meta=meta)
+
+# Search with filters and temporal decay
+fb = FilterBuilder()
+filter = fb.types(ContextType.PREFERENCE).min_importance(0.5).build()
+
+results = db.search(query, k=5, filter=filter, scoring=ScoringConfig(half_life=30))
+```
 ```
 
 ### C++ Usage
@@ -78,6 +101,18 @@ feather add my_db.feather 2 --npy vector2.npy
 feather search my_db.feather --npy query.npy --k 10
 ```
 
+### Rust CLI
+
+The CLI is available as a native binary for fast database management.
+
+```bash
+# Add with metadata
+feather add --npy vector.npy --content "Hello world" --source "cli" my_db 123
+
+# Search with filters
+feather search --npy query.npy --type-filter 0 --source-filter "cli" my_db
+```
+
 ## Installation
 
 ### Python Package (Recommended)
 
@@ -8,35 +8,65 @@ namespace py = pybind11;
 PYBIND11_MODULE(core, m) {
     m.doc() = "Feather: SQLite for Vectors";
 
+    py::enum_<feather::ContextType>(m, "ContextType")
+        .value("FACT", feather::ContextType::FACT)
+        .value("PREFERENCE", feather::ContextType::PREFERENCE)
+        .value("EVENT", feather::ContextType::EVENT)
+        .value("CONVERSATION", feather::ContextType::CONVERSATION)
+        .export_values();
+
+    py::class_<feather::Metadata>(m, "Metadata")
+        .def(py::init<>())
+        .def_readwrite("timestamp", &feather::Metadata::timestamp)
+        .def_readwrite("importance", &feather::Metadata::importance)
+        .def_readwrite("type", &feather::Metadata::type)
+        .def_readwrite("source", &feather::Metadata::source)
+        .def_readwrite("content", &feather::Metadata::content)
+        .def_readwrite("tags_json", &feather::Metadata::tags_json);
+
+    py::class_<feather::ScoringConfig>(m, "ScoringConfig")
+        .def(py::init<float, float, float>(), py::arg("half_life") = 30.0f, py::arg("weight") = 0.3f, py::arg("min") = 0.0f)
+        .def_readwrite("decay_half_life_days", &feather::ScoringConfig::decay_half_life_days)
+        .def_readwrite("time_weight", &feather::ScoringConfig::time_weight)
+        .def_readwrite("min_weight", &feather::ScoringConfig::min_weight);
+
+    py::class_<feather::SearchFilter>(m, "SearchFilter")
+        .def(py::init<>())
+        .def_readwrite("types", &feather::SearchFilter::types)
+        .def_readwrite("source", &feather::SearchFilter::source)
+        .def_readwrite("source_prefix", &feather::SearchFilter::source_prefix)
+        .def_readwrite("timestamp_after", &feather::SearchFilter::timestamp_after)
+        .def_readwrite("timestamp_before", &feather::SearchFilter::timestamp_before)
+        .def_readwrite("importance_gte", &feather::SearchFilter::importance_gte)
+        .def_readwrite("tags_contains", &feather::SearchFilter::tags_contains);
+
+    py::class_<feather::DB::SearchResult>(m, "SearchResult")
+        .def_readonly("id", &feather::DB::SearchResult::id)
+        .def_readonly("score", &feather::DB::SearchResult::score)
+        .def_readonly("metadata", &feather::DB::SearchResult::metadata);
+
     py::class_<feather::DB, std::unique_ptr<feather::DB, py::nodelete>>(m, "DB")
         .def_static("open", &feather::DB::open, py::arg("path"), py::arg("dim") = 768)
 
-        .def("add", [](feather::DB& db, uint64_t id, py::array_t<float> vec) {
+        .def("add", [](feather::DB& db, uint64_t id, py::array_t<float> vec, const std::optional<feather::Metadata>& meta) {
             auto buf = vec.request();
             if (buf.size != db.dim()) throw std::runtime_error("Dimension mismatch");
             const float* ptr = static_cast<const float*>(buf.ptr);
             std::vector<float> vec_copy(ptr, ptr + buf.size);
-            db.add(id, vec_copy);
-        }, py::arg("id"), py::arg("vec"))
-        .def("search", [](const feather::DB& db, py::array_t<float> q, size_t k = 5) {
+            db.add(id, vec_copy, meta ? *meta : feather::Metadata());
+        }, py::arg("id"), py::arg("vec"), py::arg("meta") = std::nullopt)
+
+        .def("search", [](const feather::DB& db, py::array_t<float> q, size_t k = 5,
+                          const feather::SearchFilter* filter = nullptr,
+                          const feather::ScoringConfig* scoring = nullptr) {
             auto buf = q.request();
             if (buf.size != db.dim()) throw std::runtime_error("Query dimension mismatch");
             const float* ptr = static_cast<const float*>(buf.ptr);
             std::vector<float> query(ptr, ptr + buf.size);
-            auto results = db.search(query, k);
-
-            py::array_t<uint64_t> ids(results.size());
-            py::array_t<float> distances(results.size());
-            auto ids_ptr = ids.mutable_data();
-            auto dist_ptr = distances.mutable_data();
-
-            for (size_t i = 0; i < results.size(); ++i) {
-                auto [id, dist] = results[i];
-                ids_ptr[i] = id;
-                dist_ptr[i] = dist;
-            }
-            return py::make_tuple(ids, distances);
-        }, py::arg("q"), py::arg("k") = 5)
+            return db.search(query, k, filter, scoring);
+        }, py::arg("q"), py::arg("k") = 5, py::arg("filter") = nullptr, py::arg("scoring") = nullptr)
+
+        .def("get_metadata", &feather::DB::get_metadata, py::arg("id"))
         .def("save", &feather::DB::save)
         .def("dim", &feather::DB::dim);
 }
@@ -0,0 +1,175 @@
+import os
+import time
+import json
+import numpy as np
+from feather_db import DB, Metadata, ContextType, ScoringConfig, FilterBuilder
+
+# -----------------------------------------------------------------------------
+# 1. Deterministic Mock Embedding
+# -----------------------------------------------------------------------------
+# We'll map specific keywords to vector dimensions to simulate "meaning".
+# Dim 0: "work/business"
+# Dim 1: "personal/social"
+# Dim 2: "tech/coding"
+# Dim 3: "urgent/important"
+# Dim 4: "finance/money"
+DIM = 5
+
+def mock_embed(text):
+    text = text.lower()
+    vec = np.zeros(DIM, dtype=np.float32)
+    
+    if any(w in text for w in ["meeting", "work", "campaign", "project", "quarter", "deadline"]):
+        vec[0] += 1.0
+    if any(w in text for w in ["party", "dinner", "friend", "birthday", "movie"]):
+        vec[1] += 1.0
+    if any(w in text for w in ["code", "python", "cpp", "db", "api", "bug", "feature"]):
+        vec[2] += 1.0
+    if any(w in text for w in ["urgent", "asap", "critical", "blocking"]):
+        vec[3] += 1.0
+    if any(w in text for w in ["budget", "cost", "price", "invoice", "salary"]):
+        vec[4] += 1.0
+        
+    # Normalize
+    norm = np.linalg.norm(vec)
+    if norm > 0:
+        vec = vec / norm
+    else:
+        # random small noise if no keywords match, to avoid zero vectors issues
+        vec = np.random.rand(DIM).astype(np.float32) * 0.1
+        
+    return vec
+
+# -----------------------------------------------------------------------------
+# 2. Dataset Generation
+# -----------------------------------------------------------------------------
+NOW = int(time.time())
+DAY = 86400
+
+DATASET = [
+    # Recent highly important work
+    {
+        "id": 1,
+        "text": "Urgent bug fix needed for API auth in Phase 2",
+        "type": ContextType.FACT,
+        "source": "slack:dev-team",
+        "importance": 1.0,
+        "age_days": 0.1, # Just now
+        "tags": ["bug", "api", "v2"]
+    },
+    # Older work meeting
+    {
+        "id": 2,
+        "text": "Q1 Planning meeting notes: focus on stability",
+        "type": ContextType.EVENT,
+        "source": "gcal",
+        "importance": 0.8,
+        "age_days": 30, # 1 month ago
+        "tags": ["planning", "meeting"]
+    },
+    # Personal info
+    {
+        "id": 3,
+        "text": "User prefers dark mode and high contrast",
+        "type": ContextType.PREFERENCE,
+        "source": "settings_ui",
+        "importance": 1.0,
+        "age_days": 100, # Old but permanent preference
+        "tags": ["ui", "a11y"]
+    },
+    # Code snippet
+    {
+        "id": 4,
+        "text": "Python script to migrate database schema",
+        "type": ContextType.FACT,
+        "source": "github",
+        "importance": 0.5,
+        "age_days": 2,
+        "tags": ["python", "migration"]
+    },
+    # Irrelevant noise
+    {
+        "id": 5,
+        "text": "Dinner receipt for pizza party",
+        "type": ContextType.EVENT,
+        "source": "email",
+        "importance": 0.1,
+        "age_days": 5,
+        "tags": ["finance", "food"]
+    },
+    {
+        "id": 6,
+        "text": "Invoice for cloud hosting services",
+        "type": ContextType.FACT,
+        "source": "email",
+        "importance": 0.9,
+        "age_days": 1,
+        "tags": ["finance", "cloud"]
+    },
+]
+
+# -----------------------------------------------------------------------------
+# 3. Setup and Population
+# -----------------------------------------------------------------------------
+DB_PATH = "test_db_advanced.feather"
+if os.path.exists(DB_PATH):
+    os.remove(DB_PATH)
+
+print(f"🚀 Initializing Feather DB at {DB_PATH} with dim={DIM}")
+db = DB.open(DB_PATH, DIM)
+
+print(f"📥 Ingesting {len(DATASET)} diverse records...")
+for item in DATASET:
+    meta = Metadata()
+    meta.content = item["text"]
+    meta.type = item["type"]
+    meta.source = item["source"]
+    meta.importance = item["importance"]
+    meta.timestamp = int(NOW - (item["age_days"] * DAY))
+    meta.tags_json = json.dumps(item["tags"])
+    
+    vec = mock_embed(item["text"])
+    db.add(item["id"], vec, meta)
+
+# -----------------------------------------------------------------------------
+# 4. Scenario Testing
+# -----------------------------------------------------------------------------
+
+def run_query(test_name, query_text, filter_obj=None, scoring_obj=None):
+    print(f"\n🧪 Test: {test_name}")
+    print(f"   Query: '{query_text}'")
+    q_vec = mock_embed(query_text)
+    
+    # Use the passed configuration directly
+    real_scoring = scoring_obj
+        
+    results = db.search(q_vec, k=3, filter=filter_obj, scoring=real_scoring)
+    
+    for i, r in enumerate(results):
+        age_days = (NOW - r.metadata.timestamp) / DAY
+        print(f"   {i+1}. [Score: {r.score:.4f}] [Age: {age_days:.1f}d] [{r.metadata.type}] {r.metadata.content}")
+
+# Scenario A: Baseline Similarity (Is "bug" related to "code"?)
+# Expect: Bug fix (ID 1) and Migration script (ID 4) to appear.
+run_query("Baseline Context Retrieval (Similarity Only)", "coding bug issue", scoring_obj=ScoringConfig(half_life=365, weight=0.0))
+
+# Scenario B: Finding the most URGENT recent item
+# Expect: The "Urgent bug fix" (ID 1) should be #1 significantly because of recency + importance.
+run_query("Urgent Recent Items (High Temporal Weight)", "urgent issue", scoring_obj=ScoringConfig(half_life=1, weight=0.5))
+
+# Scenario C: "Recall" context from a month ago
+# Expect: Q1 Planning meeting (ID 2).
+run_query("Memory Recall (Older Events)", "planning meeting", scoring_obj=ScoringConfig(half_life=60, weight=0.1))
+
+# Scenario D: Filtering for specific source (e.g. only 'email')
+# Expect: Invoice (ID 6) and Pizza receipt (ID 5).
+fb = FilterBuilder()
+email_filter = fb.types([ContextType.FACT, ContextType.EVENT]).source("email").build()
+run_query("Source Filtering (Emails only)", "money cost", filter_obj=email_filter)
+
+# Scenario E: Filtering for Preferences (User personalization)
+# Expect: Only ID 3.
+pref_filter = fb.types(ContextType.PREFERENCE).build()
+run_query("Personalization (Preferences only)", "ui setting", filter_obj=pref_filter)
+
+print("\n✅ Verification Complete.")