-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathcontext_engine_demo.py
More file actions
162 lines (127 loc) · 5.81 KB
/
context_engine_demo.py
File metadata and controls
162 lines (127 loc) · 5.81 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
"""
Feather DB — Self-Aligned Context Engine Demo (Phase 1)
========================================================
Shows engine.ingest() with four backends:
- Claude (ANTHROPIC_API_KEY)
- OpenAI (OPENAI_API_KEY)
- Gemini (GOOGLE_API_KEY)
- Ollama (local, no key — run: ollama pull llama3.1:8b)
Without any API key: falls back to the built-in heuristic classifier.
All backends produce identical output schema — the engine is provider-agnostic.
Run:
python3 examples/context_engine_demo.py
ANTHROPIC_API_KEY=sk-ant-... python3 examples/context_engine_demo.py
OLLAMA_MODEL=mistral:7b python3 examples/context_engine_demo.py
"""
import os
import sys
import json
import tempfile
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import numpy as np
import feather_db
from feather_db.engine import ContextEngine
from feather_db.providers import (
ClaudeProvider, OpenAIProvider, GeminiProvider, OllamaProvider,
)
# ── Simple offline embedder (no API key needed) ───────────────────────────────
import hashlib
def embed(text: str, dim: int = 768) -> np.ndarray:
vec = np.zeros(dim, dtype=np.float32)
for tok in text.lower().split():
h = int(hashlib.md5(tok.encode()).hexdigest(), 16)
for j in range(8):
vec[(h >> (j * 5)) % dim] += 1.0 / (j + 1)
n = np.linalg.norm(vec)
return (vec / n) if n > 0 else vec
# ── Pick a provider based on available env vars ───────────────────────────────
def pick_provider():
if os.environ.get("ANTHROPIC_API_KEY"):
print(" Using Claude (claude-haiku-4-5)")
return ClaudeProvider(model="claude-haiku-4-5-20251001")
if os.environ.get("OPENAI_API_KEY"):
print(" Using OpenAI (gpt-4o-mini)")
return OpenAIProvider(model="gpt-4o-mini")
if os.environ.get("GOOGLE_API_KEY"):
print(" Using Gemini (gemini-2.0-flash)")
return GeminiProvider(model="gemini-2.0-flash")
# Try Ollama
try:
import httpx
r = httpx.get("http://localhost:11434/api/tags", timeout=2)
if r.status_code == 200:
model = os.environ.get("OLLAMA_MODEL", "llama3.1:8b")
print(f" Using Ollama ({model})")
return OllamaProvider(model=model)
except Exception:
pass
print(" No API key found — using built-in heuristic classifier (offline mode)")
return None # ContextEngine falls back to heuristic
# ── Texts to ingest ───────────────────────────────────────────────────────────
TEXTS = [
"Competitor Y just launched a developer SDK with native streaming support and MIT license. "
"10k GitHub stars in 24 hours. Directly targets our core open-source audience.",
"User always prefers responses in bullet points rather than long paragraphs. "
"Keep answers concise and scannable.",
"VS Code extension weekly active users: 42,000. Up 18% month-over-month. "
"Primary driver is autocomplete adoption in the 25-35 developer cohort.",
"Strategy: prioritise time-to-first-value under 90 seconds for all onboarding flows. "
"Frictionless OAuth is the single highest-leverage improvement.",
"Community Discord request: offline mode (no internet required). "
"47 upvotes. Three enterprise pilots are blocked specifically by this gap. "
"Security and data-residency requirements.",
"The retention analysis shows power users (5+ sessions/week) have 8.4x 90-day "
"retention vs casual users. Habit formation in week 1 is the key lever.",
]
def main():
db_path = tempfile.mktemp(suffix=".feather")
provider = pick_provider()
print(f"\n{'='*60}")
print("Feather DB — Self-Aligned Context Engine (Phase 1)")
print(f"{'='*60}\n")
engine = ContextEngine(
db_path = db_path,
dim = 768,
provider = provider,
embedder = embed,
namespace = "devtools",
)
print(f"Ingesting {len(TEXTS)} nodes...\n")
ids = []
for i, text in enumerate(TEXTS, 1):
nid = engine.ingest(text)
ids.append(nid)
m = engine.db.get_metadata(nid)
print(f" [{i}] id={nid}")
print(f" entity_type = {m.get_attribute('entity_type')}")
print(f" importance = {m.importance:.2f}")
print(f" confidence = {m.confidence:.2f}")
print(f" classified_by= {m.get_attribute('classified_by')}")
print()
print(f"{'─'*60}")
print("Semantic search: 'what competitor moves should I watch?'\n")
q = embed("what competitor moves should I watch?")
res = engine.db.search(q, k=3)
for r in res:
m = r.metadata
print(f" score={r.score:.4f} [{m.get_attribute('entity_type')}]")
print(f" {m.content[:120]}...")
print()
print(f"{'─'*60}")
print("Context chain from 'slow onboarding drop-off' (2 hops)\n")
q2 = embed("slow onboarding drop-off user friction")
chain = engine.db.context_chain(q2, k=3, hops=2, modality="text")
for node in sorted(chain.nodes, key=lambda n: (n.hop, -n.score)):
prefix = " " * node.hop + "└─ " if node.hop > 0 else " "
m = node.metadata
print(f" hop={node.hop} {prefix}[{m.get_attribute('entity_type')}] {m.content[:90]}…")
if chain.edges:
print(f"\n Graph edges traversed:")
for e in chain.edges:
print(f" {e.source} ──{e.rel_type}──▶ {e.target} (w={e.weight:.2f})")
print(f"\n{'='*60}")
print(f"Done. DB: {db_path}")
print(f"Nodes stored: {len(engine.db.get_all_ids('text'))}")
print(f"{'='*60}\n")
if __name__ == "__main__":
main()