fix: Update cache docs

ocean · ocean · commit f50799c2df2e · 2026-01-01T22:56:35.000+11:00
diff --git a/.beads/issues.jsonl b/.beads/issues.jsonl
@@ -9,6 +9,7 @@
 {"id":"el-5ef","title":"Add Cross-Connection Security Tests","description":"Add comprehensive security tests to verify connections cannot access each other's resources.\n\n**Context**: ecto_libsql implements ownership tracking (TransactionEntry.conn_id, cursor ownership, statement ownership) but needs comprehensive tests to verify security boundaries.\n\n**Security Boundaries to Test**:\n\n**1. Transaction Isolation**:\n```elixir\ntest \"connection A cannot access connection B's transaction\" do\n  {:ok, conn_a} = connect(database: \"a.db\")\n  {:ok, conn_b} = connect(database: \"b.db\")\n  \n  {:ok, trx_id} = begin_transaction(conn_a)\n  \n  # Should fail - transaction belongs to conn_a\n  assert {:error, msg} = execute_with_transaction(conn_b, trx_id, \"SELECT 1\")\n  assert msg =~ \"does not belong to this connection\"\nend\n```\n\n**2. Statement Isolation**:\n```elixir\ntest \"connection A cannot access connection B's prepared statement\" do\n  {:ok, conn_a} = connect(database: \"a.db\")\n  {:ok, conn_b} = connect(database: \"b.db\")\n  \n  {:ok, stmt_id} = prepare_statement(conn_a, \"SELECT 1\")\n  \n  # Should fail - statement belongs to conn_a\n  assert {:error, msg} = execute_prepared(conn_b, stmt_id, [])\n  assert msg =~ \"Statement not found\" or msg =~ \"does not belong\"\nend\n```\n\n**3. Cursor Isolation**:\n```elixir\ntest \"connection A cannot access connection B's cursor\" do\n  {:ok, conn_a} = connect(database: \"a.db\")\n  {:ok, conn_b} = connect(database: \"b.db\")\n  \n  {:ok, cursor_id} = declare_cursor(conn_a, \"SELECT 1\")\n  \n  # Should fail - cursor belongs to conn_a\n  assert {:error, msg} = fetch_cursor(conn_b, cursor_id, 10)\n  assert msg =~ \"Cursor not found\" or msg =~ \"does not belong\"\nend\n```\n\n**4. Savepoint Isolation**:\n```elixir\ntest \"connection A cannot access connection B's savepoint\" do\n  {:ok, conn_a} = connect(database: \"a.db\")\n  {:ok, conn_b} = connect(database: \"b.db\")\n  \n  {:ok, trx_id} = begin_transaction(conn_a)\n  {:ok, _} = savepoint(conn_a, trx_id, \"sp1\")\n  \n  # Should fail - savepoint belongs to conn_a's transaction\n  assert {:error, msg} = rollback_to_savepoint(conn_b, trx_id, \"sp1\")\n  assert msg =~ \"does not belong to this connection\"\nend\n```\n\n**5. Concurrent Access Races**:\n```elixir\ntest \"concurrent cursor fetches are safe\" do\n  {:ok, conn} = connect()\n  {:ok, cursor_id} = declare_cursor(conn, \"SELECT * FROM large_table\")\n  \n  # Multiple processes try to fetch concurrently\n  tasks = for _ \u003c- 1..10 do\n    Task.async(fn -\u003e fetch_cursor(conn, cursor_id, 10) end)\n  end\n  \n  results = Task.await_many(tasks)\n  \n  # Should not crash, should handle gracefully\n  assert Enum.all?(results, fn r -\u003e match?({:ok, _}, r) or match?({:error, _}, r) end)\nend\n```\n\n**6. Process Crash Cleanup**:\n```elixir\ntest \"resources cleaned up when connection process crashes\" do\n  # Start connection in separate process\n  pid = spawn(fn -\u003e\n    {:ok, conn} = connect()\n    {:ok, trx_id} = begin_transaction(conn)\n    {:ok, cursor_id} = declare_cursor(conn, \"SELECT 1\")\n    \n    # Store IDs for verification\n    send(self(), {:ids, conn.conn_id, trx_id, cursor_id})\n    \n    # Wait to be killed\n    Process.sleep(:infinity)\n  end)\n  \n  receive do\n    {:ids, conn_id, trx_id, cursor_id} -\u003e\n      # Kill the process\n      Process.exit(pid, :kill)\n      Process.sleep(100)\n      \n      # Resources should be cleaned up (or marked orphaned)\n      # Verify they can't be accessed\n  end\nend\n```\n\n**7. Connection Pool Isolation**:\n```elixir\ntest \"pooled connections are isolated\" do\n  # Get two connections from pool\n  conn1 = get_pooled_connection()\n  conn2 = get_pooled_connection()\n  \n  # Each should have independent resources\n  {:ok, trx1} = begin_transaction(conn1)\n  {:ok, trx2} = begin_transaction(conn2)\n  \n  # Should not interfere\n  assert trx1 != trx2\n  \n  # Commit conn1, should not affect conn2\n  :ok = commit_transaction(conn1, trx1)\n  assert is_in_transaction?(conn2, trx2)\nend\n```\n\n**Implementation**:\n\n1. **Create test file** (test/security_test.exs):\n   - Transaction isolation tests\n   - Statement isolation tests\n   - Cursor isolation tests\n   - Savepoint isolation tests\n   - Concurrent access tests\n   - Cleanup tests\n   - Pool isolation tests\n\n2. **Add stress tests** for concurrent access patterns\n\n3. **Add fuzzing** for edge cases\n\n**Files**:\n- NEW: test/security_test.exs\n- Reference: FEATURE_CHECKLIST.md line 290-310\n- Reference: LIBSQL_FEATURE_COMPARISON.md section 4\n\n**Acceptance Criteria**:\n- [ ] Transaction isolation verified\n- [ ] Statement isolation verified\n- [ ] Cursor isolation verified\n- [ ] Savepoint isolation verified\n- [ ] Concurrent access safe\n- [ ] Resource cleanup verified\n- [ ] Pool isolation verified\n- [ ] All tests pass consistently\n- [ ] No race conditions detected\n\n**Security Guarantees**:\nAfter these tests pass, we can guarantee:\n- Connections cannot access each other's transactions\n- Connections cannot access each other's prepared statements\n- Connections cannot access each other's cursors\n- Savepoints are properly scoped to owning transaction\n- Concurrent access is thread-safe\n- Resources are cleaned up on connection close\n\n**References**:\n- LIBSQL_FEATURE_COMPARISON.md section \"Error Handling for Edge Cases\" line 290-310\n- Current implementation: TransactionEntry.conn_id ownership tracking\n\n**Priority**: P2 - Important for security guarantees\n**Effort**: 2 days","status":"closed","priority":2,"issue_type":"task","created_at":"2025-12-30T17:46:44.853925+11:00","created_by":"drew","updated_at":"2026-01-01T10:10:45.289402+11:00","closed_at":"2026-01-01T10:10:45.289404+11:00"}
 {"id":"el-6zu","title":"ALTER TABLE Column Modifications (libSQL Extension)","description":"LibSQL-specific extension for modifying columns. Syntax: ALTER TABLE table_name ALTER COLUMN column_name TO column_name TYPE constraints. Can modify column types, constraints, DEFAULT values. Can add/remove foreign key constraints.\n\nThis would enable better migration support for column alterations that standard SQLite doesn't support.\n\nDesired API:\n  alter table(:users) do\n    modify :email, :string, null: false  # Actually works in libSQL!\n  end\n\nEffort: 3-4 days.","status":"closed","priority":2,"issue_type":"feature","created_at":"2025-12-30T17:43:58.072377+11:00","created_by":"drew","updated_at":"2026-01-01T10:07:18.008176+11:00","closed_at":"2026-01-01T10:07:18.008178+11:00"}
 {"id":"el-7t8","title":"Full-Text Search (FTS5) Schema Integration","description":"Partial - Extension loading works, but no schema helpers. libSQL 3.45.1 has comprehensive FTS5 extension with advanced features: phrase queries, term expansion, ranking, tokenisation, custom tokenisers.\n\nDesired API:\n  create table(:posts, fts5: true) do\n    add :title, :text, fts_weight: 10\n    add :body, :text\n    add :author, :string, fts_indexed: false\n  end\n\n  from p in Post, where: fragment(\"posts MATCH ?\", \"search terms\"), order_by: [desc: fragment(\"rank\")]\n\nPRIORITY: Recommended as #7 in implementation order - major feature.\n\nEffort: 5-7 days.","status":"open","priority":2,"issue_type":"feature","created_at":"2025-12-30T17:35:51.738732+11:00","created_by":"drew","updated_at":"2025-12-30T17:43:18.522669+11:00"}
+{"id":"el-9j1","title":"Optimise LRU cache eviction for large caches","status":"open","priority":4,"issue_type":"task","created_at":"2026-01-01T22:55:00.72463+11:00","created_by":"drew","updated_at":"2026-01-01T22:55:00.72463+11:00"}
 {"id":"el-a17","title":"JSONB Binary Format Support","description":"New in libSQL 3.45. Binary encoding of JSON for faster processing. 5-10% smaller than text JSON. Backwards compatible with text JSON - automatically converted between formats. All JSON functions work with both text and JSONB.\n\nCould provide performance benefits for JSON-heavy applications. May require new Ecto type or option.\n\nEffort: 2-3 days.","status":"open","priority":3,"issue_type":"feature","created_at":"2025-12-30T17:43:58.200973+11:00","created_by":"drew","updated_at":"2025-12-30T17:43:58.200973+11:00"}
 {"id":"el-aob","title":"Implement True Streaming Cursors","description":"Refactor cursor implementation to use true streaming instead of loading all rows into memory.\n\n**Problem**: Current cursor implementation loads ALL rows into memory upfront (lib.rs:1074-1100), then paginates through the buffer. This causes high memory usage for large datasets.\n\n**Current (Memory Issue)**:\n```rust\n// MEMORY ISSUE (lib.rs:1074-1100):\nlet rows = query_result.into_iter().collect::\u003cVec\u003c_\u003e\u003e();  // ← Loads everything!\n```\n\n**Impact**:\n- ✅ Works fine for small/medium datasets (\u003c 100K rows)\n- ⚠️ High memory usage for large datasets (\u003e 1M rows)\n- ❌ Cannot stream truly large datasets (\u003e 10M rows)\n\n**Example**:\n```elixir\n# Current: Loads 1 million rows into RAM\ncursor = Repo.stream(large_query)\nEnum.take(cursor, 100)  # Only want 100, but loaded 1M!\n\n# Desired: True streaming, loads on-demand\ncursor = Repo.stream(large_query)\nEnum.take(cursor, 100)  # Only loads 100 rows\n```\n\n**Fix Required**:\n1. Refactor to use libsql Rows async iterator\n2. Stream batches on-demand instead of loading all upfront\n3. Store iterator state in cursor registry\n4. Fetch next batch when cursor is fetched\n5. Update CursorData structure to support streaming\n\n**Files**:\n- native/ecto_libsql/src/cursor.rs (major refactor)\n- native/ecto_libsql/src/models.rs (update CursorData struct)\n- test/ecto_integration_test.exs (add streaming tests)\n- NEW: test/performance_test.exs (memory usage benchmarks)\n\n**Acceptance Criteria**:\n- [ ] Cursors stream batches on-demand\n- [ ] Memory usage stays constant regardless of result size\n- [ ] Can stream 10M+ rows without OOM\n- [ ] Performance: Streaming vs loading all benchmarked\n- [ ] All existing cursor tests pass\n- [ ] New tests verify streaming behaviour\n\n**Test Requirements**:\n```elixir\ntest \"cursor streams 1M rows without loading all into memory\" do\n  # Insert 1M rows\n  # Declare cursor\n  # Verify memory usage \u003c 100MB while streaming\n  # Verify all rows eventually fetched\nend\n```\n\n**References**:\n- LIBSQL_FEATURE_MATRIX_FINAL.md section 9\n- FEATURE_CHECKLIST.md Cursor Methods\n\n**Priority**: P1 - Critical for large dataset processing\n**Effort**: 4-5 days (major refactor)","status":"open","priority":1,"issue_type":"feature","created_at":"2025-12-30T17:43:30.692425+11:00","created_by":"drew","updated_at":"2025-12-30T17:43:30.692425+11:00"}
 {"id":"el-djv","title":"Implement max_write_replication_index() NIF","description":"Add max_write_replication_index() NIF to track maximum write frame for replication monitoring.\n\n**Context**: The libsql API provides max_write_replication_index() for tracking the highest frame number that has been written. This is useful for monitoring replication lag and coordinating replica sync.\n\n**Current Status**: \n- ⚠️ LibSQL 0.9.29 provides the API\n- ⚠️ Not yet wrapped in ecto_libsql\n- Identified in LIBSQL_FEATURE_MATRIX_FINAL.md section 5\n\n**Use Case**:\n```elixir\n# Primary writes data\n{:ok, _} = Repo.query(\"INSERT INTO users (name) VALUES ('Alice')\")\n\n# Track max write frame on primary\n{:ok, max_write_frame} = EctoLibSql.Native.max_write_replication_index(primary_state)\n\n# Sync replica to that frame\n:ok = EctoLibSql.Native.sync_until(replica_state, max_write_frame)\n\n# Now replica is caught up to primary's writes\n```\n\n**Benefits**:\n- Monitor replication lag accurately\n- Coordinate multi-replica sync\n- Ensure read-after-write consistency\n- Track write progress for analytics\n\n**Implementation Required**:\n\n1. **Add NIF** (native/ecto_libsql/src/replication.rs):\n   ```rust\n   /// Get the maximum replication index that has been written.\n   ///\n   /// # Returns\n   /// - {:ok, frame_number} - Success\n   /// - {:error, reason} - Failure\n   #[rustler::nif(schedule = \"DirtyIo\")]\n   pub fn max_write_replication_index(conn_id: \u0026str) -\u003e NifResult\u003cu64\u003e {\n       let conn_map = safe_lock(\u0026CONNECTION_REGISTRY, \"max_write_replication_index\")?;\n       let conn_arc = conn_map\n           .get(conn_id)\n           .ok_or_else(|| rustler::Error::Term(Box::new(\"Connection not found\")))?\n           .clone();\n       drop(conn_map);\n\n       let result = TOKIO_RUNTIME.block_on(async {\n           let conn_guard = safe_lock_arc(\u0026conn_arc, \"max_write_replication_index conn\")\n               .map_err(|e| format!(\"{:?}\", e))?;\n           \n           conn_guard\n               .db\n               .max_write_replication_index()\n               .await\n               .map_err(|e| format!(\"Failed to get max write replication index: {:?}\", e))\n       })?;\n\n       Ok(result)\n   }\n   ```\n\n2. **Add Elixir wrapper** (lib/ecto_libsql/native.ex):\n   ```elixir\n   @doc \"\"\"\n   Get the maximum replication index that has been written.\n   \n   Returns the highest frame number that has been written to the database.\n   Useful for tracking write progress and coordinating replica sync.\n   \n   ## Examples\n   \n       {:ok, max_frame} = EctoLibSql.Native.max_write_replication_index(state)\n       :ok = EctoLibSql.Native.sync_until(replica_state, max_frame)\n   \"\"\"\n   def max_write_replication_index(_conn_id), do: :erlang.nif_error(:nif_not_loaded)\n   \n   def max_write_replication_index_safe(%EctoLibSql.State{conn_id: conn_id}) do\n     case max_write_replication_index(conn_id) do\n       {:ok, frame} -\u003e {:ok, frame}\n       {:error, reason} -\u003e {:error, reason}\n     end\n   end\n   ```\n\n3. **Add tests** (test/replication_integration_test.exs):\n   ```elixir\n   test \"max_write_replication_index tracks writes\" do\n     {:ok, state} = connect()\n     \n     # Initial max write frame\n     {:ok, initial_frame} = EctoLibSql.Native.max_write_replication_index(state)\n     \n     # Perform write\n     {:ok, _, _, state} = EctoLibSql.handle_execute(\n       \"INSERT INTO test (data) VALUES (?)\",\n       [\"test\"], [], state\n     )\n     \n     # Max write frame should increase\n     {:ok, new_frame} = EctoLibSql.Native.max_write_replication_index(state)\n     assert new_frame \u003e initial_frame\n   end\n   ```\n\n**Files**:\n- native/ecto_libsql/src/replication.rs (add NIF)\n- lib/ecto_libsql/native.ex (add wrapper)\n- test/replication_integration_test.exs (add tests)\n- AGENTS.md (update API docs)\n\n**Acceptance Criteria**:\n- [ ] max_write_replication_index() NIF implemented\n- [ ] Safe wrapper in Native module\n- [ ] Tests verify frame number increases on writes\n- [ ] Tests verify frame number coordination\n- [ ] Documentation updated\n- [ ] API added to AGENTS.md\n\n**Dependencies**:\n- Related to el-g5l (Replication Integration Tests)\n- Should be tested together\n\n**References**:\n- LIBSQL_FEATURE_MATRIX_FINAL.md section 5 (line 167)\n- libsql API: db.max_write_replication_index()\n\n**Priority**: P1 - Important for replication monitoring\n**Effort**: 0.5-1 day (straightforward NIF addition)","status":"closed","priority":1,"issue_type":"task","created_at":"2025-12-30T17:45:41.941413+11:00","created_by":"drew","updated_at":"2025-12-31T10:36:43.881304+11:00","closed_at":"2025-12-31T10:36:43.881304+11:00","close_reason":"max_write_replication_index NIF already implemented in native/ecto_libsql/src/replication.rs and wrapped in lib/ecto_libsql/native.ex"}
diff --git a/.claude/settings.local.json b/.claude/settings.local.json
@@ -47,7 +47,8 @@
       "Bash(git checkout:*)",
       "Bash(git add:*)",
       "Bash(git commit:*)",
-      "Bash(git push)"
+      "Bash(git push)",
+      "Bash(git --no-pager status)"
     ],
     "deny": [],
     "ask": []
diff --git a/lib/ecto_libsql/native.ex b/lib/ecto_libsql/native.ex
@@ -372,15 +372,32 @@ defmodule EctoLibSql.Native do
   # ETS-based LRU cache for parameter metadata.
   # Unlike persistent_term, this cache has a maximum size and evicts old entries.
   # This prevents unbounded memory growth from dynamic SQL workloads.
+  #
+  # Memory considerations:
+  # - Maximum 1000 entries, evicts 500 oldest when full
+  # - Each entry stores: SQL statement string, list of parameter names, access timestamp
+  # - For applications with many unique dynamic queries (e.g., dynamic filters, search),
+  #   the cache may consume several MB depending on query complexity
+  # - Use clear_param_cache/0 to reclaim memory if needed
+  # - Use param_cache_size/0 to monitor cache utilisation
   @param_cache_table :ecto_libsql_param_cache
   @param_cache_max_size 1000
   @param_cache_evict_count 500
 
   @doc """
   Clear the parameter name cache.
 
-  This is primarily useful for testing or when you need to reclaim memory.
+  The cache stores SQL statements and their parameter name mappings to avoid
+  repeated introspection overhead. Each entry contains the full SQL string,
+  parameter names list, and access timestamp.
+
+  Use this function to:
+  - Reclaim memory in applications with many dynamic queries
+  - Reset cache state during testing
+  - Force re-introspection after schema changes
+
   The cache will be automatically rebuilt as queries are executed.
+  Use `param_cache_size/0` to monitor cache utilisation before clearing.
   """
   @spec clear_param_cache() :: :ok
   def clear_param_cache do
@@ -396,6 +413,11 @@ defmodule EctoLibSql.Native do
   Get the current size of the parameter name cache.
 
   Returns the number of cached SQL statement parameter mappings.
+  The cache has a maximum size of #{@param_cache_max_size} entries.
+
+  Useful for monitoring cache utilisation in applications with dynamic queries.
+  If the cache frequently hits the maximum, consider whether query patterns
+  could be optimised to reduce unique SQL variations.
   """
   @spec param_cache_size() :: non_neg_integer()
   def param_cache_size do
@@ -437,11 +459,9 @@ defmodule EctoLibSql.Native do
 
     case :ets.lookup(@param_cache_table, statement) do
       [{^statement, param_names, _access_time}] ->
-        # Update access time for LRU tracking (fire and forget).
-        spawn(fn ->
-          :ets.update_element(@param_cache_table, statement, {3, System.monotonic_time()})
-        end)
-
+        # Update access time synchronously for correct LRU tracking.
+        # ETS updates are fast (microseconds), so no need for async.
+        :ets.update_element(@param_cache_table, statement, {3, System.monotonic_time()})
         param_names
 
       [] ->