Skip to content

Commit 6194c8e

Browse files
committed
feat: Add R*Tree spatial indexing support
Implement full support for SQLite R*Tree virtual tables for multi-dimensional spatial indexing. This enables efficient geospatial queries, collision detection, and time-range operations. Features: - Table creation using :rtree => true option in Ecto migrations - Support for 1D to 5D spatial indexes (3-11 columns including ID) - Automatic validation of R*Tree column structure and constraints - Comprehensive query pattern documentation and examples - Full integration with Ecto schemas using fragments Implementation: - New create_rtree_table/3 helper in connection.ex for virtual table DDL - validate_rtree_columns!/1 helper for column structure validation - Generates CREATE VIRTUAL TABLE ... USING rtree(...) syntax - Test coverage in test/rtree_test.exs for all dimensions and operations Documentation: - Complete guide in AGENTS.md with geographic and time-series examples - R*Tree vs Vector Search comparison matrix - Migration and query pattern examples - Updated README.md and CHANGELOG.md Closes el-4oc
1 parent caa255c commit 6194c8e

5 files changed

Lines changed: 650 additions & 9 deletions

File tree

AGENTS.md

Lines changed: 232 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ Welcome to ecto_libsql! This guide provides comprehensive documentation, API ref
1313
- How to integrate ecto_libsql into your Elixir/Phoenix application
1414
- Configuration and connection management
1515
- Ecto schemas, migrations, and queries
16-
- Advanced features (vector search, encryption, batching)
16+
- Advanced features (vector search, R*Tree spatial indexing, encryption, batching)
1717
- Real-world usage examples and patterns
1818
- Performance optimisation for your applications
1919

@@ -32,6 +32,7 @@ Welcome to ecto_libsql! This guide provides comprehensive documentation, API ref
3232
- [Connection Management](#connection-management)
3333
- [PRAGMA Configuration](#pragma-configuration)
3434
- [Vector Search](#vector-search)
35+
- [R*Tree Spatial Indexing](#rtree-spatial-indexing)
3536
- [Encryption](#encryption)
3637
- [Ecto Integration](#ecto-integration)
3738
- [Quick Start with Ecto](#quick-start-with-ecto)
@@ -1104,6 +1105,210 @@ distance_sql = EctoLibSql.Native.vector_distance_cos("description_embedding", qu
11041105
)
11051106
```
11061107

1108+
### R*Tree Spatial Indexing
1109+
1110+
R*Tree is a specialized spatial index for efficient multi-dimensional range queries. Perfect for geospatial data, collision detection, and time-series queries.
1111+
1112+
#### Creating R*Tree Tables
1113+
1114+
R*Tree tables are created as virtual tables using the `:rtree => true` option in migrations:
1115+
1116+
```elixir
1117+
defmodule MyApp.Repo.Migrations.CreateLocationsRTree do
1118+
use Ecto.Migration
1119+
1120+
def change do
1121+
create table(:geo_regions, rtree: true) do
1122+
add :id, :integer, primary_key: true
1123+
add :min_lat, :float
1124+
add :max_lat, :float
1125+
add :min_lng, :float
1126+
add :max_lng, :float
1127+
end
1128+
end
1129+
end
1130+
```
1131+
1132+
**Important R*Tree Requirements:**
1133+
- First column must be named `id` (integer primary key)
1134+
- Remaining columns come in min/max pairs (2D, 3D, 4D, or 5D)
1135+
- Total columns must be odd (3, 5, 7, 9, or 11)
1136+
- Minimum 3 columns (id + 1 dimension), maximum 11 columns (id + 5 dimensions)
1137+
1138+
#### 2D Example: Geographic Boundaries
1139+
1140+
```elixir
1141+
# Create table for geographic regions
1142+
Ecto.Adapters.SQL.query!(
1143+
Repo,
1144+
"""
1145+
CREATE VIRTUAL TABLE geo_regions USING rtree(
1146+
id,
1147+
min_lat, max_lat,
1148+
min_lng, max_lng
1149+
)
1150+
"""
1151+
)
1152+
1153+
# Insert bounding boxes for regions
1154+
# Sydney: -34.0 to -33.8 lat, 151.0 to 151.3 lng
1155+
Ecto.Adapters.SQL.query!(
1156+
Repo,
1157+
"INSERT INTO geo_regions VALUES (1, -34.0, -33.8, 151.0, 151.3)"
1158+
)
1159+
1160+
# Melbourne: -38.0 to -37.7 lat, 144.8 to 145.1 lng
1161+
Ecto.Adapters.SQL.query!(
1162+
Repo,
1163+
"INSERT INTO geo_regions VALUES (2, -38.0, -37.7, 144.8, 145.1)"
1164+
)
1165+
1166+
# Find regions containing a point (Sydney: -33.87, 151.21)
1167+
result = Ecto.Adapters.SQL.query!(
1168+
Repo,
1169+
"""
1170+
SELECT id FROM geo_regions
1171+
WHERE min_lat <= -33.87 AND max_lat >= -33.87
1172+
AND min_lng <= 151.21 AND max_lng >= 151.21
1173+
"""
1174+
)
1175+
# Returns: [[1]]
1176+
```
1177+
1178+
#### 3D Example: Spatial + Time Ranges
1179+
1180+
```elixir
1181+
# Create table for events with location and time bounds
1182+
Ecto.Adapters.SQL.query!(
1183+
Repo,
1184+
"""
1185+
CREATE VIRTUAL TABLE events USING rtree(
1186+
id,
1187+
min_x, max_x, -- X coordinate bounds
1188+
min_y, max_y, -- Y coordinate bounds
1189+
min_time, max_time -- Time bounds (Unix timestamp)
1190+
)
1191+
"""
1192+
)
1193+
1194+
# Insert event: Conference at (100, 200) from Jan 1-3, 2025
1195+
start_time = DateTime.to_unix(~U[2025-01-01 00:00:00Z])
1196+
end_time = DateTime.to_unix(~U[2025-01-03 23:59:59Z])
1197+
1198+
Ecto.Adapters.SQL.query!(
1199+
Repo,
1200+
"INSERT INTO events VALUES (1, 100, 100, 200, 200, #{start_time}, #{end_time})"
1201+
)
1202+
1203+
# Find events in area (90-110, 190-210) during Jan 2025
1204+
query_start = DateTime.to_unix(~U[2025-01-01 00:00:00Z])
1205+
query_end = DateTime.to_unix(~U[2025-01-31 23:59:59Z])
1206+
1207+
result = Ecto.Adapters.SQL.query!(
1208+
Repo,
1209+
"""
1210+
SELECT id FROM events
1211+
WHERE max_x >= 90 AND min_x <= 110
1212+
AND max_y >= 190 AND min_y <= 210
1213+
AND max_time >= #{query_start} AND min_time <= #{end_time}
1214+
"""
1215+
)
1216+
```
1217+
1218+
#### Using with Ecto Schemas
1219+
1220+
While R*Tree tables are virtual tables, you can still define schemas for them:
1221+
1222+
```elixir
1223+
defmodule MyApp.GeoRegion do
1224+
use Ecto.Schema
1225+
1226+
@primary_key {:id, :integer, autogenerate: false}
1227+
schema "geo_regions" do
1228+
field :min_lat, :float
1229+
field :max_lat, :float
1230+
field :min_lng, :float
1231+
field :max_lng, :float
1232+
end
1233+
end
1234+
1235+
# Insert using Ecto
1236+
region = %MyApp.GeoRegion{
1237+
id: 1,
1238+
min_lat: -34.0,
1239+
max_lat: -33.8,
1240+
min_lng: 151.0,
1241+
max_lng: 151.3
1242+
}
1243+
Repo.insert!(region)
1244+
1245+
# Query using fragments
1246+
import Ecto.Query
1247+
1248+
# Find regions containing a point
1249+
point_lat = -33.87
1250+
point_lng = 151.21
1251+
1252+
query = from r in MyApp.GeoRegion,
1253+
where: fragment("min_lat <= ? AND max_lat >= ?", ^point_lat, ^point_lat),
1254+
where: fragment("min_lng <= ? AND max_lng >= ?", ^point_lng, ^point_lng)
1255+
1256+
regions = Repo.all(query)
1257+
```
1258+
1259+
#### Common Query Patterns
1260+
1261+
```elixir
1262+
# 1. Point containment: Does bounding box contain this point?
1263+
"""
1264+
SELECT id FROM rtree_table
1265+
WHERE min_x <= ?1 AND max_x >= ?1
1266+
AND min_y <= ?2 AND max_y >= ?2
1267+
"""
1268+
1269+
# 2. Bounding box intersection: Do two boxes overlap?
1270+
"""
1271+
SELECT id FROM rtree_table
1272+
WHERE max_x >= ?1 AND min_x <= ?2 -- Query box: ?1 to ?2
1273+
AND max_y >= ?3 AND min_y <= ?4 -- Query box: ?3 to ?4
1274+
"""
1275+
1276+
# 3. Range query: All items within bounds
1277+
"""
1278+
SELECT id FROM rtree_table
1279+
WHERE min_x >= ?1 AND max_x <= ?2
1280+
AND min_y >= ?3 AND max_y <= ?4
1281+
"""
1282+
```
1283+
1284+
#### R*Tree vs Vector Search
1285+
1286+
**Use R*Tree when:**
1287+
- You have bounding box data (geographic regions, time ranges)
1288+
- You need exact range queries (all items within bounds)
1289+
- Working with 1-5 dimensional coordinate data
1290+
- Query performance is critical for range lookups
1291+
1292+
**Use Vector Search when:**
1293+
- You have high-dimensional embeddings (384-1536+ dimensions)
1294+
- You need similarity/distance-based search
1295+
- Working with semantic search, recommendations, or ML features
1296+
- Approximate nearest neighbors is acceptable
1297+
1298+
**Hybrid Approach:**
1299+
```elixir
1300+
# Combine both for location-aware semantic search
1301+
"""
1302+
SELECT p.*, vector_distance_cos(p.embedding, ?1) as similarity
1303+
FROM products p
1304+
JOIN geo_regions r ON r.id = p.region_id
1305+
WHERE r.min_lat <= ?2 AND r.max_lat >= ?2
1306+
AND r.min_lng <= ?3 AND r.max_lng >= ?3
1307+
ORDER BY similarity
1308+
LIMIT 10
1309+
"""
1310+
```
1311+
11071312
### Connection Management
11081313

11091314
Control connection behaviour and performance with these utilities (v0.7.0+):
@@ -4113,3 +4318,29 @@ Found a bug or have a feature request? Please open an issue on GitHub!
41134318
## License
41144319

41154320
Apache 2.0
4321+
4322+
## Landing the Plane (Session Completion)
4323+
4324+
**When ending a work session**, you MUST complete ALL steps below. Work is NOT complete until `git push` succeeds.
4325+
4326+
**MANDATORY WORKFLOW:**
4327+
4328+
1. **File issues for remaining work** - Create issues for anything that needs follow-up
4329+
2. **Run quality gates** (if code changed) - Tests, linters, builds
4330+
3. **Update issue status** - Close finished work, update in-progress items
4331+
4. **PUSH TO REMOTE** - This is MANDATORY:
4332+
```bash
4333+
git pull --rebase
4334+
bd sync
4335+
git push
4336+
git status # MUST show "up to date with origin"
4337+
```
4338+
5. **Clean up** - Clear stashes, prune remote branches
4339+
6. **Verify** - All changes committed AND pushed
4340+
7. **Hand off** - Provide context for next session
4341+
4342+
**CRITICAL RULES:**
4343+
- Work is NOT complete until `git push` succeeds
4344+
- NEVER stop before pushing - that leaves work stranded locally
4345+
- NEVER say "ready to push when you are" - YOU must push
4346+
- If push fails, resolve and retry until it succeeds

CHANGELOG.md

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,31 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
99

1010
### Added
1111

12+
- **R*Tree Spatial Indexing Support**
13+
- Full support for SQLite R*Tree virtual tables for multi-dimensional spatial indexing
14+
- **Table creation**: Use `:rtree => true` option in Ecto migrations
15+
- **Dimensions supported**: 1D to 5D (3 to 11 columns total including ID)
16+
- **Column structure**: First column must be `id` (integer primary key), followed by min/max coordinate pairs
17+
- **Validation**: Automatic validation of column count (odd numbers only), column naming, and dimensional constraints
18+
- **Use cases**: Geographic bounding boxes, collision detection, time-range queries, spatial indexing
19+
- **Migration example**:
20+
```elixir
21+
create table(:geo_regions, rtree: true) do
22+
add :id, :integer, primary_key: true
23+
add :min_lat, :float
24+
add :max_lat, :float
25+
add :min_lng, :float
26+
add :max_lng, :float
27+
end
28+
```
29+
- **Query patterns**: Point containment, bounding box intersection, range queries
30+
- **Virtual table syntax**: Generates `CREATE VIRTUAL TABLE ... USING rtree(...)` DDL
31+
- **Implementation**: New `create_rtree_table/3` and `validate_rtree_columns!/1` helpers in `connection.ex`
32+
- **Comprehensive test coverage** in `test/rtree_test.exs` covering 2D/3D tables, validation, queries, and CRUD operations
33+
- **Documentation**: Full guide in AGENTS.md with examples for geographic data, time-series, and hybrid vector+spatial search
34+
- **Comparison guide**: R*Tree vs Vector Search decision matrix in documentation
35+
- **Ecto integration**: Works with Ecto schemas using fragments for spatial queries
36+
1237
- **Named Parameters Execution Support**
1338
- Full support for SQLite named parameter syntax in prepared statements and direct execution
1439
- **Three SQLite syntaxes supported**: `:name`, `@name`, `$name`

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,7 @@ For lower-level control, you can use the DBConnection interface directly:
9090

9191
**Advanced Features**
9292
- Vector similarity search
93+
- R*Tree spatial indexing for multi-dimensional range queries
9394
- Database encryption (local AES-256-CBC and Turso remote encryption)
9495
- WebSocket and HTTP protocols
9596
- Cursor-based streaming for large result sets (via DBConnection interface)

lib/ecto/adapters/libsql/connection.ex

Lines changed: 68 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -184,17 +184,23 @@ defmodule Ecto.Adapters.LibSql.Connection do
184184
table_name = quote_table(table.prefix, table.name)
185185
if_not_exists = if command == :create_if_not_exists, do: " IF NOT EXISTS", else: ""
186186

187-
# Check if we have a composite primary key.
188-
composite_pk = composite_primary_key?(columns)
187+
# Check if this is an R*Tree virtual table
188+
if table.options && Keyword.get(table.options, :rtree, false) do
189+
create_rtree_table(table_name, if_not_exists, columns)
190+
else
191+
# Standard table creation
192+
# Check if we have a composite primary key.
193+
composite_pk = composite_primary_key?(columns)
189194

190-
column_definitions =
191-
Enum.map_join(columns, ", ", &column_definition(&1, composite_pk))
195+
column_definitions =
196+
Enum.map_join(columns, ", ", &column_definition(&1, composite_pk))
192197

193-
{table_constraints, table_suffix} = table_options(table, columns)
198+
{table_constraints, table_suffix} = table_options(table, columns)
194199

195-
[
196-
"CREATE TABLE#{if_not_exists} #{table_name} (#{column_definitions}#{table_constraints})#{table_suffix}"
197-
]
200+
[
201+
"CREATE TABLE#{if_not_exists} #{table_name} (#{column_definitions}#{table_constraints})#{table_suffix}"
202+
]
203+
end
198204
end
199205

200206
def execute_ddl({:drop, %Ecto.Migration.Table{} = table, _}) do
@@ -474,6 +480,60 @@ defmodule Ecto.Adapters.LibSql.Connection do
474480
{table_constraints, table_suffix}
475481
end
476482

483+
defp create_rtree_table(table_name, if_not_exists, columns) do
484+
# R*Tree virtual tables require specific column structure:
485+
# First column: integer primary key (id)
486+
# Remaining columns: coordinate pairs (min/max)
487+
488+
# Extract column names for R*Tree
489+
rtree_columns =
490+
Enum.map(columns, fn {:add, name, _type, _opts} ->
491+
Atom.to_string(name)
492+
end)
493+
494+
# Validate column structure
495+
validate_rtree_columns!(rtree_columns)
496+
497+
# Build R*Tree column list: id, min1, max1, min2, max2, ...
498+
column_list = Enum.join(rtree_columns, ", ")
499+
500+
[
501+
"CREATE VIRTUAL TABLE#{if_not_exists} #{table_name} USING rtree(#{column_list})"
502+
]
503+
end
504+
505+
defp validate_rtree_columns!(columns) do
506+
# R*Tree requires odd number of columns (3 to 11)
507+
# First column is ID, then min/max pairs
508+
num_columns = length(columns)
509+
510+
cond do
511+
num_columns < 3 ->
512+
raise ArgumentError,
513+
"R*Tree tables require at least 3 columns (id + 1 dimension). Got #{num_columns} columns."
514+
515+
num_columns > 11 ->
516+
raise ArgumentError,
517+
"R*Tree tables support maximum 11 columns (id + 5 dimensions). Got #{num_columns} columns."
518+
519+
rem(num_columns, 2) == 0 ->
520+
raise ArgumentError,
521+
"R*Tree tables require odd number of columns (id + min/max pairs). Got #{num_columns} columns."
522+
523+
true ->
524+
:ok
525+
end
526+
527+
# Validate first column is 'id'
528+
[first_column | _rest] = columns
529+
unless first_column == "id" do
530+
raise ArgumentError,
531+
"R*Tree tables must have 'id' as the first column. Got '#{first_column}' instead."
532+
end
533+
534+
:ok
535+
end
536+
477537
## Query Helpers
478538

479539
defp quote_table(nil, name), do: quote_name(name)

0 commit comments

Comments
 (0)