Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
5a6561f
vectordb (in progress)
ArnabChatterjee20k Oct 16, 2025
7ddecdd
Merge branch 'documents-db-api' into vector-db-api
ArnabChatterjee20k Oct 31, 2025
3311bd2
Merge branch 'documents-db-api' into vector-db-api
ArnabChatterjee20k Oct 31, 2025
60e9545
vector db endpoints
ArnabChatterjee20k Oct 31, 2025
f9be1dd
added vector embedding creation + update endpoint
ArnabChatterjee20k Oct 31, 2025
4988881
updated method names and sdk namespaces for vectordb
ArnabChatterjee20k Nov 3, 2025
d2275c0
updated tests
ArnabChatterjee20k Nov 5, 2025
bd6f62d
updated realtime, create endpoint , resources, registers
ArnabChatterjee20k Nov 6, 2025
86371e2
Merge branch 'documents-db-api' into vector-db-api
ArnabChatterjee20k Nov 6, 2025
b1dd377
Merge branch 'documents-db-api' into vector-db-api
ArnabChatterjee20k Nov 6, 2025
857ec3b
reverted docker compose
ArnabChatterjee20k Nov 6, 2025
04b169f
updated composer packages
ArnabChatterjee20k Nov 6, 2025
de59f53
reverted error tracing
ArnabChatterjee20k Nov 6, 2025
c3f221f
added timestamp mutation test
ArnabChatterjee20k Nov 7, 2025
954011b
added transactions for vectordb
ArnabChatterjee20k Nov 12, 2025
ed84199
linting
ArnabChatterjee20k Nov 12, 2025
75045ff
added constants for the vector dimension
ArnabChatterjee20k Nov 12, 2025
93bd6b3
updated migrations + index finding issue in vectordb
ArnabChatterjee20k Nov 13, 2025
c66308e
Merge remote-tracking branch 'upstream/documents-db-api' into vector-…
ArnabChatterjee20k Nov 14, 2025
70a1d5d
updated composer lock
ArnabChatterjee20k Nov 14, 2025
cc75986
Merge remote-tracking branch 'upstream/documents-db-api' into vector-…
ArnabChatterjee20k Nov 20, 2025
c3ad337
updated migrations for vectordb importing/exporting in csv endpoints
ArnabChatterjee20k Nov 20, 2025
fe753db
Merge remote-tracking branch 'upstream/documents-db-api' into vector-…
ArnabChatterjee20k Nov 20, 2025
c0ac528
Merge remote-tracking branch 'upstream/documents-db-api' into vector-…
ArnabChatterjee20k Nov 21, 2025
918ebb8
Add VectorDB text embedding creation endpoint and update related tests
ArnabChatterjee20k Nov 21, 2025
6eb17b8
updated composer
ArnabChatterjee20k Nov 21, 2025
fdf5ca0
updated tests condition
ArnabChatterjee20k Nov 21, 2025
abe1257
Merge remote-tracking branch 'upstream/documents-db-api' into vector-…
ArnabChatterjee20k Nov 21, 2025
d2c9ac0
Refactor VectorDB Embeddings API and Update Dimension Handling
ArnabChatterjee20k Dec 2, 2025
ca4c7b5
added stats usage for text embeddings
ArnabChatterjee20k Dec 2, 2025
def028e
Merge remote-tracking branch 'upstream/documents-db-api' into vector-…
ArnabChatterjee20k Dec 2, 2025
069f231
updated composer lock
ArnabChatterjee20k Dec 2, 2025
7af361f
* added logger in the create text embedding
ArnabChatterjee20k Dec 3, 2025
b15f2ee
Merge remote-tracking branch 'upstream/documents-db-api' into vector-…
ArnabChatterjee20k Dec 8, 2025
f149ba3
updated stats for vectordb
ArnabChatterjee20k Dec 8, 2025
fbaeff8
Merge remote-tracking branch 'upstream/documents-db-api' into vector-…
ArnabChatterjee20k Dec 8, 2025
480757d
added usage test for the vectordb
ArnabChatterjee20k Dec 9, 2025
f3721f7
fixed missing project usage fields
ArnabChatterjee20k Dec 9, 2025
d22743a
Merge remote-tracking branch 'upstream/documents-db-api' into vector-…
ArnabChatterjee20k Dec 9, 2025
991b2b1
updated delete worker for vectordb and updated tests
ArnabChatterjee20k Dec 9, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .env
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,11 @@ _APP_DB_ROOT_PASS=rootsecretpassword
_APP_DB_ADAPTER_DOCUMENTSDB=mongodb
_APP_DB_HOST_DOCUMENTSDB=mongodb
_APP_DB_PORT_DOCUMENTSDB=27017
_APP_DB_ADAPTER_VECTORDB=postgresql
_APP_DB_HOST_VECTORDB=postgresql
_APP_DB_PORT_VECTORDB=5432
_APP_EMBEDDING_MODELS=embeddinggemma
_APP_EMBEDDING_ENDPOINT='http://ollama:11434/api/embed'
_APP_STORAGE_DEVICE=Local
_APP_STORAGE_S3_ACCESS_KEY=
_APP_STORAGE_S3_SECRET=
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,7 @@ jobs:
Databases/Legacy,
Databases/TablesDB,
Databases/DocumentsDB,
Databases/VectorDB,
Functions,
FunctionsSchedule,
GraphQL,
Expand Down
2 changes: 2 additions & 0 deletions app/config/collections.php
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
$common = include __DIR__ . '/collections/common.php';
$projects = include __DIR__ . '/collections/projects.php';
$databases = include __DIR__ . '/collections/databases.php';
$vectordb = include __DIR__ . '/collections/vectordb.php';
$platform = include __DIR__ . '/collections/platform.php';
$logs = include __DIR__ . '/collections/logs.php';

Expand All @@ -26,6 +27,7 @@
$collections = [
'buckets' => $buckets,
'databases' => $databases,
'vectordb' => $vectordb,
'projects' => array_merge($projects, $common),
'console' => array_merge($platform, $common),
'logs' => $logs,
Expand Down
11 changes: 11 additions & 0 deletions app/config/collections/platform.php
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,17 @@
'array' => false,
'filters' => [],
],
[
'$id' => ID::custom('vectorDatabase'),
'type' => Database::VAR_STRING,
'format' => '',
'size' => 256,
'signed' => true,
'required' => true,
'default' => null,
'array' => false,
'filters' => [],
],
[
'$id' => ID::custom('logo'),
'type' => Database::VAR_STRING,
Expand Down
165 changes: 165 additions & 0 deletions app/config/collections/vectordb.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
<?php

use Utopia\Database\Database;
use Utopia\Database\Helpers\ID;

return [
'collections' => [
'$collection' => ID::custom('databases'),
'$id' => ID::custom('collections'),
'name' => 'Collections',
'attributes' => [
[
'$id' => ID::custom('databaseInternalId'),
'type' => Database::VAR_STRING,
'format' => '',
'size' => Database::LENGTH_KEY,
'signed' => true,
'required' => true,
'default' => null,
'array' => false,
'filters' => [],
],
[
'$id' => ID::custom('databaseId'),
'type' => Database::VAR_STRING,
'signed' => true,
'size' => Database::LENGTH_KEY,
'format' => '',
'filters' => [],
'required' => true,
'default' => null,
'array' => false,
],
[
'$id' => ID::custom('name'),
'type' => Database::VAR_STRING,
'size' => 256,
'required' => true,
'signed' => true,
'array' => false,
'filters' => [],
],
[
'$id' => ID::custom('dimension'),
'type' => Database::VAR_INTEGER,
'size' => 0,
'required' => true,
'signed' => false,
'array' => false,
'filters' => [],
],
[
'$id' => ID::custom('enabled'),
'type' => Database::VAR_BOOLEAN,
'signed' => true,
'size' => 0,
'format' => '',
'filters' => [],
'required' => true,
'default' => null,
'array' => false,
],
[
'$id' => ID::custom('documentSecurity'),
'type' => Database::VAR_BOOLEAN,
'signed' => true,
'size' => 0,
'format' => '',
'filters' => [],
'required' => true,
'default' => null,
'array' => false,
],
[
'$id' => ID::custom('attributes'),
'type' => Database::VAR_STRING,
'size' => 1000000,
'required' => false,
'signed' => true,
'array' => false,
'filters' => ['subQueryAttributes'],
],
[
'$id' => ID::custom('indexes'),
'type' => Database::VAR_STRING,
'size' => 1000000,
'required' => false,
'signed' => true,
'array' => false,
'filters' => ['subQueryIndexes'],
],
[
'$id' => ID::custom('search'),
'type' => Database::VAR_STRING,
'format' => '',
'size' => 16384,
'signed' => true,
'required' => false,
'default' => null,
'array' => false,
'filters' => [],
],
],
'defaultAttributes' => [
Comment thread
abnegate marked this conversation as resolved.
[
'$id' => ID::custom('embeddings'),
'type' => Database::VAR_VECTOR,
'required' => true,
'signed' => false,
'array' => false,
'filters' => [],
],
[
'$id' => ID::custom('metadata'),
'type' => Database::VAR_OBJECT,
'default' => [],
'required' => false,
'size' => 0,
'signed' => false,
'array' => false,
'filters' => [],
Comment thread
abnegate marked this conversation as resolved.
],
],
'indexes' => [
[
'$id' => ID::custom('_fulltext_search'),
'type' => Database::INDEX_FULLTEXT,
'attributes' => ['search'],
'lengths' => [],
'orders' => [],
],
[
'$id' => ID::custom('_key_name'),
'type' => Database::INDEX_KEY,
'attributes' => ['name'],
'lengths' => [256],
'orders' => [Database::ORDER_ASC],
],
[
'$id' => ID::custom('_key_enabled'),
'type' => Database::INDEX_KEY,
'attributes' => ['enabled'],
'lengths' => [],
'orders' => [Database::ORDER_ASC],
],
[
'$id' => ID::custom('_key_documentSecurity'),
'type' => Database::INDEX_KEY,
'attributes' => ['documentSecurity'],
'lengths' => [],
'orders' => [Database::ORDER_ASC],
],
],
'defaultIndexes' => [
// not creating default indexes on the embeddings as it depends on the type of query users using the most
[
'$id' => ID::custom('_key_metadata'),
'type' => Database::INDEX_OBJECT,
'attributes' => ['metadata'],
'lengths' => [],
'orders' => [],
],
]
]
];
3 changes: 2 additions & 1 deletion app/controllers/api/migrations.php
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,8 @@ function getDatabaseTransferResourceServices(string $databaseType)
{
return match($databaseType) {
DATABASE_TYPE_LEGACY,
DATABASE_TYPE_TABLESDB => Transfer::GROUP_DATABASES_TABLES_DB
DATABASE_TYPE_TABLESDB => Transfer::GROUP_DATABASES_TABLES_DB,
DATABASE_TYPE_VECTORDB => Transfer::GROUP_DATABASES_VECTOR_DB
};
}

Expand Down
44 changes: 44 additions & 0 deletions app/controllers/api/project.php
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,18 @@
METRIC_DATABASES_OPERATIONS_WRITES,
METRIC_DATABASES_OPERATIONS_WRITES_DOCUMENTSDB,
METRIC_FILES_IMAGES_TRANSFORMED,
// VectorDB totals
METRIC_DATABASES_VECTORDB,
METRIC_COLLECTIONS_VECTORDB,
METRIC_DOCUMENTS_VECTORDB,
METRIC_DATABASES_STORAGE_VECTORDB,
METRIC_DATABASES_OPERATIONS_READS_VECTORDB,
METRIC_DATABASES_OPERATIONS_WRITES_VECTORDB,
// Embeddings totals
METRIC_EMBEDDINGS_TEXT,
METRIC_EMBEDDINGS_TEXT_TOTAL_TOKENS,
METRIC_EMBEDDINGS_TEXT_TOTAL_DURATION,
METRIC_EMBEDDINGS_TEXT_TOTAL_ERROR
],
'period' => [
METRIC_NETWORK_REQUESTS,
Expand All @@ -92,6 +104,18 @@
METRIC_DATABASES_OPERATIONS_WRITES,
METRIC_DATABASES_OPERATIONS_WRITES_DOCUMENTSDB,
METRIC_FILES_IMAGES_TRANSFORMED,
// VectorDB time series
METRIC_DATABASES_VECTORDB,
METRIC_COLLECTIONS_VECTORDB,
METRIC_DOCUMENTS_VECTORDB,
METRIC_DATABASES_STORAGE_VECTORDB,
METRIC_DATABASES_OPERATIONS_READS_VECTORDB,
METRIC_DATABASES_OPERATIONS_WRITES_VECTORDB,
// Embeddings time series
METRIC_EMBEDDINGS_TEXT,
METRIC_EMBEDDINGS_TEXT_TOTAL_TOKENS,
METRIC_EMBEDDINGS_TEXT_TOTAL_DURATION,
METRIC_EMBEDDINGS_TEXT_TOTAL_ERROR
]
];

Expand Down Expand Up @@ -379,13 +403,25 @@
'databasesWritesTotal' => $total[METRIC_DATABASES_OPERATIONS_WRITES],
'documentsdbDatabasesReadsTotal' => $total[METRIC_DATABASES_OPERATIONS_READS_DOCUMENTSDB],
'documentsdbDatabasesWritesTotal' => $total[METRIC_DATABASES_OPERATIONS_WRITES_DOCUMENTSDB],
'vectordbDatabasesTotal' => $total[METRIC_DATABASES_VECTORDB] ?? 0,
'vectordbCollectionsTotal' => $total[METRIC_COLLECTIONS_VECTORDB] ?? 0,
'vectordbDocumentsTotal' => $total[METRIC_DOCUMENTS_VECTORDB] ?? 0,
'vectordbDatabasesStorageTotal' => $total[METRIC_DATABASES_STORAGE_VECTORDB] ?? 0,
'vectordbDatabasesReadsTotal' => $total[METRIC_DATABASES_OPERATIONS_READS_VECTORDB] ?? 0,
'vectordbDatabasesWritesTotal' => $total[METRIC_DATABASES_OPERATIONS_WRITES_VECTORDB] ?? 0,
'executionsBreakdown' => $executionsBreakdown,
'bucketsBreakdown' => $bucketsBreakdown,
'databasesReads' => $usage[METRIC_DATABASES_OPERATIONS_READS],
'databasesWrites' => $usage[METRIC_DATABASES_OPERATIONS_WRITES],
'documentsdbDatabasesReads' => $usage[METRIC_DATABASES_OPERATIONS_READS_DOCUMENTSDB],
'documentsdbDatabasesWrites' => $usage[METRIC_DATABASES_OPERATIONS_WRITES_DOCUMENTSDB],
'documentsdbDatabasesStorage' => $usage[METRIC_DATABASES_STORAGE_DOCUMENTSDB],
'vectordbDatabases' => $usage[METRIC_DATABASES_VECTORDB] ?? [],
'vectordbCollections' => $usage[METRIC_COLLECTIONS_VECTORDB] ?? [],
'vectordbDocuments' => $usage[METRIC_DOCUMENTS_VECTORDB] ?? [],
'vectordbDatabasesStorage' => $usage[METRIC_DATABASES_STORAGE_VECTORDB] ?? [],
'vectordbDatabasesReads' => $usage[METRIC_DATABASES_OPERATIONS_READS_VECTORDB] ?? [],
'vectordbDatabasesWrites' => $usage[METRIC_DATABASES_OPERATIONS_WRITES_VECTORDB] ?? [],
'databasesStorageBreakdown' => $databasesStorageBreakdown,
'executionsMbSecondsBreakdown' => $executionsMbSecondsBreakdown,
'buildsMbSecondsBreakdown' => $buildsMbSecondsBreakdown,
Expand All @@ -395,6 +431,14 @@
'authPhoneCountryBreakdown' => $authPhoneCountryBreakdown,
'imageTransformations' => $usage[METRIC_FILES_IMAGES_TRANSFORMED],
'imageTransformationsTotal' => $total[METRIC_FILES_IMAGES_TRANSFORMED],
'embeddingsText' => $usage[METRIC_EMBEDDINGS_TEXT] ?? [],
'embeddingsTextTokens' => $usage[METRIC_EMBEDDINGS_TEXT_TOTAL_TOKENS] ?? [],
'embeddingsTextDuration' => $usage[METRIC_EMBEDDINGS_TEXT_TOTAL_DURATION] ?? [],
'embeddingsTextErrors' => $usage[METRIC_EMBEDDINGS_TEXT_TOTAL_ERROR] ?? [],
'embeddingsTextTotal' => $total[METRIC_EMBEDDINGS_TEXT] ?? 0,
'embeddingsTextTokensTotal' => $total[METRIC_EMBEDDINGS_TEXT_TOTAL_TOKENS] ?? 0,
'embeddingsTextDurationTotal' => $total[METRIC_EMBEDDINGS_TEXT_TOTAL_DURATION] ?? 0,
'embeddingsTextErrorsTotal' => $total[METRIC_EMBEDDINGS_TEXT_TOTAL_ERROR] ?? 0,
]), Response::MODEL_USAGE_PROJECT);
});

Expand Down
11 changes: 10 additions & 1 deletion app/controllers/api/projects.php
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,14 @@
$sharedTables = \explode(',', System::getEnv('_APP_DATABASE_DOCUMENTSDB_SHARED_TABLES', ''));
$sharedTablesV1 = \explode(',', System::getEnv('_APP_DATABASE_DOCUMENTSDB_SHARED_TABLES_V1', ''));
break;
case 'vectorDatabase':
$databases = Config::getParam('pools-vectordb', []);
$databaseKeys = System::getEnv('_APP_DATABASE_VECTORDB_KEYS', '');
$databaseOverride = System::getEnv('_APP_DATABASE_VECTORDB_OVERRIDE');
$dbScheme = System::getEnv('_APP_DB_HOST_VECTORDB', 'postgresql');
$sharedTables = \explode(',', System::getEnv('_APP_DATABASE_VECTORDB_SHARED_TABLES', ''));
$sharedTablesV1 = \explode(',', System::getEnv('_APP_DATABASE_VECTORDB_SHARED_TABLES_V1', ''));
break;
default:
// legacy/tablesdb
$databases = Config::getParam('pools-database', []);
Expand Down Expand Up @@ -263,7 +271,8 @@
'accessedAt' => DateTime::now(),
'search' => implode(' ', [$projectId, $name]),
'database' => $dsn,
'documentsDatabase' => $getDatabaseDSN('documentsDatabase', $region, $dsn)
'documentsDatabase' => $getDatabaseDSN('documentsDatabase', $region, $dsn),
'vectorDatabase' => $getDatabaseDSN('vectorDatabase', $region, $dsn)
]));
} catch (Duplicate) {
throw new Exception(Exception::PROJECT_ALREADY_EXISTS);
Expand Down
1 change: 1 addition & 0 deletions app/controllers/shared/api.php
Original file line number Diff line number Diff line change
Expand Up @@ -529,6 +529,7 @@
$path = $route->getMatchedPath();
$databaseType = match (true) {
str_contains($path, '/documentsdb') => DATABASE_TYPE_DOCUMENTSDB,
str_contains($path, '/vectordb') => DATABASE_TYPE_VECTORDB,
default => '',
};

Expand Down
27 changes: 26 additions & 1 deletion app/init/constants.php
Original file line number Diff line number Diff line change
Expand Up @@ -280,6 +280,29 @@
const METRIC_DATABASES_OPERATIONS_WRITES_DOCUMENTSDB = 'documentsdb.databases.operations.writes';
const METRIC_DATABASE_ID_OPERATIONS_WRITES_DOCUMENTSDB = 'documentsdb.{databaseInternalId}.databases.operations.writes';

// vectordb
const METRIC_DATABASES_VECTORDB = 'vectordb.databases';
const METRIC_COLLECTIONS_VECTORDB = 'vectordb.collections';
const METRIC_DATABASES_STORAGE_VECTORDB = 'vectordb.databases.storage';
const METRIC_DATABASE_ID_COLLECTIONS_VECTORDB = 'vectordb.{databaseInternalId}.collections';
const METRIC_DATABASE_ID_STORAGE_VECTORDB = 'vectordb.{databaseInternalId}.databases.storage';
const METRIC_DOCUMENTS_VECTORDB = 'vectordb.documents';
const METRIC_DATABASE_ID_DOCUMENTS_VECTORDB = 'vectordb.{databaseInternalId}.documents';
const METRIC_DATABASE_ID_COLLECTION_ID_DOCUMENTS_VECTORDB = 'vectordb.{databaseInternalId}.{collectionInternalId}.documents';
const METRIC_DATABASE_ID_COLLECTION_ID_STORAGE_VECTORDB = 'vectordb.{databaseInternalId}.{collectionInternalId}.databases.storage';
const METRIC_DATABASES_OPERATIONS_READS_VECTORDB = 'vectordb.databases.operations.reads';
const METRIC_DATABASE_ID_OPERATIONS_READS_VECTORDB = 'vectordb.{databaseInternalId}.databases.operations.reads';
const METRIC_DATABASES_OPERATIONS_WRITES_VECTORDB = 'vectordb.databases.operations.writes';
const METRIC_DATABASE_ID_OPERATIONS_WRITES_VECTORDB = 'vectordb.{databaseInternalId}.databases.operations.writes';
const METRIC_EMBEDDINGS_TEXT = 'embeddings.text';
const METRIC_EMBEDDINGS_MODEL_TEXT = 'embeddings.text.{embeddingModel}';
const METRIC_EMBEDDINGS_TEXT_TOTAL_ERROR = 'embeddings.text.totalErrors';
const METRIC_EMBEDDINGS_MODEL_TEXT_TOTAL_ERROR = 'embeddings.text.{embeddingModel}.totalErrors';
const METRIC_EMBEDDINGS_TEXT_TOTAL_DURATION = 'embeddings.text.totalDuration';
const METRIC_EMBEDDINGS_MODEL_TEXT_TOTAL_DURATION = 'embeddings.text.{embeddingModel}.totalDuration';
const METRIC_EMBEDDINGS_TEXT_TOTAL_TOKENS = 'embeddings.text.totalTokens';
const METRIC_EMBEDDINGS_MODEL_TEXT_TOTAL_TOKENS = 'embeddings.text.{embeddingModel}.totalTokens';

const METRIC_BUCKETS = 'buckets';
const METRIC_FILES = 'files';
const METRIC_FILES_STORAGE = 'files.storage';
Expand Down Expand Up @@ -365,6 +388,7 @@
const RESOURCE_TYPE_SUBSCRIBERS = 'subscribers';
const RESOURCE_TYPE_MESSAGES = 'messages';
const RESOURCE_TYPE_EXECUTIONS = 'executions';
const RESOURCE_TYPE_EMBEDDINGS_TEXT = 'embeddingsText';

// Resource types for Tokens
const TOKENS_RESOURCE_TYPE_FILES = 'files';
Expand All @@ -384,6 +408,7 @@
const DATABASE_TYPE_LEGACY = 'legacy';
const DATABASE_TYPE_TABLESDB = 'tablesdb';
const DATABASE_TYPE_DOCUMENTSDB = 'documentsdb';
const DATABASE_TYPE_VECTORDB = 'vectordb';

// CSV import/export allowed database types
const CSV_ALLOWED_DATABASE_TYPES = [DATABASE_TYPE_LEGACY, DATABASE_TYPE_LEGACY];
const CSV_ALLOWED_DATABASE_TYPES = [DATABASE_TYPE_LEGACY, DATABASE_TYPE_LEGACY, DATABASE_TYPE_VECTORDB];
Loading