From 7d547aaa09117fca4aed5500963f695251bdcd57 Mon Sep 17 00:00:00 2001 From: Nymul Islam Date: Sun, 12 Apr 2026 13:04:42 +0900 Subject: [PATCH] feat(ai): add Ollama/local LLM support for privacy-sensitive deployments Closes #53 Co-Authored-By: Claude Opus 4.6 (1M context) --- backend/.env.example | 14 +++- backend/src/modules/ai/ai.service.ts | 90 +++++++++++++++++---- backend/src/modules/ai/embedding.service.ts | 68 +++++++++++----- 3 files changed, 135 insertions(+), 37 deletions(-) diff --git a/backend/.env.example b/backend/.env.example index 8cfb728..04b3c89 100644 --- a/backend/.env.example +++ b/backend/.env.example @@ -96,18 +96,28 @@ MAIL_USERNAME=your_ses_smtp_username MAIL_PASSWORD=your_ses_smtp_password # ===================================================== -# AI SERVICES (OpenRouter) +# AI SERVICES # ===================================================== +# AI Provider: openrouter | ollama | openai +# Defaults to "openrouter" if unset (current behavior) +AI_PROVIDER=openrouter + +# OpenRouter (cloud, default) OPENROUTER_API_KEY=sk-or-v1-your-openrouter-api-key OPENROUTER_BASE_URL=https://openrouter.ai/api/v1 OPENROUTER_DEFAULT_MODEL=anthropic/claude-3.5-sonnet OPENROUTER_EMBEDDING_MODEL=openai/text-embedding-3-small -# OpenAI Direct (optional, for embeddings) +# OpenAI Direct (optional, for embeddings or as primary provider) OPENAI_API_KEY=sk-your-openai-api-key OPENAI_EMBEDDING_MODEL=text-embedding-3-small +# Ollama (local, fully offline — no data leaves the machine) +OLLAMA_BASE_URL=http://localhost:11434/v1 +OLLAMA_MODEL=llama3.2 +OLLAMA_EMBEDDING_MODEL=nomic-embed-text + # ===================================================== # OAUTH - GOOGLE # ===================================================== diff --git a/backend/src/modules/ai/ai.service.ts b/backend/src/modules/ai/ai.service.ts index 01e4ea6..2d71b86 100644 --- a/backend/src/modules/ai/ai.service.ts +++ b/backend/src/modules/ai/ai.service.ts @@ -1,9 +1,12 @@ /** * AI Service * - * Uses OpenRouter as the unified gateway for all LLM providers. - * OpenRouter provides a single API compatible with OpenAI SDK - * to access models from OpenAI, Anthropic, Google, DeepSeek, and more. + * Supports multiple AI providers via the OpenAI SDK: + * - OpenRouter: unified cloud gateway for all models (default) + * - Ollama: local/offline LLM for privacy-sensitive deployments + * - OpenAI: direct OpenAI API access + * + * Set AI_PROVIDER env var to choose the provider. */ import { @@ -19,9 +22,13 @@ import OpenAI from 'openai'; import { DatabaseService } from '../database/database.service'; import { v4 as uuidv4 } from 'uuid'; -// Default models +// AI provider type +type AiProvider = 'openrouter' | 'ollama' | 'openai'; + +// Default models per provider const DEFAULT_MODEL = 'openai/gpt-4o-mini'; const DEFAULT_VISION_MODEL = 'openai/gpt-4o'; +const DEFAULT_OLLAMA_MODEL = 'llama3.2'; export interface ChatMessage { role: 'system' | 'user' | 'assistant'; @@ -57,12 +64,18 @@ export interface TranscriptionResponse { export class AiService implements OnModuleInit { private readonly logger = new Logger(AiService.name); + // Active AI provider + private provider: AiProvider = 'openrouter'; + // OpenRouter client (PRIMARY gateway for all models) private openRouterClient: OpenAI | null = null; // Direct OpenAI client (fallback only) private openaiClient: OpenAI | null = null; + // Ollama client (local/offline) + private ollamaClient: OpenAI | null = null; + constructor( private readonly configService: ConfigService, private readonly db: DatabaseService, @@ -73,6 +86,25 @@ export class AiService implements OnModuleInit { } private async initializeClients() { + // Determine the active provider (default: openrouter for backward compat) + this.provider = ( + this.configService.get('AI_PROVIDER', 'openrouter') as AiProvider + ); + + // Initialize Ollama (local LLM) + if (this.provider === 'ollama') { + const ollamaBaseUrl = this.configService.get( + 'OLLAMA_BASE_URL', + 'http://localhost:11434/v1', + ); + this.ollamaClient = new OpenAI({ + apiKey: 'ollama', // Ollama doesn't need a key, but the SDK requires a non-empty string + baseURL: ollamaBaseUrl, + timeout: 120000, + }); + this.logger.log(`Ollama client initialized (${ollamaBaseUrl})`); + } + // Initialize OpenRouter (PRIMARY gateway for all models) const openRouterKey = this.configService.get('OPENROUTER_API_KEY'); @@ -100,12 +132,23 @@ export class AiService implements OnModuleInit { } // Log status - if (!this.openRouterClient && !this.openaiClient) { - this.logger.warn('No AI clients available! Set OPENROUTER_API_KEY or OPENAI_API_KEY in .env'); + if (!this.ollamaClient && !this.openRouterClient && !this.openaiClient) { + this.logger.warn( + 'No AI clients available! Set AI_PROVIDER and the corresponding config in .env', + ); } } private getClient(): OpenAI { + // Use the configured provider first + if (this.provider === 'ollama' && this.ollamaClient) { + return this.ollamaClient; + } + + if (this.provider === 'openai' && this.openaiClient) { + return this.openaiClient; + } + // Primary: Use OpenRouter for everything (unified gateway) if (this.openRouterClient) { return this.openRouterClient; @@ -117,13 +160,19 @@ export class AiService implements OnModuleInit { } throw new BadRequestException( - 'No AI API key configured. Set OPENROUTER_API_KEY or OPENAI_API_KEY in .env file.', + 'No AI API key configured. Set AI_PROVIDER and the corresponding config in .env file.', ); } private getModel(type: 'text' | 'vision' = 'text'): string { + // Ollama uses its own model names + if (this.provider === 'ollama') { + // Ollama vision models: use the same model (most Ollama models handle both) + return this.configService.get('OLLAMA_MODEL', DEFAULT_OLLAMA_MODEL); + } + // When using OpenRouter, use provider/model format - if (this.openRouterClient) { + if (this.provider === 'openrouter' && this.openRouterClient) { return type === 'vision' ? this.configService.get('OPENROUTER_VISION_MODEL', DEFAULT_VISION_MODEL) : this.configService.get('OPENROUTER_DEFAULT_MODEL', DEFAULT_MODEL); @@ -140,7 +189,12 @@ export class AiService implements OnModuleInit { options: CompletionOptions = {}, ): Promise { const client = this.getClient(); - const model = options.model || this.getModel('text'); + // When using Ollama, always use the configured Ollama model + // (ignore OpenRouter-style model names like "openai/gpt-4o") + const model = + this.provider === 'ollama' + ? this.getModel('text') + : options.model || this.getModel('text'); try { const response = await client.chat.completions.create({ @@ -167,8 +221,8 @@ export class AiService implements OnModuleInit { const err = error as Error; this.logger.error(`AI completion failed: ${err.message}`); - // Try fallback to direct OpenAI if OpenRouter fails - if (this.openRouterClient && this.openaiClient) { + // Try fallback to direct OpenAI if OpenRouter fails (not applicable for Ollama) + if (this.provider !== 'ollama' && this.openRouterClient && this.openaiClient) { this.logger.warn('Attempting fallback to direct OpenAI...'); return this.completeFallback(messages, options); } @@ -221,6 +275,7 @@ export class AiService implements OnModuleInit { const client = this.getClient(); const model = this.getModel('vision'); + // Ollama multimodal models support vision via the same API const response = await client.chat.completions.create({ model, messages: [ @@ -247,7 +302,8 @@ export class AiService implements OnModuleInit { return response.choices[0]?.message?.content || ''; } catch (error) { this.logger.error('Vision generation failed', error); - throw new BadRequestException(`Vision AI failed: ${error.message}`); + const errorMessage = error instanceof Error ? error.message : 'Unknown error'; + throw new BadRequestException(`Vision AI failed: ${errorMessage}`); } } @@ -276,7 +332,10 @@ export class AiService implements OnModuleInit { options: CompletionOptions = {}, ): AsyncGenerator<{ content: string; done: boolean }> { const client = this.getClient(); - const model = options.model || this.getModel('text'); + const model = + this.provider === 'ollama' + ? this.getModel('text') + : options.model || this.getModel('text'); const stream = await client.chat.completions.create({ model, @@ -365,11 +424,14 @@ export class AiService implements OnModuleInit { } isAvailable(): boolean { - return this.openRouterClient !== null || this.openaiClient !== null; + return this.ollamaClient !== null || this.openRouterClient !== null || this.openaiClient !== null; } getAvailableProviders(): string[] { const providers: string[] = []; + if (this.ollamaClient) { + providers.push('ollama'); + } if (this.openRouterClient) { providers.push('openrouter', 'openai', 'anthropic', 'google', 'deepseek'); } else if (this.openaiClient) { diff --git a/backend/src/modules/ai/embedding.service.ts b/backend/src/modules/ai/embedding.service.ts index 63d52d9..8b3cf0c 100644 --- a/backend/src/modules/ai/embedding.service.ts +++ b/backend/src/modules/ai/embedding.service.ts @@ -1,6 +1,9 @@ import { Injectable, Logger } from '@nestjs/common'; import { ConfigService } from '@nestjs/config'; +// AI provider type +type AiProvider = 'openrouter' | 'ollama' | 'openai'; + export interface EmbeddingResult { vector: number[]; tokens: number; @@ -9,37 +12,65 @@ export interface EmbeddingResult { @Injectable() export class EmbeddingService { private readonly logger = new Logger(EmbeddingService.name); + private readonly provider: AiProvider; private readonly apiKey: string; private readonly baseUrl: string; private readonly embeddingModel: string; private readonly vectorDimension: number; constructor(private readonly configService: ConfigService) { - this.apiKey = this.configService.get('OPENROUTER_API_KEY', ''); - this.baseUrl = this.configService.get( - 'OPENROUTER_BASE_URL', - 'https://openrouter.ai/api/v1', - ); - this.embeddingModel = this.configService.get( - 'OPENROUTER_EMBEDDING_MODEL', - 'openai/text-embedding-3-small', + this.provider = ( + this.configService.get('AI_PROVIDER', 'openrouter') as AiProvider ); - this.vectorDimension = 1536; + + if (this.provider === 'ollama') { + this.apiKey = 'ollama'; // Ollama doesn't need a key + this.baseUrl = this.configService.get( + 'OLLAMA_BASE_URL', + 'http://localhost:11434/v1', + ); + this.embeddingModel = this.configService.get( + 'OLLAMA_EMBEDDING_MODEL', + 'nomic-embed-text', + ); + // nomic-embed-text produces 768-dim vectors; adjust if using a different model + this.vectorDimension = 768; + this.logger.log(`Embedding service using Ollama (${this.embeddingModel})`); + } else { + this.apiKey = this.configService.get('OPENROUTER_API_KEY', ''); + this.baseUrl = this.configService.get( + 'OPENROUTER_BASE_URL', + 'https://openrouter.ai/api/v1', + ); + this.embeddingModel = this.configService.get( + 'OPENROUTER_EMBEDDING_MODEL', + 'openai/text-embedding-3-small', + ); + this.vectorDimension = 1536; + } } getVectorDimension(): number { return this.vectorDimension; } + private getHeaders(): Record { + const headers: Record = { + Authorization: `Bearer ${this.apiKey}`, + 'Content-Type': 'application/json', + }; + // Only add OpenRouter-specific headers when using OpenRouter + if (this.provider !== 'ollama') { + headers['HTTP-Referer'] = 'https://studyield.com'; + headers['X-Title'] = 'Studyield'; + } + return headers; + } + async embed(text: string): Promise { const response = await fetch(`${this.baseUrl}/embeddings`, { method: 'POST', - headers: { - Authorization: `Bearer ${this.apiKey}`, - 'Content-Type': 'application/json', - 'HTTP-Referer': 'https://studyield.com', - 'X-Title': 'Studyield', - }, + headers: this.getHeaders(), body: JSON.stringify({ model: this.embeddingModel, input: text, @@ -65,12 +96,7 @@ export class EmbeddingService { const response = await fetch(`${this.baseUrl}/embeddings`, { method: 'POST', - headers: { - Authorization: `Bearer ${this.apiKey}`, - 'Content-Type': 'application/json', - 'HTTP-Referer': 'https://studyield.com', - 'X-Title': 'Studyield', - }, + headers: this.getHeaders(), body: JSON.stringify({ model: this.embeddingModel, input: texts,