From 7d547aaa09117fca4aed5500963f695251bdcd57 Mon Sep 17 00:00:00 2001
From: Nymul Islam <nymul@infoinlet.com>
Date: Sun, 12 Apr 2026 13:04:42 +0900
Subject: [PATCH] feat(ai): add Ollama/local LLM support for privacy-sensitive
 deployments

Closes #53

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 backend/.env.example                        | 14 +++-
 backend/src/modules/ai/ai.service.ts        | 90 +++++++++++++++++----
 backend/src/modules/ai/embedding.service.ts | 68 +++++++++++-----
 3 files changed, 135 insertions(+), 37 deletions(-)
diff --git a/backend/.env.example b/backend/.env.example
index 8cfb728..04b3c89 100644
--- a/backend/.env.example
+++ b/backend/.env.example
@@ -96,18 +96,28 @@ MAIL_USERNAME=your_ses_smtp_username
 MAIL_PASSWORD=your_ses_smtp_password
 
 # =====================================================
-# AI SERVICES (OpenRouter)
+# AI SERVICES
 # =====================================================
 
+# AI Provider: openrouter | ollama | openai
+# Defaults to "openrouter" if unset (current behavior)
+AI_PROVIDER=openrouter
+
+# OpenRouter (cloud, default)
 OPENROUTER_API_KEY=sk-or-v1-your-openrouter-api-key
 OPENROUTER_BASE_URL=https://openrouter.ai/api/v1
 OPENROUTER_DEFAULT_MODEL=anthropic/claude-3.5-sonnet
 OPENROUTER_EMBEDDING_MODEL=openai/text-embedding-3-small
 
-# OpenAI Direct (optional, for embeddings)
+# OpenAI Direct (optional, for embeddings or as primary provider)
 OPENAI_API_KEY=sk-your-openai-api-key
 OPENAI_EMBEDDING_MODEL=text-embedding-3-small
 
+# Ollama (local, fully offline — no data leaves the machine)
+OLLAMA_BASE_URL=http://localhost:11434/v1
+OLLAMA_MODEL=llama3.2
+OLLAMA_EMBEDDING_MODEL=nomic-embed-text
+
 # =====================================================
 # OAUTH - GOOGLE
 # =====================================================
diff --git a/backend/src/modules/ai/ai.service.ts b/backend/src/modules/ai/ai.service.ts
index 01e4ea6..2d71b86 100644
--- a/backend/src/modules/ai/ai.service.ts
+++ b/backend/src/modules/ai/ai.service.ts
@@ -1,9 +1,12 @@
 /**
  * AI Service
  *
- * Uses OpenRouter as the unified gateway for all LLM providers.
- * OpenRouter provides a single API compatible with OpenAI SDK
- * to access models from OpenAI, Anthropic, Google, DeepSeek, and more.
+ * Supports multiple AI providers via the OpenAI SDK:
+ * - OpenRouter: unified cloud gateway for all models (default)
+ * - Ollama: local/offline LLM for privacy-sensitive deployments
+ * - OpenAI: direct OpenAI API access
+ *
+ * Set AI_PROVIDER env var to choose the provider.
  */
 
 import {
@@ -19,9 +22,13 @@ import OpenAI from 'openai';
 import { DatabaseService } from '../database/database.service';
 import { v4 as uuidv4 } from 'uuid';
 
-// Default models
+// AI provider type
+type AiProvider = 'openrouter' | 'ollama' | 'openai';
+
+// Default models per provider
 const DEFAULT_MODEL = 'openai/gpt-4o-mini';
 const DEFAULT_VISION_MODEL = 'openai/gpt-4o';
+const DEFAULT_OLLAMA_MODEL = 'llama3.2';
 
 export interface ChatMessage {
   role: 'system' | 'user' | 'assistant';
@@ -57,12 +64,18 @@ export interface TranscriptionResponse {
 export class AiService implements OnModuleInit {
   private readonly logger = new Logger(AiService.name);
 
+  // Active AI provider
+  private provider: AiProvider = 'openrouter';
+
   // OpenRouter client (PRIMARY gateway for all models)
   private openRouterClient: OpenAI | null = null;
 
   // Direct OpenAI client (fallback only)
   private openaiClient: OpenAI | null = null;
 
+  // Ollama client (local/offline)
+  private ollamaClient: OpenAI | null = null;
+
   constructor(
     private readonly configService: ConfigService,
     private readonly db: DatabaseService,
@@ -73,6 +86,25 @@ export class AiService implements OnModuleInit {
   }
 
   private async initializeClients() {
+    // Determine the active provider (default: openrouter for backward compat)
+    this.provider = (
+      this.configService.get<string>('AI_PROVIDER', 'openrouter') as AiProvider
+    );
+
+    // Initialize Ollama (local LLM)
+    if (this.provider === 'ollama') {
+      const ollamaBaseUrl = this.configService.get<string>(
+        'OLLAMA_BASE_URL',
+        'http://localhost:11434/v1',
+      );
+      this.ollamaClient = new OpenAI({
+        apiKey: 'ollama', // Ollama doesn't need a key, but the SDK requires a non-empty string
+        baseURL: ollamaBaseUrl,
+        timeout: 120000,
+      });
+      this.logger.log(`Ollama client initialized (${ollamaBaseUrl})`);
+    }
+
     // Initialize OpenRouter (PRIMARY gateway for all models)
     const openRouterKey = this.configService.get<string>('OPENROUTER_API_KEY');
 
@@ -100,12 +132,23 @@ export class AiService implements OnModuleInit {
     }
 
     // Log status
-    if (!this.openRouterClient && !this.openaiClient) {
-      this.logger.warn('No AI clients available! Set OPENROUTER_API_KEY or OPENAI_API_KEY in .env');
+    if (!this.ollamaClient && !this.openRouterClient && !this.openaiClient) {
+      this.logger.warn(
+        'No AI clients available! Set AI_PROVIDER and the corresponding config in .env',
+      );
     }
   }
 
   private getClient(): OpenAI {
+    // Use the configured provider first
+    if (this.provider === 'ollama' && this.ollamaClient) {
+      return this.ollamaClient;
+    }
+
+    if (this.provider === 'openai' && this.openaiClient) {
+      return this.openaiClient;
+    }
+
     // Primary: Use OpenRouter for everything (unified gateway)
     if (this.openRouterClient) {
       return this.openRouterClient;
@@ -117,13 +160,19 @@ export class AiService implements OnModuleInit {
     }
 
     throw new BadRequestException(
-      'No AI API key configured. Set OPENROUTER_API_KEY or OPENAI_API_KEY in .env file.',
+      'No AI API key configured. Set AI_PROVIDER and the corresponding config in .env file.',
     );
   }
 
   private getModel(type: 'text' | 'vision' = 'text'): string {
+    // Ollama uses its own model names
+    if (this.provider === 'ollama') {
+      // Ollama vision models: use the same model (most Ollama models handle both)
+      return this.configService.get<string>('OLLAMA_MODEL', DEFAULT_OLLAMA_MODEL);
+    }
+
     // When using OpenRouter, use provider/model format
-    if (this.openRouterClient) {
+    if (this.provider === 'openrouter' && this.openRouterClient) {
       return type === 'vision'
         ? this.configService.get('OPENROUTER_VISION_MODEL', DEFAULT_VISION_MODEL)
         : this.configService.get('OPENROUTER_DEFAULT_MODEL', DEFAULT_MODEL);
@@ -140,7 +189,12 @@ export class AiService implements OnModuleInit {
     options: CompletionOptions = {},
   ): Promise<CompletionResponse> {
     const client = this.getClient();
-    const model = options.model || this.getModel('text');
+    // When using Ollama, always use the configured Ollama model
+    // (ignore OpenRouter-style model names like "openai/gpt-4o")
+    const model =
+      this.provider === 'ollama'
+        ? this.getModel('text')
+        : options.model || this.getModel('text');
 
     try {
       const response = await client.chat.completions.create({
@@ -167,8 +221,8 @@ export class AiService implements OnModuleInit {
       const err = error as Error;
       this.logger.error(`AI completion failed: ${err.message}`);
 
-      // Try fallback to direct OpenAI if OpenRouter fails
-      if (this.openRouterClient && this.openaiClient) {
+      // Try fallback to direct OpenAI if OpenRouter fails (not applicable for Ollama)
+      if (this.provider !== 'ollama' && this.openRouterClient && this.openaiClient) {
         this.logger.warn('Attempting fallback to direct OpenAI...');
         return this.completeFallback(messages, options);
       }
@@ -221,6 +275,7 @@ export class AiService implements OnModuleInit {
       const client = this.getClient();
       const model = this.getModel('vision');
 
+      // Ollama multimodal models support vision via the same API
       const response = await client.chat.completions.create({
         model,
         messages: [
@@ -247,7 +302,8 @@ export class AiService implements OnModuleInit {
       return response.choices[0]?.message?.content || '';
     } catch (error) {
       this.logger.error('Vision generation failed', error);
-      throw new BadRequestException(`Vision AI failed: ${error.message}`);
+      const errorMessage = error instanceof Error ? error.message : 'Unknown error';
+      throw new BadRequestException(`Vision AI failed: ${errorMessage}`);
     }
   }
 
@@ -276,7 +332,10 @@ export class AiService implements OnModuleInit {
     options: CompletionOptions = {},
   ): AsyncGenerator<{ content: string; done: boolean }> {
     const client = this.getClient();
-    const model = options.model || this.getModel('text');
+    const model =
+      this.provider === 'ollama'
+        ? this.getModel('text')
+        : options.model || this.getModel('text');
 
     const stream = await client.chat.completions.create({
       model,
@@ -365,11 +424,14 @@ export class AiService implements OnModuleInit {
   }
 
   isAvailable(): boolean {
-    return this.openRouterClient !== null || this.openaiClient !== null;
+    return this.ollamaClient !== null || this.openRouterClient !== null || this.openaiClient !== null;
   }
 
   getAvailableProviders(): string[] {
     const providers: string[] = [];
+    if (this.ollamaClient) {
+      providers.push('ollama');
+    }
     if (this.openRouterClient) {
       providers.push('openrouter', 'openai', 'anthropic', 'google', 'deepseek');
     } else if (this.openaiClient) {
diff --git a/backend/src/modules/ai/embedding.service.ts b/backend/src/modules/ai/embedding.service.ts
index 63d52d9..8b3cf0c 100644
--- a/backend/src/modules/ai/embedding.service.ts
+++ b/backend/src/modules/ai/embedding.service.ts
@@ -1,6 +1,9 @@
 import { Injectable, Logger } from '@nestjs/common';
 import { ConfigService } from '@nestjs/config';
 
+// AI provider type
+type AiProvider = 'openrouter' | 'ollama' | 'openai';
+
 export interface EmbeddingResult {
   vector: number[];
   tokens: number;
@@ -9,37 +12,65 @@ export interface EmbeddingResult {
 @Injectable()
 export class EmbeddingService {
   private readonly logger = new Logger(EmbeddingService.name);
+  private readonly provider: AiProvider;
   private readonly apiKey: string;
   private readonly baseUrl: string;
   private readonly embeddingModel: string;
   private readonly vectorDimension: number;
 
   constructor(private readonly configService: ConfigService) {
-    this.apiKey = this.configService.get<string>('OPENROUTER_API_KEY', '');
-    this.baseUrl = this.configService.get<string>(
-      'OPENROUTER_BASE_URL',
-      'https://openrouter.ai/api/v1',
-    );
-    this.embeddingModel = this.configService.get<string>(
-      'OPENROUTER_EMBEDDING_MODEL',
-      'openai/text-embedding-3-small',
+    this.provider = (
+      this.configService.get<string>('AI_PROVIDER', 'openrouter') as AiProvider
     );
-    this.vectorDimension = 1536;
+
+    if (this.provider === 'ollama') {
+      this.apiKey = 'ollama'; // Ollama doesn't need a key
+      this.baseUrl = this.configService.get<string>(
+        'OLLAMA_BASE_URL',
+        'http://localhost:11434/v1',
+      );
+      this.embeddingModel = this.configService.get<string>(
+        'OLLAMA_EMBEDDING_MODEL',
+        'nomic-embed-text',
+      );
+      // nomic-embed-text produces 768-dim vectors; adjust if using a different model
+      this.vectorDimension = 768;
+      this.logger.log(`Embedding service using Ollama (${this.embeddingModel})`);
+    } else {
+      this.apiKey = this.configService.get<string>('OPENROUTER_API_KEY', '');
+      this.baseUrl = this.configService.get<string>(
+        'OPENROUTER_BASE_URL',
+        'https://openrouter.ai/api/v1',
+      );
+      this.embeddingModel = this.configService.get<string>(
+        'OPENROUTER_EMBEDDING_MODEL',
+        'openai/text-embedding-3-small',
+      );
+      this.vectorDimension = 1536;
+    }
   }
 
   getVectorDimension(): number {
     return this.vectorDimension;
   }
 
+  private getHeaders(): Record<string, string> {
+    const headers: Record<string, string> = {
+      Authorization: `Bearer ${this.apiKey}`,
+      'Content-Type': 'application/json',
+    };
+    // Only add OpenRouter-specific headers when using OpenRouter
+    if (this.provider !== 'ollama') {
+      headers['HTTP-Referer'] = 'https://studyield.com';
+      headers['X-Title'] = 'Studyield';
+    }
+    return headers;
+  }
+
   async embed(text: string): Promise<EmbeddingResult> {
     const response = await fetch(`${this.baseUrl}/embeddings`, {
       method: 'POST',
-      headers: {
-        Authorization: `Bearer ${this.apiKey}`,
-        'Content-Type': 'application/json',
-        'HTTP-Referer': 'https://studyield.com',
-        'X-Title': 'Studyield',
-      },
+      headers: this.getHeaders(),
       body: JSON.stringify({
         model: this.embeddingModel,
         input: text,
@@ -65,12 +96,7 @@ export class EmbeddingService {
 
     const response = await fetch(`${this.baseUrl}/embeddings`, {
       method: 'POST',
-      headers: {
-        Authorization: `Bearer ${this.apiKey}`,
-        'Content-Type': 'application/json',
-        'HTTP-Referer': 'https://studyield.com',
-        'X-Title': 'Studyield',
-      },
+      headers: this.getHeaders(),
       body: JSON.stringify({
         model: this.embeddingModel,
         input: texts,