embedding-service.ts
1 /** 2 * Embedding service for LM Studio integration 3 * 4 * This service handles text embedding generation using LM Studio's 5 * OpenAI-compatible API for vector search functionality. 6 */ 7 8 import { Task } from "../lib.ts"; 9 import { getEnv } from "../actors/utils/env.ts"; 10 import type { 11 EmbeddingRequest, 12 EmbeddingResponse, 13 EmbeddingError, 14 } from "./vector-types.ts"; 15 16 /** 17 * Default configuration for the embedding service 18 */ 19 const DEFAULT_CONFIG = { 20 baseUrl: "http://localhost:1234/v1", 21 apiKey: "lm-studio", // LM Studio uses a dummy API key 22 model: "Qwen3-Embedding-0.6B-Q8_0.gguf", 23 timeout: 30000, // 30 seconds 24 maxBatchSize: 10, // Process up to 10 texts at once 25 cooldownMs: 1000, // 1 second between API calls 26 }; 27 28 /** 29 * Embedding service class that handles text-to-vector conversion 30 */ 31 export class EmbeddingService { 32 private config: { 33 baseUrl: string; 34 apiKey: string; 35 model: string; 36 timeout: number; 37 maxBatchSize: number; 38 cooldownMs: number; 39 }; 40 private lastRequestTime = 0; 41 42 constructor(options: { 43 baseUrl?: string; 44 apiKey?: string; 45 model?: string; 46 timeout?: number; 47 maxBatchSize?: number; 48 cooldownMs?: number; 49 } = {}) { 50 this.config = { 51 baseUrl: options.baseUrl || getEnv("LM_STUDIO_BASE_URL") || DEFAULT_CONFIG.baseUrl, 52 apiKey: options.apiKey || DEFAULT_CONFIG.apiKey, 53 model: options.model || getEnv("LM_STUDIO_EMBEDDING_MODEL") || DEFAULT_CONFIG.model, 54 timeout: options.timeout || DEFAULT_CONFIG.timeout, 55 maxBatchSize: options.maxBatchSize || Number(getEnv("EMBEDDING_BATCH_SIZE")) || DEFAULT_CONFIG.maxBatchSize, 56 cooldownMs: options.cooldownMs || Number(getEnv("EMBEDDING_COOLDOWN_MS")) || DEFAULT_CONFIG.cooldownMs, 57 }; 58 59 console.log(`🔧 EmbeddingService initialized:`); 60 console.log(` Base URL: ${this.config.baseUrl}`); 61 console.log(` Model: ${this.config.model}`); 62 console.log(` Batch size: ${this.config.maxBatchSize}`); 63 console.log(` Cooldown: ${this.config.cooldownMs}ms`); 64 } 65 66 /** 67 * Get the current embedding model name 68 */ 69 get embeddingModel(): string { 70 return this.config.model; 71 } 72 73 /** 74 * Test connection to LM Studio 75 */ 76 async testConnection(): Promise<boolean> { 77 try { 78 console.log("🔍 Testing LM Studio connection..."); 79 80 // Try a simple embedding request 81 const response = await this.embedText("test"); 82 83 if (response && response.length > 0) { 84 console.log("✅ LM Studio connection successful"); 85 console.log(` Vector dimension: ${response.length}`); 86 return true; 87 } else { 88 console.error("❌ LM Studio returned empty embedding"); 89 return false; 90 } 91 } catch (error) { 92 console.error("❌ LM Studio connection failed:", error); 93 return false; 94 } 95 } 96 97 /** 98 * Embed a single text string 99 */ 100 async embedText(text: string): Promise<number[]> { 101 const response = await this.embedBatch([text]); 102 if (!response.embeddings || response.embeddings.length === 0) { 103 throw this.createError("No embeddings returned from API", "api_error"); 104 } 105 return response.embeddings[0]; 106 } 107 108 /** 109 * Embed multiple texts in a batch 110 */ 111 async embedBatch(texts: string[]): Promise<EmbeddingResponse> { 112 if (texts.length === 0) { 113 throw this.createError("No texts provided for embedding", "api_error"); 114 } 115 116 if (texts.length > this.config.maxBatchSize) { 117 throw this.createError( 118 `Batch size ${texts.length} exceeds maximum ${this.config.maxBatchSize}`, 119 "api_error" 120 ); 121 } 122 123 // Apply cooldown between requests 124 await this.applyCooldown(); 125 126 try { 127 console.log(`🔧 Embedding ${texts.length} text(s) with ${this.config.model}`); 128 console.log(`🔧 Total characters: ${texts.reduce((sum, text) => sum + text.length, 0)}`); 129 130 const startTime = Date.now(); 131 132 // Make direct HTTP request to LM Studio 133 const response = await Promise.race([ 134 this.makeEmbeddingRequest(texts), 135 this.createTimeoutPromise(), 136 ]); 137 138 const duration = Date.now() - startTime; 139 console.log(`✅ Embedding completed in ${duration}ms`); 140 console.log(` Vector dimension: ${response.data[0]?.embedding?.length || "unknown"}`); 141 142 this.lastRequestTime = Date.now(); 143 144 return { 145 embeddings: response.data.map((item: any) => item.embedding), 146 model: this.config.model, 147 usage: response.usage ? { 148 total_tokens: response.usage.total_tokens, 149 } : undefined, 150 }; 151 152 } catch (error) { 153 const embeddingError = this.handleError(error); 154 console.error("❌ Embedding failed:", embeddingError.message); 155 throw embeddingError; 156 } 157 } 158 159 /** 160 * Embed texts with automatic batching for large arrays 161 */ 162 async* embedTextsInBatches(texts: string[]): AsyncGenerator<EmbeddingResponse, void, unknown> { 163 console.log(`🔧 Processing ${texts.length} texts in batches of ${this.config.maxBatchSize}`); 164 165 for (let i = 0; i < texts.length; i += this.config.maxBatchSize) { 166 const batch = texts.slice(i, i + this.config.maxBatchSize); 167 const batchNumber = Math.floor(i / this.config.maxBatchSize) + 1; 168 const totalBatches = Math.ceil(texts.length / this.config.maxBatchSize); 169 170 console.log(`🔄 Processing batch ${batchNumber}/${totalBatches} (${batch.length} texts)`); 171 172 try { 173 const response = await this.embedBatch(batch); 174 yield response; 175 } catch (error) { 176 console.error(`❌ Batch ${batchNumber} failed:`, error); 177 throw error; 178 } 179 } 180 } 181 182 /** 183 * Get vector dimension for the current model 184 */ 185 async getVectorDimension(): Promise<number> { 186 try { 187 const testVector = await this.embedText("test"); 188 return testVector.length; 189 } catch (error) { 190 console.warn("⚠️ Could not determine vector dimension, using default 768"); 191 return 768; // Default fallback 192 } 193 } 194 195 /** 196 * Make HTTP request to LM Studio embedding API 197 */ 198 private async makeEmbeddingRequest(texts: string[]): Promise<any> { 199 const response = await fetch(`${this.config.baseUrl}/embeddings`, { 200 method: "POST", 201 headers: { 202 "Content-Type": "application/json", 203 "Authorization": `Bearer ${this.config.apiKey}`, 204 }, 205 body: JSON.stringify({ 206 model: this.config.model, 207 input: texts, 208 }), 209 }); 210 211 if (!response.ok) { 212 throw new Error(`HTTP ${response.status}: ${response.statusText}`); 213 } 214 215 return await response.json(); 216 } 217 218 /** 219 * Apply cooldown between API requests 220 */ 221 private async applyCooldown(): Promise<void> { 222 const timeSinceLastRequest = Date.now() - this.lastRequestTime; 223 const cooldownRemaining = this.config.cooldownMs - timeSinceLastRequest; 224 225 if (cooldownRemaining > 0) { 226 console.log(`⏱️ Applying cooldown: ${cooldownRemaining}ms`); 227 await new Promise(resolve => setTimeout(resolve, cooldownRemaining)); 228 } 229 } 230 231 /** 232 * Create a timeout promise for request timeouts 233 */ 234 private createTimeoutPromise(): Promise<never> { 235 return new Promise((_, reject) => { 236 setTimeout(() => { 237 reject(this.createError( 238 `Embedding request timed out after ${this.config.timeout}ms`, 239 "timeout_error" 240 )); 241 }, this.config.timeout); 242 }); 243 } 244 245 /** 246 * Handle and categorize errors from the embedding API 247 */ 248 private handleError(error: unknown): EmbeddingError { 249 if (error instanceof Error) { 250 // Check for specific error types 251 if (error.message.includes("ECONNREFUSED") || error.message.includes("fetch")) { 252 return this.createError( 253 `Cannot connect to LM Studio at ${this.config.baseUrl}: ${error.message}`, 254 "connection_error", 255 error 256 ); 257 } 258 259 if (error.message.includes("timeout") || error.message.includes("aborted")) { 260 return this.createError( 261 `Request timed out: ${error.message}`, 262 "timeout_error", 263 error 264 ); 265 } 266 267 if (error.message.includes("model") || error.message.includes("Model")) { 268 return this.createError( 269 `Model error with ${this.config.model}: ${error.message}`, 270 "model_error", 271 error 272 ); 273 } 274 275 // Generic API error 276 return this.createError( 277 `API error: ${error.message}`, 278 "api_error", 279 error 280 ); 281 } 282 283 // Unknown error type 284 return this.createError( 285 `Unknown error: ${String(error)}`, 286 "api_error" 287 ); 288 } 289 290 /** 291 * Create a typed embedding error 292 */ 293 private createError( 294 message: string, 295 type: EmbeddingError["type"], 296 originalError?: Error, 297 statusCode?: number 298 ): EmbeddingError { 299 const error = new Error(message) as EmbeddingError; 300 error.type = type; 301 error.originalError = originalError; 302 error.statusCode = statusCode; 303 return error; 304 } 305 } 306 307 /** 308 * Create a singleton embedding service instance 309 */ 310 let _embeddingService: EmbeddingService | null = null; 311 312 export function getEmbeddingService(): EmbeddingService { 313 if (!_embeddingService) { 314 _embeddingService = new EmbeddingService(); 315 } 316 return _embeddingService; 317 } 318 319 /** 320 * Generator function for embedding text with Task monad integration 321 */ 322 export function* embedTextTask(text: string): Generator<any, number[], any> { 323 const service = getEmbeddingService(); 324 return yield* Task.wait(service.embedText(text)); 325 } 326 327 /** 328 * Generator function for batch embedding with Task monad integration 329 */ 330 export function* embedBatchTask(texts: string[]): Generator<any, EmbeddingResponse, any> { 331 const service = getEmbeddingService(); 332 return yield* Task.wait(service.embedBatch(texts)); 333 } 334 335 /** 336 * Check if embedding service is enabled 337 * @deprecated This is now controlled by JSON configuration, not environment variables 338 */ 339 export function isEmbeddingEnabled(): boolean { 340 // Always return true - enabled/disabled is now controlled by vector store config 341 return true; 342 }