/ services / embedding-service.ts
embedding-service.ts
  1  /**
  2   * Embedding service for LM Studio integration
  3   * 
  4   * This service handles text embedding generation using LM Studio's
  5   * OpenAI-compatible API for vector search functionality.
  6   */
  7  
  8  import { Task } from "../lib.ts";
  9  import { getEnv } from "../actors/utils/env.ts";
 10  import type {
 11    EmbeddingRequest,
 12    EmbeddingResponse,
 13    EmbeddingError,
 14  } from "./vector-types.ts";
 15  
 16  /**
 17   * Default configuration for the embedding service
 18   */
 19  const DEFAULT_CONFIG = {
 20    baseUrl: "http://localhost:1234/v1",
 21    apiKey: "lm-studio", // LM Studio uses a dummy API key
 22    model: "Qwen3-Embedding-0.6B-Q8_0.gguf",
 23    timeout: 30000, // 30 seconds
 24    maxBatchSize: 10, // Process up to 10 texts at once
 25    cooldownMs: 1000, // 1 second between API calls
 26  };
 27  
 28  /**
 29   * Embedding service class that handles text-to-vector conversion
 30   */
 31  export class EmbeddingService {
 32    private config: {
 33      baseUrl: string;
 34      apiKey: string;
 35      model: string;
 36      timeout: number;
 37      maxBatchSize: number;
 38      cooldownMs: number;
 39    };
 40    private lastRequestTime = 0;
 41  
 42    constructor(options: {
 43      baseUrl?: string;
 44      apiKey?: string;
 45      model?: string;
 46      timeout?: number;
 47      maxBatchSize?: number;
 48      cooldownMs?: number;
 49    } = {}) {
 50      this.config = {
 51        baseUrl: options.baseUrl || getEnv("LM_STUDIO_BASE_URL") || DEFAULT_CONFIG.baseUrl,
 52        apiKey: options.apiKey || DEFAULT_CONFIG.apiKey,
 53        model: options.model || getEnv("LM_STUDIO_EMBEDDING_MODEL") || DEFAULT_CONFIG.model,
 54        timeout: options.timeout || DEFAULT_CONFIG.timeout,
 55        maxBatchSize: options.maxBatchSize || Number(getEnv("EMBEDDING_BATCH_SIZE")) || DEFAULT_CONFIG.maxBatchSize,
 56        cooldownMs: options.cooldownMs || Number(getEnv("EMBEDDING_COOLDOWN_MS")) || DEFAULT_CONFIG.cooldownMs,
 57      };
 58  
 59      console.log(`🔧 EmbeddingService initialized:`);
 60      console.log(`   Base URL: ${this.config.baseUrl}`);
 61      console.log(`   Model: ${this.config.model}`);
 62      console.log(`   Batch size: ${this.config.maxBatchSize}`);
 63      console.log(`   Cooldown: ${this.config.cooldownMs}ms`);
 64    }
 65  
 66    /**
 67     * Get the current embedding model name
 68     */
 69    get embeddingModel(): string {
 70      return this.config.model;
 71    }
 72  
 73    /**
 74     * Test connection to LM Studio
 75     */
 76    async testConnection(): Promise<boolean> {
 77      try {
 78        console.log("🔍 Testing LM Studio connection...");
 79        
 80        // Try a simple embedding request
 81        const response = await this.embedText("test");
 82        
 83        if (response && response.length > 0) {
 84          console.log("✅ LM Studio connection successful");
 85          console.log(`   Vector dimension: ${response.length}`);
 86          return true;
 87        } else {
 88          console.error("❌ LM Studio returned empty embedding");
 89          return false;
 90        }
 91      } catch (error) {
 92        console.error("❌ LM Studio connection failed:", error);
 93        return false;
 94      }
 95    }
 96  
 97    /**
 98     * Embed a single text string
 99     */
100    async embedText(text: string): Promise<number[]> {
101      const response = await this.embedBatch([text]);
102      if (!response.embeddings || response.embeddings.length === 0) {
103        throw this.createError("No embeddings returned from API", "api_error");
104      }
105      return response.embeddings[0];
106    }
107  
108    /**
109     * Embed multiple texts in a batch
110     */
111    async embedBatch(texts: string[]): Promise<EmbeddingResponse> {
112      if (texts.length === 0) {
113        throw this.createError("No texts provided for embedding", "api_error");
114      }
115  
116      if (texts.length > this.config.maxBatchSize) {
117        throw this.createError(
118          `Batch size ${texts.length} exceeds maximum ${this.config.maxBatchSize}`,
119          "api_error"
120        );
121      }
122  
123      // Apply cooldown between requests
124      await this.applyCooldown();
125  
126      try {
127        console.log(`🔧 Embedding ${texts.length} text(s) with ${this.config.model}`);
128        console.log(`🔧 Total characters: ${texts.reduce((sum, text) => sum + text.length, 0)}`);
129  
130        const startTime = Date.now();
131  
132        // Make direct HTTP request to LM Studio
133        const response = await Promise.race([
134          this.makeEmbeddingRequest(texts),
135          this.createTimeoutPromise(),
136        ]);
137  
138        const duration = Date.now() - startTime;
139        console.log(`✅ Embedding completed in ${duration}ms`);
140        console.log(`   Vector dimension: ${response.data[0]?.embedding?.length || "unknown"}`);
141  
142        this.lastRequestTime = Date.now();
143  
144        return {
145          embeddings: response.data.map((item: any) => item.embedding),
146          model: this.config.model,
147          usage: response.usage ? {
148            total_tokens: response.usage.total_tokens,
149          } : undefined,
150        };
151  
152      } catch (error) {
153        const embeddingError = this.handleError(error);
154        console.error("❌ Embedding failed:", embeddingError.message);
155        throw embeddingError;
156      }
157    }
158  
159    /**
160     * Embed texts with automatic batching for large arrays
161     */
162    async* embedTextsInBatches(texts: string[]): AsyncGenerator<EmbeddingResponse, void, unknown> {
163      console.log(`🔧 Processing ${texts.length} texts in batches of ${this.config.maxBatchSize}`);
164  
165      for (let i = 0; i < texts.length; i += this.config.maxBatchSize) {
166        const batch = texts.slice(i, i + this.config.maxBatchSize);
167        const batchNumber = Math.floor(i / this.config.maxBatchSize) + 1;
168        const totalBatches = Math.ceil(texts.length / this.config.maxBatchSize);
169  
170        console.log(`🔄 Processing batch ${batchNumber}/${totalBatches} (${batch.length} texts)`);
171  
172        try {
173          const response = await this.embedBatch(batch);
174          yield response;
175        } catch (error) {
176          console.error(`❌ Batch ${batchNumber} failed:`, error);
177          throw error;
178        }
179      }
180    }
181  
182    /**
183     * Get vector dimension for the current model
184     */
185    async getVectorDimension(): Promise<number> {
186      try {
187        const testVector = await this.embedText("test");
188        return testVector.length;
189      } catch (error) {
190        console.warn("⚠️ Could not determine vector dimension, using default 768");
191        return 768; // Default fallback
192      }
193    }
194  
195    /**
196     * Make HTTP request to LM Studio embedding API
197     */
198    private async makeEmbeddingRequest(texts: string[]): Promise<any> {
199      const response = await fetch(`${this.config.baseUrl}/embeddings`, {
200        method: "POST",
201        headers: {
202          "Content-Type": "application/json",
203          "Authorization": `Bearer ${this.config.apiKey}`,
204        },
205        body: JSON.stringify({
206          model: this.config.model,
207          input: texts,
208        }),
209      });
210  
211      if (!response.ok) {
212        throw new Error(`HTTP ${response.status}: ${response.statusText}`);
213      }
214  
215      return await response.json();
216    }
217  
218    /**
219     * Apply cooldown between API requests
220     */
221    private async applyCooldown(): Promise<void> {
222      const timeSinceLastRequest = Date.now() - this.lastRequestTime;
223      const cooldownRemaining = this.config.cooldownMs - timeSinceLastRequest;
224  
225      if (cooldownRemaining > 0) {
226        console.log(`⏱️ Applying cooldown: ${cooldownRemaining}ms`);
227        await new Promise(resolve => setTimeout(resolve, cooldownRemaining));
228      }
229    }
230  
231    /**
232     * Create a timeout promise for request timeouts
233     */
234    private createTimeoutPromise(): Promise<never> {
235      return new Promise((_, reject) => {
236        setTimeout(() => {
237          reject(this.createError(
238            `Embedding request timed out after ${this.config.timeout}ms`,
239            "timeout_error"
240          ));
241        }, this.config.timeout);
242      });
243    }
244  
245    /**
246     * Handle and categorize errors from the embedding API
247     */
248    private handleError(error: unknown): EmbeddingError {
249      if (error instanceof Error) {
250        // Check for specific error types
251        if (error.message.includes("ECONNREFUSED") || error.message.includes("fetch")) {
252          return this.createError(
253            `Cannot connect to LM Studio at ${this.config.baseUrl}: ${error.message}`,
254            "connection_error",
255            error
256          );
257        }
258  
259        if (error.message.includes("timeout") || error.message.includes("aborted")) {
260          return this.createError(
261            `Request timed out: ${error.message}`,
262            "timeout_error",
263            error
264          );
265        }
266  
267        if (error.message.includes("model") || error.message.includes("Model")) {
268          return this.createError(
269            `Model error with ${this.config.model}: ${error.message}`,
270            "model_error",
271            error
272          );
273        }
274  
275        // Generic API error
276        return this.createError(
277          `API error: ${error.message}`,
278          "api_error",
279          error
280        );
281      }
282  
283      // Unknown error type
284      return this.createError(
285        `Unknown error: ${String(error)}`,
286        "api_error"
287      );
288    }
289  
290    /**
291     * Create a typed embedding error
292     */
293    private createError(
294      message: string,
295      type: EmbeddingError["type"],
296      originalError?: Error,
297      statusCode?: number
298    ): EmbeddingError {
299      const error = new Error(message) as EmbeddingError;
300      error.type = type;
301      error.originalError = originalError;
302      error.statusCode = statusCode;
303      return error;
304    }
305  }
306  
307  /**
308   * Create a singleton embedding service instance
309   */
310  let _embeddingService: EmbeddingService | null = null;
311  
312  export function getEmbeddingService(): EmbeddingService {
313    if (!_embeddingService) {
314      _embeddingService = new EmbeddingService();
315    }
316    return _embeddingService;
317  }
318  
319  /**
320   * Generator function for embedding text with Task monad integration
321   */
322  export function* embedTextTask(text: string): Generator<any, number[], any> {
323    const service = getEmbeddingService();
324    return yield* Task.wait(service.embedText(text));
325  }
326  
327  /**
328   * Generator function for batch embedding with Task monad integration
329   */
330  export function* embedBatchTask(texts: string[]): Generator<any, EmbeddingResponse, any> {
331    const service = getEmbeddingService();
332    return yield* Task.wait(service.embedBatch(texts));
333  }
334  
335  /**
336   * Check if embedding service is enabled
337   * @deprecated This is now controlled by JSON configuration, not environment variables
338   */
339  export function isEmbeddingEnabled(): boolean {
340    // Always return true - enabled/disabled is now controlled by vector store config
341    return true;
342  }