/ src / praisonai-ts / src / knowledge / query-engine.ts
query-engine.ts
  1  /**
  2   * Query Engine - Semantic and hybrid search for knowledge retrieval
  3   * 
  4   * Provides unified search interface for RAG applications.
  5   * 
  6   * @example
  7   * ```typescript
  8   * import { QueryEngine, Agent } from 'praisonai';
  9   * 
 10   * const engine = new QueryEngine({
 11   *   embedder: async (text) => embeddings.embed(text),
 12   *   vectorStore: vectorStore
 13   * });
 14   * 
 15   * const results = await engine.query('What is PraisonAI?', { topK: 5 });
 16   * ```
 17   */
 18  
 19  import { randomUUID } from 'crypto';
 20  
 21  /**
 22   * Query result item
 23   */
 24  export interface QueryResult {
 25      /** Document ID */
 26      id: string;
 27      /** Document content */
 28      content: string;
 29      /** Relevance score (0-1) */
 30      score: number;
 31      /** Document metadata */
 32      metadata?: Record<string, any>;
 33      /** Source information */
 34      source?: string;
 35  }
 36  
 37  /**
 38   * Query options
 39   */
 40  export interface QueryOptions {
 41      /** Number of results to return */
 42      topK?: number;
 43      /** Minimum score threshold */
 44      minScore?: number;
 45      /** Filter by metadata */
 46      filter?: Record<string, any>;
 47      /** Search mode */
 48      mode?: 'semantic' | 'keyword' | 'hybrid';
 49      /** Rerank results */
 50      rerank?: boolean;
 51      /** Include document content */
 52      includeContent?: boolean;
 53  }
 54  
 55  /**
 56   * Embedder function type
 57   */
 58  export type EmbedderFn = (text: string) => Promise<number[]>;
 59  
 60  /**
 61   * Vector store interface (minimal)
 62   */
 63  export interface VectorStoreInterface {
 64      query(vector: number[], options?: { topK?: number; filter?: any }): Promise<Array<{
 65          id: string;
 66          score: number;
 67          metadata?: Record<string, any>;
 68          content?: string;
 69      }>>;
 70      search?(query: string, options?: { topK?: number }): Promise<any[]>;
 71  }
 72  
 73  /**
 74   * Query engine configuration
 75   */
 76  export interface QueryEngineConfig {
 77      /** Embedding function */
 78      embedder?: EmbedderFn;
 79      /** Vector store for semantic search */
 80      vectorStore?: VectorStoreInterface;
 81      /** Keyword search function */
 82      keywordSearch?: (query: string, options?: QueryOptions) => Promise<QueryResult[]>;
 83      /** Reranker function */
 84      reranker?: (query: string, results: QueryResult[]) => Promise<QueryResult[]>;
 85      /** Default options */
 86      defaultOptions?: QueryOptions;
 87  }
 88  
 89  /**
 90   * QueryEngine - Unified search for knowledge retrieval
 91   */
 92  export class QueryEngine {
 93      readonly id: string;
 94      private config: QueryEngineConfig;
 95      private cache: Map<string, { results: QueryResult[]; timestamp: number }>;
 96      private cacheMaxAge: number;
 97  
 98      constructor(config: QueryEngineConfig = {}) {
 99          this.id = randomUUID();
100          this.config = config;
101          this.cache = new Map();
102          this.cacheMaxAge = 5 * 60 * 1000; // 5 minutes
103      }
104  
105      /**
106       * Semantic search using embeddings
107       */
108      async semanticSearch(query: string, options: QueryOptions = {}): Promise<QueryResult[]> {
109          if (!this.config.embedder || !this.config.vectorStore) {
110              throw new Error('Semantic search requires embedder and vectorStore');
111          }
112  
113          const topK = options.topK ?? 10;
114          const minScore = options.minScore ?? 0;
115  
116          // Generate embedding
117          const embedding = await this.config.embedder(query);
118  
119          // Search vector store
120          const raw = await this.config.vectorStore.query(embedding, {
121              topK,
122              filter: options.filter,
123          });
124  
125          // Convert to QueryResult format
126          const results: QueryResult[] = raw
127              .filter(r => r.score >= minScore)
128              .map(r => ({
129                  id: r.id,
130                  content: r.content ?? '',
131                  score: r.score,
132                  metadata: r.metadata,
133                  source: r.metadata?.source,
134              }));
135  
136          return results;
137      }
138  
139      /**
140       * Keyword search
141       */
142      async keywordSearch(query: string, options: QueryOptions = {}): Promise<QueryResult[]> {
143          if (!this.config.keywordSearch) {
144              // Fallback: Use vector store's text search if available
145              if (this.config.vectorStore?.search) {
146                  const results = await this.config.vectorStore.search(query, { topK: options.topK });
147                  return results.map((r, i) => ({
148                      id: r.id ?? `kw-${i}`,
149                      content: r.content ?? r.text ?? '',
150                      score: r.score ?? 1 - (i * 0.1),
151                      metadata: r.metadata,
152                  }));
153              }
154              throw new Error('Keyword search not configured');
155          }
156  
157          return this.config.keywordSearch(query, options);
158      }
159  
160      /**
161       * Hybrid search combining semantic and keyword
162       */
163      async hybridSearch(query: string, options: QueryOptions = {}): Promise<QueryResult[]> {
164          const topK = options.topK ?? 10;
165  
166          // Run both searches in parallel
167          const [semanticResults, keywordResults] = await Promise.all([
168              this.semanticSearch(query, { ...options, topK: topK * 2 }).catch(() => []),
169              this.keywordSearch(query, { ...options, topK: topK * 2 }).catch(() => []),
170          ]);
171  
172          // Combine and deduplicate
173          const combined = new Map<string, QueryResult>();
174  
175          // Add semantic results (with weighted score)
176          for (const r of semanticResults) {
177              combined.set(r.id, { ...r, score: r.score * 0.6 });
178          }
179  
180          // Add keyword results (combine scores if exists)
181          for (const r of keywordResults) {
182              if (combined.has(r.id)) {
183                  const existing = combined.get(r.id)!;
184                  combined.set(r.id, {
185                      ...existing,
186                      score: existing.score + (r.score * 0.4),
187                  });
188              } else {
189                  combined.set(r.id, { ...r, score: r.score * 0.4 });
190              }
191          }
192  
193          // Sort by combined score and take topK
194          return Array.from(combined.values())
195              .sort((a, b) => b.score - a.score)
196              .slice(0, topK);
197      }
198  
199      /**
200       * Main query method - routes to appropriate search type
201       */
202      async query(query: string, options: QueryOptions = {}): Promise<QueryResult[]> {
203          const mergedOptions = { ...this.config.defaultOptions, ...options };
204          const mode = mergedOptions.mode ?? 'semantic';
205  
206          // Check cache
207          const cacheKey = `${mode}:${query}:${JSON.stringify(mergedOptions)}`;
208          const cached = this.cache.get(cacheKey);
209          if (cached && Date.now() - cached.timestamp < this.cacheMaxAge) {
210              return cached.results;
211          }
212  
213          let results: QueryResult[];
214  
215          switch (mode) {
216              case 'keyword':
217                  results = await this.keywordSearch(query, mergedOptions);
218                  break;
219              case 'hybrid':
220                  results = await this.hybridSearch(query, mergedOptions);
221                  break;
222              case 'semantic':
223              default:
224                  results = await this.semanticSearch(query, mergedOptions);
225          }
226  
227          // Rerank if configured
228          if (mergedOptions.rerank && this.config.reranker) {
229              results = await this.config.reranker(query, results);
230          }
231  
232          // Cache results
233          this.cache.set(cacheKey, { results, timestamp: Date.now() });
234  
235          return results;
236      }
237  
238      /**
239       * Query and return formatted context string
240       */
241      async queryForContext(query: string, options: QueryOptions = {}): Promise<string> {
242          const results = await this.query(query, options);
243  
244          if (results.length === 0) {
245              return 'No relevant information found.';
246          }
247  
248          return results
249              .map((r, i) => `[${i + 1}] ${r.content}`)
250              .join('\n\n');
251      }
252  
253      /**
254       * Clear the query cache
255       */
256      clearCache(): void {
257          this.cache.clear();
258      }
259  
260      /**
261       * Set cache max age
262       */
263      setCacheMaxAge(ms: number): void {
264          this.cacheMaxAge = ms;
265      }
266  }
267  
268  /**
269   * Create a query engine
270   */
271  export function createQueryEngine(config?: QueryEngineConfig): QueryEngine {
272      return new QueryEngine(config);
273  }
274  
275  /**
276   * Create a simple in-memory query engine for testing
277   */
278  export function createSimpleQueryEngine(documents: Array<{ id: string; content: string; metadata?: any }>): QueryEngine {
279      // Simple BM25-like keyword matching
280      const keywordSearch = async (query: string, options?: QueryOptions) => {
281          const queryTerms = query.toLowerCase().split(/\s+/);
282          const topK = options?.topK ?? 10;
283  
284          const scored = documents.map(doc => {
285              const content = doc.content.toLowerCase();
286              let score = 0;
287              for (const term of queryTerms) {
288                  if (content.includes(term)) {
289                      score += 1 / queryTerms.length;
290                  }
291              }
292              return { ...doc, score };
293          });
294  
295          return scored
296              .filter(d => d.score > 0)
297              .sort((a, b) => b.score - a.score)
298              .slice(0, topK)
299              .map(d => ({
300                  id: d.id,
301                  content: d.content,
302                  score: d.score,
303                  metadata: d.metadata,
304              }));
305      };
306  
307      return new QueryEngine({
308          keywordSearch,
309          defaultOptions: { mode: 'keyword' },
310      });
311  }
312  
313  // Default export
314  export default QueryEngine;