query-engine.ts
1 /** 2 * Query Engine - Semantic and hybrid search for knowledge retrieval 3 * 4 * Provides unified search interface for RAG applications. 5 * 6 * @example 7 * ```typescript 8 * import { QueryEngine, Agent } from 'praisonai'; 9 * 10 * const engine = new QueryEngine({ 11 * embedder: async (text) => embeddings.embed(text), 12 * vectorStore: vectorStore 13 * }); 14 * 15 * const results = await engine.query('What is PraisonAI?', { topK: 5 }); 16 * ``` 17 */ 18 19 import { randomUUID } from 'crypto'; 20 21 /** 22 * Query result item 23 */ 24 export interface QueryResult { 25 /** Document ID */ 26 id: string; 27 /** Document content */ 28 content: string; 29 /** Relevance score (0-1) */ 30 score: number; 31 /** Document metadata */ 32 metadata?: Record<string, any>; 33 /** Source information */ 34 source?: string; 35 } 36 37 /** 38 * Query options 39 */ 40 export interface QueryOptions { 41 /** Number of results to return */ 42 topK?: number; 43 /** Minimum score threshold */ 44 minScore?: number; 45 /** Filter by metadata */ 46 filter?: Record<string, any>; 47 /** Search mode */ 48 mode?: 'semantic' | 'keyword' | 'hybrid'; 49 /** Rerank results */ 50 rerank?: boolean; 51 /** Include document content */ 52 includeContent?: boolean; 53 } 54 55 /** 56 * Embedder function type 57 */ 58 export type EmbedderFn = (text: string) => Promise<number[]>; 59 60 /** 61 * Vector store interface (minimal) 62 */ 63 export interface VectorStoreInterface { 64 query(vector: number[], options?: { topK?: number; filter?: any }): Promise<Array<{ 65 id: string; 66 score: number; 67 metadata?: Record<string, any>; 68 content?: string; 69 }>>; 70 search?(query: string, options?: { topK?: number }): Promise<any[]>; 71 } 72 73 /** 74 * Query engine configuration 75 */ 76 export interface QueryEngineConfig { 77 /** Embedding function */ 78 embedder?: EmbedderFn; 79 /** Vector store for semantic search */ 80 vectorStore?: VectorStoreInterface; 81 /** Keyword search function */ 82 keywordSearch?: (query: string, options?: QueryOptions) => Promise<QueryResult[]>; 83 /** Reranker function */ 84 reranker?: (query: string, results: QueryResult[]) => Promise<QueryResult[]>; 85 /** Default options */ 86 defaultOptions?: QueryOptions; 87 } 88 89 /** 90 * QueryEngine - Unified search for knowledge retrieval 91 */ 92 export class QueryEngine { 93 readonly id: string; 94 private config: QueryEngineConfig; 95 private cache: Map<string, { results: QueryResult[]; timestamp: number }>; 96 private cacheMaxAge: number; 97 98 constructor(config: QueryEngineConfig = {}) { 99 this.id = randomUUID(); 100 this.config = config; 101 this.cache = new Map(); 102 this.cacheMaxAge = 5 * 60 * 1000; // 5 minutes 103 } 104 105 /** 106 * Semantic search using embeddings 107 */ 108 async semanticSearch(query: string, options: QueryOptions = {}): Promise<QueryResult[]> { 109 if (!this.config.embedder || !this.config.vectorStore) { 110 throw new Error('Semantic search requires embedder and vectorStore'); 111 } 112 113 const topK = options.topK ?? 10; 114 const minScore = options.minScore ?? 0; 115 116 // Generate embedding 117 const embedding = await this.config.embedder(query); 118 119 // Search vector store 120 const raw = await this.config.vectorStore.query(embedding, { 121 topK, 122 filter: options.filter, 123 }); 124 125 // Convert to QueryResult format 126 const results: QueryResult[] = raw 127 .filter(r => r.score >= minScore) 128 .map(r => ({ 129 id: r.id, 130 content: r.content ?? '', 131 score: r.score, 132 metadata: r.metadata, 133 source: r.metadata?.source, 134 })); 135 136 return results; 137 } 138 139 /** 140 * Keyword search 141 */ 142 async keywordSearch(query: string, options: QueryOptions = {}): Promise<QueryResult[]> { 143 if (!this.config.keywordSearch) { 144 // Fallback: Use vector store's text search if available 145 if (this.config.vectorStore?.search) { 146 const results = await this.config.vectorStore.search(query, { topK: options.topK }); 147 return results.map((r, i) => ({ 148 id: r.id ?? `kw-${i}`, 149 content: r.content ?? r.text ?? '', 150 score: r.score ?? 1 - (i * 0.1), 151 metadata: r.metadata, 152 })); 153 } 154 throw new Error('Keyword search not configured'); 155 } 156 157 return this.config.keywordSearch(query, options); 158 } 159 160 /** 161 * Hybrid search combining semantic and keyword 162 */ 163 async hybridSearch(query: string, options: QueryOptions = {}): Promise<QueryResult[]> { 164 const topK = options.topK ?? 10; 165 166 // Run both searches in parallel 167 const [semanticResults, keywordResults] = await Promise.all([ 168 this.semanticSearch(query, { ...options, topK: topK * 2 }).catch(() => []), 169 this.keywordSearch(query, { ...options, topK: topK * 2 }).catch(() => []), 170 ]); 171 172 // Combine and deduplicate 173 const combined = new Map<string, QueryResult>(); 174 175 // Add semantic results (with weighted score) 176 for (const r of semanticResults) { 177 combined.set(r.id, { ...r, score: r.score * 0.6 }); 178 } 179 180 // Add keyword results (combine scores if exists) 181 for (const r of keywordResults) { 182 if (combined.has(r.id)) { 183 const existing = combined.get(r.id)!; 184 combined.set(r.id, { 185 ...existing, 186 score: existing.score + (r.score * 0.4), 187 }); 188 } else { 189 combined.set(r.id, { ...r, score: r.score * 0.4 }); 190 } 191 } 192 193 // Sort by combined score and take topK 194 return Array.from(combined.values()) 195 .sort((a, b) => b.score - a.score) 196 .slice(0, topK); 197 } 198 199 /** 200 * Main query method - routes to appropriate search type 201 */ 202 async query(query: string, options: QueryOptions = {}): Promise<QueryResult[]> { 203 const mergedOptions = { ...this.config.defaultOptions, ...options }; 204 const mode = mergedOptions.mode ?? 'semantic'; 205 206 // Check cache 207 const cacheKey = `${mode}:${query}:${JSON.stringify(mergedOptions)}`; 208 const cached = this.cache.get(cacheKey); 209 if (cached && Date.now() - cached.timestamp < this.cacheMaxAge) { 210 return cached.results; 211 } 212 213 let results: QueryResult[]; 214 215 switch (mode) { 216 case 'keyword': 217 results = await this.keywordSearch(query, mergedOptions); 218 break; 219 case 'hybrid': 220 results = await this.hybridSearch(query, mergedOptions); 221 break; 222 case 'semantic': 223 default: 224 results = await this.semanticSearch(query, mergedOptions); 225 } 226 227 // Rerank if configured 228 if (mergedOptions.rerank && this.config.reranker) { 229 results = await this.config.reranker(query, results); 230 } 231 232 // Cache results 233 this.cache.set(cacheKey, { results, timestamp: Date.now() }); 234 235 return results; 236 } 237 238 /** 239 * Query and return formatted context string 240 */ 241 async queryForContext(query: string, options: QueryOptions = {}): Promise<string> { 242 const results = await this.query(query, options); 243 244 if (results.length === 0) { 245 return 'No relevant information found.'; 246 } 247 248 return results 249 .map((r, i) => `[${i + 1}] ${r.content}`) 250 .join('\n\n'); 251 } 252 253 /** 254 * Clear the query cache 255 */ 256 clearCache(): void { 257 this.cache.clear(); 258 } 259 260 /** 261 * Set cache max age 262 */ 263 setCacheMaxAge(ms: number): void { 264 this.cacheMaxAge = ms; 265 } 266 } 267 268 /** 269 * Create a query engine 270 */ 271 export function createQueryEngine(config?: QueryEngineConfig): QueryEngine { 272 return new QueryEngine(config); 273 } 274 275 /** 276 * Create a simple in-memory query engine for testing 277 */ 278 export function createSimpleQueryEngine(documents: Array<{ id: string; content: string; metadata?: any }>): QueryEngine { 279 // Simple BM25-like keyword matching 280 const keywordSearch = async (query: string, options?: QueryOptions) => { 281 const queryTerms = query.toLowerCase().split(/\s+/); 282 const topK = options?.topK ?? 10; 283 284 const scored = documents.map(doc => { 285 const content = doc.content.toLowerCase(); 286 let score = 0; 287 for (const term of queryTerms) { 288 if (content.includes(term)) { 289 score += 1 / queryTerms.length; 290 } 291 } 292 return { ...doc, score }; 293 }); 294 295 return scored 296 .filter(d => d.score > 0) 297 .sort((a, b) => b.score - a.score) 298 .slice(0, topK) 299 .map(d => ({ 300 id: d.id, 301 content: d.content, 302 score: d.score, 303 metadata: d.metadata, 304 })); 305 }; 306 307 return new QueryEngine({ 308 keywordSearch, 309 defaultOptions: { mode: 'keyword' }, 310 }); 311 } 312 313 // Default export 314 export default QueryEngine;