memory-db.ts
1 import Database from 'better-sqlite3' 2 import path from 'path' 3 import fs from 'fs' 4 import { createHash } from 'crypto' 5 import { genId } from '@/lib/id' 6 import type { MemoryEntry, FileReference, MemoryImage, MemoryReference } from '@/types' 7 import { getEmbedding, cosineSimilarity, serializeEmbedding, deserializeEmbedding } from '@/lib/server/embeddings' 8 import { hmrSingleton } from '@/lib/shared-utils' 9 import { applyMMR } from '@/lib/server/mmr' 10 import { calculateTemporalDecayMultiplier, isDecayExempt } from '@/lib/server/memory/temporal-decay' 11 import { loadSettings } from '@/lib/server/storage' 12 import { 13 normalizeLinkedMemoryIds, 14 normalizeMemoryLookupLimits, 15 resolveLookupRequest, 16 traverseLinkedMemoryGraph, 17 type MemoryLookupLimits, 18 } from '@/lib/server/memory/memory-graph' 19 import { isWorkingMemoryCategory } from '@/lib/server/memory/memory-tiers' 20 21 import { generateAbstract } from '@/lib/server/memory/memory-abstract' 22 import { DATA_DIR, MEMORY_IMAGES_DIR, WORKSPACE_DIR } from '@/lib/server/data-dir' 23 import { safeJsonParse } from '@/lib/server/json-utils' 24 import { tryResolvePathWithinBaseDir } from '@/lib/server/path-utils' 25 import { log } from '@/lib/server/logger' 26 27 const TAG = 'memory-db' 28 29 const DB_PATH = path.join(DATA_DIR, 'memory.db') 30 const IMAGES_DIR = MEMORY_IMAGES_DIR 31 const APP_STATE_ROOT_DIR = path.dirname(DATA_DIR) 32 33 const MAX_IMAGE_INPUT_BYTES = 10 * 1024 * 1024 // 10MB 34 const IMAGE_EXT_WHITELIST = new Set(['.png', '.jpg', '.jpeg', '.gif', '.webp', '.bmp', '.tiff']) 35 export const MAX_FTS_QUERY_TERMS = 6 36 export const MAX_FTS_TERM_LENGTH = 48 37 const MAX_FTS_RESULT_ROWS = 50 38 const DEFAULT_VECTOR_SIMILARITY_THRESHOLD = 0.3 39 const MAX_MERGED_RESULTS = 80 40 41 export const MEMORY_FTS_STOP_WORDS = new Set([ 42 'a', 'an', 'and', 'are', 'as', 'at', 'be', 'by', 'for', 'from', 'how', 43 'i', 'if', 'in', 'is', 'it', 'of', 'on', 'or', 'that', 'the', 'this', 44 'to', 'was', 'we', 'were', 'what', 'when', 'where', 'which', 'who', 'with', 45 'you', 'your', 46 ]) 47 48 export type MemoryScopeMode = 'auto' | 'all' | 'global' | 'agent' | 'session' | 'project' 49 export type MemoryRerankMode = 'balanced' | 'semantic' | 'lexical' 50 51 export interface MemoryScopeFilter { 52 mode: MemoryScopeMode 53 agentId?: string | null 54 sessionId?: string | null 55 projectRoot?: string | null 56 } 57 58 export interface MemorySearchOptions { 59 scope?: MemoryScopeFilter 60 rerankMode?: MemoryRerankMode 61 vectorSimilarityThreshold?: number 62 } 63 64 function normalizeScopeIdentifier(value: unknown): string | null { 65 if (typeof value !== 'string') return null 66 const trimmed = value.trim() 67 return trimmed ? trimmed : null 68 } 69 70 function normalizePathForScope(value: unknown): string | null { 71 if (typeof value !== 'string') return null 72 const trimmed = value.trim() 73 if (!trimmed) return null 74 return path.normalize(trimmed).replace(/\\/g, '/').replace(/\/+$/, '').toLowerCase() 75 } 76 77 function memorySearchText(entry: MemoryEntry): string { 78 const refs = Array.isArray(entry.references) 79 ? entry.references.map((ref) => `${ref.type} ${ref.path || ''} ${ref.title || ''} ${ref.note || ''}`).join(' ') 80 : '' 81 return `${entry.title || ''} ${entry.content || ''} ${refs}`.toLowerCase() 82 } 83 84 function tokenizeForRerank(input: string): string[] { 85 const raw = String(input || '').toLowerCase().match(/[a-z0-9][a-z0-9._:/-]*/g) || [] 86 const out: string[] = [] 87 const seen = new Set<string>() 88 for (const token of raw) { 89 if (token.length < 2) continue 90 if (MEMORY_FTS_STOP_WORDS.has(token)) continue 91 if (seen.has(token)) continue 92 seen.add(token) 93 out.push(token) 94 } 95 return out 96 } 97 98 function keywordOverlapScore(queryTokens: string[], entryText: string): number { 99 if (!queryTokens.length) return 0 100 const corpus = entryText.toLowerCase() 101 let matched = 0 102 for (const token of queryTokens) { 103 if (token.length < 3) continue 104 if (corpus.includes(token)) matched++ 105 } 106 return matched / queryTokens.length 107 } 108 109 function entryRootsForScope(entry: MemoryEntry): string[] { 110 const roots = new Set<string>() 111 const add = (raw: unknown) => { 112 const normalized = normalizePathForScope(raw) 113 if (normalized) roots.add(normalized) 114 } 115 116 add((entry.metadata as Record<string, unknown> | undefined)?.projectRoot) 117 118 if (Array.isArray(entry.references)) { 119 for (const ref of entry.references) { 120 add(ref.projectRoot) 121 if (ref.type === 'project') add(ref.path) 122 if (ref.type === 'folder' || ref.type === 'file') add(ref.path) 123 } 124 } 125 126 if (Array.isArray(entry.filePaths)) { 127 for (const ref of entry.filePaths) { 128 add(ref.projectRoot) 129 add(ref.path) 130 } 131 } 132 133 return [...roots] 134 } 135 136 function scopeAllowsAgentAccess(entry: MemoryEntry, agentId: string): boolean { 137 if (entry.agentId === agentId) return true 138 if (Array.isArray(entry.sharedWith) && entry.sharedWith.includes(agentId)) return true 139 return false 140 } 141 142 function metadataNumber(entry: MemoryEntry, key: string): number | null { 143 const value = entry.metadata && typeof entry.metadata === 'object' 144 ? entry.metadata[key] 145 : null 146 return typeof value === 'number' && Number.isFinite(value) ? value : null 147 } 148 149 function knowledgeChunkHashScope(category: string, metadata: unknown): string { 150 if (category !== 'knowledge' || !metadata || typeof metadata !== 'object') return category 151 const sourceIdValue = (metadata as Record<string, unknown>).sourceId 152 const sourceId = typeof sourceIdValue === 'string' 153 ? sourceIdValue.trim() 154 : '' 155 if (!sourceId) return category 156 const chunkIndexValue = (metadata as Record<string, unknown>).chunkIndex 157 const chunkIndex = typeof chunkIndexValue === 'number' 158 ? chunkIndexValue 159 : null 160 return `${category}:${sourceId}:${chunkIndex ?? 'legacy'}` 161 } 162 163 function followUpSalienceMultiplier(entry: MemoryEntry, nowTs: number): number { 164 if (entry.category !== 'reflection/open_loop') return 1 165 const resolvedAt = metadataNumber(entry, 'resolvedAt') 166 if (resolvedAt != null) return 0.7 167 const followUpAt = metadataNumber(entry, 'followUpAt') 168 if (followUpAt == null) return 1.1 169 return followUpAt <= nowTs ? 1.6 : 0.95 170 } 171 172 export function normalizeMemoryScopeMode(raw: unknown): MemoryScopeMode { 173 const value = typeof raw === 'string' ? raw.trim().toLowerCase() : '' 174 if (value === 'shared') return 'global' 175 if (value === 'all' || value === 'global' || value === 'agent' || value === 'session' || value === 'project') return value 176 return 'auto' 177 } 178 179 export function filterMemoriesByScope(entries: MemoryEntry[], scope?: MemoryScopeFilter): MemoryEntry[] { 180 if (!scope || scope.mode === 'all') return entries 181 const mode = normalizeMemoryScopeMode(scope.mode) 182 const agentId = normalizeScopeIdentifier(scope.agentId) 183 const sessionId = normalizeScopeIdentifier(scope.sessionId) 184 const projectRoot = normalizePathForScope(scope.projectRoot) 185 186 if (mode === 'global') { 187 return entries.filter((entry) => !entry.agentId) 188 } 189 190 if (mode === 'agent') { 191 if (!agentId) return [] 192 return entries.filter((entry) => scopeAllowsAgentAccess(entry, agentId)) 193 } 194 195 if (mode === 'session') { 196 if (!sessionId) return [] 197 return entries.filter((entry) => entry.sessionId === sessionId) 198 } 199 200 if (mode === 'project') { 201 if (!projectRoot) return [] 202 return entries.filter((entry) => { 203 const roots = entryRootsForScope(entry) 204 return roots.some((root) => root === projectRoot || root.startsWith(`${projectRoot}/`)) 205 }) 206 } 207 208 // auto 209 if (!agentId) return entries 210 return entries.filter((entry) => !entry.agentId || scopeAllowsAgentAccess(entry, agentId)) 211 } 212 213 function computeContentHash(category: string, content: string): string { 214 const normalized = `${category}|${content.toLowerCase().trim()}` 215 return createHash('sha256').update(normalized).digest('hex').slice(0, 16) 216 } 217 218 function shouldSkipSearchQuery(input: string): boolean { 219 const text = String(input || '').toLowerCase().trim() 220 if (!text) return true 221 if (text.length > 1200) return true 222 if (text.includes('swarm_heartbeat_check')) return true 223 if (text.includes('opencode_test_ok')) return true 224 if (text.includes('reply exactly') && text.includes('heartbeat')) return true 225 return false 226 } 227 228 // Simple cache for query embeddings to avoid blocking 229 const EMBEDDING_CACHE_MAX = 100 230 const EMBEDDING_CACHE_EVICT_TO = 80 231 const embeddingCache = hmrSingleton('__swarmclaw_memory_embedding_cache__', () => new Map<string, number[]>()) 232 233 function evictEmbeddingCache(): void { 234 if (embeddingCache.size <= EMBEDDING_CACHE_MAX) return 235 const excess = embeddingCache.size - EMBEDDING_CACHE_EVICT_TO 236 const iter = embeddingCache.keys() 237 for (let i = 0; i < excess; i++) { 238 const k = iter.next().value 239 if (k !== undefined) embeddingCache.delete(k) 240 } 241 } 242 243 function getEmbeddingSync(query: string): number[] | null { 244 const cached = embeddingCache.get(query) 245 if (cached) return cached 246 // Evict before async call to bound growth regardless of resolution 247 evictEmbeddingCache() 248 // Kick off async computation for next time 249 getEmbedding(query).then((emb) => { 250 if (emb) embeddingCache.set(query, emb) 251 }).catch(() => { /* ok */ }) 252 return null 253 } 254 255 function parseImageDimensionsFromSharp(metadata: { width?: number; height?: number }): { width?: number; height?: number } { 256 const width = typeof metadata.width === 'number' ? metadata.width : undefined 257 const height = typeof metadata.height === 'number' ? metadata.height : undefined 258 return { width, height } 259 } 260 261 function normalizeImageExt(sourcePath: string): string { 262 const ext = path.extname(sourcePath).toLowerCase() 263 return IMAGE_EXT_WHITELIST.has(ext) ? ext : '.jpg' 264 } 265 266 /** Compress an image file and store it in the memory-images directory. Returns structured image metadata. */ 267 export async function storeMemoryImageAsset(sourcePath: string, memoryId: string): Promise<MemoryImage> { 268 if (!fs.existsSync(sourcePath)) { 269 throw new Error(`Image file not found: ${sourcePath}`) 270 } 271 const sourceStat = fs.statSync(sourcePath) 272 if (sourceStat.size > MAX_IMAGE_INPUT_BYTES) { 273 throw new Error(`Image exceeds max size (${MAX_IMAGE_INPUT_BYTES} bytes): ${sourcePath}`) 274 } 275 276 // Ensure images directory exists 277 fs.mkdirSync(IMAGES_DIR, { recursive: true }) 278 279 const ext = normalizeImageExt(sourcePath) 280 const destFilename = `${memoryId}${ext}` 281 const destPath = path.join(IMAGES_DIR, destFilename) 282 const jpgPath = destPath.replace(/\.[^.]+$/, '.jpg') 283 284 try { 285 // Try to use sharp for compression 286 const sharp = (await import('sharp')).default 287 const transformed = sharp(sourcePath) 288 .resize(1024, 1024, { fit: 'inside', withoutEnlargement: true }) 289 .jpeg({ quality: 75 }) 290 const info = await transformed.toFile(jpgPath) 291 const relPath = `data/memory-images/${path.basename(jpgPath)}` 292 return { 293 path: relPath, 294 mimeType: 'image/jpeg', 295 ...parseImageDimensionsFromSharp(info), 296 sizeBytes: info.size, 297 } 298 } catch { 299 // Fallback: copy file as-is if sharp is not available 300 fs.copyFileSync(sourcePath, destPath) 301 const stat = fs.statSync(destPath) 302 const mimeType = ext === '.png' 303 ? 'image/png' 304 : ext === '.gif' 305 ? 'image/gif' 306 : ext === '.webp' 307 ? 'image/webp' 308 : 'image/jpeg' 309 return { 310 path: `data/memory-images/${destFilename}`, 311 mimeType, 312 sizeBytes: stat.size, 313 } 314 } 315 } 316 317 export async function storeMemoryImageFromDataUrl(dataUrl: string, memoryId: string): Promise<MemoryImage> { 318 const match = dataUrl.match(/^data:(image\/[a-zA-Z0-9.+-]+);base64,(.+)$/) 319 if (!match) throw new Error('Invalid image data URL format') 320 const [, mimeType, base64] = match 321 const buf = Buffer.from(base64, 'base64') 322 if (buf.length > MAX_IMAGE_INPUT_BYTES) { 323 throw new Error(`Image exceeds max size (${MAX_IMAGE_INPUT_BYTES} bytes)`) 324 } 325 326 fs.mkdirSync(IMAGES_DIR, { recursive: true }) 327 const ext = mimeType.includes('png') 328 ? '.png' 329 : mimeType.includes('gif') 330 ? '.gif' 331 : mimeType.includes('webp') 332 ? '.webp' 333 : '.jpg' 334 const tmpPath = path.join(IMAGES_DIR, `${memoryId}-upload${ext}`) 335 fs.writeFileSync(tmpPath, buf) 336 try { 337 return await storeMemoryImageAsset(tmpPath, memoryId) 338 } finally { 339 try { fs.unlinkSync(tmpPath) } catch { /* ignore */ } 340 } 341 } 342 343 /** Backward-compatible helper returning only the stored relative path. */ 344 export async function storeMemoryImage(sourcePath: string, memoryId: string): Promise<string> { 345 const image = await storeMemoryImageAsset(sourcePath, memoryId) 346 return image.path 347 } 348 349 let _db: ReturnType<typeof initDb> | null = null 350 351 export function getMemoryLookupLimits( 352 settingsOverride?: import('@/types').AppSettings | Record<string, unknown>, 353 ): MemoryLookupLimits { 354 const settings = settingsOverride || loadSettings() 355 return normalizeMemoryLookupLimits(settings) 356 } 357 358 function normalizeReferencePath(raw: unknown): string | undefined { 359 if (typeof raw !== 'string') return undefined 360 const value = raw.trim() 361 return value ? value : undefined 362 } 363 364 function canonicalText(value: unknown): string { 365 return String(value || '') 366 .toLowerCase() 367 .replace(/\s+/g, ' ') 368 .replace(/[^\w\s:/.-]/g, '') 369 .trim() 370 } 371 372 export function buildFtsQuery(input: string): string { 373 const tokens = String(input || '') 374 .toLowerCase() 375 .match(/[a-z0-9][a-z0-9._:/-]*/g) || [] 376 if (!tokens.length) return '' 377 378 const unique: string[] = [] 379 const seen = new Set<string>() 380 for (const token of tokens) { 381 const term = token.slice(0, MAX_FTS_TERM_LENGTH) 382 if (term.length < 3) continue 383 if (MEMORY_FTS_STOP_WORDS.has(term)) continue 384 if (seen.has(term)) continue 385 seen.add(term) 386 unique.push(term) 387 if (unique.length >= MAX_FTS_QUERY_TERMS) break 388 } 389 390 if (unique.length === 1) { 391 return `"${unique[0].replace(/"/g, '')}"` 392 } 393 394 const selected = unique.slice(0, Math.min(4, MAX_FTS_QUERY_TERMS)) 395 return selected.map((term) => `"${term.replace(/"/g, '')}"`).join(' AND ') 396 } 397 398 function resolveExists(pathValue: string | undefined): boolean | undefined { 399 if (!pathValue) return undefined 400 const candidates = path.isAbsolute(pathValue) 401 ? [pathValue] 402 : [ 403 tryResolvePathWithinBaseDir(APP_STATE_ROOT_DIR, pathValue), 404 tryResolvePathWithinBaseDir(WORKSPACE_DIR, pathValue), 405 ].filter((candidate): candidate is string => !!candidate) 406 if (candidates.length === 0) return undefined 407 try { 408 return candidates.some((candidate) => fs.existsSync(candidate)) 409 } catch { 410 return undefined 411 } 412 } 413 414 function normalizeReferences( 415 rawRefs: unknown, 416 legacyFilePaths: unknown, 417 ): MemoryReference[] | undefined { 418 const output: MemoryReference[] = [] 419 const seen = new Set<string>() 420 421 const pushRef = (ref: MemoryReference) => { 422 const key = `${ref.type}|${ref.path || ''}|${ref.projectRoot || ''}|${ref.title || ''}` 423 if (seen.has(key)) return 424 seen.add(key) 425 output.push(ref) 426 } 427 428 if (Array.isArray(rawRefs)) { 429 for (const raw of rawRefs) { 430 if (!raw || typeof raw !== 'object') continue 431 const obj = raw as Record<string, unknown> 432 const type = typeof obj.type === 'string' ? obj.type : 'file' 433 if (!['project', 'folder', 'file', 'task', 'session', 'url'].includes(type)) continue 434 const pathValue = normalizeReferencePath(obj.path) 435 const projectRoot = normalizeReferencePath(obj.projectRoot) 436 const title = typeof obj.title === 'string' ? obj.title.trim() : undefined 437 const note = typeof obj.note === 'string' ? obj.note.trim() : undefined 438 const projectName = typeof obj.projectName === 'string' ? obj.projectName.trim() : undefined 439 const ts = typeof obj.timestamp === 'number' && Number.isFinite(obj.timestamp) 440 ? Math.trunc(obj.timestamp) 441 : Date.now() 442 const exists = resolveExists(pathValue) ?? (typeof obj.exists === 'boolean' ? obj.exists : undefined) 443 pushRef({ 444 type: type as MemoryReference['type'], 445 path: pathValue, 446 projectRoot, 447 projectName, 448 title, 449 note, 450 exists, 451 timestamp: ts, 452 }) 453 } 454 } 455 456 const legacy = Array.isArray(legacyFilePaths) ? legacyFilePaths as FileReference[] : [] 457 for (const raw of legacy) { 458 if (!raw || typeof raw !== 'object') continue 459 const pathValue = normalizeReferencePath((raw as FileReference).path) 460 if (!pathValue) continue 461 const kind = (raw as FileReference).kind || 'file' 462 const type: MemoryReference['type'] = kind === 'project' ? 'project' : (kind === 'folder' ? 'folder' : 'file') 463 const timestamp = typeof raw.timestamp === 'number' && Number.isFinite(raw.timestamp) 464 ? Math.trunc(raw.timestamp) 465 : Date.now() 466 pushRef({ 467 type, 468 path: pathValue, 469 projectRoot: raw.projectRoot, 470 projectName: raw.projectName, 471 note: raw.contextSnippet, 472 exists: typeof raw.exists === 'boolean' ? raw.exists : resolveExists(pathValue), 473 timestamp, 474 }) 475 } 476 477 return output.length ? output : undefined 478 } 479 480 function referencesToLegacyFilePaths(references?: MemoryReference[]): FileReference[] | undefined { 481 if (!references?.length) return undefined 482 const fileRefs: FileReference[] = references 483 .filter((ref) => ref.type === 'file' || ref.type === 'folder' || ref.type === 'project') 484 .map((ref) => ({ 485 path: ref.path || '', 486 contextSnippet: ref.note, 487 kind: ref.type === 'project' 488 ? 'project' as const 489 : ref.type === 'folder' 490 ? 'folder' as const 491 : 'file' as const, 492 projectRoot: ref.projectRoot, 493 projectName: ref.projectName, 494 exists: ref.exists, 495 timestamp: ref.timestamp || Date.now(), 496 })) 497 .filter((ref) => !!ref.path) 498 return fileRefs.length ? fileRefs : undefined 499 } 500 501 function normalizeImage(rawImage: unknown, legacyImagePath?: string | null): MemoryImage | null | undefined { 502 if (rawImage && typeof rawImage === 'object') { 503 const obj = rawImage as Record<string, unknown> 504 const pathValue = normalizeReferencePath(obj.path) 505 if (pathValue) { 506 return { 507 path: pathValue, 508 mimeType: typeof obj.mimeType === 'string' ? obj.mimeType : undefined, 509 width: typeof obj.width === 'number' ? obj.width : undefined, 510 height: typeof obj.height === 'number' ? obj.height : undefined, 511 sizeBytes: typeof obj.sizeBytes === 'number' ? obj.sizeBytes : undefined, 512 } 513 } 514 } 515 const legacy = normalizeReferencePath(legacyImagePath || undefined) 516 if (legacy) return { path: legacy } 517 return undefined 518 } 519 520 function initDb() { 521 const db = new Database(DB_PATH) 522 db.pragma('journal_mode = WAL') 523 db.pragma('busy_timeout = 5000') 524 525 db.exec(` 526 CREATE TABLE IF NOT EXISTS memories ( 527 id TEXT PRIMARY KEY, 528 agentId TEXT, 529 sessionId TEXT, 530 category TEXT NOT NULL DEFAULT 'note', 531 title TEXT NOT NULL, 532 content TEXT NOT NULL DEFAULT '', 533 metadata TEXT, 534 createdAt INTEGER NOT NULL, 535 updatedAt INTEGER NOT NULL 536 ) 537 `) 538 539 // Safe column migrations for older databases 540 for (const col of [ 541 'agentId TEXT', 542 'sessionId TEXT', 543 'embedding BLOB', 544 'filePaths TEXT', 545 'imagePath TEXT', 546 'linkedMemoryIds TEXT', 547 '"references" TEXT', 548 'image TEXT', 549 'pinned INTEGER DEFAULT 0', 550 'sharedWith TEXT', 551 'accessCount INTEGER DEFAULT 0', 552 'lastAccessedAt INTEGER DEFAULT 0', 553 'contentHash TEXT', 554 'reinforcementCount INTEGER DEFAULT 0', 555 'abstract TEXT', 556 ]) { 557 try { db.exec(`ALTER TABLE memories ADD COLUMN ${col}`) } catch { /* already exists */ } 558 } 559 560 // Partial index for fast pinned-memory lookups 561 db.exec(`CREATE INDEX IF NOT EXISTS idx_memories_pinned ON memories(agentId, updatedAt DESC) WHERE pinned = 1`) 562 563 // Index for content hash dedup lookups 564 db.exec(`CREATE INDEX IF NOT EXISTS idx_memories_content_hash ON memories(contentHash) WHERE contentHash IS NOT NULL`) 565 566 // FTS5 virtual table for full-text search 567 db.exec(` 568 CREATE VIRTUAL TABLE IF NOT EXISTS memories_fts USING fts5( 569 title, content, category, 570 content='memories', 571 content_rowid='rowid' 572 ) 573 `) 574 575 // Triggers to keep FTS in sync 576 db.exec(` 577 CREATE TRIGGER IF NOT EXISTS memories_ai AFTER INSERT ON memories BEGIN 578 INSERT INTO memories_fts(rowid, title, content, category) 579 VALUES (new.rowid, new.title, new.content, new.category); 580 END 581 `) 582 583 // Critical list-path indexes for large memory datasets. 584 // Without these, ORDER BY updatedAt DESC LIMIT N performs a full table scan + temp sort. 585 db.exec(` 586 CREATE INDEX IF NOT EXISTS idx_memories_updated_at ON memories(updatedAt DESC) 587 `) 588 db.exec(` 589 CREATE INDEX IF NOT EXISTS idx_memories_agent_updated_at ON memories(agentId, updatedAt DESC) 590 `) 591 db.exec(` 592 CREATE INDEX IF NOT EXISTS idx_memories_session_category_updated_at ON memories(sessionId, category, updatedAt DESC) 593 `) 594 db.exec(` 595 CREATE TRIGGER IF NOT EXISTS memories_ad AFTER DELETE ON memories BEGIN 596 INSERT INTO memories_fts(memories_fts, rowid, title, content, category) 597 VALUES ('delete', old.rowid, old.title, old.content, old.category); 598 END 599 `) 600 db.exec(` 601 CREATE TRIGGER IF NOT EXISTS memories_au AFTER UPDATE ON memories BEGIN 602 INSERT INTO memories_fts(memories_fts, rowid, title, content, category) 603 VALUES ('delete', old.rowid, old.title, old.content, old.category); 604 INSERT INTO memories_fts(rowid, title, content, category) 605 VALUES (new.rowid, new.title, new.content, new.category); 606 END 607 `) 608 609 const rowsForMigration = db.prepare(` 610 SELECT id, filePaths, imagePath, linkedMemoryIds, "references" as refs, image 611 FROM memories 612 `).all() as Array<{ 613 id: string 614 filePaths: string | null 615 imagePath: string | null 616 linkedMemoryIds: string | null 617 refs: string | null 618 image: string | null 619 }> 620 621 const migrationStmt = db.prepare(` 622 UPDATE memories 623 SET "references" = ?, image = ?, linkedMemoryIds = ? 624 WHERE id = ? 625 `) 626 627 const migrateLegacyRows = db.transaction(() => { 628 let migrated = 0 629 for (const row of rowsForMigration) { 630 const legacyFilePaths = safeJsonParse<FileReference[]>(row.filePaths, []) 631 const refs = normalizeReferences(safeJsonParse<MemoryReference[]>(row.refs, []), legacyFilePaths) 632 const image = normalizeImage(safeJsonParse<MemoryImage | null>(row.image, null), row.imagePath) 633 const linkedIds = normalizeLinkedMemoryIds(safeJsonParse<string[]>(row.linkedMemoryIds, []), row.id) 634 635 const nextRefs = refs?.length ? JSON.stringify(refs) : null 636 const nextImage = image ? JSON.stringify(image) : null 637 const nextLinks = linkedIds.length ? JSON.stringify(linkedIds) : null 638 639 if (nextRefs === row.refs && nextImage === row.image && nextLinks === row.linkedMemoryIds) continue 640 migrationStmt.run(nextRefs, nextImage, nextLinks, row.id) 641 migrated++ 642 } 643 if (migrated > 0) { 644 log.info(TAG, `Migrated ${migrated} legacy memory row(s) to graph schema`) 645 } 646 }) 647 migrateLegacyRows() 648 649 // Backfill contentHash for existing rows that don't have one yet 650 const unhashed = (db.prepare(`SELECT COUNT(*) as cnt FROM memories WHERE contentHash IS NULL`).get() as { cnt: number }).cnt 651 if (unhashed > 0) { 652 const backfillRows = db.prepare(`SELECT id, category, content FROM memories WHERE contentHash IS NULL`).all() as Array<{ id: string; category: string; content: string }> 653 const backfillStmt = db.prepare(`UPDATE memories SET contentHash = ? WHERE id = ?`) 654 const BATCH = 500 655 for (let i = 0; i < backfillRows.length; i += BATCH) { 656 const batch = backfillRows.slice(i, i + BATCH) 657 const tx = db.transaction(() => { 658 for (const r of batch) { 659 backfillStmt.run(computeContentHash(r.category, r.content), r.id) 660 } 661 }) 662 tx() 663 } 664 log.info(TAG, `Backfilled contentHash for ${backfillRows.length} memory row(s)`) 665 } 666 667 // Fresh installs now start with an empty memory graph. 668 // Durable memories are created only from actual user/agent interactions. 669 670 const stmts = { 671 insert: db.prepare(` 672 INSERT INTO memories ( 673 id, agentId, sessionId, category, title, content, metadata, embedding, 674 "references", filePaths, image, imagePath, linkedMemoryIds, pinned, sharedWith, contentHash, abstract, createdAt, updatedAt 675 ) 676 VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) 677 `), 678 update: db.prepare(` 679 UPDATE memories 680 SET agentId=?, sessionId=?, category=?, title=?, content=?, metadata=?, embedding=?, 681 "references"=?, filePaths=?, image=?, imagePath=?, linkedMemoryIds=?, pinned=?, sharedWith=?, contentHash=?, updatedAt=? 682 WHERE id=? 683 `), 684 delete: db.prepare(`DELETE FROM memories WHERE id=?`), 685 getById: db.prepare(`SELECT * FROM memories WHERE id=?`), 686 getByIds: (ids: string[]) => { 687 if (!ids.length) return [] 688 const placeholders = ids.map(() => '?').join(',') 689 return db.prepare(`SELECT * FROM memories WHERE id IN (${placeholders})`).all(...ids) as Record<string, unknown>[] 690 }, 691 listAll: db.prepare(`SELECT * FROM memories ORDER BY updatedAt DESC LIMIT ?`), 692 listByAgent: db.prepare(`SELECT * FROM memories WHERE agentId=? ORDER BY updatedAt DESC LIMIT ?`), 693 listByAgentOrShared: db.prepare(`SELECT * FROM memories WHERE agentId=? OR sharedWith LIKE ? ORDER BY updatedAt DESC LIMIT ?`), 694 listByCategoryAll: db.prepare(`SELECT * FROM memories WHERE category=? ORDER BY updatedAt DESC LIMIT ?`), 695 listByCategoryAgentOrShared: db.prepare(`SELECT * FROM memories WHERE category=? AND (agentId=? OR sharedWith LIKE ?) ORDER BY updatedAt DESC LIMIT ?`), 696 listKnowledgeSourceChunks: db.prepare(` 697 SELECT * FROM memories 698 WHERE category='knowledge' AND json_extract(metadata, '$.sourceId') = ? 699 ORDER BY COALESCE(json_extract(metadata, '$.chunkIndex'), 0) ASC, createdAt ASC 700 `), 701 listPinnedByAgent: db.prepare(`SELECT * FROM memories WHERE pinned = 1 AND agentId = ? ORDER BY updatedAt DESC LIMIT ?`), 702 listPinnedAll: db.prepare(`SELECT * FROM memories WHERE pinned = 1 ORDER BY updatedAt DESC LIMIT ?`), 703 search: db.prepare(` 704 SELECT m.* FROM memories m 705 INNER JOIN memories_fts f ON m.rowid = f.rowid 706 WHERE memories_fts MATCH ? 707 LIMIT ${MAX_FTS_RESULT_ROWS} 708 `), 709 searchByAgent: db.prepare(` 710 SELECT m.* FROM memories m 711 INNER JOIN memories_fts f ON m.rowid = f.rowid 712 WHERE memories_fts MATCH ? AND m.agentId = ? 713 LIMIT ${MAX_FTS_RESULT_ROWS} 714 `), 715 searchByAgentOrShared: db.prepare(` 716 SELECT m.* FROM memories m 717 INNER JOIN memories_fts f ON m.rowid = f.rowid 718 WHERE memories_fts MATCH ? AND (m.agentId = ? OR m.sharedWith LIKE ?) 719 LIMIT ${MAX_FTS_RESULT_ROWS} 720 `), 721 // Remove a linked ID from all memories that reference it (cleanup on delete) 722 findMemoriesLinkingTo: db.prepare(`SELECT * FROM memories WHERE linkedMemoryIds LIKE ?`), 723 updateLinks: db.prepare(`UPDATE memories SET linkedMemoryIds = ?, updatedAt = ? WHERE id = ?`), 724 latestBySessionCategory: db.prepare(` 725 SELECT * FROM memories 726 WHERE sessionId = ? AND category = ? 727 ORDER BY updatedAt DESC 728 LIMIT 1 729 `), 730 allRowsByUpdated: db.prepare(`SELECT * FROM memories ORDER BY updatedAt DESC`), 731 countsByAgent: db.prepare(`SELECT COALESCE(agentId, '_global') AS agentKey, COUNT(*) AS cnt FROM memories GROUP BY agentKey`), 732 exactDuplicateBySessionCategory: db.prepare(` 733 SELECT * FROM memories 734 WHERE sessionId = ? AND category = ? AND title = ? AND content = ? 735 ORDER BY updatedAt DESC 736 LIMIT 1 737 `), 738 findByContentHash: db.prepare(` 739 SELECT * FROM memories 740 WHERE contentHash = ? AND agentId = ? 741 ORDER BY updatedAt DESC 742 LIMIT 1 743 `), 744 findByContentHashShared: db.prepare(` 745 SELECT * FROM memories 746 WHERE contentHash = ? AND agentId IS NULL 747 ORDER BY updatedAt DESC 748 LIMIT 1 749 `), 750 reinforceMemory: db.prepare(` 751 UPDATE memories SET reinforcementCount = reinforcementCount + 1, updatedAt = ? WHERE id = ? 752 `), 753 bumpAccessCount: db.prepare(` 754 UPDATE memories SET accessCount = accessCount + 1, lastAccessedAt = ? WHERE id = ? 755 `), 756 frequentlyAccessedByAgent: db.prepare(` 757 SELECT * FROM memories WHERE agentId = ? AND accessCount >= ? AND lastAccessedAt >= ? ORDER BY accessCount DESC LIMIT 100 758 `), 759 } 760 761 function rowToEntry(row: Record<string, unknown>): MemoryEntry { 762 const legacyFilePaths = safeJsonParse<FileReference[]>(row.filePaths, []) 763 const references = normalizeReferences(safeJsonParse<MemoryReference[]>(row.references, []), legacyFilePaths) 764 const image = normalizeImage(safeJsonParse<MemoryImage | null>(row.image, null), typeof row.imagePath === 'string' ? row.imagePath : null) 765 const filePaths = referencesToLegacyFilePaths(references) 766 const linkedMemoryIds = normalizeLinkedMemoryIds(safeJsonParse<string[]>(row.linkedMemoryIds, []), typeof row.id === 'string' ? row.id : undefined) 767 768 return { 769 id: String(row.id || ''), 770 agentId: typeof row.agentId === 'string' ? row.agentId : null, 771 sessionId: typeof row.sessionId === 'string' ? row.sessionId : null, 772 category: typeof row.category === 'string' ? row.category : 'note', 773 title: typeof row.title === 'string' ? row.title : 'Untitled', 774 content: typeof row.content === 'string' ? row.content : '', 775 metadata: safeJsonParse<Record<string, unknown> | undefined>(row.metadata, undefined), 776 references, 777 filePaths, 778 image, 779 imagePath: image?.path || undefined, 780 linkedMemoryIds: linkedMemoryIds.length ? linkedMemoryIds : undefined, 781 pinned: row.pinned === 1, 782 sharedWith: safeJsonParse<string[]>(row.sharedWith, []).length ? safeJsonParse<string[]>(row.sharedWith, []) : undefined, 783 accessCount: typeof row.accessCount === 'number' ? row.accessCount : 0, 784 lastAccessedAt: typeof row.lastAccessedAt === 'number' ? row.lastAccessedAt : 0, 785 contentHash: typeof row.contentHash === 'string' ? row.contentHash : undefined, 786 reinforcementCount: typeof row.reinforcementCount === 'number' ? row.reinforcementCount : 0, 787 abstract: typeof row.abstract === 'string' ? row.abstract : null, 788 createdAt: typeof row.createdAt === 'number' ? row.createdAt : Date.now(), 789 updatedAt: typeof row.updatedAt === 'number' ? row.updatedAt : Date.now(), 790 } 791 } 792 793 function traverseLinked( 794 seedEntries: MemoryEntry[], 795 limits: MemoryLookupLimits, 796 ): { entries: MemoryEntry[]; truncated: boolean; expandedLinkedCount: number } { 797 const traversal = traverseLinkedMemoryGraph( 798 seedEntries, 799 limits, 800 (ids) => { 801 const linkedRows = stmts.getByIds(ids) 802 return linkedRows.map((row) => rowToEntry(row as Record<string, unknown>)) 803 }, 804 ) 805 return traversal 806 } 807 808 const getAllWithEmbeddings = db.prepare( 809 `SELECT * FROM memories WHERE embedding IS NOT NULL` 810 ) 811 const getAllWithEmbeddingsByAgentOrShared = db.prepare( 812 `SELECT * FROM memories WHERE embedding IS NOT NULL AND (agentId = ? OR sharedWith LIKE ?)` 813 ) 814 815 return { 816 add(data: Omit<MemoryEntry, 'id' | 'createdAt' | 'updatedAt'>): MemoryEntry { 817 const id = genId(6) 818 const now = Date.now() 819 const references = normalizeReferences(data.references, data.filePaths) 820 const legacyFilePaths = referencesToLegacyFilePaths(references) 821 const image = normalizeImage(data.image, data.imagePath) 822 const linkedMemoryIds = normalizeLinkedMemoryIds(data.linkedMemoryIds, id) 823 const sessionId = data.sessionId || null 824 const category = data.category || 'note' 825 const title = data.title || 'Untitled' 826 const content = data.content || '' 827 const contentHash = computeContentHash(knowledgeChunkHashScope(category, data.metadata), content) 828 829 // Content-hash dedup: if same content already exists for this agent, reinforce instead of duplicating 830 const agentId = data.agentId || null 831 const existingByHash = agentId 832 ? stmts.findByContentHash.get(contentHash, agentId) as Record<string, unknown> | undefined 833 : stmts.findByContentHashShared.get(contentHash) as Record<string, unknown> | undefined 834 if (existingByHash) { 835 stmts.reinforceMemory.run(now, existingByHash.id) 836 return rowToEntry({ ...existingByHash, reinforcementCount: ((existingByHash.reinforcementCount as number) || 0) + 1, updatedAt: now }) 837 } 838 839 // Guard against exact duplicate memory spam for the same session/category. 840 if (sessionId) { 841 const duplicate = stmts.exactDuplicateBySessionCategory.get(sessionId, category, title, content) as Record<string, unknown> | undefined 842 if (duplicate) return rowToEntry(duplicate) 843 } 844 const pinned = data.pinned ? 1 : 0 845 const sharedWith = Array.isArray(data.sharedWith) && data.sharedWith.length ? JSON.stringify(data.sharedWith) : null 846 stmts.insert.run( 847 id, agentId, sessionId, 848 category, title, content, 849 data.metadata ? JSON.stringify(data.metadata) : null, 850 null, // embedding computed async 851 references?.length ? JSON.stringify(references) : null, 852 legacyFilePaths?.length ? JSON.stringify(legacyFilePaths) : null, 853 image ? JSON.stringify(image) : null, 854 image?.path || null, 855 linkedMemoryIds.length ? JSON.stringify(linkedMemoryIds) : null, 856 pinned, 857 sharedWith, 858 contentHash, 859 null, // abstract computed async 860 now, now, 861 ) 862 // Compute embedding in background (fire-and-forget) 863 const text = `${title} ${content}`.slice(0, 4000) 864 getEmbedding(text).then((emb) => { 865 if (emb) { 866 db.prepare(`UPDATE memories SET embedding = ? WHERE id = ?`).run( 867 serializeEmbedding(emb), id, 868 ) 869 } 870 }).catch((err: unknown) => { log.warn(TAG, `Embedding generation failed for memory ${id}:`, err instanceof Error ? err.message : String(err)) }) 871 872 // Generate abstract for long content in background (fire-and-forget) 873 if (content.length > 200) { 874 generateAbstract(content, title).then((abstract) => { 875 if (abstract) { 876 db.prepare(`UPDATE memories SET abstract = ? WHERE id = ?`).run(abstract, id) 877 } 878 }).catch(() => { /* non-critical */ }) 879 } 880 881 // Keep memory links bidirectional by default. 882 if (linkedMemoryIds.length) this.link(id, linkedMemoryIds, true) 883 884 const created = this.get(id) 885 if (created) return created 886 return { 887 ...data, 888 id, 889 sessionId, 890 category, 891 title, 892 content, 893 references, 894 filePaths: legacyFilePaths, 895 image, 896 imagePath: image?.path || null, 897 linkedMemoryIds, 898 accessCount: 0, 899 lastAccessedAt: 0, 900 contentHash, 901 reinforcementCount: 0, 902 createdAt: now, 903 updatedAt: now, 904 } 905 }, 906 907 update(id: string, updates: Partial<MemoryEntry>): MemoryEntry | null { 908 const existing = stmts.getById.get(id) as Record<string, unknown> | undefined 909 if (!existing) return null 910 const existingEntry = rowToEntry(existing) 911 const merged = { ...existingEntry, ...updates } 912 const references = normalizeReferences(merged.references, merged.filePaths) 913 const legacyFilePaths = referencesToLegacyFilePaths(references) 914 const image = normalizeImage(merged.image, merged.imagePath) 915 const nextLinked = normalizeLinkedMemoryIds(merged.linkedMemoryIds, id) 916 const prevLinked = normalizeLinkedMemoryIds(existingEntry.linkedMemoryIds, id) 917 const now = Date.now() 918 const pinnedVal = merged.pinned ? 1 : 0 919 const sharedWithVal = Array.isArray(merged.sharedWith) && merged.sharedWith.length ? JSON.stringify(merged.sharedWith) : null 920 const nextContentHash = computeContentHash(knowledgeChunkHashScope(merged.category, merged.metadata), merged.content) 921 stmts.update.run( 922 merged.agentId || null, merged.sessionId || null, 923 merged.category, merged.title, merged.content, 924 merged.metadata ? JSON.stringify(merged.metadata) : null, 925 existing.embedding || null, // preserve existing embedding 926 references?.length ? JSON.stringify(references) : null, 927 legacyFilePaths?.length ? JSON.stringify(legacyFilePaths) : null, 928 image ? JSON.stringify(image) : null, 929 image?.path || null, 930 nextLinked.length ? JSON.stringify(nextLinked) : null, 931 pinnedVal, 932 sharedWithVal, 933 nextContentHash, 934 now, id, 935 ) 936 937 // Keep links reciprocal when link set changes. 938 if (updates.linkedMemoryIds) { 939 const added = nextLinked.filter((lid) => !prevLinked.includes(lid)) 940 const removed = prevLinked.filter((lid) => !nextLinked.includes(lid)) 941 if (added.length) this.link(id, added, true) 942 if (removed.length) this.unlink(id, removed, true) 943 } 944 945 // Re-compute embedding if content changed 946 if (updates.title || updates.content) { 947 const text = `${merged.title} ${merged.content}`.slice(0, 4000) 948 getEmbedding(text).then((emb) => { 949 if (emb) { 950 db.prepare(`UPDATE memories SET embedding = ? WHERE id = ?`).run( 951 serializeEmbedding(emb), id, 952 ) 953 } 954 }).catch(() => { /* ok */ }) 955 } 956 return this.get(id) 957 }, 958 959 delete(id: string) { 960 // Clean up image file if present 961 const row = stmts.getById.get(id) as Record<string, unknown> | undefined 962 const entry = row ? rowToEntry(row) : null 963 if (entry?.image?.path || entry?.imagePath) { 964 const imagePath = entry.image?.path || entry.imagePath || '' 965 const candidatePaths = path.isAbsolute(imagePath) 966 ? [imagePath] 967 : [ 968 tryResolvePathWithinBaseDir(APP_STATE_ROOT_DIR, imagePath), 969 tryResolvePathWithinBaseDir(WORKSPACE_DIR, imagePath), 970 ].filter((candidate): candidate is string => !!candidate) 971 for (const imgPath of candidatePaths) { 972 try { 973 fs.unlinkSync(imgPath) 974 break 975 } catch { 976 // file may not exist 977 } 978 } 979 } 980 stmts.delete.run(id) 981 // Remove this ID from any other memory's linkedMemoryIds 982 const linking = stmts.findMemoriesLinkingTo.all(`%"${id}"%`) as any[] 983 for (const row of linking) { 984 const ids = normalizeLinkedMemoryIds(safeJsonParse<string[]>(row.linkedMemoryIds, []), row.id) 985 const filtered = ids.filter((lid: string) => lid !== id) 986 stmts.updateLinks.run(filtered.length ? JSON.stringify(filtered) : null, Date.now(), row.id) 987 } 988 }, 989 990 get(id: string): MemoryEntry | null { 991 const row = stmts.getById.get(id) as Record<string, unknown> | undefined 992 if (!row) return null 993 // Bump access count (non-blocking) 994 setTimeout(() => { 995 try { stmts.bumpAccessCount.run(Date.now(), id) } catch { /* best-effort */ } 996 }, 0) 997 return rowToEntry(row) 998 }, 999 1000 /** Get a memory and its linked memories via BFS traversal */ 1001 getWithLinked( 1002 id: string, 1003 maxDepth?: number, 1004 maxResults?: number, 1005 maxLinkedExpansion?: number, 1006 ): { entries: MemoryEntry[]; truncated: boolean; expandedLinkedCount: number; limits: MemoryLookupLimits } | null { 1007 const row = stmts.getById.get(id) as Record<string, unknown> | undefined 1008 if (!row) return null 1009 const entry = rowToEntry(row) 1010 const defaults = getMemoryLookupLimits() 1011 const limits = resolveLookupRequest(defaults, { 1012 depth: maxDepth ?? defaults.maxDepth, 1013 limit: maxResults ?? defaults.maxPerLookup, 1014 linkedLimit: maxLinkedExpansion ?? defaults.maxLinkedExpansion, 1015 }) 1016 const traversal = traverseLinked([entry], limits) 1017 return { ...traversal, limits } 1018 }, 1019 1020 /** Add links from one memory to others */ 1021 link(id: string, targetIds: string[], bidirectional = true): MemoryEntry | null { 1022 const existing = stmts.getById.get(id) as Record<string, unknown> | undefined 1023 if (!existing) return null 1024 const entry = rowToEntry(existing) 1025 const validTargetIds = normalizeLinkedMemoryIds(targetIds, id) 1026 const targetRows = stmts.getByIds(validTargetIds) 1027 const existingTargetIds = new Set((targetRows as Array<Record<string, unknown>>).map((row) => String(row.id))) 1028 const filteredTargets = validTargetIds.filter((tid) => existingTargetIds.has(tid)) 1029 1030 const sourceLinks = new Set(normalizeLinkedMemoryIds(entry.linkedMemoryIds, id)) 1031 for (const tid of filteredTargets) sourceLinks.add(tid) 1032 1033 const now = Date.now() 1034 const tx = db.transaction(() => { 1035 const sourceValues = [...sourceLinks] 1036 stmts.updateLinks.run(sourceValues.length ? JSON.stringify(sourceValues) : null, now, id) 1037 1038 if (!bidirectional) return 1039 for (const targetRow of targetRows as Array<Record<string, unknown>>) { 1040 const targetEntry = rowToEntry(targetRow) 1041 const targetLinks = new Set(normalizeLinkedMemoryIds(targetEntry.linkedMemoryIds, targetEntry.id)) 1042 targetLinks.add(id) 1043 const next = [...targetLinks] 1044 stmts.updateLinks.run(next.length ? JSON.stringify(next) : null, now, targetEntry.id) 1045 } 1046 }) 1047 tx() 1048 1049 return this.get(id) 1050 }, 1051 1052 /** Remove links from one memory to others */ 1053 unlink(id: string, targetIds: string[], bidirectional = true): MemoryEntry | null { 1054 const existing = stmts.getById.get(id) as Record<string, unknown> | undefined 1055 if (!existing) return null 1056 const entry = rowToEntry(existing) 1057 const removeSet = new Set(normalizeLinkedMemoryIds(targetIds, id)) 1058 const now = Date.now() 1059 const tx = db.transaction(() => { 1060 const sourceLinks = normalizeLinkedMemoryIds(entry.linkedMemoryIds, id).filter((lid) => !removeSet.has(lid)) 1061 stmts.updateLinks.run(sourceLinks.length ? JSON.stringify(sourceLinks) : null, now, id) 1062 1063 if (!bidirectional || !removeSet.size) return 1064 const targetRows = stmts.getByIds([...removeSet]) as Array<Record<string, unknown>> 1065 for (const targetRow of targetRows) { 1066 const targetEntry = rowToEntry(targetRow) 1067 const next = normalizeLinkedMemoryIds(targetEntry.linkedMemoryIds, targetEntry.id).filter((lid) => lid !== id) 1068 stmts.updateLinks.run(next.length ? JSON.stringify(next) : null, now, targetEntry.id) 1069 } 1070 }) 1071 tx() 1072 1073 return this.get(id) 1074 }, 1075 1076 search(query: string, agentId?: string, options: MemorySearchOptions = {}): MemoryEntry[] { 1077 if (shouldSkipSearchQuery(query)) return [] 1078 const startedAt = Date.now() 1079 const normalizedAgentId = normalizeScopeIdentifier(agentId) 1080 const { vectorSimilarityThreshold } = options 1081 const rerankMode: MemoryRerankMode = options.rerankMode === 'semantic' || options.rerankMode === 'lexical' 1082 ? options.rerankMode 1083 : 'balanced' 1084 const scopeMode = options.scope 1085 ? normalizeMemoryScopeMode(options.scope.mode) 1086 : (normalizedAgentId ? 'agent' : 'all') 1087 const scopeFilter: MemoryScopeFilter | undefined = scopeMode === 'all' 1088 ? undefined 1089 : { 1090 mode: scopeMode, 1091 agentId: options.scope?.agentId ?? normalizedAgentId, 1092 sessionId: options.scope?.sessionId, 1093 projectRoot: options.scope?.projectRoot, 1094 } 1095 1096 // FTS keyword search (includes memories shared with this agent) 1097 const ftsQuery = buildFtsQuery(query) 1098 const fastAgentOnlyScope = scopeMode === 'agent' && !!normalizedAgentId 1099 const ftsResults: MemoryEntry[] = ftsQuery 1100 ? (fastAgentOnlyScope 1101 ? stmts.searchByAgentOrShared.all(ftsQuery, normalizedAgentId, `%"${normalizedAgentId}"%`) as any[] 1102 : stmts.search.all(ftsQuery) as any[] 1103 ).map(rowToEntry) 1104 : [] 1105 const ftsHitIds = new Set<string>(ftsResults.map((entry) => entry.id)) 1106 1107 // Attempt vector search (synchronous — uses cached embedding if available) 1108 const vectorSimilarityScores = new Map<string, number>() 1109 const rawEmbeddings = new Map<string, number[]>() 1110 let vectorResults: MemoryEntry[] = [] 1111 let queryEmbeddingResult: number[] | undefined 1112 try { 1113 const queryEmbedding = getEmbeddingSync(query) 1114 queryEmbeddingResult = queryEmbedding || undefined 1115 if (queryEmbedding) { 1116 const rows = fastAgentOnlyScope 1117 ? getAllWithEmbeddingsByAgentOrShared.all(normalizedAgentId, `%"${normalizedAgentId}"%`) as any[] 1118 : getAllWithEmbeddings.all() as any[] 1119 1120 const scored = rows 1121 .map((row) => { 1122 const emb = deserializeEmbedding(row.embedding) 1123 const score = cosineSimilarity(queryEmbedding, emb) 1124 return { row, score, emb } 1125 }) 1126 .filter((s) => s.score > (vectorSimilarityThreshold ?? DEFAULT_VECTOR_SIMILARITY_THRESHOLD)) 1127 .sort((a, b) => b.score - a.score) 1128 .slice(0, 20) 1129 1130 vectorResults = scored.map((s) => { 1131 const entry = rowToEntry(s.row) 1132 vectorSimilarityScores.set(entry.id, s.score) 1133 rawEmbeddings.set(entry.id, s.emb) 1134 return entry 1135 }) 1136 } 1137 } catch (err: unknown) { 1138 log.warn(TAG, 'Vector search failed, falling back to FTS:', err instanceof Error ? err.message : String(err)) 1139 } 1140 1141 // Merge: deduplicate by id 1142 const seen = new Set<string>() 1143 const merged: MemoryEntry[] = [] 1144 for (const entry of [...ftsResults, ...vectorResults]) { 1145 if (!seen.has(entry.id)) { 1146 seen.add(entry.id) 1147 merged.push(entry) 1148 } 1149 } 1150 const scopedMerged = scopeFilter ? filterMemoriesByScope(merged, scopeFilter) : merged 1151 1152 // Retrieval v2 rerank: hybrid relevance (semantic + lexical + FTS signal), then salience decay/boosting. 1153 const queryTokens = tokenizeForRerank(query) 1154 const now = Date.now() 1155 const halfLifeDays = 30 1156 const salienceScored = scopedMerged.map((entry) => { 1157 const semantic = vectorSimilarityScores.get(entry.id) ?? (ftsHitIds.has(entry.id) ? 0.42 : 0.18) 1158 const lexical = keywordOverlapScore(queryTokens, memorySearchText(entry)) 1159 const ftsSignal = ftsHitIds.has(entry.id) ? 1 : 0 1160 const relevance = rerankMode === 'semantic' 1161 ? (semantic * 0.78 + ftsSignal * 0.22) 1162 : rerankMode === 'lexical' 1163 ? (lexical * 0.78 + ftsSignal * 0.22) 1164 : (semantic * 0.50 + lexical * 0.35 + ftsSignal * 0.15) 1165 const daysSinceAccess = (now - (entry.lastAccessedAt || entry.updatedAt)) / 86_400_000 1166 const recencyDecay = isDecayExempt({ pinned: entry.pinned, category: entry.category, metadata: entry.metadata }) 1167 ? 1.0 1168 : calculateTemporalDecayMultiplier(daysSinceAccess, halfLifeDays) 1169 const reinforcement = Math.log((entry.reinforcementCount || 0) + 1) + 1 1170 const pinnedBoost = entry.pinned ? 1.5 : 1.0 1171 const followUpBoost = followUpSalienceMultiplier(entry, now) 1172 const salience = Math.max(0.0001, relevance) * recencyDecay * reinforcement * pinnedBoost * followUpBoost 1173 return { entry, salience, embedding: rawEmbeddings.get(entry.id) } 1174 }) 1175 1176 // Apply MMR for diversity — uses embedding similarity when available, Jaccard text fallback otherwise. 1177 let out: MemoryEntry[] = [] 1178 if (rerankMode !== 'lexical') { 1179 out = applyMMR(queryEmbeddingResult || null, salienceScored, MAX_MERGED_RESULTS, 0.7) 1180 } else { 1181 salienceScored.sort((a, b) => b.salience - a.salience) 1182 out = salienceScored.slice(0, MAX_MERGED_RESULTS).map((s) => s.entry) 1183 } 1184 1185 // Bump access counts for returned results (non-blocking) 1186 if (out.length) { 1187 const returnedIds = out.map((e) => e.id) 1188 setTimeout(() => { 1189 try { 1190 const ts = Date.now() 1191 for (const mid of returnedIds) stmts.bumpAccessCount.run(ts, mid) 1192 } catch { /* best-effort */ } 1193 }, 0) 1194 } 1195 1196 const elapsed = Date.now() - startedAt 1197 if (elapsed > 1200) { 1198 log.warn(TAG, 1199 `Slow search ${elapsed}ms (scope=${scopeMode}, rerank=${rerankMode}, rawLen=${String(query || '').length}, fts="${ftsQuery.slice(0, 180)}")`, 1200 ) 1201 } 1202 return out 1203 }, 1204 1205 /** Search with linked memory traversal */ 1206 searchWithLinked( 1207 query: string, 1208 agentId?: string, 1209 maxDepth?: number, 1210 maxResults?: number, 1211 maxLinkedExpansion?: number, 1212 options: MemorySearchOptions = {}, 1213 ): { entries: MemoryEntry[]; truncated: boolean; expandedLinkedCount: number; limits: MemoryLookupLimits } { 1214 const baseResults = this.search(query, agentId, options) 1215 const defaults = getMemoryLookupLimits() 1216 const limits = resolveLookupRequest(defaults, { 1217 depth: maxDepth ?? defaults.maxDepth, 1218 limit: maxResults ?? defaults.maxPerLookup, 1219 linkedLimit: maxLinkedExpansion ?? defaults.maxLinkedExpansion, 1220 }) 1221 if (limits.maxDepth <= 0) { 1222 return { 1223 entries: baseResults.slice(0, limits.maxPerLookup), 1224 truncated: baseResults.length > limits.maxPerLookup, 1225 expandedLinkedCount: 0, 1226 limits, 1227 } 1228 } 1229 const traversal = traverseLinked(baseResults, limits) 1230 return { ...traversal, limits } 1231 }, 1232 1233 list(agentId?: string, limit = 200): MemoryEntry[] { 1234 const safeLimit = Math.max(1, Math.min(500, Math.trunc(limit))) 1235 const rows = agentId 1236 ? stmts.listByAgentOrShared.all(agentId, `%"${agentId}"%`, safeLimit) as any[] 1237 : stmts.listAll.all(safeLimit) as any[] 1238 return rows.map(rowToEntry) 1239 }, 1240 1241 listByCategory(category: string, agentId?: string, limit = 500): MemoryEntry[] { 1242 const safeLimit = Math.max(1, Math.min(10_000, Math.trunc(limit))) 1243 const rows = agentId 1244 ? stmts.listByCategoryAgentOrShared.all(category, agentId, `%"${agentId}"%`, safeLimit) as Record<string, unknown>[] 1245 : stmts.listByCategoryAll.all(category, safeLimit) as Record<string, unknown>[] 1246 return rows.map(rowToEntry) 1247 }, 1248 1249 listKnowledgeSourceChunks(sourceId: string): MemoryEntry[] { 1250 return (stmts.listKnowledgeSourceChunks.all(sourceId) as Record<string, unknown>[]).map(rowToEntry) 1251 }, 1252 1253 listPinned(agentId?: string, limit = 20): MemoryEntry[] { 1254 const safeLimit = Math.max(1, Math.min(100, Math.trunc(limit))) 1255 const rows = agentId 1256 ? stmts.listPinnedByAgent.all(agentId, safeLimit) as any[] 1257 : stmts.listPinnedAll.all(safeLimit) as any[] 1258 return rows.map(rowToEntry) 1259 }, 1260 1261 countsByAgent(): Record<string, number> { 1262 const rows = stmts.countsByAgent.all() as { agentKey: string; cnt: number }[] 1263 const result: Record<string, number> = {} 1264 for (const row of rows) result[row.agentKey] = row.cnt 1265 return result 1266 }, 1267 1268 getByAgent(agentId: string, limit = 200): MemoryEntry[] { 1269 const safeLimit = Math.max(1, Math.min(500, Math.trunc(limit))) 1270 return (stmts.listByAgent.all(agentId, safeLimit) as any[]).map(rowToEntry) 1271 }, 1272 1273 getFrequentlyAccessedByAgent(agentId: string, minAccessCount = 3, sinceDays = 7): MemoryEntry[] { 1274 const cutoff = Date.now() - sinceDays * 86_400_000 1275 const rows = stmts.frequentlyAccessedByAgent.all(agentId, minAccessCount, cutoff) as Record<string, unknown>[] 1276 return rows.map(rowToEntry) 1277 }, 1278 1279 analyzeMaintenance(ttlHours = 24): { 1280 total: number 1281 exactDuplicateCandidates: number 1282 canonicalDuplicateCandidates: number 1283 staleWorkingCandidates: number 1284 lowConfidenceWorkingCandidates: number 1285 dueFollowups: number 1286 } { 1287 const rows = (stmts.allRowsByUpdated.all() as any[]).map(rowToEntry) 1288 const seenExact = new Set<string>() 1289 const seenCanonical = new Set<string>() 1290 let exactDuplicateCandidates = 0 1291 let canonicalDuplicateCandidates = 0 1292 let staleWorkingCandidates = 0 1293 let lowConfidenceWorkingCandidates = 0 1294 let dueFollowups = 0 1295 const cutoff = Date.now() - Math.max(1, Math.min(24 * 365, Math.trunc(ttlHours))) * 3600_000 1296 1297 for (const row of rows) { 1298 const keyExact = [ 1299 row.agentId || '', 1300 row.sessionId || '', 1301 row.category || '', 1302 row.title || '', 1303 row.content || '', 1304 ].join('|') 1305 if (seenExact.has(keyExact)) exactDuplicateCandidates++ 1306 else seenExact.add(keyExact) 1307 1308 const keyCanonical = [ 1309 row.agentId || '', 1310 row.sessionId || '', 1311 row.category || '', 1312 canonicalText(row.title), 1313 canonicalText(row.content), 1314 ].join('|') 1315 if (seenCanonical.has(keyCanonical)) canonicalDuplicateCandidates++ 1316 else seenCanonical.add(keyCanonical) 1317 1318 const isWorkingLike = isWorkingMemoryCategory(row.category) 1319 if (isWorkingLike && (row.updatedAt || row.createdAt || 0) < cutoff) staleWorkingCandidates++ 1320 if (isWorkingLike && ((metadataNumber(row, 'confidence') ?? 1) < 0.35)) lowConfidenceWorkingCandidates++ 1321 if (row.category === 'reflection/open_loop') { 1322 const followUpAt = metadataNumber(row, 'followUpAt') 1323 const resolvedAt = metadataNumber(row, 'resolvedAt') 1324 if (followUpAt != null && followUpAt <= Date.now() && resolvedAt == null) dueFollowups++ 1325 } 1326 } 1327 1328 return { 1329 total: rows.length, 1330 exactDuplicateCandidates, 1331 canonicalDuplicateCandidates, 1332 staleWorkingCandidates, 1333 lowConfidenceWorkingCandidates, 1334 dueFollowups, 1335 } 1336 }, 1337 1338 maintain(opts?: { 1339 dedupe?: boolean 1340 canonicalDedupe?: boolean 1341 pruneWorking?: boolean 1342 ttlHours?: number 1343 maxDeletes?: number 1344 }): { 1345 deduped: number 1346 pruned: number 1347 deletedIds: string[] 1348 analyzed: { 1349 total: number 1350 exactDuplicateCandidates: number 1351 canonicalDuplicateCandidates: number 1352 staleWorkingCandidates: number 1353 lowConfidenceWorkingCandidates: number 1354 dueFollowups: number 1355 } 1356 } { 1357 const options = opts || {} 1358 const rows = (stmts.allRowsByUpdated.all() as any[]).map(rowToEntry) 1359 const analyzed = this.analyzeMaintenance(options.ttlHours) 1360 const deleteBudget = Math.max(1, Math.min(20_000, Math.trunc(options.maxDeletes || 500))) 1361 const deleteIds: string[] = [] 1362 const toDelete = new Set<string>() 1363 const dedupe = options.dedupe !== false 1364 const canonicalDedupe = options.canonicalDedupe === true 1365 const pruneWorking = options.pruneWorking !== false 1366 const cutoff = Date.now() - Math.max(1, Math.min(24 * 365, Math.trunc(options.ttlHours || 24))) * 3600_000 1367 1368 // Hash-based dedup: group by contentHash + agentId, keep the one with highest reinforcementCount 1369 if (dedupe && toDelete.size < deleteBudget) { 1370 const hashGroups = new Map<string, MemoryEntry[]>() 1371 for (const row of rows) { 1372 if (!row.contentHash || toDelete.has(row.id)) continue 1373 const groupKey = `${row.agentId || ''}|${row.contentHash}` 1374 const group = hashGroups.get(groupKey) 1375 if (group) group.push(row) 1376 else hashGroups.set(groupKey, [row]) 1377 } 1378 for (const group of hashGroups.values()) { 1379 if (group.length <= 1) continue 1380 group.sort((a, b) => (b.reinforcementCount || 0) - (a.reinforcementCount || 0)) 1381 for (let i = 1; i < group.length; i++) { 1382 toDelete.add(group[i].id) 1383 if (toDelete.size >= deleteBudget) break 1384 } 1385 if (toDelete.size >= deleteBudget) break 1386 } 1387 } 1388 1389 // Exact string-match dedup (legacy fallback for rows without contentHash) 1390 if (dedupe) { 1391 const seen = new Set<string>() 1392 for (const row of rows) { 1393 if (toDelete.has(row.id)) continue 1394 const key = [ 1395 row.agentId || '', 1396 row.sessionId || '', 1397 row.category || '', 1398 row.title || '', 1399 row.content || '', 1400 ].join('|') 1401 if (seen.has(key)) toDelete.add(row.id) 1402 else seen.add(key) 1403 if (toDelete.size >= deleteBudget) break 1404 } 1405 } 1406 1407 if (canonicalDedupe && toDelete.size < deleteBudget) { 1408 const seen = new Set<string>() 1409 for (const row of rows) { 1410 if (toDelete.has(row.id)) continue 1411 const key = [ 1412 row.agentId || '', 1413 row.sessionId || '', 1414 row.category || '', 1415 canonicalText(row.title), 1416 canonicalText(row.content), 1417 ].join('|') 1418 if (seen.has(key)) toDelete.add(row.id) 1419 else seen.add(key) 1420 if (toDelete.size >= deleteBudget) break 1421 } 1422 } 1423 1424 if (pruneWorking && toDelete.size < deleteBudget) { 1425 for (const row of rows) { 1426 if (toDelete.has(row.id)) continue 1427 const isWorkingLike = isWorkingMemoryCategory(row.category) 1428 const updatedAt = row.updatedAt || row.createdAt || 0 1429 const unresolvedFollowup = row.category === 'reflection/open_loop' && metadataNumber(row, 'resolvedAt') == null 1430 const lowConfidence = (metadataNumber(row, 'confidence') ?? 1) < 0.35 1431 if (isWorkingLike && !unresolvedFollowup && (updatedAt < cutoff || lowConfidence)) toDelete.add(row.id) 1432 if (toDelete.size >= deleteBudget) break 1433 } 1434 } 1435 1436 for (const id of toDelete) { 1437 this.delete(id) 1438 deleteIds.push(id) 1439 if (deleteIds.length >= deleteBudget) break 1440 } 1441 1442 let pruned = 0 1443 let deduped = 0 1444 if (deleteIds.length) { 1445 const deletedSet = new Set(deleteIds) 1446 for (const row of rows) { 1447 if (!deletedSet.has(row.id)) continue 1448 const isWorkingLike = isWorkingMemoryCategory(row.category) 1449 if (isWorkingLike) pruned++ 1450 else deduped++ 1451 } 1452 } 1453 1454 return { 1455 deduped, 1456 pruned, 1457 deletedIds: deleteIds, 1458 analyzed, 1459 } 1460 }, 1461 1462 getLatestBySessionCategory(sessionId: string, category: string): MemoryEntry | null { 1463 const sid = (sessionId || '').trim() 1464 const cat = (category || '').trim() 1465 if (!sid || !cat) return null 1466 const row = stmts.latestBySessionCategory.get(sid, cat) as Record<string, unknown> | undefined 1467 if (!row) return null 1468 return rowToEntry(row) 1469 }, 1470 } 1471 } 1472 1473 export function getMemoryDb() { 1474 if (!_db) _db = initDb() 1475 return _db 1476 } 1477 1478 // --------------------------------------------------------------------------- 1479 // Cross-Agent Knowledge Base helpers 1480 // --------------------------------------------------------------------------- 1481 1482 export function addKnowledge(params: { 1483 title: string 1484 content: string 1485 tags?: string[] 1486 scope?: 'global' | 'agent' 1487 agentIds?: string[] 1488 createdByAgentId?: string | null 1489 createdBySessionId?: string | null 1490 source?: string 1491 sourceUrl?: string 1492 }): MemoryEntry { 1493 const db = getMemoryDb() 1494 const metadata: Record<string, unknown> = { 1495 tags: params.tags || [], 1496 scope: params.scope || 'global', 1497 agentIds: params.scope === 'agent' ? (params.agentIds || []) : [], 1498 createdByAgentId: params.createdByAgentId || null, 1499 createdBySessionId: params.createdBySessionId || null, 1500 } 1501 if (params.source) metadata.source = params.source 1502 if (params.sourceUrl) metadata.sourceUrl = params.sourceUrl 1503 return db.add({ 1504 agentId: null, 1505 sessionId: null, 1506 category: 'knowledge', 1507 title: params.title, 1508 content: params.content, 1509 metadata, 1510 }) 1511 } 1512 1513 export function searchKnowledge(query: string, tags?: string[], limit?: number): MemoryEntry[] { 1514 const db = getMemoryDb() 1515 const results = db.search(query) 1516 let filtered = results.filter((e) => e.category === 'knowledge') 1517 1518 if (tags && tags.length > 0) { 1519 const tagSet = new Set(tags.map((t) => t.toLowerCase())) 1520 filtered = filtered.filter((e) => { 1521 const entryTags: string[] = (e.metadata as Record<string, unknown>)?.tags as string[] || [] 1522 return entryTags.some((t) => tagSet.has(t.toLowerCase())) 1523 }) 1524 } 1525 1526 if (limit && limit > 0) { 1527 filtered = filtered.slice(0, limit) 1528 } 1529 1530 return filtered 1531 } 1532 1533 export function listKnowledge(tags?: string[], limit?: number): MemoryEntry[] { 1534 const db = getMemoryDb() 1535 const all = db.list(undefined, 500) 1536 let filtered = all.filter((e) => e.category === 'knowledge') 1537 1538 if (tags && tags.length > 0) { 1539 const tagSet = new Set(tags.map((t) => t.toLowerCase())) 1540 filtered = filtered.filter((e) => { 1541 const entryTags: string[] = (e.metadata as Record<string, unknown>)?.tags as string[] || [] 1542 return entryTags.some((t) => tagSet.has(t.toLowerCase())) 1543 }) 1544 } 1545 1546 if (limit && limit > 0) { 1547 filtered = filtered.slice(0, limit) 1548 } 1549 1550 return filtered 1551 }