/ services / vector-types.ts
vector-types.ts
  1  /**
  2   * Vector store types and interfaces for LanceDB integration
  3   * 
  4   * This module defines TypeScript interfaces for the vector store system,
  5   * including embedding data structures, search options, and result types.
  6   */
  7  
  8  /**
  9   * Represents a message embedding record in the vector store
 10   */
 11  export interface MessageEmbedding {
 12    // Reference to source data
 13    message_id: string;           // Discord message ID
 14    guild_id: string;             // Guild ID for filtering
 15    channel_id: string;           // Channel ID for filtering
 16  
 17    // Embedding data
 18    content: string;              // Message content (source field)
 19    vector: number[];             // Embedding vector
 20  
 21    // Metadata for filtering
 22    timestamp: string;            // ISO timestamp
 23    author_id: string;           // User ID
 24    has_attachments: boolean;    // For filtering
 25  
 26    // Indexing metadata
 27    indexed_at: string;          // When embedded
 28    model_version: string;       // Embedding model used
 29  }
 30  
 31  /**
 32   * Represents a note embedding record in the vector store
 33   */
 34  export interface NoteEmbedding {
 35    // Reference to source data
 36    note_id: string;             // Note ID (e.g., "N123")
 37    guild_id: string;           // Guild ID for filtering
 38  
 39    // Embedding data
 40    content: string;            // Note body (source field)
 41    vector: number[];           // Embedding vector
 42  
 43    // Metadata
 44    author_id: string;          // Creator ID
 45    linked_message_id?: string; // Optional message link
 46    created_at: string;         // ISO timestamp
 47    indexed_at: string;         // When embedded
 48  }
 49  
 50  /**
 51   * Options for vector similarity search
 52   */
 53  export interface SearchOptions {
 54    guildIds?: string[];        // Filter by guild IDs (from agent config)
 55    channelId?: string;         // Optional channel filter
 56    authorId?: string;          // Optional author filter
 57    limit?: number;             // Maximum results (default: 10)
 58    minScore?: number;          // Minimum similarity score
 59    after?: string;             // ISO timestamp filter
 60    hasAttachments?: boolean;   // Filter by attachment presence
 61  }
 62  
 63  /**
 64   * Vector search result with similarity score
 65   */
 66  export interface SearchResult<T = MessageEmbedding | NoteEmbedding> {
 67    data: T;                    // The embedding record
 68    score: number;              // Similarity score (0-1)
 69    distance: number;           // Vector distance
 70  }
 71  
 72  /**
 73   * Batch embedding request
 74   */
 75  export interface EmbeddingRequest {
 76    texts: string[];            // Texts to embed
 77    model?: string;             // Override embedding model
 78  }
 79  
 80  /**
 81   * Embedding response from service
 82   */
 83  export interface EmbeddingResponse {
 84    embeddings: number[][];     // Array of embedding vectors
 85    model: string;              // Model used
 86    usage?: {
 87      total_tokens?: number;    // Token usage if available
 88    };
 89  }
 90  
 91  /**
 92   * Discord message data for embedding
 93   * Based on the DiscordMessage schema from actors/utils/discord.ts
 94   */
 95  export interface DiscordMessageData {
 96    id: string;                 // Discord message ID
 97    channelId: string;          // Channel where message was posted
 98    guildId: string;           // Guild (server) ID
 99    authorId: string;          // User ID of message author
100    authorUsername: string;     // Username of author
101    content: string;           // Message text content
102    timestamp: string;         // ISO timestamp when message was created
103    messageType?: string;      // Discord message type (DEFAULT, REPLY, etc.)
104    isBot?: string;            // "true" if author is a bot
105    hasAttachments?: string;   // "true" if message has attachments
106    threadId?: string;         // Thread ID if message is in a thread
107    referencedMessageId?: string; // ID of message being replied to
108  }
109  
110  /**
111   * Vector store configuration
112   */
113  export interface VectorStoreConfig {
114    dbPath: string;             // LanceDB file path
115    embeddingModel: string;     // LM Studio embedding model
116    batchSize: number;          // Batch size for processing
117    cooldownMs: number;         // Cooldown between API calls
118    enabled: boolean;           // Enable/disable vector operations
119  }
120  
121  /**
122   * Vector store initialization options
123   */
124  export interface VectorStoreInitOptions {
125    dbPath?: string;            // Override default DB path
126    embeddingModel?: string;    // Override default embedding model
127    forceRecreate?: boolean;    // Force recreate tables
128  }
129  
130  /**
131   * Embedding service error types
132   */
133  export interface EmbeddingError extends Error {
134    type: 'api_error' | 'connection_error' | 'timeout_error' | 'model_error';
135    statusCode?: number;
136    originalError?: Error;
137  }
138  
139  /**
140   * Vector store statistics
141   */
142  export interface VectorStoreStats {
143    messageCount: number;       // Total messages indexed
144    noteCount: number;         // Total notes indexed
145    lastUpdated: string;       // ISO timestamp of last update
146    dbSize: number;            // Database size in bytes
147    modelVersion: string;      // Current embedding model
148  }
149  
150  /**
151   * Indexing progress information
152   */
153  export interface IndexingProgress {
154    total: number;             // Total items to process
155    processed: number;         // Items processed so far
156    errors: number;           // Number of errors
157    startTime: string;        // ISO timestamp when started
158    estimatedCompletion?: string; // Estimated completion time
159  }