/ services / lsp / LSPDiagnosticRegistry.ts
LSPDiagnosticRegistry.ts
  1  import { randomUUID } from 'crypto'
  2  import { LRUCache } from 'lru-cache'
  3  import { logForDebugging } from '../../utils/debug.js'
  4  import { toError } from '../../utils/errors.js'
  5  import { logError } from '../../utils/log.js'
  6  import { jsonStringify } from '../../utils/slowOperations.js'
  7  import type { DiagnosticFile } from '../diagnosticTracking.js'
  8  
  9  /**
 10   * Pending LSP diagnostic notification
 11   */
 12  export type PendingLSPDiagnostic = {
 13    /** Server that sent the diagnostic */
 14    serverName: string
 15    /** Diagnostic files */
 16    files: DiagnosticFile[]
 17    /** When diagnostic was received */
 18    timestamp: number
 19    /** Whether attachment was already sent to conversation */
 20    attachmentSent: boolean
 21  }
 22  
 23  /**
 24   * LSP Diagnostic Registry
 25   *
 26   * Stores LSP diagnostics received asynchronously from LSP servers via
 27   * textDocument/publishDiagnostics notifications. Follows the same pattern
 28   * as AsyncHookRegistry for consistent async attachment delivery.
 29   *
 30   * Pattern:
 31   * 1. LSP server sends publishDiagnostics notification
 32   * 2. registerPendingLSPDiagnostic() stores diagnostic
 33   * 3. checkForLSPDiagnostics() retrieves pending diagnostics
 34   * 4. getLSPDiagnosticAttachments() converts to Attachment[]
 35   * 5. getAttachments() delivers to conversation automatically
 36   *
 37   * Similar to AsyncHookRegistry but simpler since diagnostics arrive
 38   * synchronously (no need to accumulate output over time).
 39   */
 40  
 41  // Volume limiting constants
 42  const MAX_DIAGNOSTICS_PER_FILE = 10
 43  const MAX_TOTAL_DIAGNOSTICS = 30
 44  
 45  // Max files to track for deduplication - prevents unbounded memory growth
 46  const MAX_DELIVERED_FILES = 500
 47  
 48  // Global registry state
 49  const pendingDiagnostics = new Map<string, PendingLSPDiagnostic>()
 50  
 51  // Cross-turn deduplication: tracks diagnostics that have been delivered
 52  // Maps file URI to a set of diagnostic keys (hash of message+severity+range)
 53  // Using LRUCache to prevent unbounded growth in long sessions
 54  const deliveredDiagnostics = new LRUCache<string, Set<string>>({
 55    max: MAX_DELIVERED_FILES,
 56  })
 57  
 58  /**
 59   * Register LSP diagnostics received from a server.
 60   * These will be delivered as attachments in the next query.
 61   *
 62   * @param serverName - Name of LSP server that sent diagnostics
 63   * @param files - Diagnostic files to deliver
 64   */
 65  export function registerPendingLSPDiagnostic({
 66    serverName,
 67    files,
 68  }: {
 69    serverName: string
 70    files: DiagnosticFile[]
 71  }): void {
 72    // Use UUID for guaranteed uniqueness (handles rapid registrations)
 73    const diagnosticId = randomUUID()
 74  
 75    logForDebugging(
 76      `LSP Diagnostics: Registering ${files.length} diagnostic file(s) from ${serverName} (ID: ${diagnosticId})`,
 77    )
 78  
 79    pendingDiagnostics.set(diagnosticId, {
 80      serverName,
 81      files,
 82      timestamp: Date.now(),
 83      attachmentSent: false,
 84    })
 85  }
 86  
 87  /**
 88   * Maps severity string to numeric value for sorting.
 89   * Error=1, Warning=2, Info=3, Hint=4
 90   */
 91  function severityToNumber(severity: string | undefined): number {
 92    switch (severity) {
 93      case 'Error':
 94        return 1
 95      case 'Warning':
 96        return 2
 97      case 'Info':
 98        return 3
 99      case 'Hint':
100        return 4
101      default:
102        return 4
103    }
104  }
105  
106  /**
107   * Creates a unique key for a diagnostic based on its content.
108   * Used for both within-batch and cross-turn deduplication.
109   */
110  function createDiagnosticKey(diag: {
111    message: string
112    severity?: string
113    range?: unknown
114    source?: string
115    code?: unknown
116  }): string {
117    return jsonStringify({
118      message: diag.message,
119      severity: diag.severity,
120      range: diag.range,
121      source: diag.source || null,
122      code: diag.code || null,
123    })
124  }
125  
126  /**
127   * Deduplicates diagnostics by file URI and diagnostic content.
128   * Also filters out diagnostics that were already delivered in previous turns.
129   * Two diagnostics are considered duplicates if they have the same:
130   * - File URI
131   * - Range (start/end line and character)
132   * - Message
133   * - Severity
134   * - Source and code (if present)
135   */
136  function deduplicateDiagnosticFiles(
137    allFiles: DiagnosticFile[],
138  ): DiagnosticFile[] {
139    // Group diagnostics by file URI
140    const fileMap = new Map<string, Set<string>>()
141    const dedupedFiles: DiagnosticFile[] = []
142  
143    for (const file of allFiles) {
144      if (!fileMap.has(file.uri)) {
145        fileMap.set(file.uri, new Set())
146        dedupedFiles.push({ uri: file.uri, diagnostics: [] })
147      }
148  
149      const seenDiagnostics = fileMap.get(file.uri)!
150      const dedupedFile = dedupedFiles.find(f => f.uri === file.uri)!
151  
152      // Get previously delivered diagnostics for this file (for cross-turn dedup)
153      const previouslyDelivered = deliveredDiagnostics.get(file.uri) || new Set()
154  
155      for (const diag of file.diagnostics) {
156        try {
157          const key = createDiagnosticKey(diag)
158  
159          // Skip if already seen in this batch OR already delivered in previous turns
160          if (seenDiagnostics.has(key) || previouslyDelivered.has(key)) {
161            continue
162          }
163  
164          seenDiagnostics.add(key)
165          dedupedFile.diagnostics.push(diag)
166        } catch (error: unknown) {
167          const err = toError(error)
168          const truncatedMessage =
169            diag.message?.substring(0, 100) || '<no message>'
170          logError(
171            new Error(
172              `Failed to deduplicate diagnostic in ${file.uri}: ${err.message}. ` +
173                `Diagnostic message: ${truncatedMessage}`,
174            ),
175          )
176          // Include the diagnostic anyway to avoid losing information
177          dedupedFile.diagnostics.push(diag)
178        }
179      }
180    }
181  
182    // Filter out files with no diagnostics after deduplication
183    return dedupedFiles.filter(f => f.diagnostics.length > 0)
184  }
185  
186  /**
187   * Get all pending LSP diagnostics that haven't been delivered yet.
188   * Deduplicates diagnostics to prevent sending the same diagnostic multiple times.
189   * Marks diagnostics as sent to prevent duplicate delivery.
190   *
191   * @returns Array of pending diagnostics ready for delivery (deduplicated)
192   */
193  export function checkForLSPDiagnostics(): Array<{
194    serverName: string
195    files: DiagnosticFile[]
196  }> {
197    logForDebugging(
198      `LSP Diagnostics: Checking registry - ${pendingDiagnostics.size} pending`,
199    )
200  
201    // Collect all diagnostic files from all pending notifications
202    const allFiles: DiagnosticFile[] = []
203    const serverNames = new Set<string>()
204    const diagnosticsToMark: PendingLSPDiagnostic[] = []
205  
206    for (const diagnostic of pendingDiagnostics.values()) {
207      if (!diagnostic.attachmentSent) {
208        allFiles.push(...diagnostic.files)
209        serverNames.add(diagnostic.serverName)
210        diagnosticsToMark.push(diagnostic)
211      }
212    }
213  
214    if (allFiles.length === 0) {
215      return []
216    }
217  
218    // Deduplicate diagnostics across all files
219    let dedupedFiles: DiagnosticFile[]
220    try {
221      dedupedFiles = deduplicateDiagnosticFiles(allFiles)
222    } catch (error: unknown) {
223      const err = toError(error)
224      logError(new Error(`Failed to deduplicate LSP diagnostics: ${err.message}`))
225      // Fall back to undedup'd files to avoid losing diagnostics
226      dedupedFiles = allFiles
227    }
228  
229    // Only mark as sent AFTER successful deduplication, then delete from map.
230    // Entries are tracked in deliveredDiagnostics LRU for dedup, so we don't
231    // need to keep them in pendingDiagnostics after delivery.
232    for (const diagnostic of diagnosticsToMark) {
233      diagnostic.attachmentSent = true
234    }
235    for (const [id, diagnostic] of pendingDiagnostics) {
236      if (diagnostic.attachmentSent) {
237        pendingDiagnostics.delete(id)
238      }
239    }
240  
241    const originalCount = allFiles.reduce(
242      (sum, f) => sum + f.diagnostics.length,
243      0,
244    )
245    const dedupedCount = dedupedFiles.reduce(
246      (sum, f) => sum + f.diagnostics.length,
247      0,
248    )
249  
250    if (originalCount > dedupedCount) {
251      logForDebugging(
252        `LSP Diagnostics: Deduplication removed ${originalCount - dedupedCount} duplicate diagnostic(s)`,
253      )
254    }
255  
256    // Apply volume limiting: cap per file and total
257    let totalDiagnostics = 0
258    let truncatedCount = 0
259    for (const file of dedupedFiles) {
260      // Sort by severity (Error=1 < Warning=2 < Info=3 < Hint=4) to prioritize errors
261      file.diagnostics.sort(
262        (a, b) => severityToNumber(a.severity) - severityToNumber(b.severity),
263      )
264  
265      // Cap per file
266      if (file.diagnostics.length > MAX_DIAGNOSTICS_PER_FILE) {
267        truncatedCount += file.diagnostics.length - MAX_DIAGNOSTICS_PER_FILE
268        file.diagnostics = file.diagnostics.slice(0, MAX_DIAGNOSTICS_PER_FILE)
269      }
270  
271      // Cap total
272      const remainingCapacity = MAX_TOTAL_DIAGNOSTICS - totalDiagnostics
273      if (file.diagnostics.length > remainingCapacity) {
274        truncatedCount += file.diagnostics.length - remainingCapacity
275        file.diagnostics = file.diagnostics.slice(0, remainingCapacity)
276      }
277  
278      totalDiagnostics += file.diagnostics.length
279    }
280  
281    // Filter out files that ended up with no diagnostics after limiting
282    dedupedFiles = dedupedFiles.filter(f => f.diagnostics.length > 0)
283  
284    if (truncatedCount > 0) {
285      logForDebugging(
286        `LSP Diagnostics: Volume limiting removed ${truncatedCount} diagnostic(s) (max ${MAX_DIAGNOSTICS_PER_FILE}/file, ${MAX_TOTAL_DIAGNOSTICS} total)`,
287      )
288    }
289  
290    // Track delivered diagnostics for cross-turn deduplication
291    for (const file of dedupedFiles) {
292      if (!deliveredDiagnostics.has(file.uri)) {
293        deliveredDiagnostics.set(file.uri, new Set())
294      }
295      const delivered = deliveredDiagnostics.get(file.uri)!
296      for (const diag of file.diagnostics) {
297        try {
298          delivered.add(createDiagnosticKey(diag))
299        } catch (error: unknown) {
300          // Log but continue - failure to track shouldn't prevent delivery
301          const err = toError(error)
302          const truncatedMessage =
303            diag.message?.substring(0, 100) || '<no message>'
304          logError(
305            new Error(
306              `Failed to track delivered diagnostic in ${file.uri}: ${err.message}. ` +
307                `Diagnostic message: ${truncatedMessage}`,
308            ),
309          )
310        }
311      }
312    }
313  
314    const finalCount = dedupedFiles.reduce(
315      (sum, f) => sum + f.diagnostics.length,
316      0,
317    )
318  
319    // Return empty if no diagnostics to deliver (all filtered by deduplication)
320    if (finalCount === 0) {
321      logForDebugging(
322        `LSP Diagnostics: No new diagnostics to deliver (all filtered by deduplication)`,
323      )
324      return []
325    }
326  
327    logForDebugging(
328      `LSP Diagnostics: Delivering ${dedupedFiles.length} file(s) with ${finalCount} diagnostic(s) from ${serverNames.size} server(s)`,
329    )
330  
331    // Return single result with all deduplicated diagnostics
332    return [
333      {
334        serverName: Array.from(serverNames).join(', '),
335        files: dedupedFiles,
336      },
337    ]
338  }
339  
340  /**
341   * Clear all pending diagnostics.
342   * Used during cleanup/shutdown or for testing.
343   * Note: Does NOT clear deliveredDiagnostics - that's for cross-turn deduplication
344   * and should only be cleared when files are edited or on session reset.
345   */
346  export function clearAllLSPDiagnostics(): void {
347    logForDebugging(
348      `LSP Diagnostics: Clearing ${pendingDiagnostics.size} pending diagnostic(s)`,
349    )
350    pendingDiagnostics.clear()
351  }
352  
353  /**
354   * Reset all diagnostic state including cross-turn tracking.
355   * Used on session reset or for testing.
356   */
357  export function resetAllLSPDiagnosticState(): void {
358    logForDebugging(
359      `LSP Diagnostics: Resetting all state (${pendingDiagnostics.size} pending, ${deliveredDiagnostics.size} files tracked)`,
360    )
361    pendingDiagnostics.clear()
362    deliveredDiagnostics.clear()
363  }
364  
365  /**
366   * Clear delivered diagnostics for a specific file.
367   * Should be called when a file is edited so that new diagnostics for that file
368   * will be shown even if they match previously delivered ones.
369   *
370   * @param fileUri - URI of the file that was edited
371   */
372  export function clearDeliveredDiagnosticsForFile(fileUri: string): void {
373    if (deliveredDiagnostics.has(fileUri)) {
374      logForDebugging(
375        `LSP Diagnostics: Clearing delivered diagnostics for ${fileUri}`,
376      )
377      deliveredDiagnostics.delete(fileUri)
378    }
379  }
380  
381  /**
382   * Get count of pending diagnostics (for monitoring)
383   */
384  export function getPendingLSPDiagnosticCount(): number {
385    return pendingDiagnostics.size
386  }