/ src / utils / transcriptSearch.ts
transcriptSearch.ts
  1  import type { RenderableMessage } from '../types/message.js'
  2  import {
  3    INTERRUPT_MESSAGE,
  4    INTERRUPT_MESSAGE_FOR_TOOL_USE,
  5  } from './messages.js'
  6  
  7  const SYSTEM_REMINDER_CLOSE = '</system-reminder>'
  8  
  9  // UserTextMessage.tsx:~84 replaces these with <InterruptedByUser />
 10  // (renders 'Interrupted · /issue...'). Raw text never appears on screen;
 11  // searching it yields phantom matches — /terr → in[terr]upted.
 12  const RENDERED_AS_SENTINEL = new Set([
 13    INTERRUPT_MESSAGE,
 14    INTERRUPT_MESSAGE_FOR_TOOL_USE,
 15  ])
 16  
 17  const searchTextCache = new WeakMap<RenderableMessage, string>()
 18  
 19  /** Flatten a RenderableMessage to lowercased searchable text. WeakMap-
 20   *  cached — messages are append-only and immutable so a hit is always
 21   *  valid. Lowercased at cache time: the only caller immediately
 22   *  .toLowerCase()d the result, re-lowering ~1.5MB on every keystroke
 23   *  (the backspace hang). Returns '' for non-searchable types. */
 24  export function renderableSearchText(msg: RenderableMessage): string {
 25    const cached = searchTextCache.get(msg)
 26    if (cached !== undefined) return cached
 27    const result = computeSearchText(msg).toLowerCase()
 28    searchTextCache.set(msg, result)
 29    return result
 30  }
 31  
 32  function computeSearchText(msg: RenderableMessage): string {
 33    let raw = ''
 34    switch (msg.type) {
 35      case 'user': {
 36        const c = msg.message.content
 37        if (typeof c === 'string') {
 38          raw = RENDERED_AS_SENTINEL.has(c) ? '' : c
 39        } else {
 40          const parts: string[] = []
 41          for (const b of c) {
 42            if (b.type === 'text') {
 43              if (!RENDERED_AS_SENTINEL.has(b.text)) parts.push(b.text)
 44            } else if (b.type === 'tool_result') {
 45              // b.content is the MODEL-facing serialization (from each tool's
 46              // mapToolResultToToolResultBlockParam) — adds system-reminders,
 47              // <persisted-output> wrappers, backgroundInfo strings,
 48              // CYBER_RISK_MITIGATION_REMINDER. The UI
 49              // renders msg.toolUseResult (the tool's native Out) via
 50              // renderToolResultMessage — DIFFERENT text. Indexing b.content
 51              // yields phantoms: /malware → matches the reminder, /background
 52              // → matches the model-only ID string, none render.
 53              //
 54              // Duck-type the native Out instead. Covers the common shapes:
 55              // Bash {stdout,stderr}, Grep {content,filenames}, Read
 56              // {file.content}. Unknown shapes index empty — under-count is
 57              // honest, phantom is a lie. Proper fix is per-tool
 58              // extractSearchText(Out) on the Tool interface (TODO).
 59              parts.push(toolResultSearchText(msg.toolUseResult))
 60            }
 61          }
 62          raw = parts.join('\n')
 63        }
 64        break
 65      }
 66      case 'assistant': {
 67        const c = msg.message.content
 68        if (Array.isArray(c)) {
 69          // text blocks + tool_use inputs. tool_use renders as "⏺ Bash(cmd)"
 70          // — the command/pattern/path is visible and searchable-expected.
 71          // Skip thinking (hidden by hidePastThinking in transcript mount).
 72          raw = c
 73            .flatMap(b => {
 74              if (b.type === 'text') return [b.text]
 75              if (b.type === 'tool_use') return [toolUseSearchText(b.input)]
 76              return []
 77            })
 78            .join('\n')
 79        }
 80        break
 81      }
 82      case 'attachment': {
 83        // relevant_memories renders full m.content in transcript mode
 84        // (AttachmentMessage.tsx <Ansi>{m.content}</Ansi>). Visible but
 85        // unsearchable without this — [ dump finds it, / doesn't.
 86        if (msg.attachment.type === 'relevant_memories') {
 87          raw = msg.attachment.memories.map(m => m.content).join('\n')
 88        } else if (
 89          // Mid-turn prompts — queued while an agent is running. Render via
 90          // UserTextMessage (AttachmentMessage.tsx:~348). stickyPromptText
 91          // (VirtualMessageList.tsx:~103) has the same guards — mirror here.
 92          msg.attachment.type === 'queued_command' &&
 93          msg.attachment.commandMode !== 'task-notification' &&
 94          !msg.attachment.isMeta
 95        ) {
 96          const p = msg.attachment.prompt
 97          raw =
 98            typeof p === 'string'
 99              ? p
100              : p.flatMap(b => (b.type === 'text' ? [b.text] : [])).join('\n')
101        }
102        break
103      }
104      case 'collapsed_read_search': {
105        // relevant_memories attachments are absorbed into collapse groups
106        // (collapseReadSearch.ts); their content is visible in transcript mode
107        // via CollapsedReadSearchContent, so mirror it here for / search.
108        if (msg.relevantMemories) {
109          raw = msg.relevantMemories.map(m => m.content).join('\n')
110        }
111        break
112      }
113      default:
114        // grouped_tool_use, system — no text content
115        break
116    }
117    // Strip <system-reminder> anywhere — Claude context, not user-visible.
118    // Mid-message on cc -c resumes (memory reminders between prompt lines).
119    let t = raw
120    let open = t.indexOf('<system-reminder>')
121    while (open >= 0) {
122      const close = t.indexOf(SYSTEM_REMINDER_CLOSE, open)
123      if (close < 0) break
124      t = t.slice(0, open) + t.slice(close + SYSTEM_REMINDER_CLOSE.length)
125      open = t.indexOf('<system-reminder>')
126    }
127    return t
128  }
129  
130  /** Tool invocation display: renderToolUseMessage shows input fields like
131   *  command (Bash), pattern (Grep), file_path (Read/Edit), prompt (Agent).
132   *  Same duck-type strategy as toolResultSearchText — known field names,
133   *  unknown → empty. Under-count > phantom. */
134  export function toolUseSearchText(input: unknown): string {
135    if (!input || typeof input !== 'object') return ''
136    const o = input as Record<string, unknown>
137    const parts: string[] = []
138    // renderToolUseMessage typically shows one or two of these as the
139    // primary argument. tool_name itself is in the "⏺ Bash(...)" chrome,
140    // handled by under-count (the overlay matches it but we don't count it).
141    for (const k of [
142      'command',
143      'pattern',
144      'file_path',
145      'path',
146      'prompt',
147      'description',
148      'query',
149      'url',
150      'skill', // SkillTool
151    ]) {
152      const v = o[k]
153      if (typeof v === 'string') parts.push(v)
154    }
155    // args[] (Tmux/TungstenTool), files[] (SendUserFile) — tool-use
156    // renders the joined array as the primary display. Under-count > skip.
157    for (const k of ['args', 'files']) {
158      const v = o[k]
159      if (Array.isArray(v) && v.every(x => typeof x === 'string')) {
160        parts.push((v as string[]).join(' '))
161      }
162    }
163    return parts.join('\n')
164  }
165  
166  /** Duck-type the tool's native Out for searchable text. Known shapes:
167   *  {stdout,stderr} (Bash/Shell), {content} (Grep), {file:{content}} (Read),
168   *  {filenames:[]} (Grep/Glob), {output} (generic). Falls back to concating
169   *  all top-level string fields — crude but better than indexing model-chatter.
170   *  Empty for unknown shapes: under-count > phantom. */
171  export function toolResultSearchText(r: unknown): string {
172    if (!r || typeof r !== 'object') return typeof r === 'string' ? r : ''
173    const o = r as Record<string, unknown>
174    // Known shapes first (common tools).
175    if (typeof o.stdout === 'string') {
176      const err = typeof o.stderr === 'string' ? o.stderr : ''
177      return o.stdout + (err ? '\n' + err : '')
178    }
179    if (
180      o.file &&
181      typeof o.file === 'object' &&
182      typeof (o.file as { content?: unknown }).content === 'string'
183    ) {
184      return (o.file as { content: string }).content
185    }
186    // Known output-field names only. A blind walk would index metadata
187    // the UI doesn't show (rawOutputPath, backgroundTaskId, filePath,
188    // durationMs-as-string). Allowlist the fields tools actually render.
189    // Tools not matching any shape index empty — add them here as found.
190    const parts: string[] = []
191    for (const k of ['content', 'output', 'result', 'text', 'message']) {
192      const v = o[k]
193      if (typeof v === 'string') parts.push(v)
194    }
195    for (const k of ['filenames', 'lines', 'results']) {
196      const v = o[k]
197      if (Array.isArray(v) && v.every(x => typeof x === 'string')) {
198        parts.push((v as string[]).join('\n'))
199      }
200    }
201    return parts.join('\n')
202  }