/ utils / sideQuestion.ts
sideQuestion.ts
  1  /**
  2   * Side Question ("/btw") feature - allows asking quick questions without
  3   * interrupting the main agent context.
  4   *
  5   * Uses runForkedAgent to leverage prompt caching from the parent context
  6   * while keeping the side question response separate from main conversation.
  7   */
  8  
  9  import { formatAPIError } from '../services/api/errorUtils.js'
 10  import type { NonNullableUsage } from '../services/api/logging.js'
 11  import type { Message, SystemAPIErrorMessage } from '../types/message.js'
 12  import { type CacheSafeParams, runForkedAgent } from './forkedAgent.js'
 13  import { createUserMessage, extractTextContent } from './messages.js'
 14  
 15  // Pattern to detect "/btw" at start of input (case-insensitive, word boundary)
 16  const BTW_PATTERN = /^\/btw\b/gi
 17  
 18  /**
 19   * Find positions of "/btw" keyword at the start of text for highlighting.
 20   * Similar to findThinkingTriggerPositions in thinking.ts.
 21   */
 22  export function findBtwTriggerPositions(text: string): Array<{
 23    word: string
 24    start: number
 25    end: number
 26  }> {
 27    const positions: Array<{ word: string; start: number; end: number }> = []
 28    const matches = text.matchAll(BTW_PATTERN)
 29  
 30    for (const match of matches) {
 31      if (match.index !== undefined) {
 32        positions.push({
 33          word: match[0],
 34          start: match.index,
 35          end: match.index + match[0].length,
 36        })
 37      }
 38    }
 39  
 40    return positions
 41  }
 42  
 43  export type SideQuestionResult = {
 44    response: string | null
 45    usage: NonNullableUsage
 46  }
 47  
 48  /**
 49   * Run a side question using a forked agent.
 50   * Shares the parent's prompt cache — no thinking override, no cache write.
 51   * All tools are blocked and we cap at 1 turn.
 52   */
 53  export async function runSideQuestion({
 54    question,
 55    cacheSafeParams,
 56  }: {
 57    question: string
 58    cacheSafeParams: CacheSafeParams
 59  }): Promise<SideQuestionResult> {
 60    // Wrap the question with instructions to answer without tools
 61    const wrappedQuestion = `<system-reminder>This is a side question from the user. You must answer this question directly in a single response.
 62  
 63  IMPORTANT CONTEXT:
 64  - You are a separate, lightweight agent spawned to answer this one question
 65  - The main agent is NOT interrupted - it continues working independently in the background
 66  - You share the conversation context but are a completely separate instance
 67  - Do NOT reference being interrupted or what you were "previously doing" - that framing is incorrect
 68  
 69  CRITICAL CONSTRAINTS:
 70  - You have NO tools available - you cannot read files, run commands, search, or take any actions
 71  - This is a one-off response - there will be no follow-up turns
 72  - You can ONLY provide information based on what you already know from the conversation context
 73  - NEVER say things like "Let me try...", "I'll now...", "Let me check...", or promise to take any action
 74  - If you don't know the answer, say so - do not offer to look it up or investigate
 75  
 76  Simply answer the question with the information you have.</system-reminder>
 77  
 78  ${question}`
 79  
 80    const agentResult = await runForkedAgent({
 81      promptMessages: [createUserMessage({ content: wrappedQuestion })],
 82      // Do NOT override thinkingConfig — thinking is part of the API cache key,
 83      // and diverging from the main thread's config busts the prompt cache.
 84      // Adaptive thinking on a quick Q&A has negligible overhead.
 85      cacheSafeParams,
 86      canUseTool: async () => ({
 87        behavior: 'deny' as const,
 88        message: 'Side questions cannot use tools',
 89        decisionReason: { type: 'other' as const, reason: 'side_question' },
 90      }),
 91      querySource: 'side_question',
 92      forkLabel: 'side_question',
 93      maxTurns: 1, // Single turn only - no tool use loops
 94      // No future request shares this suffix; skip writing cache entries.
 95      skipCacheWrite: true,
 96    })
 97  
 98    return {
 99      response: extractSideQuestionResponse(agentResult.messages),
100      usage: agentResult.totalUsage,
101    }
102  }
103  
104  /**
105   * Extract a display string from forked agent messages.
106   *
107   * IMPORTANT: claude.ts yields one AssistantMessage PER CONTENT BLOCK, not one
108   * per API response. With adaptive thinking enabled (inherited from the main
109   * thread to preserve the cache key), a thinking response arrives as:
110   *   messages[0] = assistant { content: [thinking_block] }
111   *   messages[1] = assistant { content: [text_block] }
112   *
113   * The old code used `.find(m => m.type === 'assistant')` which grabbed the
114   * first (thinking-only) message, found no text block, and returned null →
115   * "No response received". Repos with large context (many skills, big CLAUDE.md)
116   * trigger thinking more often, which is why this reproduced in the monorepo
117   * but not here.
118   *
119   * Secondary failure modes also surfaced as "No response received":
120   *   - Model attempts tool_use → content = [thinking, tool_use], no text.
121   *     Rare — the system-reminder usually prevents this, but handled here.
122   *   - API error exhausts retries → query yields system api_error + user
123   *     interruption, no assistant message at all.
124   */
125  function extractSideQuestionResponse(messages: Message[]): string | null {
126    // Flatten all assistant content blocks across the per-block messages.
127    const assistantBlocks = messages.flatMap(m =>
128      m.type === 'assistant' ? m.message.content : [],
129    )
130  
131    if (assistantBlocks.length > 0) {
132      // Concatenate all text blocks (there's normally at most one, but be safe).
133      const text = extractTextContent(assistantBlocks, '\n\n').trim()
134      if (text) return text
135  
136      // No text — check if the model tried to call a tool despite instructions.
137      const toolUse = assistantBlocks.find(b => b.type === 'tool_use')
138      if (toolUse) {
139        const toolName = 'name' in toolUse ? toolUse.name : 'a tool'
140        return `(The model tried to call ${toolName} instead of answering directly. Try rephrasing or ask in the main conversation.)`
141      }
142    }
143  
144    // No assistant content — likely API error exhausted retries. Surface the
145    // first system api_error message so the user sees what happened.
146    const apiErr = messages.find(
147      (m): m is SystemAPIErrorMessage =>
148        m.type === 'system' && 'subtype' in m && m.subtype === 'api_error',
149    )
150    if (apiErr) {
151      return `(API error: ${formatAPIError(apiErr.error)})`
152    }
153  
154    return null
155  }