/ utils / shell / prefix.ts
prefix.ts
  1  /**
  2   * Shared command prefix extraction using Haiku LLM
  3   *
  4   * This module provides a factory for creating command prefix extractors
  5   * that can be used by different shell tools. The core logic
  6   * (Haiku query, response validation) is shared, while tool-specific
  7   * aspects (examples, pre-checks) are configurable.
  8   */
  9  
 10  import chalk from 'chalk'
 11  import type { QuerySource } from '../../constants/querySource.js'
 12  import { getFeatureValue_CACHED_MAY_BE_STALE } from '../../services/analytics/growthbook.js'
 13  import {
 14    type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
 15    logEvent,
 16  } from '../../services/analytics/index.js'
 17  import { queryHaiku } from '../../services/api/claude.js'
 18  import { startsWithApiErrorPrefix } from '../../services/api/errors.js'
 19  import { memoizeWithLRU } from '../memoize.js'
 20  import { jsonStringify } from '../slowOperations.js'
 21  import { asSystemPrompt } from '../systemPromptType.js'
 22  
 23  /**
 24   * Shell executables that must never be accepted as bare prefixes.
 25   * Allowing e.g. "bash:*" would let any command through, defeating
 26   * the permission system. Includes Unix shells and Windows equivalents.
 27   */
 28  const DANGEROUS_SHELL_PREFIXES = new Set([
 29    'sh',
 30    'bash',
 31    'zsh',
 32    'fish',
 33    'csh',
 34    'tcsh',
 35    'ksh',
 36    'dash',
 37    'cmd',
 38    'cmd.exe',
 39    'powershell',
 40    'powershell.exe',
 41    'pwsh',
 42    'pwsh.exe',
 43    'bash.exe',
 44  ])
 45  
 46  /**
 47   * Result of command prefix extraction
 48   */
 49  export type CommandPrefixResult = {
 50    /** The detected command prefix, or null if no prefix could be determined */
 51    commandPrefix: string | null
 52  }
 53  
 54  /**
 55   * Result including subcommand prefixes for compound commands
 56   */
 57  export type CommandSubcommandPrefixResult = CommandPrefixResult & {
 58    subcommandPrefixes: Map<string, CommandPrefixResult>
 59  }
 60  
 61  /**
 62   * Configuration for creating a command prefix extractor
 63   */
 64  export type PrefixExtractorConfig = {
 65    /** Tool name for logging and warning messages */
 66    toolName: string
 67  
 68    /** The policy spec containing examples for Haiku */
 69    policySpec: string
 70    /** Analytics event name for logging */
 71    eventName: string
 72  
 73    /** Query source identifier for the API call */
 74    querySource: QuerySource
 75  
 76    /** Optional pre-check function that can short-circuit the Haiku call */
 77    preCheck?: (command: string) => CommandPrefixResult | null
 78  }
 79  
 80  /**
 81   * Creates a memoized command prefix extractor function.
 82   *
 83   * Uses two-layer memoization: the outer memoized function creates the promise
 84   * and attaches a .catch handler that evicts the cache entry on rejection.
 85   * This prevents aborted or failed Haiku calls from poisoning future lookups.
 86   *
 87   * Bounded to 200 entries via LRU to prevent unbounded growth in heavy sessions.
 88   *
 89   * @param config - Configuration for the extractor
 90   * @returns A memoized async function that extracts command prefixes
 91   */
 92  export function createCommandPrefixExtractor(config: PrefixExtractorConfig) {
 93    const { toolName, policySpec, eventName, querySource, preCheck } = config
 94  
 95    const memoized = memoizeWithLRU(
 96      (
 97        command: string,
 98        abortSignal: AbortSignal,
 99        isNonInteractiveSession: boolean,
100      ): Promise<CommandPrefixResult | null> => {
101        const promise = getCommandPrefixImpl(
102          command,
103          abortSignal,
104          isNonInteractiveSession,
105          toolName,
106          policySpec,
107          eventName,
108          querySource,
109          preCheck,
110        )
111        // Evict on rejection so aborted calls don't poison future turns.
112        // Identity guard: after LRU eviction, a newer promise may occupy
113        // this key; a stale rejection must not delete it.
114        promise.catch(() => {
115          if (memoized.cache.get(command) === promise) {
116            memoized.cache.delete(command)
117          }
118        })
119        return promise
120      },
121      command => command, // memoize by command only
122      200,
123    )
124  
125    return memoized
126  }
127  
128  /**
129   * Creates a memoized function to get prefixes for compound commands with subcommands.
130   *
131   * Uses the same two-layer memoization pattern as createCommandPrefixExtractor:
132   * a .catch handler evicts the cache entry on rejection to prevent poisoning.
133   *
134   * @param getPrefix - The single-command prefix extractor (from createCommandPrefixExtractor)
135   * @param splitCommand - Function to split a compound command into subcommands
136   * @returns A memoized async function that extracts prefixes for the main command and all subcommands
137   */
138  export function createSubcommandPrefixExtractor(
139    getPrefix: ReturnType<typeof createCommandPrefixExtractor>,
140    splitCommand: (command: string) => string[] | Promise<string[]>,
141  ) {
142    const memoized = memoizeWithLRU(
143      (
144        command: string,
145        abortSignal: AbortSignal,
146        isNonInteractiveSession: boolean,
147      ): Promise<CommandSubcommandPrefixResult | null> => {
148        const promise = getCommandSubcommandPrefixImpl(
149          command,
150          abortSignal,
151          isNonInteractiveSession,
152          getPrefix,
153          splitCommand,
154        )
155        // Evict on rejection so aborted calls don't poison future turns.
156        // Identity guard: after LRU eviction, a newer promise may occupy
157        // this key; a stale rejection must not delete it.
158        promise.catch(() => {
159          if (memoized.cache.get(command) === promise) {
160            memoized.cache.delete(command)
161          }
162        })
163        return promise
164      },
165      command => command, // memoize by command only
166      200,
167    )
168  
169    return memoized
170  }
171  
172  async function getCommandPrefixImpl(
173    command: string,
174    abortSignal: AbortSignal,
175    isNonInteractiveSession: boolean,
176    toolName: string,
177    policySpec: string,
178    eventName: string,
179    querySource: QuerySource,
180    preCheck?: (command: string) => CommandPrefixResult | null,
181  ): Promise<CommandPrefixResult | null> {
182    if (process.env.NODE_ENV === 'test') {
183      return null
184    }
185  
186    // Run pre-check if provided (e.g., isHelpCommand for Bash)
187    if (preCheck) {
188      const preCheckResult = preCheck(command)
189      if (preCheckResult !== null) {
190        return preCheckResult
191      }
192    }
193  
194    let preflightCheckTimeoutId: NodeJS.Timeout | undefined
195    const startTime = Date.now()
196    let result: CommandPrefixResult | null = null
197  
198    try {
199      // Log a warning if the pre-flight check takes too long
200      preflightCheckTimeoutId = setTimeout(
201        (tn, nonInteractive) => {
202          const message = `[${tn}Tool] Pre-flight check is taking longer than expected. Run with ANTHROPIC_LOG=debug to check for failed or slow API requests.`
203          if (nonInteractive) {
204            process.stderr.write(jsonStringify({ level: 'warn', message }) + '\n')
205          } else {
206            // biome-ignore lint/suspicious/noConsole: intentional warning
207            console.warn(chalk.yellow(`⚠️  ${message}`))
208          }
209        },
210        10000, // 10 seconds
211        toolName,
212        isNonInteractiveSession,
213      )
214  
215      const useSystemPromptPolicySpec = getFeatureValue_CACHED_MAY_BE_STALE(
216        'tengu_cork_m4q',
217        false,
218      )
219  
220      const response = await queryHaiku({
221        systemPrompt: asSystemPrompt(
222          useSystemPromptPolicySpec
223            ? [
224                `Your task is to process ${toolName} commands that an AI coding agent wants to run.\n\n${policySpec}`,
225              ]
226            : [
227                `Your task is to process ${toolName} commands that an AI coding agent wants to run.\n\nThis policy spec defines how to determine the prefix of a ${toolName} command:`,
228              ],
229        ),
230        userPrompt: useSystemPromptPolicySpec
231          ? `Command: ${command}`
232          : `${policySpec}\n\nCommand: ${command}`,
233        signal: abortSignal,
234        options: {
235          enablePromptCaching: useSystemPromptPolicySpec,
236          querySource,
237          agents: [],
238          isNonInteractiveSession,
239          hasAppendSystemPrompt: false,
240          mcpTools: [],
241        },
242      })
243  
244      // Clear the timeout since the query completed
245      clearTimeout(preflightCheckTimeoutId)
246      const durationMs = Date.now() - startTime
247  
248      const prefix =
249        typeof response.message.content === 'string'
250          ? response.message.content
251          : Array.isArray(response.message.content)
252            ? (response.message.content.find(_ => _.type === 'text')?.text ??
253              'none')
254            : 'none'
255  
256      if (startsWithApiErrorPrefix(prefix)) {
257        logEvent(eventName, {
258          success: false,
259          error:
260            'API error' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
261          durationMs,
262        })
263        result = null
264      } else if (prefix === 'command_injection_detected') {
265        // Haiku detected something suspicious - treat as no prefix available
266        logEvent(eventName, {
267          success: false,
268          error:
269            'command_injection_detected' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
270          durationMs,
271        })
272        result = {
273          commandPrefix: null,
274        }
275      } else if (
276        prefix === 'git' ||
277        DANGEROUS_SHELL_PREFIXES.has(prefix.toLowerCase())
278      ) {
279        // Never accept bare `git` or shell executables as a prefix
280        logEvent(eventName, {
281          success: false,
282          error:
283            'dangerous_shell_prefix' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
284          durationMs,
285        })
286        result = {
287          commandPrefix: null,
288        }
289      } else if (prefix === 'none') {
290        // No prefix detected
291        logEvent(eventName, {
292          success: false,
293          error:
294            'prefix "none"' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
295          durationMs,
296        })
297        result = {
298          commandPrefix: null,
299        }
300      } else {
301        // Validate that the prefix is actually a prefix of the command
302  
303        if (!command.startsWith(prefix)) {
304          // Prefix isn't actually a prefix of the command
305          logEvent(eventName, {
306            success: false,
307            error:
308              'command did not start with prefix' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
309            durationMs,
310          })
311          result = {
312            commandPrefix: null,
313          }
314        } else {
315          logEvent(eventName, {
316            success: true,
317            durationMs,
318          })
319          result = {
320            commandPrefix: prefix,
321          }
322        }
323      }
324  
325      return result
326    } catch (error) {
327      clearTimeout(preflightCheckTimeoutId)
328      throw error
329    }
330  }
331  
332  async function getCommandSubcommandPrefixImpl(
333    command: string,
334    abortSignal: AbortSignal,
335    isNonInteractiveSession: boolean,
336    getPrefix: ReturnType<typeof createCommandPrefixExtractor>,
337    splitCommandFn: (command: string) => string[] | Promise<string[]>,
338  ): Promise<CommandSubcommandPrefixResult | null> {
339    const subcommands = await splitCommandFn(command)
340  
341    const [fullCommandPrefix, ...subcommandPrefixesResults] = await Promise.all([
342      getPrefix(command, abortSignal, isNonInteractiveSession),
343      ...subcommands.map(async subcommand => ({
344        subcommand,
345        prefix: await getPrefix(subcommand, abortSignal, isNonInteractiveSession),
346      })),
347    ])
348  
349    if (!fullCommandPrefix) {
350      return null
351    }
352  
353    const subcommandPrefixes = subcommandPrefixesResults.reduce(
354      (acc, { subcommand, prefix }) => {
355        if (prefix) {
356          acc.set(subcommand, prefix)
357        }
358        return acc
359      },
360      new Map<string, CommandPrefixResult>(),
361    )
362  
363    return {
364      ...fullCommandPrefix,
365      subcommandPrefixes,
366    }
367  }