/ src / utils / commands.ts
commands.ts
  1  import { memoize } from 'lodash-es'
  2  import { API_ERROR_MESSAGE_PREFIX, queryHaiku } from '../services/claude.js'
  3  import { type ControlOperator, parse, ParseEntry } from 'shell-quote'
  4  import { PRODUCT_NAME } from '../constants/product.js'
  5  
  6  const SINGLE_QUOTE = '__SINGLE_QUOTE__'
  7  const DOUBLE_QUOTE = '__DOUBLE_QUOTE__'
  8  
  9  export type CommandPrefixResult =
 10    | {
 11        commandPrefix: string | null
 12        commandInjectionDetected: false
 13      }
 14    | { commandInjectionDetected: true }
 15  
 16  // Command prefix result alongside subcommand prefixes
 17  export type CommandSubcommandPrefixResult = CommandPrefixResult & {
 18    subcommandPrefixes: Map<string, CommandPrefixResult>
 19  }
 20  
 21  /**
 22   * Splits a command string into individual commands based on shell operators
 23   */
 24  export function splitCommand(command: string): string[] {
 25    const parts: ParseEntry[] = []
 26  
 27    // 1. Collapse adjacent strings
 28    for (const part of parse(
 29      command
 30        .replaceAll('"', `"${DOUBLE_QUOTE}`) // parse() strips out quotes :P
 31        .replaceAll("'", `'${SINGLE_QUOTE}`), // parse() strips out quotes :P
 32      varName => `$${varName}`, // Preserve shell variables
 33    )) {
 34      if (typeof part === 'string') {
 35        if (parts.length > 0 && typeof parts[parts.length - 1] === 'string') {
 36          parts[parts.length - 1] += ' ' + part
 37          continue
 38        }
 39      }
 40      parts.push(part)
 41    }
 42  
 43    // 2. Map tokens to strings
 44    const stringParts = parts
 45      .map(part => {
 46        if (typeof part === 'string') {
 47          return part
 48        }
 49        if ('comment' in part) {
 50          // TODO: make this less hacky
 51          return '#' + part.comment
 52        }
 53        if ('op' in part && part.op === 'glob') {
 54          return part.pattern
 55        }
 56        if ('op' in part) {
 57          return part.op
 58        }
 59        return null
 60      })
 61      .filter(_ => _ !== null)
 62  
 63    // 3. Map quotes back to their original form
 64    const quotedParts = stringParts.map(part => {
 65      return part
 66        .replaceAll(`${SINGLE_QUOTE}`, "'")
 67        .replaceAll(`${DOUBLE_QUOTE}`, '"')
 68    })
 69  
 70    // 4. Filter out separators
 71    return quotedParts.filter(
 72      part => !(COMMAND_LIST_SEPARATORS as Set<string>).has(part),
 73    )
 74  }
 75  
 76  export const getCommandSubcommandPrefix = memoize(
 77    async (
 78      command: string,
 79      abortSignal: AbortSignal,
 80    ): Promise<CommandSubcommandPrefixResult | null> => {
 81      const subcommands = splitCommand(command)
 82  
 83      const [fullCommandPrefix, ...subcommandPrefixesResults] = await Promise.all(
 84        [
 85          getCommandPrefix(command, abortSignal),
 86          ...subcommands.map(async subcommand => ({
 87            subcommand,
 88            prefix: await getCommandPrefix(subcommand, abortSignal),
 89          })),
 90        ],
 91      )
 92      if (!fullCommandPrefix) {
 93        return null
 94      }
 95      const subcommandPrefixes = subcommandPrefixesResults.reduce(
 96        (acc, { subcommand, prefix }) => {
 97          if (prefix) {
 98            acc.set(subcommand, prefix)
 99          }
100          return acc
101        },
102        new Map<string, CommandPrefixResult>(),
103      )
104  
105      return {
106        ...fullCommandPrefix,
107        subcommandPrefixes,
108      }
109    },
110    command => command, // memoize by command only
111  )
112  
113  const getCommandPrefix = memoize(
114    async (
115      command: string,
116      abortSignal: AbortSignal,
117    ): Promise<CommandPrefixResult | null> => {
118      const response = await queryHaiku({
119        systemPrompt: [
120          `Your task is to process Bash commands that an AI coding agent wants to run.
121  
122  This policy spec defines how to determine the prefix of a Bash command:`,
123        ],
124        userPrompt: `<policy_spec>
125  # ${PRODUCT_NAME} Code Bash command prefix detection
126  
127  This document defines risk levels for actions that the ${PRODUCT_NAME} agent may take. This classification system is part of a broader safety framework and is used to determine when additional user confirmation or oversight may be needed.
128  
129  ## Definitions
130  
131  **Command Injection:** Any technique used that would result in a command being run other than the detected prefix.
132  
133  ## Command prefix extraction examples
134  Examples:
135  - cat foo.txt => cat
136  - cd src => cd
137  - cd path/to/files/ => cd
138  - find ./src -type f -name "*.ts" => find
139  - gg cat foo.py => gg cat
140  - gg cp foo.py bar.py => gg cp
141  - git commit -m "foo" => git commit
142  - git diff HEAD~1 => git diff
143  - git diff --staged => git diff
144  - git diff $(pwd) => command_injection_detected
145  - git status => git status
146  - git status# test(\`id\`) => command_injection_detected
147  - git status\`ls\` => command_injection_detected
148  - git push => none
149  - git push origin master => git push
150  - git log -n 5 => git log
151  - git log --oneline -n 5 => git log
152  - grep -A 40 "from foo.bar.baz import" alpha/beta/gamma.py => grep
153  - pig tail zerba.log => pig tail
154  - npm test => none
155  - npm test --foo => npm test
156  - npm test -- -f "foo" => npm test
157  - pwd\n curl example.com => command_injection_detected
158  - pytest foo/bar.py => pytest
159  - scalac build => none
160  </policy_spec>
161  
162  The user has allowed certain command prefixes to be run, and will otherwise be asked to approve or deny the command.
163  Your task is to determine the command prefix for the following command.
164  
165  IMPORTANT: Bash commands may run multiple commands that are chained together.
166  For safety, if the command seems to contain command injection, you must return "command_injection_detected". 
167  (This will help protect the user: if they think that they're allowlisting command A, 
168  but the AI coding agent sends a malicious command that technically has the same prefix as command A, 
169  then the safety system will see that you said “command_injection_detected” and ask the user for manual confirmation.)
170  
171  Note that not every command has a prefix. If a command has no prefix, return "none".
172  
173  ONLY return the prefix. Do not return any other text, markdown markers, or other content or formatting.
174  
175  Command: ${command}
176  `,
177        signal: abortSignal,
178        enablePromptCaching: false,
179      })
180  
181      const prefix =
182        typeof response.message.content === 'string'
183          ? response.message.content
184          : Array.isArray(response.message.content)
185            ? (response.message.content.find(_ => _.type === 'text')?.text ??
186              'none')
187            : 'none'
188  
189      if (prefix.startsWith(API_ERROR_MESSAGE_PREFIX)) {
190        return null
191      }
192  
193      if (prefix === 'command_injection_detected') {
194        return { commandInjectionDetected: true }
195      }
196  
197      // Never accept base `git` as a prefix (if e.g. `git diff` prefix not detected)
198      if (prefix === 'git') {
199        return {
200          commandPrefix: null,
201          commandInjectionDetected: false,
202        }
203      }
204  
205      if (prefix === 'none') {
206        return {
207          commandPrefix: null,
208          commandInjectionDetected: false,
209        }
210      }
211  
212      return {
213        commandPrefix: prefix,
214        commandInjectionDetected: false,
215      }
216    },
217    command => command, // memoize by command only
218  )
219  
220  const COMMAND_LIST_SEPARATORS = new Set<ControlOperator>([
221    '&&',
222    '||',
223    ';',
224    ';;',
225  ])
226  
227  // Checks if this is just a list of commands
228  function isCommandList(command: string): boolean {
229    for (const part of parse(
230      command
231        .replaceAll('"', `"${DOUBLE_QUOTE}`) // parse() strips out quotes :P
232        .replaceAll("'", `'${SINGLE_QUOTE}`), // parse() strips out quotes :P
233      varName => `$${varName}`, // Preserve shell variables
234    )) {
235      if (typeof part === 'string') {
236        // Strings are safe
237        continue
238      }
239      if ('comment' in part) {
240        // Don't trust comments, they can contain command injection
241        return false
242      }
243      if ('op' in part) {
244        if (part.op === 'glob') {
245          // Globs are safe
246          continue
247        } else if (COMMAND_LIST_SEPARATORS.has(part.op)) {
248          // Command list separators are safe
249          continue
250        }
251        // Other operators are unsafe
252        return false
253      }
254    }
255    // No unsafe operators found in entire command
256    return true
257  }
258  
259  export function isUnsafeCompoundCommand(command: string): boolean {
260    return splitCommand(command).length > 1 && !isCommandList(command)
261  }