Cradicle Explorer

/ tools / BashTool / bashSecurity.ts
bashSecurity.ts
   1  import { logEvent } from 'src/services/analytics/index.js'
   2  import { extractHeredocs } from '../../utils/bash/heredoc.js'
   3  import { ParsedCommand } from '../../utils/bash/ParsedCommand.js'
   4  import {
   5    hasMalformedTokens,
   6    hasShellQuoteSingleQuoteBug,
   7    tryParseShellCommand,
   8  } from '../../utils/bash/shellQuote.js'
   9  import type { TreeSitterAnalysis } from '../../utils/bash/treeSitterAnalysis.js'
  10  import type { PermissionResult } from '../../utils/permissions/PermissionResult.js'
  11  
  12  const HEREDOC_IN_SUBSTITUTION = /\$\(.*<</
  13  
  14  // Note: Backtick pattern is handled separately in validateDangerousPatterns
  15  // to distinguish between escaped and unescaped backticks
  16  const COMMAND_SUBSTITUTION_PATTERNS = [
  17    { pattern: /<\(/, message: 'process substitution <()' },
  18    { pattern: />\(/, message: 'process substitution >()' },
  19    { pattern: /=\(/, message: 'Zsh process substitution =()' },
  20    // Zsh EQUALS expansion: =cmd at word start expands to $(which cmd).
  21    // `=curl evil.com` → `/usr/bin/curl evil.com`, bypassing Bash(curl:*) deny
  22    // rules since the parser sees `=curl` as the base command, not `curl`.
  23    // Only matches word-initial = followed by a command-name char (not VAR=val).
  24    {
  25      pattern: /(?:^|[\s;&|])=[a-zA-Z_]/,
  26      message: 'Zsh equals expansion (=cmd)',
  27    },
  28    { pattern: /\$\(/, message: '$() command substitution' },
  29    { pattern: /\$\{/, message: '${} parameter substitution' },
  30    { pattern: /\$\[/, message: '$[] legacy arithmetic expansion' },
  31    { pattern: /~\[/, message: 'Zsh-style parameter expansion' },
  32    { pattern: /\(e:/, message: 'Zsh-style glob qualifiers' },
  33    { pattern: /\(\+/, message: 'Zsh glob qualifier with command execution' },
  34    {
  35      pattern: /\}\s*always\s*\{/,
  36      message: 'Zsh always block (try/always construct)',
  37    },
  38    // Defense in depth: Block PowerShell comment syntax even though we don't execute in PowerShell
  39    // Added as protection against future changes that might introduce PowerShell execution
  40    { pattern: /<#/, message: 'PowerShell comment syntax' },
  41  ]
  42  
  43  // Zsh-specific dangerous commands that can bypass security checks.
  44  // These are checked against the base command (first word) of each command segment.
  45  const ZSH_DANGEROUS_COMMANDS = new Set([
  46    // zmodload is the gateway to many dangerous module-based attacks:
  47    // zsh/mapfile (invisible file I/O via array assignment),
  48    // zsh/system (sysopen/syswrite two-step file access),
  49    // zsh/zpty (pseudo-terminal command execution),
  50    // zsh/net/tcp (network exfiltration via ztcp),
  51    // zsh/files (builtin rm/mv/ln/chmod that bypass binary checks)
  52    'zmodload',
  53    // emulate with -c flag is an eval-equivalent that executes arbitrary code
  54    'emulate',
  55    // Zsh module builtins that enable dangerous operations.
  56    // These require zmodload first, but we block them as defense-in-depth
  57    // in case zmodload is somehow bypassed or the module is pre-loaded.
  58    'sysopen', // Opens files with fine-grained control (zsh/system)
  59    'sysread', // Reads from file descriptors (zsh/system)
  60    'syswrite', // Writes to file descriptors (zsh/system)
  61    'sysseek', // Seeks on file descriptors (zsh/system)
  62    'zpty', // Executes commands on pseudo-terminals (zsh/zpty)
  63    'ztcp', // Creates TCP connections for exfiltration (zsh/net/tcp)
  64    'zsocket', // Creates Unix/TCP sockets (zsh/net/socket)
  65    'mapfile', // Not actually a command, but the associative array is set via zmodload
  66    'zf_rm', // Builtin rm from zsh/files
  67    'zf_mv', // Builtin mv from zsh/files
  68    'zf_ln', // Builtin ln from zsh/files
  69    'zf_chmod', // Builtin chmod from zsh/files
  70    'zf_chown', // Builtin chown from zsh/files
  71    'zf_mkdir', // Builtin mkdir from zsh/files
  72    'zf_rmdir', // Builtin rmdir from zsh/files
  73    'zf_chgrp', // Builtin chgrp from zsh/files
  74  ])
  75  
  76  // Numeric identifiers for bash security checks (to avoid logging strings)
  77  const BASH_SECURITY_CHECK_IDS = {
  78    INCOMPLETE_COMMANDS: 1,
  79    JQ_SYSTEM_FUNCTION: 2,
  80    JQ_FILE_ARGUMENTS: 3,
  81    OBFUSCATED_FLAGS: 4,
  82    SHELL_METACHARACTERS: 5,
  83    DANGEROUS_VARIABLES: 6,
  84    NEWLINES: 7,
  85    DANGEROUS_PATTERNS_COMMAND_SUBSTITUTION: 8,
  86    DANGEROUS_PATTERNS_INPUT_REDIRECTION: 9,
  87    DANGEROUS_PATTERNS_OUTPUT_REDIRECTION: 10,
  88    IFS_INJECTION: 11,
  89    GIT_COMMIT_SUBSTITUTION: 12,
  90    PROC_ENVIRON_ACCESS: 13,
  91    MALFORMED_TOKEN_INJECTION: 14,
  92    BACKSLASH_ESCAPED_WHITESPACE: 15,
  93    BRACE_EXPANSION: 16,
  94    CONTROL_CHARACTERS: 17,
  95    UNICODE_WHITESPACE: 18,
  96    MID_WORD_HASH: 19,
  97    ZSH_DANGEROUS_COMMANDS: 20,
  98    BACKSLASH_ESCAPED_OPERATORS: 21,
  99    COMMENT_QUOTE_DESYNC: 22,
 100    QUOTED_NEWLINE: 23,
 101  } as const
 102  
 103  type ValidationContext = {
 104    originalCommand: string
 105    baseCommand: string
 106    unquotedContent: string
 107    fullyUnquotedContent: string
 108    /** fullyUnquoted before stripSafeRedirections — used by validateBraceExpansion
 109     * to avoid false negatives from redirection stripping creating backslash adjacencies */
 110    fullyUnquotedPreStrip: string
 111    /** Like fullyUnquotedPreStrip but preserves quote characters ('/"): e.g.,
 112     * echo 'x'# → echo ''# (the quote chars remain, revealing adjacency to #) */
 113    unquotedKeepQuoteChars: string
 114    /** Tree-sitter analysis data, if available. Validators can use this for
 115     * more accurate analysis when present, falling back to regex otherwise. */
 116    treeSitter?: TreeSitterAnalysis | null
 117  }
 118  
 119  type QuoteExtraction = {
 120    withDoubleQuotes: string
 121    fullyUnquoted: string
 122    /** Like fullyUnquoted but preserves quote characters ('/"): strips quoted
 123     * content while keeping the delimiters. Used by validateMidWordHash to detect
 124     * quote-adjacent # (e.g., 'x'# where quote stripping would hide adjacency). */
 125    unquotedKeepQuoteChars: string
 126  }
 127  
 128  function extractQuotedContent(command: string, isJq = false): QuoteExtraction {
 129    let withDoubleQuotes = ''
 130    let fullyUnquoted = ''
 131    let unquotedKeepQuoteChars = ''
 132    let inSingleQuote = false
 133    let inDoubleQuote = false
 134    let escaped = false
 135  
 136    for (let i = 0; i < command.length; i++) {
 137      const char = command[i]
 138  
 139      if (escaped) {
 140        escaped = false
 141        if (!inSingleQuote) withDoubleQuotes += char
 142        if (!inSingleQuote && !inDoubleQuote) fullyUnquoted += char
 143        if (!inSingleQuote && !inDoubleQuote) unquotedKeepQuoteChars += char
 144        continue
 145      }
 146  
 147      if (char === '\\' && !inSingleQuote) {
 148        escaped = true
 149        if (!inSingleQuote) withDoubleQuotes += char
 150        if (!inSingleQuote && !inDoubleQuote) fullyUnquoted += char
 151        if (!inSingleQuote && !inDoubleQuote) unquotedKeepQuoteChars += char
 152        continue
 153      }
 154  
 155      if (char === "'" && !inDoubleQuote) {
 156        inSingleQuote = !inSingleQuote
 157        unquotedKeepQuoteChars += char
 158        continue
 159      }
 160  
 161      if (char === '"' && !inSingleQuote) {
 162        inDoubleQuote = !inDoubleQuote
 163        unquotedKeepQuoteChars += char
 164        // For jq, include quotes in extraction to ensure content is properly analyzed
 165        if (!isJq) continue
 166      }
 167  
 168      if (!inSingleQuote) withDoubleQuotes += char
 169      if (!inSingleQuote && !inDoubleQuote) fullyUnquoted += char
 170      if (!inSingleQuote && !inDoubleQuote) unquotedKeepQuoteChars += char
 171    }
 172  
 173    return { withDoubleQuotes, fullyUnquoted, unquotedKeepQuoteChars }
 174  }
 175  
 176  function stripSafeRedirections(content: string): string {
 177    // SECURITY: All three patterns MUST have a trailing boundary (?=\s|$).
 178    // Without it, `> /dev/nullo` matches `/dev/null` as a PREFIX, strips
 179    // `> /dev/null` leaving `o`, so `echo hi > /dev/nullo` becomes `echo hi o`.
 180    // validateRedirections then sees no `>` and passes. The file write to
 181    // /dev/nullo is auto-allowed via the read-only path (checkReadOnlyConstraints).
 182    // Main bashPermissions flow is protected (checkPathConstraints validates the
 183    // original command), but speculation.ts uses checkReadOnlyConstraints alone.
 184    return content
 185      .replace(/\s+2\s*>&\s*1(?=\s|$)/g, '')
 186      .replace(/[012]?\s*>\s*\/dev\/null(?=\s|$)/g, '')
 187      .replace(/\s*<\s*\/dev\/null(?=\s|$)/g, '')
 188  }
 189  
 190  /**
 191   * Checks if content contains an unescaped occurrence of a single character.
 192   * Handles bash escape sequences correctly where a backslash escapes the following character.
 193   *
 194   * IMPORTANT: This function only handles single characters, not strings. If you need to extend
 195   * this to handle multi-character strings, be EXTREMELY CAREFUL about shell ANSI-C quoting
 196   * (e.g., $'\n', $'\x41', $'\u0041') which can encode arbitrary characters and strings in ways
 197   * that are very difficult to parse correctly. Incorrect handling could introduce security
 198   * vulnerabilities by allowing attackers to bypass security checks.
 199   *
 200   * @param content - The string to search (typically from extractQuotedContent)
 201   * @param char - Single character to search for (e.g., '`')
 202   * @returns true if unescaped occurrence found, false otherwise
 203   *
 204   * Examples:
 205   *   hasUnescapedChar("test \`safe\`", '`') → false (escaped backticks)
 206   *   hasUnescapedChar("test `dangerous`", '`') → true (unescaped backticks)
 207   *   hasUnescapedChar("test\\`date`", '`') → true (escaped backslash + unescaped backtick)
 208   */
 209  function hasUnescapedChar(content: string, char: string): boolean {
 210    if (char.length !== 1) {
 211      throw new Error('hasUnescapedChar only works with single characters')
 212    }
 213  
 214    let i = 0
 215    while (i < content.length) {
 216      // If we see a backslash, skip it and the next character (they form an escape sequence)
 217      if (content[i] === '\\' && i + 1 < content.length) {
 218        i += 2 // Skip backslash and escaped character
 219        continue
 220      }
 221  
 222      // Check if current character matches
 223      if (content[i] === char) {
 224        return true // Found unescaped occurrence
 225      }
 226  
 227      i++
 228    }
 229  
 230    return false // No unescaped occurrences found
 231  }
 232  
 233  function validateEmpty(context: ValidationContext): PermissionResult {
 234    if (!context.originalCommand.trim()) {
 235      return {
 236        behavior: 'allow',
 237        updatedInput: { command: context.originalCommand },
 238        decisionReason: { type: 'other', reason: 'Empty command is safe' },
 239      }
 240    }
 241    return { behavior: 'passthrough', message: 'Command is not empty' }
 242  }
 243  
 244  function validateIncompleteCommands(
 245    context: ValidationContext,
 246  ): PermissionResult {
 247    const { originalCommand } = context
 248    const trimmed = originalCommand.trim()
 249  
 250    if (/^\s*\t/.test(originalCommand)) {
 251      logEvent('tengu_bash_security_check_triggered', {
 252        checkId: BASH_SECURITY_CHECK_IDS.INCOMPLETE_COMMANDS,
 253        subId: 1,
 254      })
 255      return {
 256        behavior: 'ask',
 257        message: 'Command appears to be an incomplete fragment (starts with tab)',
 258      }
 259    }
 260  
 261    if (trimmed.startsWith('-')) {
 262      logEvent('tengu_bash_security_check_triggered', {
 263        checkId: BASH_SECURITY_CHECK_IDS.INCOMPLETE_COMMANDS,
 264        subId: 2,
 265      })
 266      return {
 267        behavior: 'ask',
 268        message:
 269          'Command appears to be an incomplete fragment (starts with flags)',
 270      }
 271    }
 272  
 273    if (/^\s*(&&|\|\||;|>>?|<)/.test(originalCommand)) {
 274      logEvent('tengu_bash_security_check_triggered', {
 275        checkId: BASH_SECURITY_CHECK_IDS.INCOMPLETE_COMMANDS,
 276        subId: 3,
 277      })
 278      return {
 279        behavior: 'ask',
 280        message:
 281          'Command appears to be a continuation line (starts with operator)',
 282      }
 283    }
 284  
 285    return { behavior: 'passthrough', message: 'Command appears complete' }
 286  }
 287  
 288  /**
 289   * Checks if a command is a "safe" heredoc-in-substitution pattern that can
 290   * bypass the generic $() validator.
 291   *
 292   * This is an EARLY-ALLOW path: returning `true` causes bashCommandIsSafe to
 293   * return `passthrough`, bypassing ALL subsequent validators. Given this
 294   * authority, the check must be PROVABLY safe, not "probably safe".
 295   *
 296   * The only pattern we allow is:
 297   *   [prefix] $(cat <<'DELIM'\n
 298   *   [body lines]\n
 299   *   DELIM\n
 300   *   ) [suffix]
 301   *
 302   * Where:
 303   * - The delimiter must be single-quoted ('DELIM') or escaped (\DELIM) so the
 304   *   body is literal text with no expansion
 305   * - The closing delimiter must be on a line BY ITSELF (or with only trailing
 306   *   whitespace + `)` for the $(cat <<'EOF'\n...\nEOF)` inline form)
 307   * - The closing delimiter must be the FIRST such line — matching bash's
 308   *   behavior exactly (no skipping past early delimiters to find EOF))
 309   * - There must be non-whitespace text BEFORE the $( (i.e., the substitution
 310   *   is used in argument position, not as a command name). Otherwise the
 311   *   heredoc body becomes an arbitrary command name with [suffix] as args.
 312   * - The remaining text (with the heredoc stripped) must pass all validators
 313   *
 314   * This implementation uses LINE-BASED matching, not regex [\s\S]*?, to
 315   * precisely replicate bash's heredoc-closing behavior.
 316   */
 317  function isSafeHeredoc(command: string): boolean {
 318    if (!HEREDOC_IN_SUBSTITUTION.test(command)) return false
 319  
 320    // SECURITY: Use [ \t] (not \s) between << and the delimiter. \s matches
 321    // newlines, but bash requires the delimiter word on the same line as <<.
 322    // Matching across newlines could accept malformed syntax that bash rejects.
 323    // Handle quote variations: 'EOF', ''EOF'' (splitCommand may mangle quotes).
 324    const heredocPattern =
 325      /\$\(cat[ \t]*<<(-?)[ \t]*(?:'+([A-Za-z_]\w*)'+|\\([A-Za-z_]\w*))/g
 326    let match
 327    type HeredocMatch = {
 328      start: number
 329      operatorEnd: number
 330      delimiter: string
 331      isDash: boolean
 332    }
 333    const safeHeredocs: HeredocMatch[] = []
 334  
 335    while ((match = heredocPattern.exec(command)) !== null) {
 336      const delimiter = match[2] || match[3]
 337      if (delimiter) {
 338        safeHeredocs.push({
 339          start: match.index,
 340          operatorEnd: match.index + match[0].length,
 341          delimiter,
 342          isDash: match[1] === '-',
 343        })
 344      }
 345    }
 346  
 347    // If no safe heredoc patterns found, it's not safe
 348    if (safeHeredocs.length === 0) return false
 349  
 350    // SECURITY: For each heredoc, find the closing delimiter using LINE-BASED
 351    // matching that exactly replicates bash's behavior. Bash closes a heredoc
 352    // at the FIRST line that exactly matches the delimiter. Any subsequent
 353    // occurrence of the delimiter is just content (or a new command). Regex
 354    // [\s\S]*? can skip past the first delimiter to find a later `DELIM)`
 355    // pattern, hiding injected commands between the two delimiters.
 356    type VerifiedHeredoc = { start: number; end: number }
 357    const verified: VerifiedHeredoc[] = []
 358  
 359    for (const { start, operatorEnd, delimiter, isDash } of safeHeredocs) {
 360      // The opening line must end immediately after the delimiter (only
 361      // horizontal whitespace allowed before the newline). If there's other
 362      // content (like `; rm -rf /`), this is not a simple safe heredoc.
 363      const afterOperator = command.slice(operatorEnd)
 364      const openLineEnd = afterOperator.indexOf('\n')
 365      if (openLineEnd === -1) return false // No content at all
 366      const openLineTail = afterOperator.slice(0, openLineEnd)
 367      if (!/^[ \t]*$/.test(openLineTail)) return false // Extra content on open line
 368  
 369      // Body starts after the newline
 370      const bodyStart = operatorEnd + openLineEnd + 1
 371      const body = command.slice(bodyStart)
 372      const bodyLines = body.split('\n')
 373  
 374      // Find the FIRST line that closes the heredoc. There are two valid forms:
 375      //   1. `DELIM` alone on a line (bash-standard), followed by `)` on the
 376      //      next line (with only whitespace before it)
 377      //   2. `DELIM)` on a line (the inline $(cat <<'EOF'\n...\nEOF) form,
 378      //      where bash's PST_EOFTOKEN closes both heredoc and substitution)
 379      // For <<-, leading tabs are stripped before matching.
 380      let closingLineIdx = -1
 381      let closeParenLineIdx = -1 // Line index where `)` appears
 382      let closeParenColIdx = -1 // Column index of `)` on that line
 383  
 384      for (let i = 0; i < bodyLines.length; i++) {
 385        const rawLine = bodyLines[i]!
 386        const line = isDash ? rawLine.replace(/^\t*/, '') : rawLine
 387  
 388        // Form 1: delimiter alone on a line
 389        if (line === delimiter) {
 390          closingLineIdx = i
 391          // The `)` must be on the NEXT line with only whitespace before it
 392          const nextLine = bodyLines[i + 1]
 393          if (nextLine === undefined) return false // No closing `)`
 394          const parenMatch = nextLine.match(/^([ \t]*)\)/)
 395          if (!parenMatch) return false // `)` not at start of next line
 396          closeParenLineIdx = i + 1
 397          closeParenColIdx = parenMatch[1]!.length // Position of `)`
 398          break
 399        }
 400  
 401        // Form 2: delimiter immediately followed by `)` (PST_EOFTOKEN form)
 402        // Only whitespace allowed between delimiter and `)`.
 403        if (line.startsWith(delimiter)) {
 404          const afterDelim = line.slice(delimiter.length)
 405          const parenMatch = afterDelim.match(/^([ \t]*)\)/)
 406          if (parenMatch) {
 407            closingLineIdx = i
 408            closeParenLineIdx = i
 409            // Column is in rawLine (pre-tab-strip), so recompute
 410            const tabPrefix = isDash ? (rawLine.match(/^\t*/)?.[0] ?? '') : ''
 411            closeParenColIdx =
 412              tabPrefix.length + delimiter.length + parenMatch[1]!.length
 413            break
 414          }
 415          // Line starts with delimiter but has other trailing content —
 416          // this is NOT the closing line (bash requires exact match or EOF`)`).
 417          // But it's also a red flag: if this were inside $(), bash might
 418          // close early via PST_EOFTOKEN with other shell metacharacters.
 419          // We already handle that case in extractHeredocs — here we just
 420          // reject it as not matching our safe pattern.
 421          if (/^[)}`|&;(<>]/.test(afterDelim)) {
 422            return false // Ambiguous early-closure pattern
 423          }
 424        }
 425      }
 426  
 427      if (closingLineIdx === -1) return false // No closing delimiter found
 428  
 429      // Compute the absolute end position (one past the `)` character)
 430      let endPos = bodyStart
 431      for (let i = 0; i < closeParenLineIdx; i++) {
 432        endPos += bodyLines[i]!.length + 1 // +1 for newline
 433      }
 434      endPos += closeParenColIdx + 1 // +1 to include the `)` itself
 435  
 436      verified.push({ start, end: endPos })
 437    }
 438  
 439    // SECURITY: Reject nested matches. The regex finds $(cat <<'X' patterns
 440    // in RAW TEXT without understanding quoted-heredoc semantics. When the
 441    // outer heredoc has a quoted delimiter (<<'A'), its body is LITERAL text
 442    // in bash — any inner $(cat <<'B' is just characters, not a real heredoc.
 443    // But our regex matches both, producing NESTED ranges. Stripping nested
 444    // ranges corrupts indices: after stripping the inner range, the outer
 445    // range's `end` is stale (points past the shrunken string), causing
 446    // `remaining.slice(end)` to return '' and silently drop any suffix
 447    // (e.g., `; rm -rf /`). Since all our matched heredocs have quoted/escaped
 448    // delimiters, a nested match inside the body is ALWAYS literal text —
 449    // no legitimate user writes this pattern. Bail to safe fallback.
 450    for (const outer of verified) {
 451      for (const inner of verified) {
 452        if (inner === outer) continue
 453        if (inner.start > outer.start && inner.start < outer.end) {
 454          return false
 455        }
 456      }
 457    }
 458  
 459    // Strip all verified heredocs from the command, building `remaining`.
 460    // Process in reverse order so earlier indices stay valid.
 461    const sortedVerified = [...verified].sort((a, b) => b.start - a.start)
 462    let remaining = command
 463    for (const { start, end } of sortedVerified) {
 464      remaining = remaining.slice(0, start) + remaining.slice(end)
 465    }
 466  
 467    // SECURITY: The remaining text must NOT start with only whitespace before
 468    // the (now-stripped) heredoc position IF there's non-whitespace after it.
 469    // If the $() is in COMMAND-NAME position (no prefix), its output becomes
 470    // the command to execute, with any suffix text as arguments:
 471    //   $(cat <<'EOF'\nchmod\nEOF\n) 777 /etc/shadow
 472    //   → runs `chmod 777 /etc/shadow`
 473    // We only allow the substitution in ARGUMENT position: there must be a
 474    // command word before the $(.
 475    // After stripping, `remaining` should look like `cmd args... [more args]`.
 476    // If remaining starts with only whitespace (or is empty), the $() WAS the
 477    // command — that's only safe if there are no trailing arguments.
 478    const trimmedRemaining = remaining.trim()
 479    if (trimmedRemaining.length > 0) {
 480      // There's a prefix command — good. But verify the original command
 481      // also had a non-whitespace prefix before the FIRST $( (the heredoc
 482      // could be one of several; we need the first one's prefix).
 483      const firstHeredocStart = Math.min(...verified.map(v => v.start))
 484      const prefix = command.slice(0, firstHeredocStart)
 485      if (prefix.trim().length === 0) {
 486        // $() is in command-name position but there's trailing text — UNSAFE.
 487        // The heredoc body becomes the command name, trailing text becomes args.
 488        return false
 489      }
 490    }
 491  
 492    // Check that remaining text contains only safe characters.
 493    // After stripping safe heredocs, the remaining text should only be command
 494    // names, arguments, quotes, and whitespace. Reject ANY shell metacharacter
 495    // to prevent operators (|, &, &&, ||, ;) or expansions ($, `, {, <, >) from
 496    // being used to chain dangerous commands after a safe heredoc.
 497    // SECURITY: Use explicit ASCII space/tab only — \s matches unicode whitespace
 498    // like \u00A0 which can be used to hide content. Newlines are also blocked
 499    // (they would indicate multi-line commands outside the heredoc body).
 500    if (!/^[a-zA-Z0-9 \t"'.\-/_@=,:+~]*$/.test(remaining)) return false
 501  
 502    // SECURITY: The remaining text (command with heredocs stripped) must also
 503    // pass all security validators. Without this, appending a safe heredoc to a
 504    // dangerous command (e.g., `zmodload zsh/system $(cat <<'EOF'\nx\nEOF\n)`)
 505    // causes this early-allow path to return passthrough, bypassing
 506    // validateZshDangerousCommands, validateProcEnvironAccess, and any other
 507    // main validator that checks allowlist-safe character patterns.
 508    // No recursion risk: `remaining` has no `$(... <<` pattern, so the recursive
 509    // call's validateSafeCommandSubstitution returns passthrough immediately.
 510    if (bashCommandIsSafe_DEPRECATED(remaining).behavior !== 'passthrough')
 511      return false
 512  
 513    return true
 514  }
 515  
 516  /**
 517   * Detects well-formed $(cat <<'DELIM'...DELIM) heredoc substitution patterns.
 518   * Returns the command with matched heredocs stripped, or null if none found.
 519   * Used by the pre-split gate to strip safe heredocs and re-check the remainder.
 520   */
 521  export function stripSafeHeredocSubstitutions(command: string): string | null {
 522    if (!HEREDOC_IN_SUBSTITUTION.test(command)) return null
 523  
 524    const heredocPattern =
 525      /\$\(cat[ \t]*<<(-?)[ \t]*(?:'+([A-Za-z_]\w*)'+|\\([A-Za-z_]\w*))/g
 526    let result = command
 527    let found = false
 528    let match
 529    const ranges: Array<{ start: number; end: number }> = []
 530    while ((match = heredocPattern.exec(command)) !== null) {
 531      if (match.index > 0 && command[match.index - 1] === '\\') continue
 532      const delimiter = match[2] || match[3]
 533      if (!delimiter) continue
 534      const isDash = match[1] === '-'
 535      const operatorEnd = match.index + match[0].length
 536  
 537      const afterOperator = command.slice(operatorEnd)
 538      const openLineEnd = afterOperator.indexOf('\n')
 539      if (openLineEnd === -1) continue
 540      if (!/^[ \t]*$/.test(afterOperator.slice(0, openLineEnd))) continue
 541  
 542      const bodyStart = operatorEnd + openLineEnd + 1
 543      const bodyLines = command.slice(bodyStart).split('\n')
 544      for (let i = 0; i < bodyLines.length; i++) {
 545        const rawLine = bodyLines[i]!
 546        const line = isDash ? rawLine.replace(/^\t*/, '') : rawLine
 547        if (line.startsWith(delimiter)) {
 548          const after = line.slice(delimiter.length)
 549          let closePos = -1
 550          if (/^[ \t]*\)/.test(after)) {
 551            const lineStart =
 552              bodyStart +
 553              bodyLines.slice(0, i).join('\n').length +
 554              (i > 0 ? 1 : 0)
 555            closePos = command.indexOf(')', lineStart)
 556          } else if (after === '') {
 557            const nextLine = bodyLines[i + 1]
 558            if (nextLine !== undefined && /^[ \t]*\)/.test(nextLine)) {
 559              const nextLineStart =
 560                bodyStart + bodyLines.slice(0, i + 1).join('\n').length + 1
 561              closePos = command.indexOf(')', nextLineStart)
 562            }
 563          }
 564          if (closePos !== -1) {
 565            ranges.push({ start: match.index, end: closePos + 1 })
 566            found = true
 567          }
 568          break
 569        }
 570      }
 571    }
 572    if (!found) return null
 573    for (let i = ranges.length - 1; i >= 0; i--) {
 574      const r = ranges[i]!
 575      result = result.slice(0, r.start) + result.slice(r.end)
 576    }
 577    return result
 578  }
 579  
 580  /** Detection-only check: does the command contain a safe heredoc substitution? */
 581  export function hasSafeHeredocSubstitution(command: string): boolean {
 582    return stripSafeHeredocSubstitutions(command) !== null
 583  }
 584  
 585  function validateSafeCommandSubstitution(
 586    context: ValidationContext,
 587  ): PermissionResult {
 588    const { originalCommand } = context
 589  
 590    if (!HEREDOC_IN_SUBSTITUTION.test(originalCommand)) {
 591      return { behavior: 'passthrough', message: 'No heredoc in substitution' }
 592    }
 593  
 594    if (isSafeHeredoc(originalCommand)) {
 595      return {
 596        behavior: 'allow',
 597        updatedInput: { command: originalCommand },
 598        decisionReason: {
 599          type: 'other',
 600          reason:
 601            'Safe command substitution: cat with quoted/escaped heredoc delimiter',
 602        },
 603      }
 604    }
 605  
 606    return {
 607      behavior: 'passthrough',
 608      message: 'Command substitution needs validation',
 609    }
 610  }
 611  
 612  function validateGitCommit(context: ValidationContext): PermissionResult {
 613    const { originalCommand, baseCommand } = context
 614  
 615    if (baseCommand !== 'git' || !/^git\s+commit\s+/.test(originalCommand)) {
 616      return { behavior: 'passthrough', message: 'Not a git commit' }
 617    }
 618  
 619    // SECURITY: Backslashes can cause our regex to mis-identify quote boundaries
 620    // (e.g., `git commit -m "test\"msg" && evil`). Legitimate commit messages
 621    // virtually never contain backslashes, so bail to the full validator chain.
 622    if (originalCommand.includes('\\')) {
 623      return {
 624        behavior: 'passthrough',
 625        message: 'Git commit contains backslash, needs full validation',
 626      }
 627    }
 628  
 629    // SECURITY: The `.*?` before `-m` must NOT match shell operators. Previously
 630    // `.*?` matched anything except `\n`, including `;`, `&`, `|`, `` ` ``, `$(`.
 631    // For `git commit ; curl evil.com -m 'x'`, `.*?` swallowed `; curl evil.com `
 632    // leaving remainder=`` (falsy → remainder check skipped) → returned `allow`
 633    // for a compound command. Early-allow skips ALL main validators (line ~1908),
 634    // nullifying validateQuotedNewline, validateBackslashEscapedOperators, etc.
 635    // While splitCommand currently catches this downstream, early-allow is a
 636    // POSITIVE ASSERTION that the FULL command is safe — which it is NOT.
 637    //
 638    // Also: `\s+` between `git` and `commit` must NOT match `\n`/`\r` (command
 639    // separators in bash). Use `[ \t]+` for horizontal-only whitespace.
 640    //
 641    // The `[^;&|`$<>()\n\r]*?` class excludes shell metacharacters. We also
 642    // exclude `<` and `>` here (redirects) — they're allowed in the REMAINDER
 643    // for `--author="Name <email>"` but must not appear BEFORE `-m`.
 644    const messageMatch = originalCommand.match(
 645      /^git[ \t]+commit[ \t]+[^;&|`$<>()\n\r]*?-m[ \t]+(["'])([\s\S]*?)\1(.*)$/,
 646    )
 647  
 648    if (messageMatch) {
 649      const [, quote, messageContent, remainder] = messageMatch
 650  
 651      if (quote === '"' && messageContent && /\$\(|`|\$\{/.test(messageContent)) {
 652        logEvent('tengu_bash_security_check_triggered', {
 653          checkId: BASH_SECURITY_CHECK_IDS.GIT_COMMIT_SUBSTITUTION,
 654          subId: 1,
 655        })
 656        return {
 657          behavior: 'ask',
 658          message: 'Git commit message contains command substitution patterns',
 659        }
 660      }
 661  
 662      // SECURITY: Check remainder for shell operators that could chain commands
 663      // or redirect output. The `.*` before `-m` in the regex can swallow flags
 664      // like `--amend`, leaving `&& evil` or `> ~/.bashrc` in the remainder.
 665      // Previously we only checked for $() / `` / ${} here, missing operators
 666      // like ; | & && || < >.
 667      //
 668      // `<` and `>` can legitimately appear INSIDE quotes in --author values
 669      // like `--author="Name <email>"`. An UNQUOTED `>` is a shell redirect
 670      // operator. Because validateGitCommit is an EARLY validator, returning
 671      // `allow` here short-circuits bashCommandIsSafe and SKIPS
 672      // validateRedirections. So we must bail to passthrough on unquoted `<>`
 673      // to let the main validators handle it.
 674      //
 675      // Attack: `git commit --allow-empty -m 'payload' > ~/.bashrc`
 676      //   validateGitCommit returns allow → bashCommandIsSafe short-circuits →
 677      //   validateRedirections NEVER runs → ~/.bashrc overwritten with git
 678      //   stdout containing `payload` → RCE on next shell login.
 679      if (remainder && /[;|&()`]|\$\(|\$\{/.test(remainder)) {
 680        return {
 681          behavior: 'passthrough',
 682          message: 'Git commit remainder contains shell metacharacters',
 683        }
 684      }
 685      if (remainder) {
 686        // Strip quoted content, then check for `<` or `>`. Quoted `<>` (email
 687        // brackets in --author) are safe; unquoted `<>` are shell redirects.
 688        // NOTE: This simple quote tracker has NO backslash handling. `\'`/`\"`
 689        // outside quotes would desync it (bash: \' = literal ', tracker: toggles
 690        // SQ). BUT line 584 already bailed on ANY backslash in originalCommand,
 691        // so we never reach here with backslashes. For backslash-free input,
 692        // simple quote toggling is correct (no way to escape quotes without \\).
 693        let unquoted = ''
 694        let inSQ = false
 695        let inDQ = false
 696        for (let i = 0; i < remainder.length; i++) {
 697          const c = remainder[i]
 698          if (c === "'" && !inDQ) {
 699            inSQ = !inSQ
 700            continue
 701          }
 702          if (c === '"' && !inSQ) {
 703            inDQ = !inDQ
 704            continue
 705          }
 706          if (!inSQ && !inDQ) unquoted += c
 707        }
 708        if (/[<>]/.test(unquoted)) {
 709          return {
 710            behavior: 'passthrough',
 711            message: 'Git commit remainder contains unquoted redirect operator',
 712          }
 713        }
 714      }
 715  
 716      // Security hardening: block messages starting with dash
 717      // This catches potential obfuscation patterns like git commit -m "---"
 718      if (messageContent && messageContent.startsWith('-')) {
 719        logEvent('tengu_bash_security_check_triggered', {
 720          checkId: BASH_SECURITY_CHECK_IDS.OBFUSCATED_FLAGS,
 721          subId: 5,
 722        })
 723        return {
 724          behavior: 'ask',
 725          message: 'Command contains quoted characters in flag names',
 726        }
 727      }
 728  
 729      return {
 730        behavior: 'allow',
 731        updatedInput: { command: originalCommand },
 732        decisionReason: {
 733          type: 'other',
 734          reason: 'Git commit with simple quoted message is allowed',
 735        },
 736      }
 737    }
 738  
 739    return { behavior: 'passthrough', message: 'Git commit needs validation' }
 740  }
 741  
 742  function validateJqCommand(context: ValidationContext): PermissionResult {
 743    const { originalCommand, baseCommand } = context
 744  
 745    if (baseCommand !== 'jq') {
 746      return { behavior: 'passthrough', message: 'Not jq' }
 747    }
 748  
 749    if (/\bsystem\s*\(/.test(originalCommand)) {
 750      logEvent('tengu_bash_security_check_triggered', {
 751        checkId: BASH_SECURITY_CHECK_IDS.JQ_SYSTEM_FUNCTION,
 752        subId: 1,
 753      })
 754      return {
 755        behavior: 'ask',
 756        message:
 757          'jq command contains system() function which executes arbitrary commands',
 758      }
 759    }
 760  
 761    // File arguments are now allowed - they will be validated by path validation in readOnlyValidation.ts
 762    // Only block dangerous flags that could read files into jq variables
 763    const afterJq = originalCommand.substring(3).trim()
 764    if (
 765      /(?:^|\s)(?:-f\b|--from-file|--rawfile|--slurpfile|-L\b|--library-path)/.test(
 766        afterJq,
 767      )
 768    ) {
 769      logEvent('tengu_bash_security_check_triggered', {
 770        checkId: BASH_SECURITY_CHECK_IDS.JQ_FILE_ARGUMENTS,
 771        subId: 1,
 772      })
 773      return {
 774        behavior: 'ask',
 775        message:
 776          'jq command contains dangerous flags that could execute code or read arbitrary files',
 777      }
 778    }
 779  
 780    return { behavior: 'passthrough', message: 'jq command is safe' }
 781  }
 782  
 783  function validateShellMetacharacters(
 784    context: ValidationContext,
 785  ): PermissionResult {
 786    const { unquotedContent } = context
 787    const message =
 788      'Command contains shell metacharacters (;, |, or &) in arguments'
 789  
 790    if (/(?:^|\s)["'][^"']*[;&][^"']*["'](?:\s|$)/.test(unquotedContent)) {
 791      logEvent('tengu_bash_security_check_triggered', {
 792        checkId: BASH_SECURITY_CHECK_IDS.SHELL_METACHARACTERS,
 793        subId: 1,
 794      })
 795      return { behavior: 'ask', message }
 796    }
 797  
 798    const globPatterns = [
 799      /-name\s+["'][^"']*[;|&][^"']*["']/,
 800      /-path\s+["'][^"']*[;|&][^"']*["']/,
 801      /-iname\s+["'][^"']*[;|&][^"']*["']/,
 802    ]
 803  
 804    if (globPatterns.some(p => p.test(unquotedContent))) {
 805      logEvent('tengu_bash_security_check_triggered', {
 806        checkId: BASH_SECURITY_CHECK_IDS.SHELL_METACHARACTERS,
 807        subId: 2,
 808      })
 809      return { behavior: 'ask', message }
 810    }
 811  
 812    if (/-regex\s+["'][^"']*[;&][^"']*["']/.test(unquotedContent)) {
 813      logEvent('tengu_bash_security_check_triggered', {
 814        checkId: BASH_SECURITY_CHECK_IDS.SHELL_METACHARACTERS,
 815        subId: 3,
 816      })
 817      return { behavior: 'ask', message }
 818    }
 819  
 820    return { behavior: 'passthrough', message: 'No metacharacters' }
 821  }
 822  
 823  function validateDangerousVariables(
 824    context: ValidationContext,
 825  ): PermissionResult {
 826    const { fullyUnquotedContent } = context
 827  
 828    if (
 829      /[<>|]\s*\$[A-Za-z_]/.test(fullyUnquotedContent) ||
 830      /\$[A-Za-z_][A-Za-z0-9_]*\s*[|<>]/.test(fullyUnquotedContent)
 831    ) {
 832      logEvent('tengu_bash_security_check_triggered', {
 833        checkId: BASH_SECURITY_CHECK_IDS.DANGEROUS_VARIABLES,
 834        subId: 1,
 835      })
 836      return {
 837        behavior: 'ask',
 838        message:
 839          'Command contains variables in dangerous contexts (redirections or pipes)',
 840      }
 841    }
 842  
 843    return { behavior: 'passthrough', message: 'No dangerous variables' }
 844  }
 845  
 846  function validateDangerousPatterns(
 847    context: ValidationContext,
 848  ): PermissionResult {
 849    const { unquotedContent } = context
 850  
 851    // Special handling for backticks - check for UNESCAPED backticks only
 852    // Escaped backticks (e.g., \`) are safe and commonly used in SQL commands
 853    if (hasUnescapedChar(unquotedContent, '`')) {
 854      return {
 855        behavior: 'ask',
 856        message: 'Command contains backticks (`) for command substitution',
 857      }
 858    }
 859  
 860    // Other command substitution checks (include double-quoted content)
 861    for (const { pattern, message } of COMMAND_SUBSTITUTION_PATTERNS) {
 862      if (pattern.test(unquotedContent)) {
 863        logEvent('tengu_bash_security_check_triggered', {
 864          checkId:
 865            BASH_SECURITY_CHECK_IDS.DANGEROUS_PATTERNS_COMMAND_SUBSTITUTION,
 866          subId: 1,
 867        })
 868        return { behavior: 'ask', message: `Command contains ${message}` }
 869      }
 870    }
 871  
 872    return { behavior: 'passthrough', message: 'No dangerous patterns' }
 873  }
 874  
 875  function validateRedirections(context: ValidationContext): PermissionResult {
 876    const { fullyUnquotedContent } = context
 877  
 878    if (/</.test(fullyUnquotedContent)) {
 879      logEvent('tengu_bash_security_check_triggered', {
 880        checkId: BASH_SECURITY_CHECK_IDS.DANGEROUS_PATTERNS_INPUT_REDIRECTION,
 881        subId: 1,
 882      })
 883      return {
 884        behavior: 'ask',
 885        message:
 886          'Command contains input redirection (<) which could read sensitive files',
 887      }
 888    }
 889  
 890    if (/>/.test(fullyUnquotedContent)) {
 891      logEvent('tengu_bash_security_check_triggered', {
 892        checkId: BASH_SECURITY_CHECK_IDS.DANGEROUS_PATTERNS_OUTPUT_REDIRECTION,
 893        subId: 1,
 894      })
 895      return {
 896        behavior: 'ask',
 897        message:
 898          'Command contains output redirection (>) which could write to arbitrary files',
 899      }
 900    }
 901  
 902    return { behavior: 'passthrough', message: 'No redirections' }
 903  }
 904  
 905  function validateNewlines(context: ValidationContext): PermissionResult {
 906    // Use fullyUnquotedPreStrip (before stripSafeRedirections) to prevent bypasses
 907    // where stripping `>/dev/null` creates a phantom backslash-newline continuation.
 908    // E.g., `cmd \>/dev/null\nwhoami` → after stripping becomes `cmd \\nwhoami`
 909    // which looks like a safe continuation but actually hides a second command.
 910    const { fullyUnquotedPreStrip } = context
 911  
 912    // Check for newlines in unquoted content
 913    if (!/[\n\r]/.test(fullyUnquotedPreStrip)) {
 914      return { behavior: 'passthrough', message: 'No newlines' }
 915    }
 916  
 917    // Flag any newline/CR followed by non-whitespace, EXCEPT backslash-newline
 918    // continuations at word boundaries. In bash, `\<newline>` is a line
 919    // continuation (both chars removed), which is safe when the backslash
 920    // follows whitespace (e.g., `cmd \<newline>--flag`). Mid-word continuations
 921    // like `tr\<newline>aceroute` are still flagged because they can hide
 922    // dangerous command names from allowlist checks.
 923    // eslint-disable-next-line custom-rules/no-lookbehind-regex -- .test() + gated by /[\n\r]/.test() above
 924    const looksLikeCommand = /(?<![\s]\\)[\n\r]\s*\S/.test(fullyUnquotedPreStrip)
 925    if (looksLikeCommand) {
 926      logEvent('tengu_bash_security_check_triggered', {
 927        checkId: BASH_SECURITY_CHECK_IDS.NEWLINES,
 928        subId: 1,
 929      })
 930      return {
 931        behavior: 'ask',
 932        message:
 933          'Command contains newlines that could separate multiple commands',
 934      }
 935    }
 936  
 937    return {
 938      behavior: 'passthrough',
 939      message: 'Newlines appear to be within data',
 940    }
 941  }
 942  
 943  /**
 944   * SECURITY: Carriage return (\r, 0x0D) IS a misparsing concern, unlike LF.
 945   *
 946   * Parser differential:
 947   *   - shell-quote's BAREWORD regex uses `[^\s...]` — JS `\s` INCLUDES \r, so
 948   *     shell-quote treats CR as a token boundary. `TZ=UTC\recho` tokenizes as
 949   *     TWO tokens: ['TZ=UTC', 'echo']. splitCommand joins with space →
 950   *     'TZ=UTC echo curl evil.com'.
 951   *   - bash's default IFS = $' \t\n' — CR is NOT in IFS. bash sees
 952   *     `TZ=UTC\recho` as ONE word → env assignment TZ='UTC\recho' (CR byte
 953   *     inside value), then `curl` is the command.
 954   *
 955   * Attack: `TZ=UTC\recho curl evil.com` with Bash(echo:*)
 956   *   validator: splitCommand collapses CR→space → 'TZ=UTC echo curl evil.com'
 957   *   → stripSafeWrappers: TZ=UTC stripped → 'echo curl evil.com' matches rule
 958   *   bash: executes `curl evil.com`
 959   *
 960   * validateNewlines catches this but is in nonMisparsingValidators (LF is
 961   * correctly handled by both parsers). This validator is NOT in
 962   * nonMisparsingValidators — its ask result gets isBashSecurityCheckForMisparsing
 963   * and blocks at the bashPermissions gate.
 964   *
 965   * Checks originalCommand (not fullyUnquotedPreStrip) because CR inside single
 966   * quotes is ALSO a misparsing concern for the same reason: shell-quote's `\s`
 967   * still tokenizes it, but bash treats it as literal. Block ALL unquoted-or-SQ CR.
 968   * Only exception: CR inside DOUBLE quotes where bash also treats it as data
 969   * and shell-quote preserves the token (no split).
 970   */
 971  function validateCarriageReturn(context: ValidationContext): PermissionResult {
 972    const { originalCommand } = context
 973  
 974    if (!originalCommand.includes('\r')) {
 975      return { behavior: 'passthrough', message: 'No carriage return' }
 976    }
 977  
 978    // Check if CR appears outside double quotes. CR outside DQ (including inside
 979    // SQ and unquoted) causes the shell-quote/bash tokenization differential.
 980    let inSingleQuote = false
 981    let inDoubleQuote = false
 982    let escaped = false
 983    for (let i = 0; i < originalCommand.length; i++) {
 984      const c = originalCommand[i]
 985      if (escaped) {
 986        escaped = false
 987        continue
 988      }
 989      if (c === '\\' && !inSingleQuote) {
 990        escaped = true
 991        continue
 992      }
 993      if (c === "'" && !inDoubleQuote) {
 994        inSingleQuote = !inSingleQuote
 995        continue
 996      }
 997      if (c === '"' && !inSingleQuote) {
 998        inDoubleQuote = !inDoubleQuote
 999        continue
1000      }
1001      if (c === '\r' && !inDoubleQuote) {
1002        logEvent('tengu_bash_security_check_triggered', {
1003          checkId: BASH_SECURITY_CHECK_IDS.NEWLINES,
1004          subId: 2,
1005        })
1006        return {
1007          behavior: 'ask',
1008          message:
1009            'Command contains carriage return (\\r) which shell-quote and bash tokenize differently',
1010        }
1011      }
1012    }
1013  
1014    return { behavior: 'passthrough', message: 'CR only inside double quotes' }
1015  }
1016  
1017  function validateIFSInjection(context: ValidationContext): PermissionResult {
1018    const { originalCommand } = context
1019  
1020    // Detect any usage of IFS variable which could be used to bypass regex validation
1021    // Check for $IFS and ${...IFS...} patterns (including parameter expansions like ${IFS:0:1}, ${#IFS}, etc.)
1022    // Using ${[^}]*IFS to catch all parameter expansion variations with IFS
1023    if (/\$IFS|\$\{[^}]*IFS/.test(originalCommand)) {
1024      logEvent('tengu_bash_security_check_triggered', {
1025        checkId: BASH_SECURITY_CHECK_IDS.IFS_INJECTION,
1026        subId: 1,
1027      })
1028      return {
1029        behavior: 'ask',
1030        message:
1031          'Command contains IFS variable usage which could bypass security validation',
1032      }
1033    }
1034  
1035    return { behavior: 'passthrough', message: 'No IFS injection detected' }
1036  }
1037  
1038  // Additional hardening against reading environment variables via /proc filesystem.
1039  // Path validation typically blocks /proc access, but this provides defense-in-depth.
1040  // Environment files in /proc can expose sensitive data like API keys and secrets.
1041  function validateProcEnvironAccess(
1042    context: ValidationContext,
1043  ): PermissionResult {
1044    const { originalCommand } = context
1045  
1046    // Check for /proc paths that could expose environment variables
1047    // This catches patterns like:
1048    // - /proc/self/environ
1049    // - /proc/1/environ
1050    // - /proc/*/environ (with any PID)
1051    if (/\/proc\/.*\/environ/.test(originalCommand)) {
1052      logEvent('tengu_bash_security_check_triggered', {
1053        checkId: BASH_SECURITY_CHECK_IDS.PROC_ENVIRON_ACCESS,
1054        subId: 1,
1055      })
1056      return {
1057        behavior: 'ask',
1058        message:
1059          'Command accesses /proc/*/environ which could expose sensitive environment variables',
1060      }
1061    }
1062  
1063    return {
1064      behavior: 'passthrough',
1065      message: 'No /proc/environ access detected',
1066    }
1067  }
1068  
1069  /**
1070   * Detects commands with malformed tokens (unbalanced delimiters) combined with
1071   * command separators. This catches potential injection patterns where ambiguous
1072   * shell syntax could be exploited.
1073   *
1074   * Security: This check catches the eval bypass discovered in HackerOne review.
1075   * When shell-quote parses ambiguous patterns like `echo {"hi":"hi;evil"}`,
1076   * it may produce unbalanced tokens (e.g., `{hi:"hi`). Combined with command
1077   * separators, this can lead to unintended command execution via eval re-parsing.
1078   *
1079   * By forcing user approval for these patterns, we ensure the user sees exactly
1080   * what will be executed before approving.
1081   */
1082  function validateMalformedTokenInjection(
1083    context: ValidationContext,
1084  ): PermissionResult {
1085    const { originalCommand } = context
1086  
1087    const parseResult = tryParseShellCommand(originalCommand)
1088    if (!parseResult.success) {
1089      // Parse failed - this is handled elsewhere (bashToolHasPermission checks this)
1090      return {
1091        behavior: 'passthrough',
1092        message: 'Parse failed, handled elsewhere',
1093      }
1094    }
1095  
1096    const parsed = parseResult.tokens
1097  
1098    // Check for command separators (;, &&, ||)
1099    const hasCommandSeparator = parsed.some(
1100      entry =>
1101        typeof entry === 'object' &&
1102        entry !== null &&
1103        'op' in entry &&
1104        (entry.op === ';' || entry.op === '&&' || entry.op === '||'),
1105    )
1106  
1107    if (!hasCommandSeparator) {
1108      return { behavior: 'passthrough', message: 'No command separators' }
1109    }
1110  
1111    // Check for malformed tokens (unbalanced delimiters)
1112    if (hasMalformedTokens(originalCommand, parsed)) {
1113      logEvent('tengu_bash_security_check_triggered', {
1114        checkId: BASH_SECURITY_CHECK_IDS.MALFORMED_TOKEN_INJECTION,
1115        subId: 1,
1116      })
1117      return {
1118        behavior: 'ask',
1119        message:
1120          'Command contains ambiguous syntax with command separators that could be misinterpreted',
1121      }
1122    }
1123  
1124    return {
1125      behavior: 'passthrough',
1126      message: 'No malformed token injection detected',
1127    }
1128  }
1129  
1130  function validateObfuscatedFlags(context: ValidationContext): PermissionResult {
1131    // Block shell quoting bypass patterns used to circumvent negative lookaheads we use in our regexes to block known dangerous flags
1132  
1133    const { originalCommand, baseCommand } = context
1134  
1135    // Echo is safe for obfuscated flags, BUT only for simple echo commands.
1136    // For compound commands (with |, &, ;), we need to check the whole command
1137    // because the dangerous ANSI-C quoting might be after the operator.
1138    const hasShellOperators = /[|&;]/.test(originalCommand)
1139    if (baseCommand === 'echo' && !hasShellOperators) {
1140      return {
1141        behavior: 'passthrough',
1142        message: 'echo command is safe and has no dangerous flags',
1143      }
1144    }
1145  
1146    // COMPREHENSIVE OBFUSCATION DETECTION
1147    // These checks catch various ways to hide flags using shell quoting
1148  
1149    // 1. Block ANSI-C quoting ($'...') - can encode any character via escape sequences
1150    // Simple pattern that matches $'...' anywhere. This correctly handles:
1151    // - grep '$' file => no match ($ is regex anchor inside quotes, no $'...' structure)
1152    // - 'test'$'-exec' => match (quote concatenation with ANSI-C)
1153    // - Zero-width space and other invisible chars => match
1154    // The pattern requires $' followed by content (can be empty) followed by closing '
1155    if (/\$'[^']*'/.test(originalCommand)) {
1156      logEvent('tengu_bash_security_check_triggered', {
1157        checkId: BASH_SECURITY_CHECK_IDS.OBFUSCATED_FLAGS,
1158        subId: 5,
1159      })
1160      return {
1161        behavior: 'ask',
1162        message: 'Command contains ANSI-C quoting which can hide characters',
1163      }
1164    }
1165  
1166    // 2. Block locale quoting ($"...")  - can also use escape sequences
1167    // Same simple pattern as ANSI-C quoting above
1168    if (/\$"[^"]*"/.test(originalCommand)) {
1169      logEvent('tengu_bash_security_check_triggered', {
1170        checkId: BASH_SECURITY_CHECK_IDS.OBFUSCATED_FLAGS,
1171        subId: 6,
1172      })
1173      return {
1174        behavior: 'ask',
1175        message: 'Command contains locale quoting which can hide characters',
1176      }
1177    }
1178  
1179    // 3. Block empty ANSI-C or locale quotes followed by dash
1180    // $''-exec or $""-exec
1181    if (/\$['"]{2}\s*-/.test(originalCommand)) {
1182      logEvent('tengu_bash_security_check_triggered', {
1183        checkId: BASH_SECURITY_CHECK_IDS.OBFUSCATED_FLAGS,
1184        subId: 9,
1185      })
1186      return {
1187        behavior: 'ask',
1188        message:
1189          'Command contains empty special quotes before dash (potential bypass)',
1190      }
1191    }
1192  
1193    // 4. Block ANY sequence of empty quotes followed by dash
1194    // This catches: ''-  ""-  ''""-  ""''-  ''""''-  etc.
1195    // The pattern looks for one or more empty quote pairs followed by optional whitespace and dash
1196    if (/(?:^|\s)(?:''|"")+\s*-/.test(originalCommand)) {
1197      logEvent('tengu_bash_security_check_triggered', {
1198        checkId: BASH_SECURITY_CHECK_IDS.OBFUSCATED_FLAGS,
1199        subId: 7,
1200      })
1201      return {
1202        behavior: 'ask',
1203        message: 'Command contains empty quotes before dash (potential bypass)',
1204      }
1205    }
1206  
1207    // 4b. SECURITY: Block homogeneous empty quote pair(s) immediately adjacent
1208    // to a quoted dash. Patterns like `"""-f"` (empty `""` + quoted `"-f"`)
1209    // concatenate in bash to `-f` but slip past all the above checks:
1210    //   - Regex (4) above: `(?:''|"")+\s*-` matches `""` pair, then expects
1211    //     optional space and dash — but finds a third `"` instead. No match.
1212    //   - Quote-content scanner (below): Sees the first `""` pair with empty
1213    //     content (doesn't start with dash). The third `"` opens a new quoted
1214    //     region handled by the main quote-state tracker.
1215    //   - Quote-state tracker: `""` toggles inDoubleQuote on/off; third `"`
1216    //     opens it again. The `-` inside `"-f"` is INSIDE quotes → skipped.
1217    //   - Flag scanner: Looks for `\s` before `-`. The `-` is preceded by `"`.
1218    //   - fullyUnquotedContent: Both `""` and `"-f"` get stripped.
1219    //
1220    // In bash, `"""-f"` = empty string + string "-f" = `-f`. This bypass works
1221    // for ANY dangerous-flag check (jq -f, find -exec, fc -e) with a matching
1222    // prefix permission (Bash(jq:*), Bash(find:*)).
1223    //
1224    // The regex `(?:""|'')+['"]-` matches:
1225    //   - One or more HOMOGENEOUS empty pairs (`""` or `''`) — the concatenation
1226    //     point where bash joins the empty string to the flag.
1227    //   - Immediately followed by ANY quote char — opens the flag-quoted region.
1228    //   - Immediately followed by `-` — the obfuscated flag.
1229    //
1230    // POSITION-AGNOSTIC: We do NOT require word-start (`(?:^|\s)`) because
1231    // prefixes like `$x"""-f"` (unset/empty variable) concatenate the same way.
1232    // The homogeneous-empty-pair requirement filters out the `'"'"'` idiom
1233    // (no homogeneous empty pair — it's close, double-quoted-content, open).
1234    //
1235    // FALSE POSITIVE: Matches `echo '"""-f" text'` (pattern inside single-quoted
1236    // string). Extremely rare (requires echoing the literal attack). Acceptable.
1237    if (/(?:""|'')+['"]-/.test(originalCommand)) {
1238      logEvent('tengu_bash_security_check_triggered', {
1239        checkId: BASH_SECURITY_CHECK_IDS.OBFUSCATED_FLAGS,
1240        subId: 10,
1241      })
1242      return {
1243        behavior: 'ask',
1244        message:
1245          'Command contains empty quote pair adjacent to quoted dash (potential flag obfuscation)',
1246      }
1247    }
1248  
1249    // 4c. SECURITY: Also block 3+ consecutive quotes at word start even without
1250    // an immediate dash. Broader safety net for multi-quote obfuscation patterns
1251    // not enumerated above (e.g., `"""x"-f` where content between quotes shifts
1252    // the dash position). Legitimate commands never need `"""x"` when `"x"` works.
1253    if (/(?:^|\s)['"]{3,}/.test(originalCommand)) {
1254      logEvent('tengu_bash_security_check_triggered', {
1255        checkId: BASH_SECURITY_CHECK_IDS.OBFUSCATED_FLAGS,
1256        subId: 11,
1257      })
1258      return {
1259        behavior: 'ask',
1260        message:
1261          'Command contains consecutive quote characters at word start (potential obfuscation)',
1262      }
1263    }
1264  
1265    // Track quote state to avoid false positives for flags inside quoted strings
1266    let inSingleQuote = false
1267    let inDoubleQuote = false
1268    let escaped = false
1269  
1270    for (let i = 0; i < originalCommand.length - 1; i++) {
1271      const currentChar = originalCommand[i]
1272      const nextChar = originalCommand[i + 1]
1273  
1274      // Update quote state
1275      if (escaped) {
1276        escaped = false
1277        continue
1278      }
1279  
1280      // SECURITY: Only treat backslash as escape OUTSIDE single quotes. In bash,
1281      // `\` inside `'...'` is LITERAL. Without this guard, `'\'` desyncs the
1282      // quote tracker: `\` sets escaped=true, closing `'` is consumed by the
1283      // escaped-skip above instead of toggling inSingleQuote. Parser stays in
1284      // single-quote mode, and the `if (inSingleQuote || inDoubleQuote) continue`
1285      // at line ~1121 skips ALL subsequent flag detection for the rest of the
1286      // command. Example: `jq '\' "-f" evil` — bash gets `-f` arg, but desynced
1287      // parser thinks ` "-f" evil` is inside quotes → flag detection bypassed.
1288      // Defense-in-depth: hasShellQuoteSingleQuoteBug catches `'\'` patterns at
1289      // line ~1856 before this runs. But we fix the tracker for consistency with
1290      // the CORRECT implementations elsewhere in this file (hasBackslashEscaped*,
1291      // extractQuotedContent) which all guard with `!inSingleQuote`.
1292      if (currentChar === '\\' && !inSingleQuote) {
1293        escaped = true
1294        continue
1295      }
1296  
1297      if (currentChar === "'" && !inDoubleQuote) {
1298        inSingleQuote = !inSingleQuote
1299        continue
1300      }
1301  
1302      if (currentChar === '"' && !inSingleQuote) {
1303        inDoubleQuote = !inDoubleQuote
1304        continue
1305      }
1306  
1307      // Only look for flags when not inside quoted strings
1308      // This prevents false positives like: make test TEST="file.py -v"
1309      if (inSingleQuote || inDoubleQuote) {
1310        continue
1311      }
1312  
1313      // Look for whitespace followed by quote that contains a dash (potential flag obfuscation)
1314      // SECURITY: Block ANY quoted content starting with dash - err on side of safety
1315      // Catches: "-"exec, "-file", "--flag", '-'output, etc.
1316      // Users can approve manually if legitimate (e.g., find . -name "-file")
1317      if (
1318        currentChar &&
1319        nextChar &&
1320        /\s/.test(currentChar) &&
1321        /['"`]/.test(nextChar)
1322      ) {
1323        const quoteChar = nextChar
1324        let j = i + 2 // Start after the opening quote
1325        let insideQuote = ''
1326  
1327        // Collect content inside the quote
1328        while (j < originalCommand.length && originalCommand[j] !== quoteChar) {
1329          insideQuote += originalCommand[j]!
1330          j++
1331        }
1332  
1333        // If we found a closing quote and the content looks like an obfuscated flag, block it.
1334        // Three attack patterns to catch:
1335        //   1. Flag name inside quotes: "--flag", "-exec", "-X" (dashes + letters inside)
1336        //   2. Split-quote flag: "-"exec, "--"output (dashes inside, letters continue after quote)
1337        //   3. Chained quotes: "-""exec" (dashes in first quote, second quote contains letters)
1338        // Pure-dash strings like "---" or "--" followed by whitespace/separator are separators,
1339        // not flags, and should not trigger this check.
1340        const charAfterQuote = originalCommand[j + 1]
1341        // Inside double quotes, $VAR and `cmd` expand at runtime, so "-$VAR" can
1342        // become -exec. Blocking $ and ` here over-blocks single-quoted literals
1343        // like grep '-$' (where $ is literal), but main's startsWith('-') already
1344        // blocked those — this restores status quo, not a new false positive.
1345        // Brace expansion ({) does NOT happen inside quotes, so { is not needed here.
1346        const hasFlagCharsInside = /^-+[a-zA-Z0-9$`]/.test(insideQuote)
1347        // Characters that can continue a flag after a closing quote. This catches:
1348        //   a-zA-Z0-9: "-"exec → -exec (direct concatenation)
1349        //   \\:        "-"\exec → -exec (backslash escape is stripped)
1350        //   -:         "-"-output → --output (extra dashes)
1351        //   {:         "-"{exec,delete} → -exec -delete (brace expansion)
1352        //   $:         "-"$VAR → -exec when VAR=exec (variable expansion)
1353        //   `:         "-"`echo exec` → -exec (command substitution)
1354        // Note: glob chars (*?[) are omitted — they require attacker-controlled
1355        // filenames in CWD to exploit, and blocking them would break patterns
1356        // like `ls -- "-"*` for listing files that start with dash.
1357        const FLAG_CONTINUATION_CHARS = /[a-zA-Z0-9\\${`-]/
1358        const hasFlagCharsContinuing =
1359          /^-+$/.test(insideQuote) &&
1360          charAfterQuote !== undefined &&
1361          FLAG_CONTINUATION_CHARS.test(charAfterQuote)
1362        // Handle adjacent quote chaining: "-""exec" or "-""-"exec or """-"exec concatenates
1363        // to -exec in shell. Follow the chain of adjacent quoted segments until
1364        // we find one containing an alphanumeric char or hit a non-quote boundary.
1365        // Also handles empty prefix quotes: """-"exec where "" is followed by "-"exec
1366        // The combined segments form a flag if they contain dash(es) followed by alphanumerics.
1367        const hasFlagCharsInNextQuote =
1368          // Trigger when: first segment is only dashes OR empty (could be prefix for flag)
1369          (insideQuote === '' || /^-+$/.test(insideQuote)) &&
1370          charAfterQuote !== undefined &&
1371          /['"`]/.test(charAfterQuote) &&
1372          (() => {
1373            let pos = j + 1 // Start at charAfterQuote (an opening quote)
1374            let combinedContent = insideQuote // Track what the shell will see
1375            while (
1376              pos < originalCommand.length &&
1377              /['"`]/.test(originalCommand[pos]!)
1378            ) {
1379              const segQuote = originalCommand[pos]!
1380              let end = pos + 1
1381              while (
1382                end < originalCommand.length &&
1383                originalCommand[end] !== segQuote
1384              ) {
1385                end++
1386              }
1387              const segment = originalCommand.slice(pos + 1, end)
1388              combinedContent += segment
1389  
1390              // Check if combined content so far forms a flag pattern.
1391              // Include $ and ` for in-quote expansion: "-""$VAR" → -exec
1392              if (/^-+[a-zA-Z0-9$`]/.test(combinedContent)) return true
1393  
1394              // If this segment has alphanumeric/expansion and we already have dashes,
1395              // it's a flag. Catches "-""$*" where segment='$*' has no alnum but
1396              // expands to positional params at runtime.
1397              // Guard against segment.length === 0: slice(0, -0) → slice(0, 0) → ''.
1398              const priorContent =
1399                segment.length > 0
1400                  ? combinedContent.slice(0, -segment.length)
1401                  : combinedContent
1402              if (/^-+$/.test(priorContent)) {
1403                if (/[a-zA-Z0-9$`]/.test(segment)) return true
1404              }
1405  
1406              if (end >= originalCommand.length) break // Unclosed quote
1407              pos = end + 1 // Move past closing quote to check next segment
1408            }
1409            // Also check the unquoted char at the end of the chain
1410            if (
1411              pos < originalCommand.length &&
1412              FLAG_CONTINUATION_CHARS.test(originalCommand[pos]!)
1413            ) {
1414              // If we have dashes in combined content, the trailing char completes a flag
1415              if (/^-+$/.test(combinedContent) || combinedContent === '') {
1416                // Check if we're about to form a flag with the following content
1417                const nextChar = originalCommand[pos]!
1418                if (nextChar === '-') {
1419                  // More dashes, could still form a flag
1420                  return true
1421                }
1422                if (/[a-zA-Z0-9\\${`]/.test(nextChar) && combinedContent !== '') {
1423                  // We have dashes and now alphanumeric/expansion follows
1424                  return true
1425                }
1426              }
1427              // Original check for dashes followed by alphanumeric
1428              if (/^-/.test(combinedContent)) {
1429                return true
1430              }
1431            }
1432            return false
1433          })()
1434        if (
1435          j < originalCommand.length &&
1436          originalCommand[j] === quoteChar &&
1437          (hasFlagCharsInside ||
1438            hasFlagCharsContinuing ||
1439            hasFlagCharsInNextQuote)
1440        ) {
1441          logEvent('tengu_bash_security_check_triggered', {
1442            checkId: BASH_SECURITY_CHECK_IDS.OBFUSCATED_FLAGS,
1443            subId: 4,
1444          })
1445          return {
1446            behavior: 'ask',
1447            message: 'Command contains quoted characters in flag names',
1448          }
1449        }
1450      }
1451  
1452      // Look for whitespace followed by dash - this starts a flag
1453      if (currentChar && nextChar && /\s/.test(currentChar) && nextChar === '-') {
1454        let j = i + 1 // Start at the dash
1455        let flagContent = ''
1456  
1457        // Collect flag content
1458        while (j < originalCommand.length) {
1459          const flagChar = originalCommand[j]
1460          if (!flagChar) break
1461  
1462          // End flag content once we hit whitespace or an equals sign
1463          if (/[\s=]/.test(flagChar)) {
1464            break
1465          }
1466          // End flag collection if we hit quote followed by non-flag character. This is needed to handle cases like -d"," which should be parsed as just -d
1467          if (/['"`]/.test(flagChar)) {
1468            // Special case for cut -d flag: the delimiter value can be quoted
1469            // Example: cut -d'"' should parse as flag name: -d, value: '"'
1470            // Note: We only apply this exception to cut -d specifically to avoid bypasses.
1471            // Without this restriction, a command like `find -e"xec"` could be parsed as
1472            // flag name: -e, bypassing our blocklist for -exec. By restricting to cut -d,
1473            // we allow the legitimate use case while preventing obfuscation attacks on other
1474            // commands where quoted flag values could hide dangerous flag names.
1475            if (
1476              baseCommand === 'cut' &&
1477              flagContent === '-d' &&
1478              /['"`]/.test(flagChar)
1479            ) {
1480              // This is cut -d followed by a quoted delimiter - flagContent is already '-d'
1481              break
1482            }
1483  
1484            // Look ahead to see what follows the quote
1485            if (j + 1 < originalCommand.length) {
1486              const nextFlagChar = originalCommand[j + 1]
1487              if (nextFlagChar && !/[a-zA-Z0-9_'"-]/.test(nextFlagChar)) {
1488                // Quote followed by something that is clearly not part of a flag, end the parsing
1489                break
1490              }
1491            }
1492          }
1493          flagContent += flagChar
1494          j++
1495        }
1496  
1497        if (flagContent.includes('"') || flagContent.includes("'")) {
1498          logEvent('tengu_bash_security_check_triggered', {
1499            checkId: BASH_SECURITY_CHECK_IDS.OBFUSCATED_FLAGS,
1500            subId: 1,
1501          })
1502          return {
1503            behavior: 'ask',
1504            message: 'Command contains quoted characters in flag names',
1505          }
1506        }
1507      }
1508    }
1509  
1510    // Also handle flags that start with quotes: "--"output, '-'-output, etc.
1511    // Use fullyUnquotedContent to avoid false positives from legitimate quoted content like echo "---"
1512    if (/\s['"`]-/.test(context.fullyUnquotedContent)) {
1513      logEvent('tengu_bash_security_check_triggered', {
1514        checkId: BASH_SECURITY_CHECK_IDS.OBFUSCATED_FLAGS,
1515        subId: 2,
1516      })
1517      return {
1518        behavior: 'ask',
1519        message: 'Command contains quoted characters in flag names',
1520      }
1521    }
1522  
1523    // Also handles cases like ""--output
1524    // Use fullyUnquotedContent to avoid false positives from legitimate quoted content
1525    if (/['"`]{2}-/.test(context.fullyUnquotedContent)) {
1526      logEvent('tengu_bash_security_check_triggered', {
1527        checkId: BASH_SECURITY_CHECK_IDS.OBFUSCATED_FLAGS,
1528        subId: 3,
1529      })
1530      return {
1531        behavior: 'ask',
1532        message: 'Command contains quoted characters in flag names',
1533      }
1534    }
1535  
1536    return { behavior: 'passthrough', message: 'No obfuscated flags detected' }
1537  }
1538  
1539  /**
1540   * Detects backslash-escaped whitespace characters (space, tab) outside of quotes.
1541   *
1542   * In bash, `echo\ test` is a single token (command named "echo test"), but
1543   * shell-quote decodes the escape and produces `echo test` (two separate tokens).
1544   * This discrepancy allows path traversal attacks like:
1545   *   echo\ test/../../../usr/bin/touch /tmp/file
1546   * which the parser sees as `echo test/.../touch /tmp/file` (an echo command)
1547   * but bash resolves as `/usr/bin/touch /tmp/file` (via directory "echo test").
1548   */
1549  function hasBackslashEscapedWhitespace(command: string): boolean {
1550    let inSingleQuote = false
1551    let inDoubleQuote = false
1552  
1553    for (let i = 0; i < command.length; i++) {
1554      const char = command[i]
1555  
1556      if (char === '\\' && !inSingleQuote) {
1557        if (!inDoubleQuote) {
1558          const nextChar = command[i + 1]
1559          if (nextChar === ' ' || nextChar === '\t') {
1560            return true
1561          }
1562        }
1563        // Skip the escaped character (both outside quotes and inside double quotes,
1564        // where \\, \", \$, \` are valid escape sequences)
1565        i++
1566        continue
1567      }
1568  
1569      if (char === '"' && !inSingleQuote) {
1570        inDoubleQuote = !inDoubleQuote
1571        continue
1572      }
1573  
1574      if (char === "'" && !inDoubleQuote) {
1575        inSingleQuote = !inSingleQuote
1576        continue
1577      }
1578    }
1579  
1580    return false
1581  }
1582  
1583  function validateBackslashEscapedWhitespace(
1584    context: ValidationContext,
1585  ): PermissionResult {
1586    if (hasBackslashEscapedWhitespace(context.originalCommand)) {
1587      logEvent('tengu_bash_security_check_triggered', {
1588        checkId: BASH_SECURITY_CHECK_IDS.BACKSLASH_ESCAPED_WHITESPACE,
1589      })
1590      return {
1591        behavior: 'ask',
1592        message:
1593          'Command contains backslash-escaped whitespace that could alter command parsing',
1594      }
1595    }
1596  
1597    return {
1598      behavior: 'passthrough',
1599      message: 'No backslash-escaped whitespace',
1600    }
1601  }
1602  
1603  /**
1604   * Detects a backslash immediately preceding a shell operator outside of quotes.
1605   *
1606   * SECURITY: splitCommand normalizes `\;` to a bare `;` in its output string.
1607   * When downstream code (checkReadOnlyConstraints, checkPathConstraints, etc.)
1608   * re-parses that normalized string, the bare `;` is seen as an operator and
1609   * causes a false split. This enables arbitrary file read bypassing path checks:
1610   *
1611   *   cat safe.txt \; echo ~/.ssh/id_rsa
1612   *
1613   * In bash: ONE cat command reading safe.txt, ;, echo, ~/.ssh/id_rsa as files.
1614   * After splitCommand normalizes: "cat safe.txt ; echo ~/.ssh/id_rsa"
1615   * Nested re-parse: ["cat safe.txt", "echo ~/.ssh/id_rsa"] — both segments
1616   * pass isCommandReadOnly, sensitive path hidden in echo segment is never
1617   * validated by path constraints. Auto-allowed. Private key leaked.
1618   *
1619   * This check flags any \<operator> regardless of backslash parity. Even counts
1620   * (\\;) are dangerous in bash (\\ → \, ; separates). Odd counts (\;) are safe
1621   * in bash but trigger the double-parse bug above. Both must be flagged.
1622   *
1623   * Known false positive: `find . -exec cmd {} \;` — users will be prompted once.
1624   *
1625   * Note: `(` and `)` are NOT in this set — splitCommand preserves `\(` and `\)`
1626   * in its output (round-trip safe), so they don't trigger the double-parse bug.
1627   * This allows `find . \( -name x -o -name y \)` to pass without false positives.
1628   */
1629  const SHELL_OPERATORS = new Set([';', '|', '&', '<', '>'])
1630  
1631  function hasBackslashEscapedOperator(command: string): boolean {
1632    let inSingleQuote = false
1633    let inDoubleQuote = false
1634  
1635    for (let i = 0; i < command.length; i++) {
1636      const char = command[i]
1637  
1638      // SECURITY: Handle backslash FIRST, before quote toggles. In bash, inside
1639      // double quotes, `\"` is an escape sequence producing a literal `"` — it
1640      // does NOT close the quote. If we process quote toggles first, `\"` inside
1641      // `"..."` desyncs the tracker:
1642      //   - `\` is ignored (gated by !inDoubleQuote)
1643      //   - `"` toggles inDoubleQuote to FALSE (wrong — bash says still inside)
1644      //   - next `"` (the real closing quote) toggles BACK to TRUE — locked desync
1645      //   - subsequent `\;` is missed because !inDoubleQuote is false
1646      // Exploit: `tac "x\"y" \; echo ~/.ssh/id_rsa` — bash runs ONE tac reading
1647      // all args as files (leaking id_rsa), but desynced tracker misses `\;` and
1648      // splitCommand's double-parse normalization "sees" two safe commands.
1649      //
1650      // Fix structure matches hasBackslashEscapedWhitespace (which was correctly
1651      // fixed for this in commit prior to d000dfe84e): backslash check first,
1652      // gated only by !inSingleQuote (since backslash IS literal inside '...'),
1653      // unconditional i++ to skip the escaped char even inside double quotes.
1654      if (char === '\\' && !inSingleQuote) {
1655        // Only flag \<operator> when OUTSIDE double quotes (inside double quotes,
1656        // operators like ;|&<> are already not special, so \; is harmless there).
1657        if (!inDoubleQuote) {
1658          const nextChar = command[i + 1]
1659          if (nextChar && SHELL_OPERATORS.has(nextChar)) {
1660            return true
1661          }
1662        }
1663        // Skip the escaped character unconditionally. Inside double quotes, this
1664        // correctly consumes backslash pairs: `"x\\"` → pos 6 (`\`) skips pos 7
1665        // (`\`), then pos 8 (`"`) toggles inDoubleQuote off correctly. Without
1666        // unconditional skip, pos 7 would see `\`, see pos 8 (`"`) as nextChar,
1667        // skip it, and the closing quote would NEVER toggle inDoubleQuote —
1668        // permanently desyncing and missing subsequent `\;` outside quotes.
1669        // Exploit: `cat "x\\" \; echo /etc/passwd` — bash reads /etc/passwd.
1670        //
1671        // This correctly handles backslash parity: odd-count `\;` (1, 3, 5...)
1672        // is flagged (the unpaired `\` before `;` is detected). Even-count `\\;`
1673        // (2, 4...) is NOT flagged, which is CORRECT — bash treats `\\` as
1674        // literal `\` and `;` as a separator, so splitCommand handles it
1675        // normally (no double-parse bug). This matches
1676        // hasBackslashEscapedWhitespace line ~1340.
1677        i++
1678        continue
1679      }
1680  
1681      // Quote toggles come AFTER backslash handling (backslash already skipped
1682      // any escaped quote char, so these toggles only fire on unescaped quotes).
1683      if (char === "'" && !inDoubleQuote) {
1684        inSingleQuote = !inSingleQuote
1685        continue
1686      }
1687      if (char === '"' && !inSingleQuote) {
1688        inDoubleQuote = !inDoubleQuote
1689        continue
1690      }
1691    }
1692  
1693    return false
1694  }
1695  
1696  function validateBackslashEscapedOperators(
1697    context: ValidationContext,
1698  ): PermissionResult {
1699    // Tree-sitter path: if tree-sitter confirms no actual operator nodes exist
1700    // in the AST, then any \; is just an escaped character in a word argument
1701    // (e.g., `find . -exec cmd {} \;`). Skip the expensive regex check.
1702    if (context.treeSitter && !context.treeSitter.hasActualOperatorNodes) {
1703      return { behavior: 'passthrough', message: 'No operator nodes in AST' }
1704    }
1705  
1706    if (hasBackslashEscapedOperator(context.originalCommand)) {
1707      logEvent('tengu_bash_security_check_triggered', {
1708        checkId: BASH_SECURITY_CHECK_IDS.BACKSLASH_ESCAPED_OPERATORS,
1709      })
1710      return {
1711        behavior: 'ask',
1712        message:
1713          'Command contains a backslash before a shell operator (;, |, &, <, >) which can hide command structure',
1714      }
1715    }
1716  
1717    return {
1718      behavior: 'passthrough',
1719      message: 'No backslash-escaped operators',
1720    }
1721  }
1722  
1723  /**
1724   * Checks if a character at position `pos` in `content` is escaped by counting
1725   * consecutive backslashes before it. An odd number means it's escaped.
1726   */
1727  function isEscapedAtPosition(content: string, pos: number): boolean {
1728    let backslashCount = 0
1729    let i = pos - 1
1730    while (i >= 0 && content[i] === '\\') {
1731      backslashCount++
1732      i--
1733    }
1734    return backslashCount % 2 === 1
1735  }
1736  
1737  /**
1738   * Detects unquoted brace expansion syntax that Bash expands but shell-quote/tree-sitter
1739   * treat as literal strings. This parsing discrepancy allows permission bypass:
1740   *   git ls-remote {--upload-pack="touch /tmp/test",test}
1741   * Parser sees one literal arg, but Bash expands to: --upload-pack="touch /tmp/test" test
1742   *
1743   * Brace expansion has two forms:
1744   *   1. Comma-separated: {a,b,c} → a b c
1745   *   2. Sequence: {1..5} → 1 2 3 4 5
1746   *
1747   * Both single and double quotes suppress brace expansion in Bash, so we use
1748   * fullyUnquotedContent which has both quote types stripped.
1749   * Backslash-escaped braces (\{, \}) also suppress expansion.
1750   */
1751  function validateBraceExpansion(context: ValidationContext): PermissionResult {
1752    // Use pre-strip content to avoid false negatives from stripSafeRedirections
1753    // creating backslash adjacencies (e.g., `\>/dev/null{a,b}` → `\{a,b}` after
1754    // stripping, making isEscapedAtPosition think the brace is escaped).
1755    const content = context.fullyUnquotedPreStrip
1756  
1757    // SECURITY: Check for MISMATCHED brace counts in fullyUnquoted content.
1758    // A mismatch indicates that quoted braces (e.g., `'{'` or `"{"`) were
1759    // stripped by extractQuotedContent, leaving unbalanced braces in the content
1760    // we analyze. Our depth-matching algorithm below assumes balanced braces —
1761    // with a mismatch, it closes at the WRONG position, missing commas that
1762    // bash's algorithm WOULD find.
1763    //
1764    // Exploit: `git diff {@'{'0},--output=/tmp/pwned}`
1765    //   - Original: 2 `{`, 2 `}` (quoted `'{'` counts as content, not operator)
1766    //   - fullyUnquoted: `git diff {@0},--output=/tmp/pwned}` — 1 `{`, 2 `}`!
1767    //   - Our depth-matcher: closes at first `}` (after `0`), inner=`@0`, no `,`
1768    //   - Bash (on original): quoted `{` is content; first unquoted `}` has no
1769    //     `,` yet → bash treats as literal content, keeps scanning → finds `,`
1770    //     → final `}` closes → expands to `@{0} --output=/tmp/pwned`
1771    //   - git writes diff to /tmp/pwned. ARBITRARY FILE WRITE, ZERO PERMISSIONS.
1772    //
1773    // We count ONLY unescaped braces (backslash-escaped braces are literal in
1774    // bash). If counts mismatch AND at least one unescaped `{` exists, block —
1775    // our depth-matching cannot be trusted on this content.
1776    let unescapedOpenBraces = 0
1777    let unescapedCloseBraces = 0
1778    for (let i = 0; i < content.length; i++) {
1779      if (content[i] === '{' && !isEscapedAtPosition(content, i)) {
1780        unescapedOpenBraces++
1781      } else if (content[i] === '}' && !isEscapedAtPosition(content, i)) {
1782        unescapedCloseBraces++
1783      }
1784    }
1785    // Only block when CLOSE count EXCEEDS open count — this is the specific
1786    // attack signature. More `}` than `{` means a quoted `{` was stripped
1787    // (bash saw it as content, we see extra `}` unaccounted for). The inverse
1788    // (more `{` than `}`) is usually legitimate unclosed/escaped braces like
1789    // `{foo` or `{a,b\}` where bash doesn't expand anyway.
1790    if (unescapedOpenBraces > 0 && unescapedCloseBraces > unescapedOpenBraces) {
1791      logEvent('tengu_bash_security_check_triggered', {
1792        checkId: BASH_SECURITY_CHECK_IDS.BRACE_EXPANSION,
1793        subId: 2,
1794      })
1795      return {
1796        behavior: 'ask',
1797        message:
1798          'Command has excess closing braces after quote stripping, indicating possible brace expansion obfuscation',
1799      }
1800    }
1801  
1802    // SECURITY: Additionally, check the ORIGINAL command (before quote stripping)
1803    // for `'{'` or `"{"` INSIDE an unquoted brace context — this is the specific
1804    // attack primitive. A quoted brace inside an outer unquoted `{...}` is
1805    // essentially always an obfuscation attempt; legitimate commands don't nest
1806    // quoted braces inside brace expansion (awk/find patterns are fully quoted,
1807    // like `awk '{print $1}'` where the OUTER brace is inside quotes too).
1808    //
1809    // This catches the attack even if an attacker crafts a payload with balanced
1810    // stripped braces (defense-in-depth). We use a simple heuristic: if the
1811    // original command has `'{'` or `'}'` or `"{"` or `"}"` (quoted single brace)
1812    // AND also has an unquoted `{`, that's suspicious.
1813    if (unescapedOpenBraces > 0) {
1814      const orig = context.originalCommand
1815      // Look for quoted single-brace patterns: '{', '}', "{",  "}"
1816      // These are the attack primitive — a brace char wrapped in quotes.
1817      if (/['"][{}]['"]/.test(orig)) {
1818        logEvent('tengu_bash_security_check_triggered', {
1819          checkId: BASH_SECURITY_CHECK_IDS.BRACE_EXPANSION,
1820          subId: 3,
1821        })
1822        return {
1823          behavior: 'ask',
1824          message:
1825            'Command contains quoted brace character inside brace context (potential brace expansion obfuscation)',
1826        }
1827      }
1828    }
1829  
1830    // Scan for unescaped `{` characters, then check if they form brace expansion.
1831    // We use a manual scan rather than a simple regex lookbehind because
1832    // lookbehinds can't handle double-escaped backslashes (\\{ is unescaped `{`).
1833    for (let i = 0; i < content.length; i++) {
1834      if (content[i] !== '{') continue
1835      if (isEscapedAtPosition(content, i)) continue
1836  
1837      // Find matching unescaped `}` by tracking nesting depth.
1838      // Previous approach broke on nested `{`, missing commas between the outer
1839      // `{` and the nested one (e.g., `{--upload-pack="evil",{test}}`).
1840      let depth = 1
1841      let matchingClose = -1
1842      for (let j = i + 1; j < content.length; j++) {
1843        const ch = content[j]
1844        if (ch === '{' && !isEscapedAtPosition(content, j)) {
1845          depth++
1846        } else if (ch === '}' && !isEscapedAtPosition(content, j)) {
1847          depth--
1848          if (depth === 0) {
1849            matchingClose = j
1850            break
1851          }
1852        }
1853      }
1854  
1855      if (matchingClose === -1) continue
1856  
1857      // Check for `,` or `..` at the outermost nesting level between this
1858      // `{` and its matching `}`. Only depth-0 triggers matter — bash splits
1859      // brace expansion at outer-level commas/sequences.
1860      let innerDepth = 0
1861      for (let k = i + 1; k < matchingClose; k++) {
1862        const ch = content[k]
1863        if (ch === '{' && !isEscapedAtPosition(content, k)) {
1864          innerDepth++
1865        } else if (ch === '}' && !isEscapedAtPosition(content, k)) {
1866          innerDepth--
1867        } else if (innerDepth === 0) {
1868          if (
1869            ch === ',' ||
1870            (ch === '.' && k + 1 < matchingClose && content[k + 1] === '.')
1871          ) {
1872            logEvent('tengu_bash_security_check_triggered', {
1873              checkId: BASH_SECURITY_CHECK_IDS.BRACE_EXPANSION,
1874              subId: 1,
1875            })
1876            return {
1877              behavior: 'ask',
1878              message:
1879                'Command contains brace expansion that could alter command parsing',
1880            }
1881          }
1882        }
1883      }
1884      // No expansion at this level — don't skip past; inner pairs will be
1885      // caught by subsequent iterations of the outer loop.
1886    }
1887  
1888    return {
1889      behavior: 'passthrough',
1890      message: 'No brace expansion detected',
1891    }
1892  }
1893  
1894  // Matches Unicode whitespace characters that shell-quote treats as word
1895  // separators but bash treats as literal word content. While this differential
1896  // is defense-favorable (shell-quote over-splits), blocking these proactively
1897  // prevents future edge cases.
1898  // eslint-disable-next-line no-misleading-character-class
1899  const UNICODE_WS_RE =
1900    /[\u00A0\u1680\u2000-\u200A\u2028\u2029\u202F\u205F\u3000\uFEFF]/
1901  
1902  function validateUnicodeWhitespace(
1903    context: ValidationContext,
1904  ): PermissionResult {
1905    const { originalCommand } = context
1906    if (UNICODE_WS_RE.test(originalCommand)) {
1907      logEvent('tengu_bash_security_check_triggered', {
1908        checkId: BASH_SECURITY_CHECK_IDS.UNICODE_WHITESPACE,
1909      })
1910      return {
1911        behavior: 'ask',
1912        message:
1913          'Command contains Unicode whitespace characters that could cause parsing inconsistencies',
1914      }
1915    }
1916    return { behavior: 'passthrough', message: 'No Unicode whitespace' }
1917  }
1918  
1919  function validateMidWordHash(context: ValidationContext): PermissionResult {
1920    const { unquotedKeepQuoteChars } = context
1921    // Match # preceded by a non-whitespace character (mid-word hash).
1922    // shell-quote treats mid-word # as comment-start but bash treats it as a
1923    // literal character, creating a parser differential.
1924    //
1925    // Uses unquotedKeepQuoteChars (which preserves quote delimiters but strips
1926    // quoted content) to catch quote-adjacent # like 'x'# — fullyUnquotedPreStrip
1927    // would strip both quotes and content, turning 'x'# into just # (word-start).
1928    //
1929    // SECURITY: Also check the CONTINUATION-JOINED version. The context is built
1930    // from the original command (pre-continuation-join). For `foo\<NL>#bar`,
1931    // pre-join the `#` is preceded by `\n` (whitespace → `/\S#/` doesn't match),
1932    // but post-join it's preceded by `o` (non-whitespace → matches). shell-quote
1933    // operates on the post-join text (line continuations are joined in
1934    // splitCommand), so the parser differential manifests on the joined text.
1935    // While not directly exploitable (the `#...` fragment still prompts as its
1936    // own subcommand), this is a defense-in-depth gap — shell-quote would drop
1937    // post-`#` content from path extraction.
1938    //
1939    // Exclude ${# which is bash string-length syntax (e.g., ${#var}).
1940    // Note: the lookbehind must be placed immediately before # (not before \S)
1941    // so that it checks the correct 2-char window.
1942    const joined = unquotedKeepQuoteChars.replace(/\\+\n/g, match => {
1943      const backslashCount = match.length - 1
1944      return backslashCount % 2 === 1 ? '\\'.repeat(backslashCount - 1) : match
1945    })
1946    if (
1947      // eslint-disable-next-line custom-rules/no-lookbehind-regex -- .test() with atom search: fast when # absent
1948      /\S(?<!\$\{)#/.test(unquotedKeepQuoteChars) ||
1949      // eslint-disable-next-line custom-rules/no-lookbehind-regex -- same as above
1950      /\S(?<!\$\{)#/.test(joined)
1951    ) {
1952      logEvent('tengu_bash_security_check_triggered', {
1953        checkId: BASH_SECURITY_CHECK_IDS.MID_WORD_HASH,
1954      })
1955      return {
1956        behavior: 'ask',
1957        message:
1958          'Command contains mid-word # which is parsed differently by shell-quote vs bash',
1959      }
1960    }
1961    return { behavior: 'passthrough', message: 'No mid-word hash' }
1962  }
1963  
1964  /**
1965   * Detects when a `#` comment contains quote characters that would desync
1966   * downstream quote trackers (like extractQuotedContent).
1967   *
1968   * In bash, everything after an unquoted `#` on a line is a comment — quote
1969   * characters inside the comment are literal text, not quote toggles. But our
1970   * quote-tracking functions don't handle comments, so a `'` or `"` after `#`
1971   * toggles their quote state. Attackers can craft `# ' "` sequences that
1972   * precisely desync the tracker, causing subsequent content (on following
1973   * lines) to appear "inside quotes" when it's actually unquoted in bash.
1974   *
1975   * Example attack:
1976   *   echo "it's" # ' " <<'MARKER'\n
1977   *   rm -rf /\n
1978   *   MARKER
1979   * In bash: `#` starts a comment, `rm -rf /` executes on line 2.
1980   * In extractQuotedContent: the `'` at position 14 (after #) opens a single
1981   * quote, and the `'` before MARKER closes it. But the `'` after MARKER opens
1982   * ANOTHER single quote, swallowing the newline and `rm -rf /`, so
1983   * validateNewlines sees no unquoted newlines.
1984   *
1985   * Defense: If we see an unquoted `#` followed by any quote character on the
1986   * same line, treat it as a misparsing concern. Legitimate commands rarely
1987   * have quote characters in their comments (and if they do, the user can
1988   * approve manually).
1989   */
1990  function validateCommentQuoteDesync(
1991    context: ValidationContext,
1992  ): PermissionResult {
1993    // Tree-sitter path: tree-sitter correctly identifies comment nodes and
1994    // quoted content. The desync concern is about regex quote tracking being
1995    // confused by quote characters inside comments. When tree-sitter provides
1996    // the quote context, this desync cannot happen — the AST is authoritative
1997    // regardless of whether the command contains a comment.
1998    if (context.treeSitter) {
1999      return {
2000        behavior: 'passthrough',
2001        message: 'Tree-sitter quote context is authoritative',
2002      }
2003    }
2004  
2005    const { originalCommand } = context
2006  
2007    // Track quote state character-by-character using the same (correct) logic
2008    // as extractQuotedContent: single quotes don't toggle inside double quotes.
2009    // When we encounter an unquoted `#`, check if the rest of the line (until
2010    // newline) contains any quote characters.
2011    let inSingleQuote = false
2012    let inDoubleQuote = false
2013    let escaped = false
2014  
2015    for (let i = 0; i < originalCommand.length; i++) {
2016      const char = originalCommand[i]
2017  
2018      if (escaped) {
2019        escaped = false
2020        continue
2021      }
2022  
2023      if (inSingleQuote) {
2024        if (char === "'") inSingleQuote = false
2025        continue
2026      }
2027  
2028      if (char === '\\') {
2029        escaped = true
2030        continue
2031      }
2032  
2033      if (inDoubleQuote) {
2034        if (char === '"') inDoubleQuote = false
2035        // Single quotes inside double quotes are literal — no toggle
2036        continue
2037      }
2038  
2039      if (char === "'") {
2040        inSingleQuote = true
2041        continue
2042      }
2043  
2044      if (char === '"') {
2045        inDoubleQuote = true
2046        continue
2047      }
2048  
2049      // Unquoted `#` — in bash, this starts a comment. Check if the rest of
2050      // the line contains quote characters that would desync other trackers.
2051      if (char === '#') {
2052        const lineEnd = originalCommand.indexOf('\n', i)
2053        const commentText = originalCommand.slice(
2054          i + 1,
2055          lineEnd === -1 ? originalCommand.length : lineEnd,
2056        )
2057        if (/['"]/.test(commentText)) {
2058          logEvent('tengu_bash_security_check_triggered', {
2059            checkId: BASH_SECURITY_CHECK_IDS.COMMENT_QUOTE_DESYNC,
2060          })
2061          return {
2062            behavior: 'ask',
2063            message:
2064              'Command contains quote characters inside a # comment which can desync quote tracking',
2065          }
2066        }
2067        // Skip to end of line (rest is comment)
2068        if (lineEnd === -1) break
2069        i = lineEnd // Loop increment will move past newline
2070      }
2071    }
2072  
2073    return { behavior: 'passthrough', message: 'No comment quote desync' }
2074  }
2075  
2076  /**
2077   * Detects a newline inside a quoted string where the NEXT line would be
2078   * stripped by stripCommentLines (trimmed line starts with `#`).
2079   *
2080   * In bash, `\n` inside quotes is a literal character and part of the argument.
2081   * But stripCommentLines (called by stripSafeWrappers in bashPermissions before
2082   * path validation and rule matching) processes commands LINE-BY-LINE via
2083   * `command.split('\n')` without tracking quote state. A quoted newline lets an
2084   * attacker position the next line to start with `#` (after trim), causing
2085   * stripCommentLines to drop that line entirely — hiding sensitive paths or
2086   * arguments from path validation and permission rule matching.
2087   *
2088   * Example attack (auto-allowed in acceptEdits mode without any Bash rules):
2089   *   mv ./decoy '<\n>#' ~/.ssh/id_rsa ./exfil_dir
2090   * Bash: moves ./decoy AND ~/.ssh/id_rsa into ./exfil_dir/ (errors on `\n#`).
2091   * stripSafeWrappers: line 2 starts with `#` → stripped → "mv ./decoy '".
2092   * shell-quote: drops unbalanced trailing quote → ["mv", "./decoy"].
2093   * checkPathConstraints: only sees ./decoy (in cwd) → passthrough.
2094   * acceptEdits mode: mv with all-cwd paths → ALLOW. Zero clicks, no warning.
2095   *
2096   * Also works with cp (exfil), rm/rm -rf (delete arbitrary files/dirs).
2097   *
2098   * Defense: block ONLY the specific stripCommentLines trigger — a newline inside
2099   * quotes where the next line starts with `#` after trim. This is the minimal
2100   * check that catches the parser differential while preserving legitimate
2101   * multi-line quoted arguments (echo 'line1\nline2', grep patterns, etc.).
2102   * Safe heredocs ($(cat <<'EOF'...)) and git commit -m "..." are handled by
2103   * early validators and never reach this check.
2104   *
2105   * This validator is NOT in nonMisparsingValidators — its ask result gets
2106   * isBashSecurityCheckForMisparsing: true, causing an early block in the
2107   * permission flow at bashPermissions.ts before any line-based processing runs.
2108   */
2109  function validateQuotedNewline(context: ValidationContext): PermissionResult {
2110    const { originalCommand } = context
2111  
2112    // Fast path: must have both a newline byte AND a # character somewhere.
2113    // stripCommentLines only strips lines where trim().startsWith('#'), so
2114    // no # means no possible trigger.
2115    if (!originalCommand.includes('\n') || !originalCommand.includes('#')) {
2116      return { behavior: 'passthrough', message: 'No newline or no hash' }
2117    }
2118  
2119    // Track quote state. Mirrors extractQuotedContent / validateCommentQuoteDesync:
2120    // - single quotes don't toggle inside double quotes
2121    // - backslash escapes the next char (but not inside single quotes)
2122    // stripCommentLines splits on '\n' (not \r), so we only treat \n as a line
2123    // separator. \r inside a line is removed by trim() and doesn't change the
2124    // trimmed-starts-with-# check.
2125    let inSingleQuote = false
2126    let inDoubleQuote = false
2127    let escaped = false
2128  
2129    for (let i = 0; i < originalCommand.length; i++) {
2130      const char = originalCommand[i]
2131  
2132      if (escaped) {
2133        escaped = false
2134        continue
2135      }
2136  
2137      if (char === '\\' && !inSingleQuote) {
2138        escaped = true
2139        continue
2140      }
2141  
2142      if (char === "'" && !inDoubleQuote) {
2143        inSingleQuote = !inSingleQuote
2144        continue
2145      }
2146  
2147      if (char === '"' && !inSingleQuote) {
2148        inDoubleQuote = !inDoubleQuote
2149        continue
2150      }
2151  
2152      // A newline inside quotes: the NEXT line (from bash's perspective) starts
2153      // inside a quoted string. Check if that line would be stripped by
2154      // stripCommentLines — i.e., after trim(), does it start with `#`?
2155      // This exactly mirrors: lines.filter(l => !l.trim().startsWith('#'))
2156      if (char === '\n' && (inSingleQuote || inDoubleQuote)) {
2157        const lineStart = i + 1
2158        const nextNewline = originalCommand.indexOf('\n', lineStart)
2159        const lineEnd = nextNewline === -1 ? originalCommand.length : nextNewline
2160        const nextLine = originalCommand.slice(lineStart, lineEnd)
2161        if (nextLine.trim().startsWith('#')) {
2162          logEvent('tengu_bash_security_check_triggered', {
2163            checkId: BASH_SECURITY_CHECK_IDS.QUOTED_NEWLINE,
2164          })
2165          return {
2166            behavior: 'ask',
2167            message:
2168              'Command contains a quoted newline followed by a #-prefixed line, which can hide arguments from line-based permission checks',
2169          }
2170        }
2171      }
2172    }
2173  
2174    return { behavior: 'passthrough', message: 'No quoted newline-hash pattern' }
2175  }
2176  
2177  /**
2178   * Validates that the command doesn't use Zsh-specific dangerous commands that
2179   * can bypass security checks. These commands provide capabilities like loading
2180   * kernel modules, raw file I/O, network access, and pseudo-terminal execution
2181   * that circumvent normal permission checks.
2182   *
2183   * Also catches `fc -e` which can execute arbitrary editors on command history,
2184   * and `emulate` which with `-c` is an eval-equivalent.
2185   */
2186  function validateZshDangerousCommands(
2187    context: ValidationContext,
2188  ): PermissionResult {
2189    const { originalCommand } = context
2190  
2191    // Extract the base command from the original command, stripping leading
2192    // whitespace, env var assignments, and Zsh precommand modifiers.
2193    // e.g., "FOO=bar command builtin zmodload" -> "zmodload"
2194    const ZSH_PRECOMMAND_MODIFIERS = new Set([
2195      'command',
2196      'builtin',
2197      'noglob',
2198      'nocorrect',
2199    ])
2200    const trimmed = originalCommand.trim()
2201    const tokens = trimmed.split(/\s+/)
2202    let baseCmd = ''
2203    for (const token of tokens) {
2204      // Skip env var assignments (VAR=value)
2205      if (/^[A-Za-z_]\w*=/.test(token)) continue
2206      // Skip Zsh precommand modifiers (they don't change what command runs)
2207      if (ZSH_PRECOMMAND_MODIFIERS.has(token)) continue
2208      baseCmd = token
2209      break
2210    }
2211  
2212    if (ZSH_DANGEROUS_COMMANDS.has(baseCmd)) {
2213      logEvent('tengu_bash_security_check_triggered', {
2214        checkId: BASH_SECURITY_CHECK_IDS.ZSH_DANGEROUS_COMMANDS,
2215        subId: 1,
2216      })
2217      return {
2218        behavior: 'ask',
2219        message: `Command uses Zsh-specific '${baseCmd}' which can bypass security checks`,
2220      }
2221    }
2222  
2223    // Check for `fc -e` which allows executing arbitrary commands via editor
2224    // fc without -e is safe (just lists history), but -e specifies an editor
2225    // to run on the command, effectively an eval
2226    if (baseCmd === 'fc' && /\s-\S*e/.test(trimmed)) {
2227      logEvent('tengu_bash_security_check_triggered', {
2228        checkId: BASH_SECURITY_CHECK_IDS.ZSH_DANGEROUS_COMMANDS,
2229        subId: 2,
2230      })
2231      return {
2232        behavior: 'ask',
2233        message:
2234          "Command uses 'fc -e' which can execute arbitrary commands via editor",
2235      }
2236    }
2237  
2238    return {
2239      behavior: 'passthrough',
2240      message: 'No Zsh dangerous commands',
2241    }
2242  }
2243  
2244  // Matches non-printable control characters that have no legitimate use in shell
2245  // commands: 0x00-0x08, 0x0B-0x0C, 0x0E-0x1F, 0x7F. Excludes tab (0x09),
2246  // newline (0x0A), and carriage return (0x0D) which are handled by other
2247  // validators. Bash silently drops null bytes and ignores most control chars,
2248  // so an attacker can use them to slip metacharacters past our checks while
2249  // bash still executes them (e.g., "echo safe\x00; rm -rf /").
2250  // eslint-disable-next-line no-control-regex
2251  const CONTROL_CHAR_RE = /[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]/
2252  
2253  /**
2254   * @deprecated Legacy regex/shell-quote path. Only used when tree-sitter is
2255   * unavailable. The primary gate is parseForSecurity (ast.ts).
2256   */
2257  export function bashCommandIsSafe_DEPRECATED(
2258    command: string,
2259  ): PermissionResult {
2260    // SECURITY: Block control characters before any other processing. Null bytes
2261    // and other non-printable chars are silently dropped by bash but confuse our
2262    // validators, allowing metacharacters adjacent to them to slip through.
2263    if (CONTROL_CHAR_RE.test(command)) {
2264      logEvent('tengu_bash_security_check_triggered', {
2265        checkId: BASH_SECURITY_CHECK_IDS.CONTROL_CHARACTERS,
2266      })
2267      return {
2268        behavior: 'ask',
2269        message:
2270          'Command contains non-printable control characters that could be used to bypass security checks',
2271        isBashSecurityCheckForMisparsing: true,
2272      }
2273    }
2274  
2275    // SECURITY: Detect '\' patterns that exploit shell-quote's incorrect handling
2276    // of backslashes inside single quotes. Must run before shell-quote parsing.
2277    if (hasShellQuoteSingleQuoteBug(command)) {
2278      return {
2279        behavior: 'ask',
2280        message:
2281          'Command contains single-quoted backslash pattern that could bypass security checks',
2282        isBashSecurityCheckForMisparsing: true,
2283      }
2284    }
2285  
2286    // SECURITY: Strip heredoc bodies before running security validators.
2287    // Only strip bodies for quoted/escaped delimiters (<<'EOF', <<\EOF) where
2288    // the body is literal text — $(), backticks, and ${} are NOT expanded.
2289    // Unquoted heredocs (<<EOF) undergo full shell expansion, so their bodies
2290    // may contain executable command substitutions that validators must see.
2291    // When extractHeredocs bails out (can't parse safely), the raw command
2292    // goes through all validators — which is the safe direction.
2293    const { processedCommand } = extractHeredocs(command, { quotedOnly: true })
2294  
2295    const baseCommand = command.split(' ')[0] || ''
2296    const { withDoubleQuotes, fullyUnquoted, unquotedKeepQuoteChars } =
2297      extractQuotedContent(processedCommand, baseCommand === 'jq')
2298  
2299    const context: ValidationContext = {
2300      originalCommand: command,
2301      baseCommand,
2302      unquotedContent: withDoubleQuotes,
2303      fullyUnquotedContent: stripSafeRedirections(fullyUnquoted),
2304      fullyUnquotedPreStrip: fullyUnquoted,
2305      unquotedKeepQuoteChars,
2306    }
2307  
2308    const earlyValidators = [
2309      validateEmpty,
2310      validateIncompleteCommands,
2311      validateSafeCommandSubstitution,
2312      validateGitCommit,
2313    ]
2314  
2315    for (const validator of earlyValidators) {
2316      const result = validator(context)
2317      if (result.behavior === 'allow') {
2318        return {
2319          behavior: 'passthrough',
2320          message:
2321            result.decisionReason?.type === 'other' ||
2322            result.decisionReason?.type === 'safetyCheck'
2323              ? result.decisionReason.reason
2324              : 'Command allowed',
2325        }
2326      }
2327      if (result.behavior !== 'passthrough') {
2328        return result.behavior === 'ask'
2329          ? { ...result, isBashSecurityCheckForMisparsing: true as const }
2330          : result
2331      }
2332    }
2333  
2334    // Validators that don't set isBashSecurityCheckForMisparsing — their ask
2335    // results go through the standard permission flow rather than being blocked
2336    // early. LF newlines and redirections are normal patterns that splitCommand
2337    // handles correctly, not misparsing concerns.
2338    //
2339    // NOTE: validateCarriageReturn is NOT here — CR IS a misparsing concern.
2340    // shell-quote's `[^\s]` treats CR as a word separator (JS `\s` ⊃ \r), but
2341    // bash IFS does NOT include CR. splitCommand collapses CR→space, which IS
2342    // misparsing. See validateCarriageReturn for the full attack trace.
2343    const nonMisparsingValidators = new Set([
2344      validateNewlines,
2345      validateRedirections,
2346    ])
2347  
2348    const validators = [
2349      validateJqCommand,
2350      validateObfuscatedFlags,
2351      validateShellMetacharacters,
2352      validateDangerousVariables,
2353      // Run comment-quote-desync BEFORE validateNewlines: it detects cases where
2354      // the quote tracker would miss newlines due to # comment desync.
2355      validateCommentQuoteDesync,
2356      // Run quoted-newline BEFORE validateNewlines: it detects the INVERSE case
2357      // (newlines INSIDE quotes, which validateNewlines ignores by design). Quoted
2358      // newlines let attackers split commands across lines so that line-based
2359      // processing (stripCommentLines) drops sensitive content.
2360      validateQuotedNewline,
2361      // CR check runs BEFORE validateNewlines — CR is a MISPARSING concern
2362      // (shell-quote/bash tokenization differential), LF is not.
2363      validateCarriageReturn,
2364      validateNewlines,
2365      validateIFSInjection,
2366      validateProcEnvironAccess,
2367      validateDangerousPatterns,
2368      validateRedirections,
2369      validateBackslashEscapedWhitespace,
2370      validateBackslashEscapedOperators,
2371      validateUnicodeWhitespace,
2372      validateMidWordHash,
2373      validateBraceExpansion,
2374      validateZshDangerousCommands,
2375      // Run malformed token check last - other validators should catch specific patterns first
2376      // (e.g., $() substitution, backticks, etc.) since they have more precise error messages
2377      validateMalformedTokenInjection,
2378    ]
2379  
2380    // SECURITY: We must NOT short-circuit when a non-misparsing validator
2381    // returns 'ask' if there are still misparsing validators later in the list.
2382    // Non-misparsing ask results are discarded at bashPermissions.ts:~1301-1303
2383    // (the gate only blocks when isBashSecurityCheckForMisparsing is set). If
2384    // validateRedirections (index 10, non-misparsing) fires first on `>`, it
2385    // returns ask-without-flag — but validateBackslashEscapedOperators (index 12,
2386    // misparsing) would have caught `\;` WITH the flag. Short-circuiting lets a
2387    // payload like `cat safe.txt \; echo /etc/passwd > ./out` slip through.
2388    //
2389    // Fix: defer non-misparsing ask results. Continue running validators; if any
2390    // misparsing validator fires, return THAT (with the flag). Only if we reach
2391    // the end without a misparsing ask, return the deferred non-misparsing ask.
2392    let deferredNonMisparsingResult: PermissionResult | null = null
2393    for (const validator of validators) {
2394      const result = validator(context)
2395      if (result.behavior === 'ask') {
2396        if (nonMisparsingValidators.has(validator)) {
2397          if (deferredNonMisparsingResult === null) {
2398            deferredNonMisparsingResult = result
2399          }
2400          continue
2401        }
2402        return { ...result, isBashSecurityCheckForMisparsing: true as const }
2403      }
2404    }
2405    if (deferredNonMisparsingResult !== null) {
2406      return deferredNonMisparsingResult
2407    }
2408  
2409    return {
2410      behavior: 'passthrough',
2411      message: 'Command passed all security checks',
2412    }
2413  }
2414  
2415  /**
2416   * @deprecated Legacy regex/shell-quote path. Only used when tree-sitter is
2417   * unavailable. The primary gate is parseForSecurity (ast.ts).
2418   *
2419   * Async version of bashCommandIsSafe that uses tree-sitter when available
2420   * for more accurate parsing. Falls back to the sync regex version when
2421   * tree-sitter is not available.
2422   *
2423   * This should be used by async callers (bashPermissions.ts, bashCommandHelpers.ts).
2424   * Sync callers (readOnlyValidation.ts) should continue using bashCommandIsSafe().
2425   */
2426  export async function bashCommandIsSafeAsync_DEPRECATED(
2427    command: string,
2428    onDivergence?: () => void,
2429  ): Promise<PermissionResult> {
2430    // Try to get tree-sitter analysis
2431    const parsed = await ParsedCommand.parse(command)
2432    const tsAnalysis = parsed?.getTreeSitterAnalysis() ?? null
2433  
2434    // If no tree-sitter, fall back to sync version
2435    if (!tsAnalysis) {
2436      return bashCommandIsSafe_DEPRECATED(command)
2437    }
2438  
2439    // Run the same security checks but with tree-sitter enriched context.
2440    // The early checks (control chars, shell-quote bug) don't benefit from
2441    // tree-sitter, so we run them identically.
2442    if (CONTROL_CHAR_RE.test(command)) {
2443      logEvent('tengu_bash_security_check_triggered', {
2444        checkId: BASH_SECURITY_CHECK_IDS.CONTROL_CHARACTERS,
2445      })
2446      return {
2447        behavior: 'ask',
2448        message:
2449          'Command contains non-printable control characters that could be used to bypass security checks',
2450        isBashSecurityCheckForMisparsing: true,
2451      }
2452    }
2453  
2454    if (hasShellQuoteSingleQuoteBug(command)) {
2455      return {
2456        behavior: 'ask',
2457        message:
2458          'Command contains single-quoted backslash pattern that could bypass security checks',
2459        isBashSecurityCheckForMisparsing: true,
2460      }
2461    }
2462  
2463    const { processedCommand } = extractHeredocs(command, { quotedOnly: true })
2464  
2465    const baseCommand = command.split(' ')[0] || ''
2466  
2467    // Use tree-sitter quote context for more accurate analysis
2468    const tsQuote = tsAnalysis.quoteContext
2469    const regexQuote = extractQuotedContent(
2470      processedCommand,
2471      baseCommand === 'jq',
2472    )
2473  
2474    // Use tree-sitter quote context as primary, but keep regex as reference
2475    // for divergence logging
2476    const withDoubleQuotes = tsQuote.withDoubleQuotes
2477    const fullyUnquoted = tsQuote.fullyUnquoted
2478    const unquotedKeepQuoteChars = tsQuote.unquotedKeepQuoteChars
2479  
2480    const context: ValidationContext = {
2481      originalCommand: command,
2482      baseCommand,
2483      unquotedContent: withDoubleQuotes,
2484      fullyUnquotedContent: stripSafeRedirections(fullyUnquoted),
2485      fullyUnquotedPreStrip: fullyUnquoted,
2486      unquotedKeepQuoteChars,
2487      treeSitter: tsAnalysis,
2488    }
2489  
2490    // Log divergence between tree-sitter and regex quote extraction.
2491    // Skip for heredoc commands: tree-sitter strips (quoted) heredoc bodies
2492    // to nothing while the regex path replaces them with placeholder strings
2493    // (via extractHeredocs), so the two outputs can never match. Logging
2494    // divergence for every heredoc command would poison the signal.
2495    //
2496    // onDivergence callback: when called in a fanout loop (bashPermissions.ts
2497    // Promise.all over subcommands), the caller batches divergences into a
2498    // single logEvent instead of N separate calls. Each logEvent triggers
2499    // getEventMetadata() → buildProcessMetrics() → process.memoryUsage() →
2500    // /proc/self/stat read; with memoized metadata these resolve as microtasks
2501    // and starve the event loop (CC-643). Single-command callers omit the
2502    // callback and get the original per-call logEvent behavior.
2503    if (!tsAnalysis.dangerousPatterns.hasHeredoc) {
2504      const hasDivergence =
2505        tsQuote.fullyUnquoted !== regexQuote.fullyUnquoted ||
2506        tsQuote.withDoubleQuotes !== regexQuote.withDoubleQuotes
2507      if (hasDivergence) {
2508        if (onDivergence) {
2509          onDivergence()
2510        } else {
2511          logEvent('tengu_tree_sitter_security_divergence', {
2512            quoteContextDivergence: true,
2513          })
2514        }
2515      }
2516    }
2517  
2518    const earlyValidators = [
2519      validateEmpty,
2520      validateIncompleteCommands,
2521      validateSafeCommandSubstitution,
2522      validateGitCommit,
2523    ]
2524  
2525    for (const validator of earlyValidators) {
2526      const result = validator(context)
2527      if (result.behavior === 'allow') {
2528        return {
2529          behavior: 'passthrough',
2530          message:
2531            result.decisionReason?.type === 'other' ||
2532            result.decisionReason?.type === 'safetyCheck'
2533              ? result.decisionReason.reason
2534              : 'Command allowed',
2535        }
2536      }
2537      if (result.behavior !== 'passthrough') {
2538        return result.behavior === 'ask'
2539          ? { ...result, isBashSecurityCheckForMisparsing: true as const }
2540          : result
2541      }
2542    }
2543  
2544    const nonMisparsingValidators = new Set([
2545      validateNewlines,
2546      validateRedirections,
2547    ])
2548  
2549    const validators = [
2550      validateJqCommand,
2551      validateObfuscatedFlags,
2552      validateShellMetacharacters,
2553      validateDangerousVariables,
2554      validateCommentQuoteDesync,
2555      validateQuotedNewline,
2556      validateCarriageReturn,
2557      validateNewlines,
2558      validateIFSInjection,
2559      validateProcEnvironAccess,
2560      validateDangerousPatterns,
2561      validateRedirections,
2562      validateBackslashEscapedWhitespace,
2563      validateBackslashEscapedOperators,
2564      validateUnicodeWhitespace,
2565      validateMidWordHash,
2566      validateBraceExpansion,
2567      validateZshDangerousCommands,
2568      validateMalformedTokenInjection,
2569    ]
2570  
2571    let deferredNonMisparsingResult: PermissionResult | null = null
2572    for (const validator of validators) {
2573      const result = validator(context)
2574      if (result.behavior === 'ask') {
2575        if (nonMisparsingValidators.has(validator)) {
2576          if (deferredNonMisparsingResult === null) {
2577            deferredNonMisparsingResult = result
2578          }
2579          continue
2580        }
2581        return { ...result, isBashSecurityCheckForMisparsing: true as const }
2582      }
2583    }
2584    if (deferredNonMisparsingResult !== null) {
2585      return deferredNonMisparsingResult
2586    }
2587  
2588    return {
2589      behavior: 'passthrough',
2590      message: 'Command passed all security checks',
2591    }
2592  }