/ utils / powershell / staticPrefix.ts
staticPrefix.ts
  1  /**
  2   * PowerShell static command prefix extraction.
  3   *
  4   * Mirrors bash's getCommandPrefixStatic / getCompoundCommandPrefixesStatic
  5   * (src/utils/bash/prefix.ts) but uses the PowerShell AST parser instead of
  6   * tree-sitter. The AST gives us cmd.name and cmd.args already split; for
  7   * external commands we feed those into the same fig-spec walker bash uses
  8   * (src/utils/shell/specPrefix.ts) — git/npm/kubectl CLIs are shell-agnostic.
  9   *
 10   * Feeds the "Yes, and don't ask again for: ___" editable input in the
 11   * permission dialog — static extractor provides a best-guess prefix, user
 12   * edits it down if needed.
 13   */
 14  
 15  import { getCommandSpec } from '../bash/registry.js'
 16  import { buildPrefix, DEPTH_RULES } from '../shell/specPrefix.js'
 17  import { countCharInString } from '../stringUtils.js'
 18  import { NEVER_SUGGEST } from './dangerousCmdlets.js'
 19  import {
 20    getAllCommands,
 21    type ParsedCommandElement,
 22    parsePowerShellCommand,
 23  } from './parser.js'
 24  
 25  /**
 26   * Extract a static prefix from a single parsed command element.
 27   * Returns null for commands we won't suggest (shells, eval cmdlets, path-like
 28   * invocations) or can't extract a meaningful prefix from.
 29   */
 30  async function extractPrefixFromElement(
 31    cmd: ParsedCommandElement,
 32  ): Promise<string | null> {
 33    // nameType === 'application' means the raw name had path chars (./x, x\y,
 34    // x.exe) — PowerShell will run a file, not a named cmdlet. Don't suggest.
 35    // Same reasoning as the permission engine's nameType gate (PR #20096).
 36    if (cmd.nameType === 'application') {
 37      return null
 38    }
 39  
 40    const name = cmd.name
 41    if (!name) {
 42      return null
 43    }
 44  
 45    if (NEVER_SUGGEST.has(name.toLowerCase())) {
 46      return null
 47    }
 48  
 49    // Cmdlets (Verb-Noun): the name alone is the right prefix granularity.
 50    // Get-Process -Name pwsh → Get-Process. There's no subcommand concept.
 51    if (cmd.nameType === 'cmdlet') {
 52      return name
 53    }
 54  
 55    // External command. Guard the argv before feeding it to buildPrefix.
 56    //
 57    // elementTypes[0] (command name) must be a literal. `& $cmd status` has
 58    // elementTypes[0]='Variable', name='$cmd' — classifies as 'unknown' (no path
 59    // chars), passes NEVER_SUGGEST, getCommandSpec('$cmd')=null → returns bare
 60    // '$cmd' → dead rule. Cheap to gate here.
 61    //
 62    // elementTypes[1..] (args) must all be StringConstant or Parameter. Anything
 63    // dynamic (Variable/SubExpression/ScriptBlock/ExpandableString) would embed
 64    // `$foo`/`$(...)` in the prefix → dead rule.
 65    if (cmd.elementTypes?.[0] !== 'StringConstant') {
 66      return null
 67    }
 68    for (let i = 0; i < cmd.args.length; i++) {
 69      const t = cmd.elementTypes[i + 1]
 70      if (t !== 'StringConstant' && t !== 'Parameter') {
 71        return null
 72      }
 73    }
 74  
 75    // Consult the fig spec — same oracle bash uses. If git's spec says -C takes
 76    // a value, buildPrefix skips -C /repo and finds `status` as a subcommand.
 77    // Lowercase for lookup: fig specs are filesystem paths (git.js), case-
 78    // sensitive on Linux. PowerShell is case-insensitive (Git === git) so `Git`
 79    // must resolve to the git spec. macOS hides this bug (case-insensitive fs).
 80    // Call buildPrefix unconditionally — calculateDepth consults DEPTH_RULES
 81    // before its own `if (!spec) return 2` fallback, so gcloud/aws/kubectl/az
 82    // get depth-aware prefixes even without a loaded spec. The old
 83    // `if (!spec) return name` short-circuit produced bare `gcloud:*` which
 84    // auto-allows every gcloud subcommand.
 85    const nameLower = name.toLowerCase()
 86    const spec = await getCommandSpec(nameLower)
 87    const prefix = await buildPrefix(name, cmd.args, spec)
 88  
 89    // Post-buildPrefix word integrity: buildPrefix space-joins consumed args
 90    // into the prefix string. parser.ts:685 stores .value (quote-stripped) for
 91    // single-quoted literals: git 'push origin' → args=['push origin']. If
 92    // that arg is consumed, buildPrefix emits 'git push origin' — silently
 93    // promoting 1 argv element to 3 prefix words. Rule PowerShell(git push
 94    // origin:*) then matches `git push origin --force` (3-element argv) — not
 95    // what the user approved.
 96    //
 97    // The old set-membership check (`!cmd.args.includes(word)`) was defeated
 98    // by decoy args: `git 'push origin' push origin` → args=['push origin',
 99    // 'push', 'origin'], prefix='git push origin'. Each word ∈ args (decoys at
100    // indices 1,2 satisfy .includes()) → passed. Now POSITIONAL: walk args in
101    // order; each prefix word must exactly match the next non-flag arg. A
102    // positional that doesn't match means buildPrefix split it. Flags and
103    // their values are skipped (buildPrefix skips them too) so
104    // `git -C '/my repo' status` and `git commit -m 'fix typo'` still pass.
105    // Backslash (C:\repo) rejected: dead over-specific rule.
106    let argIdx = 0
107    for (const word of prefix.split(' ').slice(1)) {
108      if (word.includes('\\')) return null
109      while (argIdx < cmd.args.length) {
110        const a = cmd.args[argIdx]!
111        if (a === word) break
112        if (a.startsWith('-')) {
113          argIdx++
114          // Only skip the flag's value if the spec says this flag takes a
115          // value argument. Without spec info, treat as a switch (no value)
116          // — fail-safe avoids over-skipping positional args. (bug #16)
117          if (
118            spec?.options &&
119            argIdx < cmd.args.length &&
120            cmd.args[argIdx] !== word &&
121            !cmd.args[argIdx]!.startsWith('-')
122          ) {
123            const flagLower = a.toLowerCase()
124            const opt = spec.options.find(o =>
125              Array.isArray(o.name)
126                ? o.name.includes(flagLower)
127                : o.name === flagLower,
128            )
129            if (opt?.args) {
130              argIdx++
131            }
132          }
133          continue
134        }
135        // Positional arg that isn't the expected word → arg was split.
136        return null
137      }
138      if (argIdx >= cmd.args.length) return null
139      argIdx++
140    }
141  
142    // Bare-root guard: buildPrefix returns 'git' for `git` with no subcommand
143    // found (empty args, or only global flags). That's too broad — would
144    // auto-allow `git push --force` forever. Bash's extractor doesn't gate this
145    // (bash/prefix.ts:363, separate fix). Reject single-word results for
146    // commands whose spec declares subcommands OR that have DEPTH_RULES entries
147    // (gcloud, aws, kubectl, etc.) which implies subcommand structure even
148    // without a loaded spec. (bug #17)
149    if (
150      !prefix.includes(' ') &&
151      (spec?.subcommands?.length || DEPTH_RULES[nameLower])
152    ) {
153      return null
154    }
155    return prefix
156  }
157  
158  /**
159   * Extract a prefix suggestion for a PowerShell command.
160   *
161   * Parses the command, takes the first CommandAst, returns a prefix suitable
162   * for the permission dialog's "don't ask again for: ___" editable input.
163   * Returns null when no safe prefix can be extracted (parse failure, shell
164   * invocation, path-like name, bare subcommand-aware command).
165   */
166  export async function getCommandPrefixStatic(
167    command: string,
168  ): Promise<{ commandPrefix: string | null } | null> {
169    const parsed = await parsePowerShellCommand(command)
170    if (!parsed.valid) {
171      return null
172    }
173  
174    // Find the first actual command (CommandAst). getAllCommands iterates
175    // both statement.commands and statement.nestedCommands (for &&/||/if/for).
176    // Skip synthetic CommandExpressionAst entries (expression pipeline sources,
177    // non-PipelineAst statement placeholders).
178    const firstCommand = getAllCommands(parsed).find(
179      cmd => cmd.elementType === 'CommandAst',
180    )
181    if (!firstCommand) {
182      return { commandPrefix: null }
183    }
184  
185    return { commandPrefix: await extractPrefixFromElement(firstCommand) }
186  }
187  
188  /**
189   * Extract prefixes for all subcommands in a compound PowerShell command.
190   *
191   * For `Get-Process; git status && npm test`, returns per-subcommand prefixes.
192   * Subcommands for which `excludeSubcommand` returns true (e.g. already
193   * read-only/auto-allowed) are skipped — no point suggesting a rule for them.
194   * Prefixes sharing a root are collapsed via word-aligned LCP:
195   * `npm run test && npm run lint` → `npm run`.
196   *
197   * The filter receives the ParsedCommandElement (not cmd.text) because
198   * PowerShell's read-only check (isAllowlistedCommand) needs the element's
199   * structured fields (nameType, args). Passing text would require reparsing,
200   * which spawns pwsh.exe per subcommand — expensive and wasteful since we
201   * already have the parsed elements here. Bash's equivalent passes text
202   * because BashTool.isReadOnly works from regex/patterns, not parsed AST.
203   */
204  export async function getCompoundCommandPrefixesStatic(
205    command: string,
206    excludeSubcommand?: (element: ParsedCommandElement) => boolean,
207  ): Promise<string[]> {
208    const parsed = await parsePowerShellCommand(command)
209    if (!parsed.valid) {
210      return []
211    }
212  
213    const commands = getAllCommands(parsed).filter(
214      cmd => cmd.elementType === 'CommandAst',
215    )
216  
217    // Single command — no compound collapse needed.
218    if (commands.length <= 1) {
219      const prefix = commands[0]
220        ? await extractPrefixFromElement(commands[0])
221        : null
222      return prefix ? [prefix] : []
223    }
224  
225    const prefixes: string[] = []
226    for (const cmd of commands) {
227      if (excludeSubcommand?.(cmd)) {
228        continue
229      }
230      const prefix = await extractPrefixFromElement(cmd)
231      if (prefix) {
232        prefixes.push(prefix)
233      }
234    }
235  
236    if (prefixes.length === 0) {
237      return []
238    }
239  
240    // Group by root command (first word) and collapse each group via
241    // word-aligned longest common prefix. `npm run test` + `npm run lint`
242    // → `npm run`. But NEVER collapse down to a bare subcommand-aware root:
243    // `git add` + `git commit` would LCP to `git`, which extractPrefixFromElement
244    // explicitly refuses as too broad (line ~119). Collapsing through that gate
245    // would suggest PowerShell(git:*) → auto-allows git push --force forever.
246    // When LCP yields a bare subcommand-aware root, drop the group entirely
247    // rather than suggest either the too-broad root or N un-collapsed rules.
248    //
249    // Bash's getCompoundCommandPrefixesStatic has this same collapse without
250    // the guard (src/utils/bash/prefix.ts:360-365) — that's a separate fix.
251    //
252    // Grouping and word-comparison are case-insensitive (PowerShell is
253    // case-insensitive: Git === git, Get-Process === get-process). The Map key
254    // is lowercased; the emitted prefix keeps the first-seen casing.
255    const groups = new Map<string, string[]>()
256    for (const prefix of prefixes) {
257      const root = prefix.split(' ')[0]!
258      const key = root.toLowerCase()
259      const group = groups.get(key)
260      if (group) {
261        group.push(prefix)
262      } else {
263        groups.set(key, [prefix])
264      }
265    }
266  
267    const collapsed: string[] = []
268    for (const [rootLower, group] of groups) {
269      const lcp = wordAlignedLCP(group)
270      const lcpWordCount = lcp === '' ? 0 : countCharInString(lcp, ' ') + 1
271      if (lcpWordCount <= 1) {
272        // LCP collapsed to a single word. If that root's fig spec declares
273        // subcommands, this is the same too-broad case extractPrefixFromElement
274        // rejects (bare `git` → allows `git push --force`). Drop the group.
275        // getCommandSpec is LRU-memoized; one lookup per distinct root.
276        const rootSpec = await getCommandSpec(rootLower)
277        if (rootSpec?.subcommands?.length || DEPTH_RULES[rootLower]) {
278          continue
279        }
280      }
281      collapsed.push(lcp)
282    }
283    return collapsed
284  }
285  
286  /**
287   * Word-aligned longest common prefix. Doesn't chop mid-word.
288   * Case-insensitive comparison (PowerShell: Git === git), emits first
289   * string's casing.
290   * ["npm run test", "npm run lint"] → "npm run"
291   * ["Git status", "git log"] → "Git" (first-seen casing)
292   * ["Get-Process"] → "Get-Process"
293   */
294  function wordAlignedLCP(strings: string[]): string {
295    if (strings.length === 0) return ''
296    if (strings.length === 1) return strings[0]!
297  
298    const firstWords = strings[0]!.split(' ')
299    let commonWordCount = firstWords.length
300  
301    for (let i = 1; i < strings.length; i++) {
302      const words = strings[i]!.split(' ')
303      let matchCount = 0
304      while (
305        matchCount < commonWordCount &&
306        matchCount < words.length &&
307        words[matchCount]!.toLowerCase() === firstWords[matchCount]!.toLowerCase()
308      ) {
309        matchCount++
310      }
311      commonWordCount = matchCount
312      if (commonWordCount === 0) break
313    }
314  
315    return firstWords.slice(0, commonWordCount).join(' ')
316  }