/ src / utils / command-path-extraction.ts
command-path-extraction.ts
  1  import path from "path";
  2  import os from "os";
  3  import { expandHome } from "./path-utils.js";
  4  
  5  /**
  6   * Extract file and directory paths from shell command arguments
  7   * 
  8   * This function parses shell commands to identify file/directory paths
  9   * that need to be validated against allowed directories. It handles:
 10   * - Windows paths (C:\path, \\server\share\path)
 11   * - Unix paths (/path, ~/path)
 12   * - Relative paths (./path, ../path) - resolved to absolute
 13   * - Quoted paths ("path with spaces")
 14   * - Environment variables ($HOME, %USERPROFILE%)
 15   * 
 16   * @param command - The shell command string
 17   * @param workdir - Working directory for resolving relative paths
 18   * @returns Array of absolute paths found in the command
 19   */
 20  export function extractPathsFromCommand(
 21    command: string,
 22    workdir: string
 23  ): string[] {
 24    if (!command || !command.trim()) {
 25      return [];
 26    }
 27  
 28    const paths: string[] = [];
 29    const tokens = tokenizeCommand(command);
 30  
 31    for (let i = 0; i < tokens.length; i++) {
 32      const token = tokens[i];
 33      
 34      // Skip command name itself (first token)
 35      if (i === 0) {
 36        continue;
 37      }
 38  
 39      // Skip flags and options
 40      if (isFlagOrOption(token)) {
 41        // Some flags take arguments (like -o output.txt)
 42        // Check if next token might be a path argument
 43        if (i + 1 < tokens.length) {
 44          const nextToken = tokens[i + 1];
 45          if (isLikelyPathArgument(nextToken)) {
 46            const resolvedPath = resolvePath(nextToken, workdir);
 47            if (resolvedPath) {
 48              paths.push(resolvedPath);
 49              i++; // Skip the next token since we processed it
 50            }
 51          }
 52        }
 53        continue;
 54      }
 55  
 56      // Check if token is a path
 57      if (isLikelyPathArgument(token)) {
 58        const resolvedPath = resolvePath(token, workdir);
 59        if (resolvedPath) {
 60          paths.push(resolvedPath);
 61        }
 62      }
 63    }
 64  
 65    return paths;
 66  }
 67  
 68  /**
 69   * Tokenize command string, handling quoted arguments
 70   */
 71  function tokenizeCommand(command: string): string[] {
 72    const tokens: string[] = [];
 73    let current = "";
 74    let inDoubleQuotes = false;
 75    let inSingleQuotes = false;
 76    let escaped = false;
 77  
 78    for (let i = 0; i < command.length; i++) {
 79      const char = command[i];
 80  
 81      if (escaped) {
 82        current += char;
 83        escaped = false;
 84        continue;
 85      }
 86  
 87      if (char === "\\") {
 88        escaped = true;
 89        current += char;
 90        continue;
 91      }
 92  
 93      if (char === '"' && !inSingleQuotes) {
 94        inDoubleQuotes = !inDoubleQuotes;
 95        current += char;
 96        continue;
 97      }
 98  
 99      if (char === "'" && !inDoubleQuotes) {
100        inSingleQuotes = !inSingleQuotes;
101        current += char;
102        continue;
103      }
104  
105      if ((char === " " || char === "\t") && !inDoubleQuotes && !inSingleQuotes) {
106        if (current.trim()) {
107          tokens.push(current.trim());
108          current = "";
109        }
110        continue;
111      }
112  
113      current += char;
114    }
115  
116    if (current.trim()) {
117      tokens.push(current.trim());
118    }
119  
120    return tokens;
121  }
122  
123  /**
124   * Check if a token is a flag or option (not a path)
125   */
126  function isFlagOrOption(token: string): boolean {
127    // Remove surrounding quotes
128    const cleanToken = token.replace(/^["']|["']$/g, "");
129  
130    // Windows: -flag or /flag
131    if (cleanToken.match(/^[-/][^-/]/)) {
132      return true;
133    }
134  
135    // Unix: --long-flag or -s
136    if (cleanToken.match(/^--?[a-zA-Z]/)) {
137      return true;
138    }
139  
140    return false;
141  }
142  
143  /**
144   * Check if a token is likely a file/directory path argument
145   */
146  function isLikelyPathArgument(token: string): boolean {
147    // Remove surrounding quotes
148    const cleanToken = token.replace(/^["']|["']$/g, "");
149  
150    // Windows absolute path: C:\path or \\server\share\path
151    if (cleanToken.match(/^[A-Za-z]:[\\/]/) || cleanToken.startsWith("\\\\")) {
152      return true;
153    }
154  
155    // Unix absolute path: /path
156    if (cleanToken.startsWith("/") && !cleanToken.match(/^\/[a-zA-Z]\//)) {
157      // Exclude Windows-style paths like /c/path
158      return true;
159    }
160  
161    // Home directory: ~/path or ~
162    if (cleanToken.startsWith("~/") || cleanToken === "~") {
163      return true;
164    }
165  
166    // Relative path: ./path or ../path
167    if (cleanToken.startsWith("./") || cleanToken.startsWith("../")) {
168      return true;
169    }
170  
171    // Path with environment variable: $HOME/path or %USERPROFILE%\path
172    if (cleanToken.includes("$") || cleanToken.includes("%")) {
173      return true;
174    }
175  
176    // If it contains path separators, might be a path
177    if (cleanToken.includes("/") || cleanToken.includes("\\")) {
178      // But exclude URLs and other non-path strings
179      if (
180        !cleanToken.match(/^https?:\/\//) &&
181        !cleanToken.match(/^[a-zA-Z]+:\/\//) &&
182        !cleanToken.match(/^[a-zA-Z]+:/) // Exclude single-letter drive-like patterns
183      ) {
184        return true;
185      }
186    }
187  
188    return false;
189  }
190  
191  /**
192   * Resolve a path token to an absolute path
193   */
194  function resolvePath(token: string, workdir: string): string | null {
195    try {
196      // Remove surrounding quotes
197      let cleanToken = token.replace(/^["']|["']$/g, "");
198  
199      // Expand environment variables
200      cleanToken = expandEnvironmentVariables(cleanToken);
201  
202      // Expand home directory
203      cleanToken = expandHome(cleanToken);
204  
205      // Resolve to absolute path
206      let absolute: string;
207      if (path.isAbsolute(cleanToken)) {
208        absolute = path.resolve(cleanToken);
209      } else {
210        absolute = path.resolve(workdir, cleanToken);
211      }
212  
213      // Normalize the path
214      return path.normalize(absolute);
215    } catch {
216      // If resolution fails, return null (don't block, but don't validate)
217      return null;
218    }
219  }
220  
221  /**
222   * Expand environment variables in a path string
223   */
224  function expandEnvironmentVariables(pathStr: string): string {
225    // Windows: %VAR%
226    pathStr = pathStr.replace(/%([^%]+)%/g, (match, varName) => {
227      return process.env[varName] || match;
228    });
229  
230    // Unix: $VAR or ${VAR}
231    pathStr = pathStr.replace(/\$([A-Za-z_][A-Za-z0-9_]*)/g, (match, varName) => {
232      return process.env[varName] || match;
233    });
234  
235    pathStr = pathStr.replace(/\${([^}]+)}/g, (match, varName) => {
236      return process.env[varName] || match;
237    });
238  
239    return pathStr;
240  }
241