command-path-extraction.ts
1 import path from "path"; 2 import os from "os"; 3 import { expandHome } from "./path-utils.js"; 4 5 /** 6 * Extract file and directory paths from shell command arguments 7 * 8 * This function parses shell commands to identify file/directory paths 9 * that need to be validated against allowed directories. It handles: 10 * - Windows paths (C:\path, \\server\share\path) 11 * - Unix paths (/path, ~/path) 12 * - Relative paths (./path, ../path) - resolved to absolute 13 * - Quoted paths ("path with spaces") 14 * - Environment variables ($HOME, %USERPROFILE%) 15 * 16 * @param command - The shell command string 17 * @param workdir - Working directory for resolving relative paths 18 * @returns Array of absolute paths found in the command 19 */ 20 export function extractPathsFromCommand( 21 command: string, 22 workdir: string 23 ): string[] { 24 if (!command || !command.trim()) { 25 return []; 26 } 27 28 const paths: string[] = []; 29 const tokens = tokenizeCommand(command); 30 31 for (let i = 0; i < tokens.length; i++) { 32 const token = tokens[i]; 33 34 // Skip command name itself (first token) 35 if (i === 0) { 36 continue; 37 } 38 39 // Skip flags and options 40 if (isFlagOrOption(token)) { 41 // Some flags take arguments (like -o output.txt) 42 // Check if next token might be a path argument 43 if (i + 1 < tokens.length) { 44 const nextToken = tokens[i + 1]; 45 if (isLikelyPathArgument(nextToken)) { 46 const resolvedPath = resolvePath(nextToken, workdir); 47 if (resolvedPath) { 48 paths.push(resolvedPath); 49 i++; // Skip the next token since we processed it 50 } 51 } 52 } 53 continue; 54 } 55 56 // Check if token is a path 57 if (isLikelyPathArgument(token)) { 58 const resolvedPath = resolvePath(token, workdir); 59 if (resolvedPath) { 60 paths.push(resolvedPath); 61 } 62 } 63 } 64 65 return paths; 66 } 67 68 /** 69 * Tokenize command string, handling quoted arguments 70 */ 71 function tokenizeCommand(command: string): string[] { 72 const tokens: string[] = []; 73 let current = ""; 74 let inDoubleQuotes = false; 75 let inSingleQuotes = false; 76 let escaped = false; 77 78 for (let i = 0; i < command.length; i++) { 79 const char = command[i]; 80 81 if (escaped) { 82 current += char; 83 escaped = false; 84 continue; 85 } 86 87 if (char === "\\") { 88 escaped = true; 89 current += char; 90 continue; 91 } 92 93 if (char === '"' && !inSingleQuotes) { 94 inDoubleQuotes = !inDoubleQuotes; 95 current += char; 96 continue; 97 } 98 99 if (char === "'" && !inDoubleQuotes) { 100 inSingleQuotes = !inSingleQuotes; 101 current += char; 102 continue; 103 } 104 105 if ((char === " " || char === "\t") && !inDoubleQuotes && !inSingleQuotes) { 106 if (current.trim()) { 107 tokens.push(current.trim()); 108 current = ""; 109 } 110 continue; 111 } 112 113 current += char; 114 } 115 116 if (current.trim()) { 117 tokens.push(current.trim()); 118 } 119 120 return tokens; 121 } 122 123 /** 124 * Check if a token is a flag or option (not a path) 125 */ 126 function isFlagOrOption(token: string): boolean { 127 // Remove surrounding quotes 128 const cleanToken = token.replace(/^["']|["']$/g, ""); 129 130 // Windows: -flag or /flag 131 if (cleanToken.match(/^[-/][^-/]/)) { 132 return true; 133 } 134 135 // Unix: --long-flag or -s 136 if (cleanToken.match(/^--?[a-zA-Z]/)) { 137 return true; 138 } 139 140 return false; 141 } 142 143 /** 144 * Check if a token is likely a file/directory path argument 145 */ 146 function isLikelyPathArgument(token: string): boolean { 147 // Remove surrounding quotes 148 const cleanToken = token.replace(/^["']|["']$/g, ""); 149 150 // Windows absolute path: C:\path or \\server\share\path 151 if (cleanToken.match(/^[A-Za-z]:[\\/]/) || cleanToken.startsWith("\\\\")) { 152 return true; 153 } 154 155 // Unix absolute path: /path 156 if (cleanToken.startsWith("/") && !cleanToken.match(/^\/[a-zA-Z]\//)) { 157 // Exclude Windows-style paths like /c/path 158 return true; 159 } 160 161 // Home directory: ~/path or ~ 162 if (cleanToken.startsWith("~/") || cleanToken === "~") { 163 return true; 164 } 165 166 // Relative path: ./path or ../path 167 if (cleanToken.startsWith("./") || cleanToken.startsWith("../")) { 168 return true; 169 } 170 171 // Path with environment variable: $HOME/path or %USERPROFILE%\path 172 if (cleanToken.includes("$") || cleanToken.includes("%")) { 173 return true; 174 } 175 176 // If it contains path separators, might be a path 177 if (cleanToken.includes("/") || cleanToken.includes("\\")) { 178 // But exclude URLs and other non-path strings 179 if ( 180 !cleanToken.match(/^https?:\/\//) && 181 !cleanToken.match(/^[a-zA-Z]+:\/\//) && 182 !cleanToken.match(/^[a-zA-Z]+:/) // Exclude single-letter drive-like patterns 183 ) { 184 return true; 185 } 186 } 187 188 return false; 189 } 190 191 /** 192 * Resolve a path token to an absolute path 193 */ 194 function resolvePath(token: string, workdir: string): string | null { 195 try { 196 // Remove surrounding quotes 197 let cleanToken = token.replace(/^["']|["']$/g, ""); 198 199 // Expand environment variables 200 cleanToken = expandEnvironmentVariables(cleanToken); 201 202 // Expand home directory 203 cleanToken = expandHome(cleanToken); 204 205 // Resolve to absolute path 206 let absolute: string; 207 if (path.isAbsolute(cleanToken)) { 208 absolute = path.resolve(cleanToken); 209 } else { 210 absolute = path.resolve(workdir, cleanToken); 211 } 212 213 // Normalize the path 214 return path.normalize(absolute); 215 } catch { 216 // If resolution fails, return null (don't block, but don't validate) 217 return null; 218 } 219 } 220 221 /** 222 * Expand environment variables in a path string 223 */ 224 function expandEnvironmentVariables(pathStr: string): string { 225 // Windows: %VAR% 226 pathStr = pathStr.replace(/%([^%]+)%/g, (match, varName) => { 227 return process.env[varName] || match; 228 }); 229 230 // Unix: $VAR or ${VAR} 231 pathStr = pathStr.replace(/\$([A-Za-z_][A-Za-z0-9_]*)/g, (match, varName) => { 232 return process.env[varName] || match; 233 }); 234 235 pathStr = pathStr.replace(/\${([^}]+)}/g, (match, varName) => { 236 return process.env[varName] || match; 237 }); 238 239 return pathStr; 240 } 241