Cradicle Explorer

GrepTool.ts
  1  import { z } from 'zod/v4'
  2  import type { ValidationResult } from '../../Tool.js'
  3  import { buildTool, type ToolDef } from '../../Tool.js'
  4  import { getCwd } from '../../utils/cwd.js'
  5  import { isENOENT } from '../../utils/errors.js'
  6  import {
  7    FILE_NOT_FOUND_CWD_NOTE,
  8    suggestPathUnderCwd,
  9  } from '../../utils/file.js'
 10  import { getFsImplementation } from '../../utils/fsOperations.js'
 11  import { lazySchema } from '../../utils/lazySchema.js'
 12  import { expandPath, toRelativePath } from '../../utils/path.js'
 13  import {
 14    checkReadPermissionForTool,
 15    getFileReadIgnorePatterns,
 16    normalizePatternsToPath,
 17  } from '../../utils/permissions/filesystem.js'
 18  import type { PermissionDecision } from '../../utils/permissions/PermissionResult.js'
 19  import { matchWildcardPattern } from '../../utils/permissions/shellRuleMatching.js'
 20  import { getGlobExclusionsForPluginCache } from '../../utils/plugins/orphanedPluginFilter.js'
 21  import { ripGrep } from '../../utils/ripgrep.js'
 22  import { semanticBoolean } from '../../utils/semanticBoolean.js'
 23  import { semanticNumber } from '../../utils/semanticNumber.js'
 24  import { plural } from '../../utils/stringUtils.js'
 25  import { GREP_TOOL_NAME, getDescription } from './prompt.js'
 26  import {
 27    getToolUseSummary,
 28    renderToolResultMessage,
 29    renderToolUseErrorMessage,
 30    renderToolUseMessage,
 31  } from './UI.js'
 32  
 33  const inputSchema = lazySchema(() =>
 34    z.strictObject({
 35      pattern: z
 36        .string()
 37        .describe(
 38          'The regular expression pattern to search for in file contents',
 39        ),
 40      path: z
 41        .string()
 42        .optional()
 43        .describe(
 44          'File or directory to search in (rg PATH). Defaults to current working directory.',
 45        ),
 46      glob: z
 47        .string()
 48        .optional()
 49        .describe(
 50          'Glob pattern to filter files (e.g. "*.js", "*.{ts,tsx}") - maps to rg --glob',
 51        ),
 52      output_mode: z
 53        .enum(['content', 'files_with_matches', 'count'])
 54        .optional()
 55        .describe(
 56          'Output mode: "content" shows matching lines (supports -A/-B/-C context, -n line numbers, head_limit), "files_with_matches" shows file paths (supports head_limit), "count" shows match counts (supports head_limit). Defaults to "files_with_matches".',
 57        ),
 58      '-B': semanticNumber(z.number().optional()).describe(
 59        'Number of lines to show before each match (rg -B). Requires output_mode: "content", ignored otherwise.',
 60      ),
 61      '-A': semanticNumber(z.number().optional()).describe(
 62        'Number of lines to show after each match (rg -A). Requires output_mode: "content", ignored otherwise.',
 63      ),
 64      '-C': semanticNumber(z.number().optional()).describe('Alias for context.'),
 65      context: semanticNumber(z.number().optional()).describe(
 66        'Number of lines to show before and after each match (rg -C). Requires output_mode: "content", ignored otherwise.',
 67      ),
 68      '-n': semanticBoolean(z.boolean().optional()).describe(
 69        'Show line numbers in output (rg -n). Requires output_mode: "content", ignored otherwise. Defaults to true.',
 70      ),
 71      '-i': semanticBoolean(z.boolean().optional()).describe(
 72        'Case insensitive search (rg -i)',
 73      ),
 74      type: z
 75        .string()
 76        .optional()
 77        .describe(
 78          'File type to search (rg --type). Common types: js, py, rust, go, java, etc. More efficient than include for standard file types.',
 79        ),
 80      head_limit: semanticNumber(z.number().optional()).describe(
 81        'Limit output to first N lines/entries, equivalent to "| head -N". Works across all output modes: content (limits output lines), files_with_matches (limits file paths), count (limits count entries). Defaults to 250 when unspecified. Pass 0 for unlimited (use sparingly — large result sets waste context).',
 82      ),
 83      offset: semanticNumber(z.number().optional()).describe(
 84        'Skip first N lines/entries before applying head_limit, equivalent to "| tail -n +N | head -N". Works across all output modes. Defaults to 0.',
 85      ),
 86      multiline: semanticBoolean(z.boolean().optional()).describe(
 87        'Enable multiline mode where . matches newlines and patterns can span lines (rg -U --multiline-dotall). Default: false.',
 88      ),
 89    }),
 90  )
 91  type InputSchema = ReturnType<typeof inputSchema>
 92  
 93  // Version control system directories to exclude from searches
 94  // These are excluded automatically because they create noise in search results
 95  const VCS_DIRECTORIES_TO_EXCLUDE = [
 96    '.git',
 97    '.svn',
 98    '.hg',
 99    '.bzr',
100    '.jj',
101    '.sl',
102  ] as const
103  
104  // Default cap on grep results when head_limit is unspecified. Unbounded content-mode
105  // greps can fill up to the 20KB persist threshold (~6-24K tokens/grep-heavy session).
106  // 250 is generous enough for exploratory searches while preventing context bloat.
107  // Pass head_limit=0 explicitly for unlimited.
108  const DEFAULT_HEAD_LIMIT = 250
109  
110  function applyHeadLimit<T>(
111    items: T[],
112    limit: number | undefined,
113    offset: number = 0,
114  ): { items: T[]; appliedLimit: number | undefined } {
115    // Explicit 0 = unlimited escape hatch
116    if (limit === 0) {
117      return { items: items.slice(offset), appliedLimit: undefined }
118    }
119    const effectiveLimit = limit ?? DEFAULT_HEAD_LIMIT
120    const sliced = items.slice(offset, offset + effectiveLimit)
121    // Only report appliedLimit when truncation actually occurred, so the model
122    // knows there may be more results and can paginate with offset.
123    const wasTruncated = items.length - offset > effectiveLimit
124    return {
125      items: sliced,
126      appliedLimit: wasTruncated ? effectiveLimit : undefined,
127    }
128  }
129  
130  // Format limit/offset information for display in tool results.
131  // appliedLimit is only set when truncation actually occurred (see applyHeadLimit),
132  // so it may be undefined even when appliedOffset is set — build parts conditionally
133  // to avoid "limit: undefined" appearing in user-visible output.
134  function formatLimitInfo(
135    appliedLimit: number | undefined,
136    appliedOffset: number | undefined,
137  ): string {
138    const parts: string[] = []
139    if (appliedLimit !== undefined) parts.push(`limit: ${appliedLimit}`)
140    if (appliedOffset) parts.push(`offset: ${appliedOffset}`)
141    return parts.join(', ')
142  }
143  
144  const outputSchema = lazySchema(() =>
145    z.object({
146      mode: z.enum(['content', 'files_with_matches', 'count']).optional(),
147      numFiles: z.number(),
148      filenames: z.array(z.string()),
149      content: z.string().optional(),
150      numLines: z.number().optional(), // For content mode
151      numMatches: z.number().optional(), // For count mode
152      appliedLimit: z.number().optional(), // The limit that was applied (if any)
153      appliedOffset: z.number().optional(), // The offset that was applied
154    }),
155  )
156  type OutputSchema = ReturnType<typeof outputSchema>
157  
158  type Output = z.infer<OutputSchema>
159  
160  export const GrepTool = buildTool({
161    name: GREP_TOOL_NAME,
162    searchHint: 'search file contents with regex (ripgrep)',
163    // 20K chars - tool result persistence threshold
164    maxResultSizeChars: 20_000,
165    strict: true,
166    async description() {
167      return getDescription()
168    },
169    userFacingName() {
170      return 'Search'
171    },
172    getToolUseSummary,
173    getActivityDescription(input) {
174      const summary = getToolUseSummary(input)
175      return summary ? `Searching for ${summary}` : 'Searching'
176    },
177    get inputSchema(): InputSchema {
178      return inputSchema()
179    },
180    get outputSchema(): OutputSchema {
181      return outputSchema()
182    },
183    isConcurrencySafe() {
184      return true
185    },
186    isReadOnly() {
187      return true
188    },
189    toAutoClassifierInput(input) {
190      return input.path ? `${input.pattern} in ${input.path}` : input.pattern
191    },
192    isSearchOrReadCommand() {
193      return { isSearch: true, isRead: false }
194    },
195    getPath({ path }): string {
196      return path || getCwd()
197    },
198    async preparePermissionMatcher({ pattern }) {
199      return rulePattern => matchWildcardPattern(rulePattern, pattern)
200    },
201    async validateInput({ path }): Promise<ValidationResult> {
202      // If path is provided, validate that it exists
203      if (path) {
204        const fs = getFsImplementation()
205        const absolutePath = expandPath(path)
206  
207        // SECURITY: Skip filesystem operations for UNC paths to prevent NTLM credential leaks.
208        if (absolutePath.startsWith('\\\\') || absolutePath.startsWith('//')) {
209          return { result: true }
210        }
211  
212        try {
213          await fs.stat(absolutePath)
214        } catch (e: unknown) {
215          if (isENOENT(e)) {
216            const cwdSuggestion = await suggestPathUnderCwd(absolutePath)
217            let message = `Path does not exist: ${path}. ${FILE_NOT_FOUND_CWD_NOTE} ${getCwd()}.`
218            if (cwdSuggestion) {
219              message += ` Did you mean ${cwdSuggestion}?`
220            }
221            return {
222              result: false,
223              message,
224              errorCode: 1,
225            }
226          }
227          throw e
228        }
229      }
230  
231      return { result: true }
232    },
233    async checkPermissions(input, context): Promise<PermissionDecision> {
234      const appState = context.getAppState()
235      return checkReadPermissionForTool(
236        GrepTool,
237        input,
238        appState.toolPermissionContext,
239      )
240    },
241    async prompt() {
242      return getDescription()
243    },
244    renderToolUseMessage,
245    renderToolUseErrorMessage,
246    renderToolResultMessage,
247    // SearchResultSummary shows content (mode=content) or filenames.join.
248    // numFiles/numLines/numMatches are chrome ("Found 3 files") — fine to
249    // skip (under-count, not phantom). Glob reuses this via UI.tsx:65.
250    extractSearchText({ mode, content, filenames }) {
251      if (mode === 'content' && content) return content
252      return filenames.join('\n')
253    },
254    mapToolResultToToolResultBlockParam(
255      {
256        mode = 'files_with_matches',
257        numFiles,
258        filenames,
259        content,
260        numLines: _numLines,
261        numMatches,
262        appliedLimit,
263        appliedOffset,
264      },
265      toolUseID,
266    ) {
267      if (mode === 'content') {
268        const limitInfo = formatLimitInfo(appliedLimit, appliedOffset)
269        const resultContent = content || 'No matches found'
270        const finalContent = limitInfo
271          ? `${resultContent}\n\n[Showing results with pagination = ${limitInfo}]`
272          : resultContent
273        return {
274          tool_use_id: toolUseID,
275          type: 'tool_result',
276          content: finalContent,
277        }
278      }
279  
280      if (mode === 'count') {
281        const limitInfo = formatLimitInfo(appliedLimit, appliedOffset)
282        const rawContent = content || 'No matches found'
283        const matches = numMatches ?? 0
284        const files = numFiles ?? 0
285        const summary = `\n\nFound ${matches} total ${matches === 1 ? 'occurrence' : 'occurrences'} across ${files} ${files === 1 ? 'file' : 'files'}.${limitInfo ? ` with pagination = ${limitInfo}` : ''}`
286        return {
287          tool_use_id: toolUseID,
288          type: 'tool_result',
289          content: rawContent + summary,
290        }
291      }
292  
293      // files_with_matches mode
294      const limitInfo = formatLimitInfo(appliedLimit, appliedOffset)
295      if (numFiles === 0) {
296        return {
297          tool_use_id: toolUseID,
298          type: 'tool_result',
299          content: 'No files found',
300        }
301      }
302      // head_limit has already been applied in call() method, so just show all filenames
303      const result = `Found ${numFiles} ${plural(numFiles, 'file')}${limitInfo ? ` ${limitInfo}` : ''}\n${filenames.join('\n')}`
304      return {
305        tool_use_id: toolUseID,
306        type: 'tool_result',
307        content: result,
308      }
309    },
310    async call(
311      {
312        pattern,
313        path,
314        glob,
315        type,
316        output_mode = 'files_with_matches',
317        '-B': context_before,
318        '-A': context_after,
319        '-C': context_c,
320        context,
321        '-n': show_line_numbers = true,
322        '-i': case_insensitive = false,
323        head_limit,
324        offset = 0,
325        multiline = false,
326      },
327      { abortController, getAppState },
328    ) {
329      const absolutePath = path ? expandPath(path) : getCwd()
330      const args = ['--hidden']
331  
332      // Exclude VCS directories to avoid noise from version control metadata
333      for (const dir of VCS_DIRECTORIES_TO_EXCLUDE) {
334        args.push('--glob', `!${dir}`)
335      }
336  
337      // Limit line length to prevent base64/minified content from cluttering output
338      args.push('--max-columns', '500')
339  
340      // Only apply multiline flags when explicitly requested
341      if (multiline) {
342        args.push('-U', '--multiline-dotall')
343      }
344  
345      // Add optional flags
346      if (case_insensitive) {
347        args.push('-i')
348      }
349  
350      // Add output mode flags
351      if (output_mode === 'files_with_matches') {
352        args.push('-l')
353      } else if (output_mode === 'count') {
354        args.push('-c')
355      }
356  
357      // Add line numbers if requested
358      if (show_line_numbers && output_mode === 'content') {
359        args.push('-n')
360      }
361  
362      // Add context flags (-C/context takes precedence over context_before/context_after)
363      if (output_mode === 'content') {
364        if (context !== undefined) {
365          args.push('-C', context.toString())
366        } else if (context_c !== undefined) {
367          args.push('-C', context_c.toString())
368        } else {
369          if (context_before !== undefined) {
370            args.push('-B', context_before.toString())
371          }
372          if (context_after !== undefined) {
373            args.push('-A', context_after.toString())
374          }
375        }
376      }
377  
378      // If pattern starts with dash, use -e flag to specify it as a pattern
379      // This prevents ripgrep from interpreting it as a command-line option
380      if (pattern.startsWith('-')) {
381        args.push('-e', pattern)
382      } else {
383        args.push(pattern)
384      }
385  
386      // Add type filter if specified
387      if (type) {
388        args.push('--type', type)
389      }
390  
391      if (glob) {
392        // Split on commas and spaces, but preserve patterns with braces
393        const globPatterns: string[] = []
394        const rawPatterns = glob.split(/\s+/)
395  
396        for (const rawPattern of rawPatterns) {
397          // If pattern contains braces, don't split further
398          if (rawPattern.includes('{') && rawPattern.includes('}')) {
399            globPatterns.push(rawPattern)
400          } else {
401            // Split on commas for patterns without braces
402            globPatterns.push(...rawPattern.split(',').filter(Boolean))
403          }
404        }
405  
406        for (const globPattern of globPatterns.filter(Boolean)) {
407          args.push('--glob', globPattern)
408        }
409      }
410  
411      // Add ignore patterns
412      const appState = getAppState()
413      const ignorePatterns = normalizePatternsToPath(
414        getFileReadIgnorePatterns(appState.toolPermissionContext),
415        getCwd(),
416      )
417      for (const ignorePattern of ignorePatterns) {
418        // Note: ripgrep only applies gitignore patterns relative to the working directory
419        // So for non-absolute paths, we need to prefix them with '**'
420        // See: https://github.com/BurntSushi/ripgrep/discussions/2156#discussioncomment-2316335
421        //
422        // We also need to negate the pattern with `!` to exclude it
423        const rgIgnorePattern = ignorePattern.startsWith('/')
424          ? `!${ignorePattern}`
425          : `!**/${ignorePattern}`
426        args.push('--glob', rgIgnorePattern)
427      }
428  
429      // Exclude orphaned plugin version directories
430      for (const exclusion of await getGlobExclusionsForPluginCache(
431        absolutePath,
432      )) {
433        args.push('--glob', exclusion)
434      }
435  
436      // WSL has severe performance penalty for file reads (3-5x slower on WSL2)
437      // The timeout is handled by ripgrep itself via execFile timeout option
438      // We don't use AbortController for timeout to avoid interrupting the agent loop
439      // If ripgrep times out, it throws RipgrepTimeoutError which propagates up
440      // so Claude knows the search didn't complete (rather than thinking there were no matches)
441      const results = await ripGrep(args, absolutePath, abortController.signal)
442  
443      if (output_mode === 'content') {
444        // For content mode, results are the actual content lines
445        // Convert absolute paths to relative paths to save tokens
446  
447        // Apply head_limit first — relativize is per-line work, so
448        // avoid processing lines that will be discarded (broad patterns can
449        // return 10k+ lines with head_limit keeping only ~30-100).
450        const { items: limitedResults, appliedLimit } = applyHeadLimit(
451          results,
452          head_limit,
453          offset,
454        )
455  
456        const finalLines = limitedResults.map(line => {
457          // Lines have format: /absolute/path:line_content or /absolute/path:num:content
458          const colonIndex = line.indexOf(':')
459          if (colonIndex > 0) {
460            const filePath = line.substring(0, colonIndex)
461            const rest = line.substring(colonIndex)
462            return toRelativePath(filePath) + rest
463          }
464          return line
465        })
466        const output = {
467          mode: 'content' as const,
468          numFiles: 0, // Not applicable for content mode
469          filenames: [],
470          content: finalLines.join('\n'),
471          numLines: finalLines.length,
472          ...(appliedLimit !== undefined && { appliedLimit }),
473          ...(offset > 0 && { appliedOffset: offset }),
474        }
475        return { data: output }
476      }
477  
478      if (output_mode === 'count') {
479        // For count mode, pass through raw ripgrep output (filename:count format)
480        // Apply head_limit first to avoid relativizing entries that will be discarded.
481        const { items: limitedResults, appliedLimit } = applyHeadLimit(
482          results,
483          head_limit,
484          offset,
485        )
486  
487        // Convert absolute paths to relative paths to save tokens
488        const finalCountLines = limitedResults.map(line => {
489          // Lines have format: /absolute/path:count
490          const colonIndex = line.lastIndexOf(':')
491          if (colonIndex > 0) {
492            const filePath = line.substring(0, colonIndex)
493            const count = line.substring(colonIndex)
494            return toRelativePath(filePath) + count
495          }
496          return line
497        })
498  
499        // Parse count output to extract total matches and file count
500        let totalMatches = 0
501        let fileCount = 0
502        for (const line of finalCountLines) {
503          const colonIndex = line.lastIndexOf(':')
504          if (colonIndex > 0) {
505            const countStr = line.substring(colonIndex + 1)
506            const count = parseInt(countStr, 10)
507            if (!isNaN(count)) {
508              totalMatches += count
509              fileCount += 1
510            }
511          }
512        }
513  
514        const output = {
515          mode: 'count' as const,
516          numFiles: fileCount,
517          filenames: [],
518          content: finalCountLines.join('\n'),
519          numMatches: totalMatches,
520          ...(appliedLimit !== undefined && { appliedLimit }),
521          ...(offset > 0 && { appliedOffset: offset }),
522        }
523        return { data: output }
524      }
525  
526      // For files_with_matches mode (default)
527      // Use allSettled so a single ENOENT (file deleted between ripgrep's scan
528      // and this stat) does not reject the whole batch. Failed stats sort as mtime 0.
529      const stats = await Promise.allSettled(
530        results.map(_ => getFsImplementation().stat(_)),
531      )
532      const sortedMatches = results
533        // Sort by modification time
534        .map((_, i) => {
535          const r = stats[i]!
536          return [
537            _,
538            r.status === 'fulfilled' ? (r.value.mtimeMs ?? 0) : 0,
539          ] as const
540        })
541        .sort((a, b) => {
542          if (process.env.NODE_ENV === 'test') {
543            // In tests, we always want to sort by filename, so that results are deterministic
544            return a[0].localeCompare(b[0])
545          }
546          const timeComparison = b[1] - a[1]
547          if (timeComparison === 0) {
548            // Sort by filename as a tiebreaker
549            return a[0].localeCompare(b[0])
550          }
551          return timeComparison
552        })
553        .map(_ => _[0])
554  
555      // Apply head_limit to sorted file list (like "| head -N")
556      const { items: finalMatches, appliedLimit } = applyHeadLimit(
557        sortedMatches,
558        head_limit,
559        offset,
560      )
561  
562      // Convert absolute paths to relative paths to save tokens
563      const relativeMatches = finalMatches.map(toRelativePath)
564  
565      const output = {
566        mode: 'files_with_matches' as const,
567        filenames: relativeMatches,
568        numFiles: relativeMatches.length,
569        ...(appliedLimit !== undefined && { appliedLimit }),
570        ...(offset > 0 && { appliedOffset: offset }),
571      }
572  
573      return {
574        data: output,
575      }
576    },
577  } satisfies ToolDef<InputSchema, Output>)