Cradicle Explorer

/ tools / WebFetchTool / WebFetchTool.ts
WebFetchTool.ts
  1  import { z } from 'zod/v4'
  2  import { buildTool, type ToolDef } from '../../Tool.js'
  3  import type { PermissionUpdate } from '../../types/permissions.js'
  4  import { formatFileSize } from '../../utils/format.js'
  5  import { lazySchema } from '../../utils/lazySchema.js'
  6  import type { PermissionDecision } from '../../utils/permissions/PermissionResult.js'
  7  import { getRuleByContentsForTool } from '../../utils/permissions/permissions.js'
  8  import { isPreapprovedHost } from './preapproved.js'
  9  import { DESCRIPTION, WEB_FETCH_TOOL_NAME } from './prompt.js'
 10  import {
 11    getToolUseSummary,
 12    renderToolResultMessage,
 13    renderToolUseMessage,
 14    renderToolUseProgressMessage,
 15  } from './UI.js'
 16  import {
 17    applyPromptToMarkdown,
 18    type FetchedContent,
 19    getURLMarkdownContent,
 20    isPreapprovedUrl,
 21    MAX_MARKDOWN_LENGTH,
 22  } from './utils.js'
 23  
 24  const inputSchema = lazySchema(() =>
 25    z.strictObject({
 26      url: z.string().url().describe('The URL to fetch content from'),
 27      prompt: z.string().describe('The prompt to run on the fetched content'),
 28    }),
 29  )
 30  type InputSchema = ReturnType<typeof inputSchema>
 31  
 32  const outputSchema = lazySchema(() =>
 33    z.object({
 34      bytes: z.number().describe('Size of the fetched content in bytes'),
 35      code: z.number().describe('HTTP response code'),
 36      codeText: z.string().describe('HTTP response code text'),
 37      result: z
 38        .string()
 39        .describe('Processed result from applying the prompt to the content'),
 40      durationMs: z
 41        .number()
 42        .describe('Time taken to fetch and process the content'),
 43      url: z.string().describe('The URL that was fetched'),
 44    }),
 45  )
 46  type OutputSchema = ReturnType<typeof outputSchema>
 47  
 48  export type Output = z.infer<OutputSchema>
 49  
 50  function webFetchToolInputToPermissionRuleContent(input: {
 51    [k: string]: unknown
 52  }): string {
 53    try {
 54      const parsedInput = WebFetchTool.inputSchema.safeParse(input)
 55      if (!parsedInput.success) {
 56        return `input:${input.toString()}`
 57      }
 58      const { url } = parsedInput.data
 59      const hostname = new URL(url).hostname
 60      return `domain:${hostname}`
 61    } catch {
 62      return `input:${input.toString()}`
 63    }
 64  }
 65  
 66  export const WebFetchTool = buildTool({
 67    name: WEB_FETCH_TOOL_NAME,
 68    searchHint: 'fetch and extract content from a URL',
 69    // 100K chars - tool result persistence threshold
 70    maxResultSizeChars: 100_000,
 71    shouldDefer: true,
 72    async description(input) {
 73      const { url } = input as { url: string }
 74      try {
 75        const hostname = new URL(url).hostname
 76        return `Claude wants to fetch content from ${hostname}`
 77      } catch {
 78        return `Claude wants to fetch content from this URL`
 79      }
 80    },
 81    userFacingName() {
 82      return 'Fetch'
 83    },
 84    getToolUseSummary,
 85    getActivityDescription(input) {
 86      const summary = getToolUseSummary(input)
 87      return summary ? `Fetching ${summary}` : 'Fetching web page'
 88    },
 89    get inputSchema(): InputSchema {
 90      return inputSchema()
 91    },
 92    get outputSchema(): OutputSchema {
 93      return outputSchema()
 94    },
 95    isConcurrencySafe() {
 96      return true
 97    },
 98    isReadOnly() {
 99      return true
100    },
101    toAutoClassifierInput(input) {
102      return input.prompt ? `${input.url}: ${input.prompt}` : input.url
103    },
104    async checkPermissions(input, context): Promise<PermissionDecision> {
105      const appState = context.getAppState()
106      const permissionContext = appState.toolPermissionContext
107  
108      // Check if the hostname is in the preapproved list
109      try {
110        const { url } = input as { url: string }
111        const parsedUrl = new URL(url)
112        if (isPreapprovedHost(parsedUrl.hostname, parsedUrl.pathname)) {
113          return {
114            behavior: 'allow',
115            updatedInput: input,
116            decisionReason: { type: 'other', reason: 'Preapproved host' },
117          }
118        }
119      } catch {
120        // If URL parsing fails, continue with normal permission checks
121      }
122  
123      // Check for a rule specific to the tool input (matching hostname)
124      const ruleContent = webFetchToolInputToPermissionRuleContent(input)
125  
126      const denyRule = getRuleByContentsForTool(
127        permissionContext,
128        WebFetchTool,
129        'deny',
130      ).get(ruleContent)
131      if (denyRule) {
132        return {
133          behavior: 'deny',
134          message: `${WebFetchTool.name} denied access to ${ruleContent}.`,
135          decisionReason: {
136            type: 'rule',
137            rule: denyRule,
138          },
139        }
140      }
141  
142      const askRule = getRuleByContentsForTool(
143        permissionContext,
144        WebFetchTool,
145        'ask',
146      ).get(ruleContent)
147      if (askRule) {
148        return {
149          behavior: 'ask',
150          message: `Claude requested permissions to use ${WebFetchTool.name}, but you haven't granted it yet.`,
151          decisionReason: {
152            type: 'rule',
153            rule: askRule,
154          },
155          suggestions: buildSuggestions(ruleContent),
156        }
157      }
158  
159      const allowRule = getRuleByContentsForTool(
160        permissionContext,
161        WebFetchTool,
162        'allow',
163      ).get(ruleContent)
164      if (allowRule) {
165        return {
166          behavior: 'allow',
167          updatedInput: input,
168          decisionReason: {
169            type: 'rule',
170            rule: allowRule,
171          },
172        }
173      }
174  
175      return {
176        behavior: 'ask',
177        message: `Claude requested permissions to use ${WebFetchTool.name}, but you haven't granted it yet.`,
178        suggestions: buildSuggestions(ruleContent),
179      }
180    },
181    async prompt(_options) {
182      // Always include the auth warning regardless of whether ToolSearch is
183      // currently in the tools list. Conditionally toggling this prefix based
184      // on ToolSearch availability caused the tool description to flicker
185      // between SDK query() calls (when ToolSearch enablement varies due to
186      // MCP tool count thresholds), invalidating the Anthropic API prompt
187      // cache on each toggle — two consecutive cache misses per flicker event.
188      return `IMPORTANT: WebFetch WILL FAIL for authenticated or private URLs. Before using this tool, check if the URL points to an authenticated service (e.g. Google Docs, Confluence, Jira, GitHub). If so, look for a specialized MCP tool that provides authenticated access.
189  ${DESCRIPTION}`
190    },
191    async validateInput(input) {
192      const { url } = input
193      try {
194        new URL(url)
195      } catch {
196        return {
197          result: false,
198          message: `Error: Invalid URL "${url}". The URL provided could not be parsed.`,
199          meta: { reason: 'invalid_url' },
200          errorCode: 1,
201        }
202      }
203      return { result: true }
204    },
205    renderToolUseMessage,
206    renderToolUseProgressMessage,
207    renderToolResultMessage,
208    async call(
209      { url, prompt },
210      { abortController, options: { isNonInteractiveSession } },
211    ) {
212      const start = Date.now()
213  
214      const response = await getURLMarkdownContent(url, abortController)
215  
216      // Check if we got a redirect to a different host
217      if ('type' in response && response.type === 'redirect') {
218        const statusText =
219          response.statusCode === 301
220            ? 'Moved Permanently'
221            : response.statusCode === 308
222              ? 'Permanent Redirect'
223              : response.statusCode === 307
224                ? 'Temporary Redirect'
225                : 'Found'
226  
227        const message = `REDIRECT DETECTED: The URL redirects to a different host.
228  
229  Original URL: ${response.originalUrl}
230  Redirect URL: ${response.redirectUrl}
231  Status: ${response.statusCode} ${statusText}
232  
233  To complete your request, I need to fetch content from the redirected URL. Please use WebFetch again with these parameters:
234  - url: "${response.redirectUrl}"
235  - prompt: "${prompt}"`
236  
237        const output: Output = {
238          bytes: Buffer.byteLength(message),
239          code: response.statusCode,
240          codeText: statusText,
241          result: message,
242          durationMs: Date.now() - start,
243          url,
244        }
245  
246        return {
247          data: output,
248        }
249      }
250  
251      const {
252        content,
253        bytes,
254        code,
255        codeText,
256        contentType,
257        persistedPath,
258        persistedSize,
259      } = response as FetchedContent
260  
261      const isPreapproved = isPreapprovedUrl(url)
262  
263      let result: string
264      if (
265        isPreapproved &&
266        contentType.includes('text/markdown') &&
267        content.length < MAX_MARKDOWN_LENGTH
268      ) {
269        result = content
270      } else {
271        result = await applyPromptToMarkdown(
272          prompt,
273          content,
274          abortController.signal,
275          isNonInteractiveSession,
276          isPreapproved,
277        )
278      }
279  
280      // Binary content (PDFs, etc.) was additionally saved to disk with a
281      // mime-derived extension. Note it so Claude can inspect the raw file
282      // if the Haiku summary above isn't enough.
283      if (persistedPath) {
284        result += `\n\n[Binary content (${contentType}, ${formatFileSize(persistedSize ?? bytes)}) also saved to ${persistedPath}]`
285      }
286  
287      const output: Output = {
288        bytes,
289        code,
290        codeText,
291        result,
292        durationMs: Date.now() - start,
293        url,
294      }
295  
296      return {
297        data: output,
298      }
299    },
300    mapToolResultToToolResultBlockParam({ result }, toolUseID) {
301      return {
302        tool_use_id: toolUseID,
303        type: 'tool_result',
304        content: result,
305      }
306    },
307  } satisfies ToolDef<InputSchema, Output>)
308  
309  function buildSuggestions(ruleContent: string): PermissionUpdate[] {
310    return [
311      {
312        type: 'addRules',
313        destination: 'localSettings',
314        rules: [{ toolName: WEB_FETCH_TOOL_NAME, ruleContent }],
315        behavior: 'allow',
316      },
317    ]
318  }