/ tools / WebSearchTool / WebSearchTool.ts
WebSearchTool.ts
  1  import type {
  2    BetaContentBlock,
  3    BetaWebSearchTool20250305,
  4  } from '@anthropic-ai/sdk/resources/beta/messages/messages.mjs'
  5  import { getAPIProvider } from 'src/utils/model/providers.js'
  6  import type { PermissionResult } from 'src/utils/permissions/PermissionResult.js'
  7  import { z } from 'zod/v4'
  8  import { getFeatureValue_CACHED_MAY_BE_STALE } from '../../services/analytics/growthbook.js'
  9  import { queryModelWithStreaming } from '../../services/api/claude.js'
 10  import { buildTool, type ToolDef } from '../../Tool.js'
 11  import { lazySchema } from '../../utils/lazySchema.js'
 12  import { logError } from '../../utils/log.js'
 13  import { createUserMessage } from '../../utils/messages.js'
 14  import { getMainLoopModel, getSmallFastModel } from '../../utils/model/model.js'
 15  import { jsonParse, jsonStringify } from '../../utils/slowOperations.js'
 16  import { asSystemPrompt } from '../../utils/systemPromptType.js'
 17  import { getWebSearchPrompt, WEB_SEARCH_TOOL_NAME } from './prompt.js'
 18  import {
 19    getToolUseSummary,
 20    renderToolResultMessage,
 21    renderToolUseMessage,
 22    renderToolUseProgressMessage,
 23  } from './UI.js'
 24  
 25  const inputSchema = lazySchema(() =>
 26    z.strictObject({
 27      query: z.string().min(2).describe('The search query to use'),
 28      allowed_domains: z
 29        .array(z.string())
 30        .optional()
 31        .describe('Only include search results from these domains'),
 32      blocked_domains: z
 33        .array(z.string())
 34        .optional()
 35        .describe('Never include search results from these domains'),
 36    }),
 37  )
 38  type InputSchema = ReturnType<typeof inputSchema>
 39  
 40  type Input = z.infer<InputSchema>
 41  
 42  const searchResultSchema = lazySchema(() => {
 43    const searchHitSchema = z.object({
 44      title: z.string().describe('The title of the search result'),
 45      url: z.string().describe('The URL of the search result'),
 46    })
 47  
 48    return z.object({
 49      tool_use_id: z.string().describe('ID of the tool use'),
 50      content: z.array(searchHitSchema).describe('Array of search hits'),
 51    })
 52  })
 53  
 54  export type SearchResult = z.infer<ReturnType<typeof searchResultSchema>>
 55  
 56  const outputSchema = lazySchema(() =>
 57    z.object({
 58      query: z.string().describe('The search query that was executed'),
 59      results: z
 60        .array(z.union([searchResultSchema(), z.string()]))
 61        .describe('Search results and/or text commentary from the model'),
 62      durationSeconds: z
 63        .number()
 64        .describe('Time taken to complete the search operation'),
 65    }),
 66  )
 67  type OutputSchema = ReturnType<typeof outputSchema>
 68  
 69  export type Output = z.infer<OutputSchema>
 70  
 71  // Re-export WebSearchProgress from centralized types to break import cycles
 72  export type { WebSearchProgress } from '../../types/tools.js'
 73  
 74  import type { WebSearchProgress } from '../../types/tools.js'
 75  
 76  function makeToolSchema(input: Input): BetaWebSearchTool20250305 {
 77    return {
 78      type: 'web_search_20250305',
 79      name: 'web_search',
 80      allowed_domains: input.allowed_domains,
 81      blocked_domains: input.blocked_domains,
 82      max_uses: 8, // Hardcoded to 8 searches maximum
 83    }
 84  }
 85  
 86  function makeOutputFromSearchResponse(
 87    result: BetaContentBlock[],
 88    query: string,
 89    durationSeconds: number,
 90  ): Output {
 91    // The result is a sequence of these blocks:
 92    // - text to start -- always?
 93    // [
 94    //    - server_tool_use
 95    //    - web_search_tool_result
 96    //    - text and citation blocks intermingled
 97    //  ]+  (this block repeated for each search)
 98  
 99    const results: (SearchResult | string)[] = []
100    let textAcc = ''
101    let inText = true
102  
103    for (const block of result) {
104      if (block.type === 'server_tool_use') {
105        if (inText) {
106          inText = false
107          if (textAcc.trim().length > 0) {
108            results.push(textAcc.trim())
109          }
110          textAcc = ''
111        }
112        continue
113      }
114  
115      if (block.type === 'web_search_tool_result') {
116        // Handle error case - content is a WebSearchToolResultError
117        if (!Array.isArray(block.content)) {
118          const errorMessage = `Web search error: ${block.content.error_code}`
119          logError(new Error(errorMessage))
120          results.push(errorMessage)
121          continue
122        }
123        // Success case - add results to our collection
124        const hits = block.content.map(r => ({ title: r.title, url: r.url }))
125        results.push({
126          tool_use_id: block.tool_use_id,
127          content: hits,
128        })
129      }
130  
131      if (block.type === 'text') {
132        if (inText) {
133          textAcc += block.text
134        } else {
135          inText = true
136          textAcc = block.text
137        }
138      }
139    }
140  
141    if (textAcc.length) {
142      results.push(textAcc.trim())
143    }
144  
145    return {
146      query,
147      results,
148      durationSeconds,
149    }
150  }
151  
152  export const WebSearchTool = buildTool({
153    name: WEB_SEARCH_TOOL_NAME,
154    searchHint: 'search the web for current information',
155    maxResultSizeChars: 100_000,
156    shouldDefer: true,
157    async description(input) {
158      return `Claude wants to search the web for: ${input.query}`
159    },
160    userFacingName() {
161      return 'Web Search'
162    },
163    getToolUseSummary,
164    getActivityDescription(input) {
165      const summary = getToolUseSummary(input)
166      return summary ? `Searching for ${summary}` : 'Searching the web'
167    },
168    isEnabled() {
169      const provider = getAPIProvider()
170      const model = getMainLoopModel()
171  
172      // Enable for firstParty
173      if (provider === 'firstParty') {
174        return true
175      }
176  
177      // Enable for Vertex AI with supported models (Claude 4.0+)
178      if (provider === 'vertex') {
179        const supportsWebSearch =
180          model.includes('claude-opus-4') ||
181          model.includes('claude-sonnet-4') ||
182          model.includes('claude-haiku-4')
183  
184        return supportsWebSearch
185      }
186  
187      // Foundry only ships models that already support Web Search
188      if (provider === 'foundry') {
189        return true
190      }
191  
192      return false
193    },
194    get inputSchema(): InputSchema {
195      return inputSchema()
196    },
197    get outputSchema(): OutputSchema {
198      return outputSchema()
199    },
200    isConcurrencySafe() {
201      return true
202    },
203    isReadOnly() {
204      return true
205    },
206    toAutoClassifierInput(input) {
207      return input.query
208    },
209    async checkPermissions(_input): Promise<PermissionResult> {
210      return {
211        behavior: 'passthrough',
212        message: 'WebSearchTool requires permission.',
213        suggestions: [
214          {
215            type: 'addRules',
216            rules: [{ toolName: WEB_SEARCH_TOOL_NAME }],
217            behavior: 'allow',
218            destination: 'localSettings',
219          },
220        ],
221      }
222    },
223    async prompt() {
224      return getWebSearchPrompt()
225    },
226    renderToolUseMessage,
227    renderToolUseProgressMessage,
228    renderToolResultMessage,
229    extractSearchText() {
230      // renderToolResultMessage shows only "Did N searches in Xs" chrome —
231      // the results[] content never appears on screen. Heuristic would index
232      // string entries in results[] (phantom match). Nothing to search.
233      return ''
234    },
235    async validateInput(input) {
236      const { query, allowed_domains, blocked_domains } = input
237      if (!query.length) {
238        return {
239          result: false,
240          message: 'Error: Missing query',
241          errorCode: 1,
242        }
243      }
244      if (allowed_domains?.length && blocked_domains?.length) {
245        return {
246          result: false,
247          message:
248            'Error: Cannot specify both allowed_domains and blocked_domains in the same request',
249          errorCode: 2,
250        }
251      }
252      return { result: true }
253    },
254    async call(input, context, _canUseTool, _parentMessage, onProgress) {
255      const startTime = performance.now()
256      const { query } = input
257      const userMessage = createUserMessage({
258        content: 'Perform a web search for the query: ' + query,
259      })
260      const toolSchema = makeToolSchema(input)
261  
262      const useHaiku = getFeatureValue_CACHED_MAY_BE_STALE(
263        'tengu_plum_vx3',
264        false,
265      )
266  
267      const appState = context.getAppState()
268      const queryStream = queryModelWithStreaming({
269        messages: [userMessage],
270        systemPrompt: asSystemPrompt([
271          'You are an assistant for performing a web search tool use',
272        ]),
273        thinkingConfig: useHaiku
274          ? { type: 'disabled' as const }
275          : context.options.thinkingConfig,
276        tools: [],
277        signal: context.abortController.signal,
278        options: {
279          getToolPermissionContext: async () => appState.toolPermissionContext,
280          model: useHaiku ? getSmallFastModel() : context.options.mainLoopModel,
281          toolChoice: useHaiku ? { type: 'tool', name: 'web_search' } : undefined,
282          isNonInteractiveSession: context.options.isNonInteractiveSession,
283          hasAppendSystemPrompt: !!context.options.appendSystemPrompt,
284          extraToolSchemas: [toolSchema],
285          querySource: 'web_search_tool',
286          agents: context.options.agentDefinitions.activeAgents,
287          mcpTools: [],
288          agentId: context.agentId,
289          effortValue: appState.effortValue,
290        },
291      })
292  
293      const allContentBlocks: BetaContentBlock[] = []
294      let currentToolUseId = null
295      let currentToolUseJson = ''
296      let progressCounter = 0
297      const toolUseQueries = new Map() // Map of tool_use_id to query
298  
299      for await (const event of queryStream) {
300        if (event.type === 'assistant') {
301          allContentBlocks.push(...event.message.content)
302          continue
303        }
304  
305        // Track tool use ID when server_tool_use starts
306        if (
307          event.type === 'stream_event' &&
308          event.event?.type === 'content_block_start'
309        ) {
310          const contentBlock = event.event.content_block
311          if (contentBlock && contentBlock.type === 'server_tool_use') {
312            currentToolUseId = contentBlock.id
313            currentToolUseJson = ''
314            // Note: The ServerToolUseBlock doesn't contain input.query
315            // The actual query comes through input_json_delta events
316            continue
317          }
318        }
319  
320        // Accumulate JSON for current tool use
321        if (
322          currentToolUseId &&
323          event.type === 'stream_event' &&
324          event.event?.type === 'content_block_delta'
325        ) {
326          const delta = event.event.delta
327          if (delta?.type === 'input_json_delta' && delta.partial_json) {
328            currentToolUseJson += delta.partial_json
329  
330            // Try to extract query from partial JSON for progress updates
331            try {
332              // Look for a complete query field
333              const queryMatch = currentToolUseJson.match(
334                /"query"\s*:\s*"((?:[^"\\]|\\.)*)"/,
335              )
336              if (queryMatch && queryMatch[1]) {
337                // The regex properly handles escaped characters
338                const query = jsonParse('"' + queryMatch[1] + '"')
339  
340                if (
341                  !toolUseQueries.has(currentToolUseId) ||
342                  toolUseQueries.get(currentToolUseId) !== query
343                ) {
344                  toolUseQueries.set(currentToolUseId, query)
345                  progressCounter++
346                  if (onProgress) {
347                    onProgress({
348                      toolUseID: `search-progress-${progressCounter}`,
349                      data: {
350                        type: 'query_update',
351                        query,
352                      },
353                    })
354                  }
355                }
356              }
357            } catch {
358              // Ignore parsing errors for partial JSON
359            }
360          }
361        }
362  
363        // Yield progress when search results come in
364        if (
365          event.type === 'stream_event' &&
366          event.event?.type === 'content_block_start'
367        ) {
368          const contentBlock = event.event.content_block
369          if (contentBlock && contentBlock.type === 'web_search_tool_result') {
370            // Get the actual query that was used for this search
371            const toolUseId = contentBlock.tool_use_id
372            const actualQuery = toolUseQueries.get(toolUseId) || query
373            const content = contentBlock.content
374  
375            progressCounter++
376            if (onProgress) {
377              onProgress({
378                toolUseID: toolUseId || `search-progress-${progressCounter}`,
379                data: {
380                  type: 'search_results_received',
381                  resultCount: Array.isArray(content) ? content.length : 0,
382                  query: actualQuery,
383                },
384              })
385            }
386          }
387        }
388      }
389  
390      // Process the final result
391      const endTime = performance.now()
392      const durationSeconds = (endTime - startTime) / 1000
393  
394      const data = makeOutputFromSearchResponse(
395        allContentBlocks,
396        query,
397        durationSeconds,
398      )
399      return { data }
400    },
401    mapToolResultToToolResultBlockParam(output, toolUseID) {
402      const { query, results } = output
403  
404      let formattedOutput = `Web search results for query: "${query}"\n\n`
405  
406      // Process the results array - it can contain both string summaries and search result objects.
407      // Guard against null/undefined entries that can appear after JSON round-tripping
408      // (e.g., from compaction or transcript deserialization).
409      ;(results ?? []).forEach(result => {
410        if (result == null) {
411          return
412        }
413        if (typeof result === 'string') {
414          // Text summary
415          formattedOutput += result + '\n\n'
416        } else {
417          // Search result with links
418          if (result.content?.length > 0) {
419            formattedOutput += `Links: ${jsonStringify(result.content)}\n\n`
420          } else {
421            formattedOutput += 'No links found.\n\n'
422          }
423        }
424      })
425  
426      formattedOutput +=
427        '\nREMINDER: You MUST include the sources above in your response to the user using markdown hyperlinks.'
428  
429      return {
430        tool_use_id: toolUseID,
431        type: 'tool_result',
432        content: formattedOutput.trim(),
433      }
434    },
435  } satisfies ToolDef<InputSchema, Output, WebSearchProgress>)