Cradicle Explorer

/ src / lib / server / tool-planning.ts
tool-planning.ts
  1  import type { ExtensionToolPlanning } from '@/types'
  2  import { dedup } from '@/lib/shared-utils'
  3  import { getExtensionManager } from './extensions'
  4  import { getNativeCapabilityTools } from './native-capabilities'
  5  import { canonicalizeExtensionId, expandExtensionIds } from './tool-aliases'
  6  
  7  export const TOOL_CAPABILITY = {
  8    researchSearch: 'research.search',
  9    researchFetch: 'research.fetch',
 10    browserNavigate: 'browser.navigate',
 11    browserCapture: 'browser.capture',
 12    artifactPdf: 'artifact.pdf',
 13    deliveryMessage: 'delivery.message',
 14    deliveryMedia: 'delivery.media',
 15    deliveryVoiceNote: 'delivery.voice_note',
 16  } as const
 17  
 18  export interface ToolPlanningEntry {
 19    toolName: string
 20    capabilities: string[]
 21    disciplineGuidance: string[]
 22  }
 23  
 24  interface LegacyToolPlanningEntry extends ToolPlanningEntry {
 25    requestMatchers?: unknown
 26  }
 27  
 28  export interface ToolPlanningView {
 29    displayToolIds: string[]
 30    expandedExtensionIds: string[]
 31    entries: ToolPlanningEntry[]
 32    disciplineGuidance: string[]
 33    capabilityToTools: Map<string, string[]>
 34  }
 35  
 36  const CORE_TOOL_PLANNING: Record<string, LegacyToolPlanningEntry[]> = {
 37    files: [
 38      {
 39        toolName: 'files',
 40        capabilities: ['artifact.files'],
 41        disciplineGuidance: [
 42          'For `files`, include an explicit action whenever possible. Common patterns: `{"action":"list","dirPath":"."}`, `{"action":"read","filePath":"path/to/file.md"}`, and `{"action":"write","files":[{"path":"path/to/file.md","content":"..."}]}`.',
 43          'Prefer a single write call with multiple files over writing one file at a time.',
 44        ],
 45        requestMatchers: [],
 46      },
 47    ],
 48    shell: [
 49      {
 50        toolName: 'shell',
 51        capabilities: ['runtime.shell'],
 52        disciplineGuidance: [
 53          'For `shell`, use `{"action":"execute","command":"..."}` for commands and `{"action":"status","processId":"..."}` or `{"action":"log","processId":"..."}` for long-lived processes.',
 54          'Chain related commands in a single shell call using && to reduce round-trips. Avoid running the same build or test command repeatedly — if it fails, diagnose the error before retrying.',
 55        ],
 56        requestMatchers: [],
 57      },
 58    ],
 59    execute: [
 60      {
 61        toolName: 'execute',
 62        capabilities: ['runtime.execute'],
 63        disciplineGuidance: [
 64          'For `execute`, pass the full bash script in `{"code":"..."}`. Use it for sandboxed command execution, curl-based fetches, and one-shot scripts.',
 65          'Use `persistent=true` only when the agent is explicitly configured for host execution. Otherwise use `files` for persistent writes.',
 66        ],
 67        requestMatchers: [],
 68      },
 69    ],
 70    web: [
 71      {
 72        toolName: 'web_search',
 73        capabilities: [TOOL_CAPABILITY.researchSearch],
 74        disciplineGuidance: [
 75          'For `web_search`, use `{"query":"..."}` to research fresh information. For current events, breaking news, or "latest" requests, start with `web_search` before summarizing.',
 76          'Gather 2-3 key sources, then synthesize. Do not search-read-search-read in a loop.',
 77        ],
 78        requestMatchers: [
 79          {
 80            capability: TOOL_CAPABILITY.researchSearch,
 81            patterns: ['research', 'look up', 'find out', 'search for', 'compare', 'latest', 'news', 'headline', 'current event', 'recent update', 'update', 'updates', 'breaking', 'developments', 'keep watching', 'watch for', 'watching for', 'monitor', 'track', "what's new", 'what happened'],
 82            forbidLiteralUrl: true,
 83          },
 84        ],
 85      },
 86      {
 87        toolName: 'web_fetch',
 88        capabilities: [TOOL_CAPABILITY.researchFetch],
 89        disciplineGuidance: [
 90          'For `web_fetch`, use `{"url":"https://..."}` to read a specific page or article after you know the URL.',
 91          'Fetch the pages you need, then synthesize. Do not fetch-read-fetch-read in a loop.',
 92        ],
 93        requestMatchers: [
 94          {
 95            capability: TOOL_CAPABILITY.researchFetch,
 96            patterns: ['read', 'summarize', 'summarise', 'analyze', 'analyse', 'extract', 'review', 'article', 'page', 'url', 'link'],
 97            requireLiteralUrl: true,
 98          },
 99        ],
100      },
101    ],
102    browser: [
103      {
104        toolName: 'browser',
105        capabilities: [TOOL_CAPABILITY.browserNavigate, TOOL_CAPABILITY.browserCapture, TOOL_CAPABILITY.artifactPdf],
106        disciplineGuidance: [
107          'For `browser`, when the task includes a literal URL, pass that exact URL string to `{"action":"navigate","url":"..."}`. Do not invent placeholder URLs like `[Your URL]`, `Example_URL`, or `MockMailPage_URL`.',
108          'For `browser` form work, prefer `{"action":"fill_form","fields":[{"element":"#email","value":"user@example.com"},{"element":"#password","value":"..."}]}`. A shorthand `form` object keyed by input id/name also works for simple forms.',
109          'Use `browser` when the user asks for screenshots, visual proof, page capture, PDFs, or a rendered view of a page. `navigate` alone is not a screenshot.',
110          'Limit browser navigations to what is needed. Each navigation is expensive. Plan your browser session: list the pages you need, visit each once, extract what you need.',
111        ],
112        requestMatchers: [
113          {
114            capability: TOOL_CAPABILITY.browserNavigate,
115            patterns: ['browser', 'click', 'fill form', 'log in', 'login', 'navigate'],
116            requireLiteralUrl: true,
117          },
118          {
119            capability: TOOL_CAPABILITY.browserCapture,
120            patterns: ['screenshot', 'screen shot', 'snapshot', 'page capture', 'visual proof', 'capture the page', 'rendered view'],
121          },
122          {
123            capability: TOOL_CAPABILITY.artifactPdf,
124            patterns: ['pdf', 'save as pdf', 'export pdf'],
125          },
126        ],
127      },
128    ],
129    manage_connectors: [
130      {
131        toolName: 'connector_message_tool',
132        capabilities: [TOOL_CAPABILITY.deliveryMessage, TOOL_CAPABILITY.deliveryMedia, TOOL_CAPABILITY.deliveryVoiceNote],
133        disciplineGuidance: [
134          'For outbound delivery, inspect available channels with `connector_message_tool` using `{"action":"list_running"}` before claiming something cannot be sent.',
135          'Use `connector_message_tool` with `{"action":"send","message":"...","mediaPath":"..."}` for text/media and `{"action":"send_voice_note","voiceText":"..."}` for voice notes.',
136          'If no channel or recipient is configured, explain that connector/channel setup is missing rather than claiming the capability does not exist.',
137          'Check channel availability once with `list_running`, then send. Do not re-list channels between each message.',
138        ],
139        requestMatchers: [
140          {
141            capability: TOOL_CAPABILITY.deliveryMessage,
142            patterns: ['send', 'share', 'deliver', 'message'],
143          },
144          {
145            capability: TOOL_CAPABILITY.deliveryMedia,
146            patterns: ['screenshot', 'screen shot', 'snapshot', 'image', 'photo', 'send file', 'send a file', 'pdf', 'attachment'],
147          },
148          {
149            capability: TOOL_CAPABILITY.deliveryVoiceNote,
150            patterns: ['voice note', 'voice-note', 'voicenote', 'voice memo', 'voice message', 'audio note', 'audio update', 'ptt'],
151          },
152        ],
153      },
154    ],
155    http_request: [
156      {
157        toolName: 'http_request',
158        capabilities: ['network.http'],
159        disciplineGuidance: [
160          'For `http_request`, send exact literal URLs from the task or from prior tool results. Keep JSON request bodies as raw JSON strings.',
161          'If an API call fails, inspect the error before retrying with the same request. Do not retry the same failing call in a loop.',
162        ],
163        requestMatchers: [],
164      },
165    ],
166    email: [
167      {
168        toolName: 'email',
169        capabilities: ['delivery.email'],
170        disciplineGuidance: [
171          'For `email`, send mail with `{"action":"send","to":"user@example.com","subject":"...","body":"..."}`. If delivery depends on SMTP setup, check `{"action":"status"}` before claiming success.',
172          'Compose the full message in one send call. Do not send partial drafts followed by corrections.',
173        ],
174        requestMatchers: [],
175      },
176    ],
177    google_workspace: [
178      {
179        toolName: 'google_workspace',
180        capabilities: ['workspace.google'],
181        disciplineGuidance: [
182          'For `google_workspace`, pass exact `gws` arguments in `{"args":[...]}` form. Prefer list/get/read commands first to confirm IDs and current state before mutating Drive, Docs, Sheets, Gmail, Calendar, or Chat resources.',
183          'Use `params` and `jsonInput` for `--params` / `--json` payloads instead of packing raw JSON blobs into the `args` array.',
184          'Do not call interactive `gws auth login` or `gws auth setup` from the agent. Use the extension settings or a pre-authenticated `gws` install.',
185          'Confirm resource IDs with a single list/get call before mutating. Do not repeatedly list the same resources between edits.',
186        ],
187        requestMatchers: [
188          {
189            capability: 'workspace.google',
190            patterns: ['google workspace', 'google docs', 'google doc', 'google sheets', 'spreadsheet', 'google drive', 'gmail', 'google calendar', 'google chat', 'workspace file', 'shared drive'],
191          },
192        ],
193      },
194    ],
195    ask_human: [
196      {
197        toolName: 'ask_human',
198        capabilities: ['human.input'],
199        disciplineGuidance: [
200          'For `ask_human`, when a workflow needs a code, approval, or out-of-band value from a person, do not guess or keep re-submitting blank forms. Use `{"action":"request_input","question":"..."}` and, for durable pauses, `{"action":"wait_for_reply","correlationId":"..."}`.',
201          'Reuse the same `correlationId` from `request_input` when you call `wait_for_reply`. Once the durable wait returns active, stop the turn immediately and wait for the reply instead of calling `request_input` again.',
202          'Do not ask the same pending human question twice before the durable wait resumes unless the question materially changes.',
203          'Batch related questions into a single request rather than asking one question at a time.',
204        ],
205        requestMatchers: [],
206      },
207    ],
208  
209    // --- Internal platform tools ---
210  
211    manage_agents: [
212      {
213        toolName: 'manage_agents',
214        capabilities: ['platform.agents'],
215        disciplineGuidance: [
216          'List agents once at the start of a task, then work with specific agent IDs. Do not re-list between each action.',
217        ],
218        requestMatchers: [],
219      },
220    ],
221    manage_projects: [
222      {
223        toolName: 'manage_projects',
224        capabilities: ['platform.projects'],
225        disciplineGuidance: [
226          'List projects once to orient, then operate on specific project IDs. Do not re-list after each update.',
227        ],
228        requestMatchers: [],
229      },
230    ],
231    manage_tasks: [
232      {
233        toolName: 'manage_tasks',
234        capabilities: ['platform.tasks'],
235        disciplineGuidance: [
236          'Read the task list once, make your changes, then move on. Do not re-read the task list after every update.',
237        ],
238        requestMatchers: [],
239      },
240    ],
241    manage_schedules: [
242      {
243        toolName: 'manage_schedules',
244        capabilities: ['platform.schedules'],
245        disciplineGuidance: [
246          'List schedules once to check current state. Do not re-list after each modification.',
247        ],
248        requestMatchers: [],
249      },
250    ],
251    manage_skills: [
252      {
253        toolName: 'manage_skills',
254        capabilities: ['platform.skills'],
255        disciplineGuidance: [
256          'Use `recommend_for_task` to find a relevant skill efficiently. Do not repeatedly list or search skills between each action.',
257        ],
258        requestMatchers: [],
259      },
260    ],
261    manage_webhooks: [
262      {
263        toolName: 'manage_webhooks',
264        capabilities: ['platform.webhooks'],
265        disciplineGuidance: [
266          'List webhooks once for current state. Do not re-list after each change.',
267        ],
268        requestMatchers: [],
269      },
270    ],
271    manage_secrets: [
272      {
273        toolName: 'manage_secrets',
274        capabilities: ['platform.secrets'],
275        disciplineGuidance: [
276          'Store secrets directly. Use the `check` action (not `list`) to verify if a credential already exists before requesting a new one.',
277        ],
278        requestMatchers: [],
279      },
280    ],
281    manage_chatrooms: [
282      {
283        toolName: 'manage_chatrooms',
284        capabilities: ['platform.chatrooms'],
285        disciplineGuidance: [
286          'List chatrooms once to orient, then operate on specific IDs. Do not re-list after each message or update.',
287        ],
288        requestMatchers: [],
289      },
290    ],
291    manage_protocols: [
292      {
293        toolName: 'manage_protocols',
294        capabilities: ['platform.protocols'],
295        disciplineGuidance: [
296          'Read the protocol definition once, then execute steps. Do not re-read the protocol between each step.',
297        ],
298        requestMatchers: [],
299      },
300    ],
301    manage_platform: [
302      {
303        toolName: 'manage_platform',
304        capabilities: ['platform.umbrella'],
305        disciplineGuidance: [
306          'Prefer the direct `manage_*` tools (manage_agents, manage_tasks, etc.) when they are enabled. Use `manage_platform` only as a fallback when the specific tool is not available.',
307        ],
308        requestMatchers: [],
309      },
310    ],
311    spawn_subagent: [
312      {
313        toolName: 'spawn_subagent',
314        capabilities: ['delegation.subagent'],
315        disciplineGuidance: [
316          'Use `waitForCompletion: true` (the default) or `wait`/`wait_all` actions to await results. Do not poll `status` in a loop.',
317          'Batch related delegations — spawn multiple subagents at once if tasks are independent.',
318          'For multi-step or cross-domain work, delegate to a subagent rather than attempting everything in one long tool chain.',
319        ],
320        requestMatchers: [],
321      },
322    ],
323    delegate: [
324      {
325        toolName: 'delegate',
326        capabilities: ['delegation.cli'],
327        disciplineGuidance: [
328          'Give the delegate a complete task description in one call. Do not send incremental instructions across multiple delegation calls.',
329        ],
330        requestMatchers: [],
331      },
332    ],
333    manage_sessions: [
334      {
335        toolName: 'sessions_tool',
336        capabilities: ['platform.sessions'],
337        disciplineGuidance: [
338          'Check session identity once at the start. Do not re-query session info between each action.',
339        ],
340        requestMatchers: [],
341      },
342    ],
343    memory: [
344      {
345        toolName: 'memory_tool',
346        capabilities: ['memory.search', 'memory.store'],
347        disciplineGuidance: [
348          'Search memory once with a good query, then use the results. Do not run multiple overlapping searches for the same topic.',
349          'For stores and updates, write once with complete content. Do not read-back immediately after writing to confirm.',
350        ],
351        requestMatchers: [],
352      },
353    ],
354    context_mgmt: [
355      {
356        toolName: 'context_status',
357        capabilities: ['context.management'],
358        disciplineGuidance: [
359          'Check context status only when you suspect you are running low. Do not check after every tool call.',
360        ],
361        requestMatchers: [],
362      },
363    ],
364    monitor: [
365      {
366        toolName: 'monitor_tool',
367        capabilities: ['monitoring.watch'],
368        disciplineGuidance: [
369          'Prefer `wait_until`, `wait_for_http`, `wait_for_file`, or other `wait_for_*` shortcut actions — they create a durable wait that resumes your turn automatically. Avoid creating a watch with `create_watch` then polling `get_watch` in a loop.',
370        ],
371        requestMatchers: [],
372      },
373    ],
374    image_gen: [
375      {
376        toolName: 'generate_image',
377        capabilities: ['media.image_generation'],
378        disciplineGuidance: [
379          'Describe the image fully in one generation call. Do not generate multiple variations unless the user asks for options.',
380        ],
381        requestMatchers: [],
382      },
383    ],
384    replicate: [
385      {
386        toolName: 'replicate',
387        capabilities: ['media.replicate'],
388        disciplineGuidance: [
389          'Submit the job with complete parameters in one call. Use `wait: true` for synchronous completion. If running async, let the built-in polling handle it — do not add your own polling loop on top.',
390        ],
391        requestMatchers: [],
392      },
393    ],
394    schedule_wake: [
395      {
396        toolName: 'schedule_wake',
397        capabilities: ['runtime.schedule'],
398        disciplineGuidance: [
399          'Schedule the wake once with the correct time. Do not reschedule repeatedly to adjust by small increments.',
400        ],
401        requestMatchers: [],
402      },
403    ],
404    mailbox: [
405      {
406        toolName: 'mailbox',
407        capabilities: ['delivery.mailbox'],
408        disciplineGuidance: [
409          'Use `search_messages` for targeted retrieval instead of listing all messages. Do not poll the inbox in a loop waiting for replies.',
410        ],
411        requestMatchers: [],
412      },
413    ],
414  }
415  
416  function dedupeStrings(values: string[]): string[] {
417    return dedup(values.filter((value) => typeof value === 'string' && value.trim()).map((value) => value.trim()))
418  }
419  
420  function normalizePlanningEntry(toolName: string, planning: ExtensionToolPlanning | null | undefined): ToolPlanningEntry | null {
421    if (!planning) return null
422    const capabilities = dedupeStrings(Array.isArray(planning.capabilities) ? planning.capabilities : [])
423    const disciplineGuidance = dedupeStrings(Array.isArray(planning.disciplineGuidance) ? planning.disciplineGuidance : [])
424    if (!capabilities.length && !disciplineGuidance.length) return null
425    return {
426      toolName,
427      capabilities,
428      disciplineGuidance,
429    }
430  }
431  
432  export function getEnabledToolPlanningView(enabledExtensions: string[]): ToolPlanningView {
433    const displayToolIds = dedupeStrings(enabledExtensions.map((toolId) => canonicalizeExtensionId(toolId))).sort()
434    const expandedExtensionIds = dedupeStrings(expandExtensionIds(enabledExtensions)).sort()
435    const entries: ToolPlanningEntry[] = []
436  
437    for (const extensionId of expandedExtensionIds) {
438      const coreEntries = CORE_TOOL_PLANNING[extensionId] || []
439      for (const entry of coreEntries) {
440        entries.push({
441          toolName: entry.toolName,
442          capabilities: [...entry.capabilities],
443          disciplineGuidance: [...entry.disciplineGuidance],
444        })
445      }
446    }
447  
448    for (const entry of [
449      ...getNativeCapabilityTools(expandedExtensionIds),
450      ...getExtensionManager().getTools(expandedExtensionIds),
451    ]) {
452      const planningEntry = normalizePlanningEntry(entry.tool.name, entry.tool.planning)
453      if (planningEntry) entries.push(planningEntry)
454    }
455  
456    const disciplineSet = new Set<string>()
457    const capabilityToTools = new Map<string, Set<string>>()
458    for (const entry of entries) {
459      for (const line of entry.disciplineGuidance) disciplineSet.add(line)
460      for (const capability of entry.capabilities) {
461        const current = capabilityToTools.get(capability) || new Set<string>()
462        current.add(entry.toolName)
463        capabilityToTools.set(capability, current)
464      }
465    }
466  
467    return {
468      displayToolIds,
469      expandedExtensionIds,
470      entries,
471      disciplineGuidance: Array.from(disciplineSet),
472      capabilityToTools: new Map(
473        Array.from(capabilityToTools.entries()).map(([capability, toolNames]) => [capability, Array.from(toolNames)]),
474      ),
475    }
476  }
477  
478  export function getToolsForCapability(enabledExtensions: string[], capability: string): string[] {
479    return getEnabledToolPlanningView(enabledExtensions).capabilityToTools.get(capability) || []
480  }
481  
482  export function getFirstToolForCapability(enabledExtensions: string[], capability: string): string | null {
483    return getToolsForCapability(enabledExtensions, capability)[0] || null
484  }