Cradicle Explorer

runAgent.ts
  1  import { feature } from 'bun:bundle'
  2  import type { UUID } from 'crypto'
  3  import { randomUUID } from 'crypto'
  4  import uniqBy from 'lodash-es/uniqBy.js'
  5  import { logForDebugging } from 'src/utils/debug.js'
  6  import { getProjectRoot, getSessionId } from '../../bootstrap/state.js'
  7  import { getCommand, getSkillToolCommands, hasCommand } from '../../commands.js'
  8  import {
  9    DEFAULT_AGENT_PROMPT,
 10    enhanceSystemPromptWithEnvDetails,
 11  } from '../../constants/prompts.js'
 12  import type { QuerySource } from '../../constants/querySource.js'
 13  import { getSystemContext, getUserContext } from '../../context.js'
 14  import type { CanUseToolFn } from '../../hooks/useCanUseTool.js'
 15  import { query } from '../../query.js'
 16  import { getFeatureValue_CACHED_MAY_BE_STALE } from '../../services/analytics/growthbook.js'
 17  import { getDumpPromptsPath } from '../../services/api/dumpPrompts.js'
 18  import { cleanupAgentTracking } from '../../services/api/promptCacheBreakDetection.js'
 19  import {
 20    connectToServer,
 21    fetchToolsForClient,
 22  } from '../../services/mcp/client.js'
 23  import { getMcpConfigByName } from '../../services/mcp/config.js'
 24  import type {
 25    MCPServerConnection,
 26    ScopedMcpServerConfig,
 27  } from '../../services/mcp/types.js'
 28  import type { Tool, Tools, ToolUseContext } from '../../Tool.js'
 29  import { killShellTasksForAgent } from '../../tasks/LocalShellTask/killShellTasks.js'
 30  import type { Command } from '../../types/command.js'
 31  import type { AgentId } from '../../types/ids.js'
 32  import type {
 33    AssistantMessage,
 34    Message,
 35    ProgressMessage,
 36    RequestStartEvent,
 37    StreamEvent,
 38    SystemCompactBoundaryMessage,
 39    TombstoneMessage,
 40    ToolUseSummaryMessage,
 41    UserMessage,
 42  } from '../../types/message.js'
 43  import { createAttachmentMessage } from '../../utils/attachments.js'
 44  import { AbortError } from '../../utils/errors.js'
 45  import { getDisplayPath } from '../../utils/file.js'
 46  import {
 47    cloneFileStateCache,
 48    createFileStateCacheWithSizeLimit,
 49    READ_FILE_STATE_CACHE_SIZE,
 50  } from '../../utils/fileStateCache.js'
 51  import {
 52    type CacheSafeParams,
 53    createSubagentContext,
 54  } from '../../utils/forkedAgent.js'
 55  import { registerFrontmatterHooks } from '../../utils/hooks/registerFrontmatterHooks.js'
 56  import { clearSessionHooks } from '../../utils/hooks/sessionHooks.js'
 57  import { executeSubagentStartHooks } from '../../utils/hooks.js'
 58  import { createUserMessage } from '../../utils/messages.js'
 59  import { getAgentModel } from '../../utils/model/agent.js'
 60  import type { ModelAlias } from '../../utils/model/aliases.js'
 61  import {
 62    clearAgentTranscriptSubdir,
 63    recordSidechainTranscript,
 64    setAgentTranscriptSubdir,
 65    writeAgentMetadata,
 66  } from '../../utils/sessionStorage.js'
 67  import {
 68    isRestrictedToPluginOnly,
 69    isSourceAdminTrusted,
 70  } from '../../utils/settings/pluginOnlyPolicy.js'
 71  import {
 72    asSystemPrompt,
 73    type SystemPrompt,
 74  } from '../../utils/systemPromptType.js'
 75  import {
 76    isPerfettoTracingEnabled,
 77    registerAgent as registerPerfettoAgent,
 78    unregisterAgent as unregisterPerfettoAgent,
 79  } from '../../utils/telemetry/perfettoTracing.js'
 80  import type { ContentReplacementState } from '../../utils/toolResultStorage.js'
 81  import { createAgentId } from '../../utils/uuid.js'
 82  import { resolveAgentTools } from './agentToolUtils.js'
 83  import { type AgentDefinition, isBuiltInAgent } from './loadAgentsDir.js'
 84  
 85  /**
 86   * Initialize agent-specific MCP servers
 87   * Agents can define their own MCP servers in their frontmatter that are additive
 88   * to the parent's MCP clients. These servers are connected when the agent starts
 89   * and cleaned up when the agent finishes.
 90   *
 91   * @param agentDefinition The agent definition with optional mcpServers
 92   * @param parentClients MCP clients inherited from parent context
 93   * @returns Merged clients (parent + agent-specific), agent MCP tools, and cleanup function
 94   */
 95  async function initializeAgentMcpServers(
 96    agentDefinition: AgentDefinition,
 97    parentClients: MCPServerConnection[],
 98  ): Promise<{
 99    clients: MCPServerConnection[]
100    tools: Tools
101    cleanup: () => Promise<void>
102  }> {
103    // If no agent-specific servers defined, return parent clients as-is
104    if (!agentDefinition.mcpServers?.length) {
105      return {
106        clients: parentClients,
107        tools: [],
108        cleanup: async () => {},
109      }
110    }
111  
112    // When MCP is locked to plugin-only, skip frontmatter MCP servers for
113    // USER-CONTROLLED agents only. Plugin, built-in, and policySettings agents
114    // are admin-trusted — their frontmatter MCP is part of the admin-approved
115    // surface. Blocking them (as the first cut did) breaks plugin agents that
116    // legitimately need MCP, contradicting "plugin-provided always loads."
117    const agentIsAdminTrusted = isSourceAdminTrusted(agentDefinition.source)
118    if (isRestrictedToPluginOnly('mcp') && !agentIsAdminTrusted) {
119      logForDebugging(
120        `[Agent: ${agentDefinition.agentType}] Skipping MCP servers: strictPluginOnlyCustomization locks MCP to plugin-only (agent source: ${agentDefinition.source})`,
121      )
122      return {
123        clients: parentClients,
124        tools: [],
125        cleanup: async () => {},
126      }
127    }
128  
129    const agentClients: MCPServerConnection[] = []
130    // Track which clients were newly created (inline definitions) vs. shared from parent
131    // Only newly created clients should be cleaned up when the agent finishes
132    const newlyCreatedClients: MCPServerConnection[] = []
133    const agentTools: Tool[] = []
134  
135    for (const spec of agentDefinition.mcpServers) {
136      let config: ScopedMcpServerConfig | null = null
137      let name: string
138      let isNewlyCreated = false
139  
140      if (typeof spec === 'string') {
141        // Reference by name - look up in existing MCP configs
142        // This uses the memoized connectToServer, so we may get a shared client
143        name = spec
144        config = getMcpConfigByName(spec)
145        if (!config) {
146          logForDebugging(
147            `[Agent: ${agentDefinition.agentType}] MCP server not found: ${spec}`,
148            { level: 'warn' },
149          )
150          continue
151        }
152      } else {
153        // Inline definition as { [name]: config }
154        // These are agent-specific servers that should be cleaned up
155        const entries = Object.entries(spec)
156        if (entries.length !== 1) {
157          logForDebugging(
158            `[Agent: ${agentDefinition.agentType}] Invalid MCP server spec: expected exactly one key`,
159            { level: 'warn' },
160          )
161          continue
162        }
163        const [serverName, serverConfig] = entries[0]!
164        name = serverName
165        config = {
166          ...serverConfig,
167          scope: 'dynamic' as const,
168        } as ScopedMcpServerConfig
169        isNewlyCreated = true
170      }
171  
172      // Connect to the server
173      const client = await connectToServer(name, config)
174      agentClients.push(client)
175      if (isNewlyCreated) {
176        newlyCreatedClients.push(client)
177      }
178  
179      // Fetch tools if connected
180      if (client.type === 'connected') {
181        const tools = await fetchToolsForClient(client)
182        agentTools.push(...tools)
183        logForDebugging(
184          `[Agent: ${agentDefinition.agentType}] Connected to MCP server '${name}' with ${tools.length} tools`,
185        )
186      } else {
187        logForDebugging(
188          `[Agent: ${agentDefinition.agentType}] Failed to connect to MCP server '${name}': ${client.type}`,
189          { level: 'warn' },
190        )
191      }
192    }
193  
194    // Create cleanup function for agent-specific servers
195    // Only clean up newly created clients (inline definitions), not shared/referenced ones
196    // Shared clients (referenced by string name) are memoized and used by the parent context
197    const cleanup = async () => {
198      for (const client of newlyCreatedClients) {
199        if (client.type === 'connected') {
200          try {
201            await client.cleanup()
202          } catch (error) {
203            logForDebugging(
204              `[Agent: ${agentDefinition.agentType}] Error cleaning up MCP server '${client.name}': ${error}`,
205              { level: 'warn' },
206            )
207          }
208        }
209      }
210    }
211  
212    // Return merged clients (parent + agent-specific) and agent tools
213    return {
214      clients: [...parentClients, ...agentClients],
215      tools: agentTools,
216      cleanup,
217    }
218  }
219  
220  type QueryMessage =
221    | StreamEvent
222    | RequestStartEvent
223    | Message
224    | ToolUseSummaryMessage
225    | TombstoneMessage
226  
227  /**
228   * Type guard to check if a message from query() is a recordable Message type.
229   * Matches the types we want to record: assistant, user, progress, or system compact_boundary.
230   */
231  function isRecordableMessage(
232    msg: QueryMessage,
233  ): msg is
234    | AssistantMessage
235    | UserMessage
236    | ProgressMessage
237    | SystemCompactBoundaryMessage {
238    return (
239      msg.type === 'assistant' ||
240      msg.type === 'user' ||
241      msg.type === 'progress' ||
242      (msg.type === 'system' &&
243        'subtype' in msg &&
244        msg.subtype === 'compact_boundary')
245    )
246  }
247  
248  export async function* runAgent({
249    agentDefinition,
250    promptMessages,
251    toolUseContext,
252    canUseTool,
253    isAsync,
254    canShowPermissionPrompts,
255    forkContextMessages,
256    querySource,
257    override,
258    model,
259    maxTurns,
260    preserveToolUseResults,
261    availableTools,
262    allowedTools,
263    onCacheSafeParams,
264    contentReplacementState,
265    useExactTools,
266    worktreePath,
267    description,
268    transcriptSubdir,
269    onQueryProgress,
270  }: {
271    agentDefinition: AgentDefinition
272    promptMessages: Message[]
273    toolUseContext: ToolUseContext
274    canUseTool: CanUseToolFn
275    isAsync: boolean
276    /** Whether this agent can show permission prompts. Defaults to !isAsync.
277     * Set to true for in-process teammates that run async but share the terminal. */
278    canShowPermissionPrompts?: boolean
279    forkContextMessages?: Message[]
280    querySource: QuerySource
281    override?: {
282      userContext?: { [k: string]: string }
283      systemContext?: { [k: string]: string }
284      systemPrompt?: SystemPrompt
285      abortController?: AbortController
286      agentId?: AgentId
287    }
288    model?: ModelAlias
289    maxTurns?: number
290    /** Preserve toolUseResult on messages for subagents with viewable transcripts */
291    preserveToolUseResults?: boolean
292    /** Precomputed tool pool for the worker agent. Computed by the caller
293     * (AgentTool.tsx) to avoid a circular dependency between runAgent and tools.ts.
294     * Always contains the full tool pool assembled with the worker's own permission
295     * mode, independent of the parent's tool restrictions. */
296    availableTools: Tools
297    /** Tool permission rules to add to the agent's session allow rules.
298     * When provided, replaces ALL allow rules so the agent only has what's
299     * explicitly listed (parent approvals don't leak through). */
300    allowedTools?: string[]
301    /** Optional callback invoked with CacheSafeParams after constructing the agent's
302     * system prompt, context, and tools. Used by background summarization to fork
303     * the agent's conversation for periodic progress summaries. */
304    onCacheSafeParams?: (params: CacheSafeParams) => void
305    /** Replacement state reconstructed from a resumed sidechain transcript so
306     * the same tool results are re-replaced (prompt cache stability). When
307     * omitted, createSubagentContext clones the parent's state. */
308    contentReplacementState?: ContentReplacementState
309    /** When true, use availableTools directly without filtering through
310     * resolveAgentTools(). Also inherits the parent's thinkingConfig and
311     * isNonInteractiveSession instead of overriding them. Used by the fork
312     * subagent path to produce byte-identical API request prefixes for
313     * prompt cache hits. */
314    useExactTools?: boolean
315    /** Worktree path if the agent was spawned with isolation: "worktree".
316     * Persisted to metadata so resume can restore the correct cwd. */
317    worktreePath?: string
318    /** Original task description from AgentTool input. Persisted to metadata
319     * so a resumed agent's notification can show the original description. */
320    description?: string
321    /** Optional subdirectory under subagents/ to group this agent's transcript
322     * with related ones (e.g. workflows/<runId> for workflow subagents). */
323    transcriptSubdir?: string
324    /** Optional callback fired on every message yielded by query() — including
325     * stream_event deltas that runAgent otherwise drops. Use to detect liveness
326     * during long single-block streams (e.g. thinking) where no assistant
327     * message is yielded for >60s. */
328    onQueryProgress?: () => void
329  }): AsyncGenerator<Message, void> {
330    // Track subagent usage for feature discovery
331  
332    const appState = toolUseContext.getAppState()
333    const permissionMode = appState.toolPermissionContext.mode
334    // Always-shared channel to the root AppState store. toolUseContext.setAppState
335    // is a no-op when the *parent* is itself an async agent (nested async→async),
336    // so session-scoped writes (hooks, bash tasks) must go through this instead.
337    const rootSetAppState =
338      toolUseContext.setAppStateForTasks ?? toolUseContext.setAppState
339  
340    const resolvedAgentModel = getAgentModel(
341      agentDefinition.model,
342      toolUseContext.options.mainLoopModel,
343      model,
344      permissionMode,
345    )
346  
347    const agentId = override?.agentId ? override.agentId : createAgentId()
348  
349    // Route this agent's transcript into a grouping subdirectory if requested
350    // (e.g. workflow subagents write to subagents/workflows/<runId>/).
351    if (transcriptSubdir) {
352      setAgentTranscriptSubdir(agentId, transcriptSubdir)
353    }
354  
355    // Register agent in Perfetto trace for hierarchy visualization
356    if (isPerfettoTracingEnabled()) {
357      const parentId = toolUseContext.agentId ?? getSessionId()
358      registerPerfettoAgent(agentId, agentDefinition.agentType, parentId)
359    }
360  
361    // Log API calls path for subagents (ant-only)
362    if (process.env.USER_TYPE === 'ant') {
363      logForDebugging(
364        `[Subagent ${agentDefinition.agentType}] API calls: ${getDisplayPath(getDumpPromptsPath(agentId))}`,
365      )
366    }
367  
368    // Handle message forking for context sharing
369    // Filter out incomplete tool calls from parent messages to avoid API errors
370    const contextMessages: Message[] = forkContextMessages
371      ? filterIncompleteToolCalls(forkContextMessages)
372      : []
373    const initialMessages: Message[] = [...contextMessages, ...promptMessages]
374  
375    const agentReadFileState =
376      forkContextMessages !== undefined
377        ? cloneFileStateCache(toolUseContext.readFileState)
378        : createFileStateCacheWithSizeLimit(READ_FILE_STATE_CACHE_SIZE)
379  
380    const [baseUserContext, baseSystemContext] = await Promise.all([
381      override?.userContext ?? getUserContext(),
382      override?.systemContext ?? getSystemContext(),
383    ])
384  
385    // Read-only agents (Explore, Plan) don't act on commit/PR/lint rules from
386    // CLAUDE.md — the main agent has full context and interprets their output.
387    // Dropping claudeMd here saves ~5-15 Gtok/week across 34M+ Explore spawns.
388    // Explicit override.userContext from callers is preserved untouched.
389    // Kill-switch defaults true; flip tengu_slim_subagent_claudemd=false to revert.
390    const shouldOmitClaudeMd =
391      agentDefinition.omitClaudeMd &&
392      !override?.userContext &&
393      getFeatureValue_CACHED_MAY_BE_STALE('tengu_slim_subagent_claudemd', true)
394    const { claudeMd: _omittedClaudeMd, ...userContextNoClaudeMd } =
395      baseUserContext
396    const resolvedUserContext = shouldOmitClaudeMd
397      ? userContextNoClaudeMd
398      : baseUserContext
399  
400    // Explore/Plan are read-only search agents — the parent-session-start
401    // gitStatus (up to 40KB, explicitly labeled stale) is dead weight. If they
402    // need git info they run `git status` themselves and get fresh data.
403    // Saves ~1-3 Gtok/week fleet-wide.
404    const { gitStatus: _omittedGitStatus, ...systemContextNoGit } =
405      baseSystemContext
406    const resolvedSystemContext =
407      agentDefinition.agentType === 'Explore' ||
408      agentDefinition.agentType === 'Plan'
409        ? systemContextNoGit
410        : baseSystemContext
411  
412    // Override permission mode if agent defines one
413    // However, don't override if parent is in bypassPermissions or acceptEdits mode - those should always take precedence
414    // For async agents, also set shouldAvoidPermissionPrompts since they can't show UI
415    const agentPermissionMode = agentDefinition.permissionMode
416    const agentGetAppState = () => {
417      const state = toolUseContext.getAppState()
418      let toolPermissionContext = state.toolPermissionContext
419  
420      // Override permission mode if agent defines one (unless parent is bypassPermissions, acceptEdits, or auto)
421      if (
422        agentPermissionMode &&
423        state.toolPermissionContext.mode !== 'bypassPermissions' &&
424        state.toolPermissionContext.mode !== 'acceptEdits' &&
425        !(
426          feature('TRANSCRIPT_CLASSIFIER') &&
427          state.toolPermissionContext.mode === 'auto'
428        )
429      ) {
430        toolPermissionContext = {
431          ...toolPermissionContext,
432          mode: agentPermissionMode,
433        }
434      }
435  
436      // Set flag to auto-deny prompts for agents that can't show UI
437      // Use explicit canShowPermissionPrompts if provided, otherwise:
438      //   - bubble mode: always show prompts (bubbles to parent terminal)
439      //   - default: !isAsync (sync agents show prompts, async agents don't)
440      const shouldAvoidPrompts =
441        canShowPermissionPrompts !== undefined
442          ? !canShowPermissionPrompts
443          : agentPermissionMode === 'bubble'
444            ? false
445            : isAsync
446      if (shouldAvoidPrompts) {
447        toolPermissionContext = {
448          ...toolPermissionContext,
449          shouldAvoidPermissionPrompts: true,
450        }
451      }
452  
453      // For background agents that can show prompts, await automated checks
454      // (classifier, permission hooks) before showing the permission dialog.
455      // Since these are background agents, waiting is fine — the user should
456      // only be interrupted when automated checks can't resolve the permission.
457      // This applies to bubble mode (always) and explicit canShowPermissionPrompts.
458      if (isAsync && !shouldAvoidPrompts) {
459        toolPermissionContext = {
460          ...toolPermissionContext,
461          awaitAutomatedChecksBeforeDialog: true,
462        }
463      }
464  
465      // Scope tool permissions: when allowedTools is provided, use them as session rules.
466      // IMPORTANT: Preserve cliArg rules (from SDK's --allowedTools) since those are
467      // explicit permissions from the SDK consumer that should apply to all agents.
468      // Only clear session-level rules from the parent to prevent unintended leakage.
469      if (allowedTools !== undefined) {
470        toolPermissionContext = {
471          ...toolPermissionContext,
472          alwaysAllowRules: {
473            // Preserve SDK-level permissions from --allowedTools
474            cliArg: state.toolPermissionContext.alwaysAllowRules.cliArg,
475            // Use the provided allowedTools as session-level permissions
476            session: [...allowedTools],
477          },
478        }
479      }
480  
481      // Override effort level if agent defines one
482      const effortValue =
483        agentDefinition.effort !== undefined
484          ? agentDefinition.effort
485          : state.effortValue
486  
487      if (
488        toolPermissionContext === state.toolPermissionContext &&
489        effortValue === state.effortValue
490      ) {
491        return state
492      }
493      return {
494        ...state,
495        toolPermissionContext,
496        effortValue,
497      }
498    }
499  
500    const resolvedTools = useExactTools
501      ? availableTools
502      : resolveAgentTools(agentDefinition, availableTools, isAsync).resolvedTools
503  
504    const additionalWorkingDirectories = Array.from(
505      appState.toolPermissionContext.additionalWorkingDirectories.keys(),
506    )
507  
508    const agentSystemPrompt = override?.systemPrompt
509      ? override.systemPrompt
510      : asSystemPrompt(
511          await getAgentSystemPrompt(
512            agentDefinition,
513            toolUseContext,
514            resolvedAgentModel,
515            additionalWorkingDirectories,
516            resolvedTools,
517          ),
518        )
519  
520    // Determine abortController:
521    // - Override takes precedence
522    // - Async agents get a new unlinked controller (runs independently)
523    // - Sync agents share parent's controller
524    const agentAbortController = override?.abortController
525      ? override.abortController
526      : isAsync
527        ? new AbortController()
528        : toolUseContext.abortController
529  
530    // Execute SubagentStart hooks and collect additional context
531    const additionalContexts: string[] = []
532    for await (const hookResult of executeSubagentStartHooks(
533      agentId,
534      agentDefinition.agentType,
535      agentAbortController.signal,
536    )) {
537      if (
538        hookResult.additionalContexts &&
539        hookResult.additionalContexts.length > 0
540      ) {
541        additionalContexts.push(...hookResult.additionalContexts)
542      }
543    }
544  
545    // Add SubagentStart hook context as a user message (consistent with SessionStart/UserPromptSubmit)
546    if (additionalContexts.length > 0) {
547      const contextMessage = createAttachmentMessage({
548        type: 'hook_additional_context',
549        content: additionalContexts,
550        hookName: 'SubagentStart',
551        toolUseID: randomUUID(),
552        hookEvent: 'SubagentStart',
553      })
554      initialMessages.push(contextMessage)
555    }
556  
557    // Register agent's frontmatter hooks (scoped to agent lifecycle)
558    // Pass isAgent=true to convert Stop hooks to SubagentStop (since subagents trigger SubagentStop)
559    // Same admin-trusted gate for frontmatter hooks: under ["hooks"] alone
560    // (skills/agents not locked), user agents still load — block their
561    // frontmatter-hook REGISTRATION here where source is known, rather than
562    // blanket-blocking all session hooks at execution time (which would
563    // also kill plugin agents' hooks).
564    const hooksAllowedForThisAgent =
565      !isRestrictedToPluginOnly('hooks') ||
566      isSourceAdminTrusted(agentDefinition.source)
567    if (agentDefinition.hooks && hooksAllowedForThisAgent) {
568      registerFrontmatterHooks(
569        rootSetAppState,
570        agentId,
571        agentDefinition.hooks,
572        `agent '${agentDefinition.agentType}'`,
573        true, // isAgent - converts Stop to SubagentStop
574      )
575    }
576  
577    // Preload skills from agent frontmatter
578    const skillsToPreload = agentDefinition.skills ?? []
579    if (skillsToPreload.length > 0) {
580      const allSkills = await getSkillToolCommands(getProjectRoot())
581  
582      // Filter valid skills and warn about missing ones
583      const validSkills: Array<{
584        skillName: string
585        skill: (typeof allSkills)[0] & { type: 'prompt' }
586      }> = []
587  
588      for (const skillName of skillsToPreload) {
589        // Resolve the skill name, trying multiple strategies:
590        // 1. Exact match (hasCommand checks name, userFacingName, aliases)
591        // 2. Fully-qualified with agent's plugin prefix (e.g., "my-skill" → "plugin:my-skill")
592        // 3. Suffix match on ":skillName" for plugin-namespaced skills
593        const resolvedName = resolveSkillName(
594          skillName,
595          allSkills,
596          agentDefinition,
597        )
598        if (!resolvedName) {
599          logForDebugging(
600            `[Agent: ${agentDefinition.agentType}] Warning: Skill '${skillName}' specified in frontmatter was not found`,
601            { level: 'warn' },
602          )
603          continue
604        }
605  
606        const skill = getCommand(resolvedName, allSkills)
607        if (skill.type !== 'prompt') {
608          logForDebugging(
609            `[Agent: ${agentDefinition.agentType}] Warning: Skill '${skillName}' is not a prompt-based skill`,
610            { level: 'warn' },
611          )
612          continue
613        }
614        validSkills.push({ skillName, skill })
615      }
616  
617      // Load all skill contents concurrently and add to initial messages
618      const { formatSkillLoadingMetadata } = await import(
619        '../../utils/processUserInput/processSlashCommand.js'
620      )
621      const loaded = await Promise.all(
622        validSkills.map(async ({ skillName, skill }) => ({
623          skillName,
624          skill,
625          content: await skill.getPromptForCommand('', toolUseContext),
626        })),
627      )
628      for (const { skillName, skill, content } of loaded) {
629        logForDebugging(
630          `[Agent: ${agentDefinition.agentType}] Preloaded skill '${skillName}'`,
631        )
632  
633        // Add command-message metadata so the UI shows which skill is loading
634        const metadata = formatSkillLoadingMetadata(
635          skillName,
636          skill.progressMessage,
637        )
638  
639        initialMessages.push(
640          createUserMessage({
641            content: [{ type: 'text', text: metadata }, ...content],
642            isMeta: true,
643          }),
644        )
645      }
646    }
647  
648    // Initialize agent-specific MCP servers (additive to parent's servers)
649    const {
650      clients: mergedMcpClients,
651      tools: agentMcpTools,
652      cleanup: mcpCleanup,
653    } = await initializeAgentMcpServers(
654      agentDefinition,
655      toolUseContext.options.mcpClients,
656    )
657  
658    // Merge agent MCP tools with resolved agent tools, deduplicating by name.
659    // resolvedTools is already deduplicated (see resolveAgentTools), so skip
660    // the spread + uniqBy overhead when there are no agent-specific MCP tools.
661    const allTools =
662      agentMcpTools.length > 0
663        ? uniqBy([...resolvedTools, ...agentMcpTools], 'name')
664        : resolvedTools
665  
666    // Build agent-specific options
667    const agentOptions: ToolUseContext['options'] = {
668      isNonInteractiveSession: useExactTools
669        ? toolUseContext.options.isNonInteractiveSession
670        : isAsync
671          ? true
672          : (toolUseContext.options.isNonInteractiveSession ?? false),
673      appendSystemPrompt: toolUseContext.options.appendSystemPrompt,
674      tools: allTools,
675      commands: [],
676      debug: toolUseContext.options.debug,
677      verbose: toolUseContext.options.verbose,
678      mainLoopModel: resolvedAgentModel,
679      // For fork children (useExactTools), inherit thinking config to match the
680      // parent's API request prefix for prompt cache hits. For regular
681      // sub-agents, disable thinking to control output token costs.
682      thinkingConfig: useExactTools
683        ? toolUseContext.options.thinkingConfig
684        : { type: 'disabled' as const },
685      mcpClients: mergedMcpClients,
686      mcpResources: toolUseContext.options.mcpResources,
687      agentDefinitions: toolUseContext.options.agentDefinitions,
688      // Fork children (useExactTools path) need querySource on context.options
689      // for the recursive-fork guard at AgentTool.tsx call() — it checks
690      // options.querySource === 'agent:builtin:fork'. This survives autocompact
691      // (which rewrites messages, not context.options). Without this, the guard
692      // reads undefined and only the message-scan fallback fires — which
693      // autocompact defeats by replacing the fork-boilerplate message.
694      ...(useExactTools && { querySource }),
695    }
696  
697    // Create subagent context using shared helper
698    // - Sync agents share setAppState, setResponseLength, abortController with parent
699    // - Async agents are fully isolated (but with explicit unlinked abortController)
700    const agentToolUseContext = createSubagentContext(toolUseContext, {
701      options: agentOptions,
702      agentId,
703      agentType: agentDefinition.agentType,
704      messages: initialMessages,
705      readFileState: agentReadFileState,
706      abortController: agentAbortController,
707      getAppState: agentGetAppState,
708      // Sync agents share these callbacks with parent
709      shareSetAppState: !isAsync,
710      shareSetResponseLength: true, // Both sync and async contribute to response metrics
711      criticalSystemReminder_EXPERIMENTAL:
712        agentDefinition.criticalSystemReminder_EXPERIMENTAL,
713      contentReplacementState,
714    })
715  
716    // Preserve tool use results for subagents with viewable transcripts (in-process teammates)
717    if (preserveToolUseResults) {
718      agentToolUseContext.preserveToolUseResults = true
719    }
720  
721    // Expose cache-safe params for background summarization (prompt cache sharing)
722    if (onCacheSafeParams) {
723      onCacheSafeParams({
724        systemPrompt: agentSystemPrompt,
725        userContext: resolvedUserContext,
726        systemContext: resolvedSystemContext,
727        toolUseContext: agentToolUseContext,
728        forkContextMessages: initialMessages,
729      })
730    }
731  
732    // Record initial messages before the query loop starts, plus the agentType
733    // so resume can route correctly when subagent_type is omitted. Both writes
734    // are fire-and-forget — persistence failure shouldn't block the agent.
735    void recordSidechainTranscript(initialMessages, agentId).catch(_err =>
736      logForDebugging(`Failed to record sidechain transcript: ${_err}`),
737    )
738    void writeAgentMetadata(agentId, {
739      agentType: agentDefinition.agentType,
740      ...(worktreePath && { worktreePath }),
741      ...(description && { description }),
742    }).catch(_err => logForDebugging(`Failed to write agent metadata: ${_err}`))
743  
744    // Track the last recorded message UUID for parent chain continuity
745    let lastRecordedUuid: UUID | null = initialMessages.at(-1)?.uuid ?? null
746  
747    try {
748      for await (const message of query({
749        messages: initialMessages,
750        systemPrompt: agentSystemPrompt,
751        userContext: resolvedUserContext,
752        systemContext: resolvedSystemContext,
753        canUseTool,
754        toolUseContext: agentToolUseContext,
755        querySource,
756        maxTurns: maxTurns ?? agentDefinition.maxTurns,
757      })) {
758        onQueryProgress?.()
759        // Forward subagent API request starts to parent's metrics display
760        // so TTFT/OTPS update during subagent execution.
761        if (
762          message.type === 'stream_event' &&
763          message.event.type === 'message_start' &&
764          message.ttftMs != null
765        ) {
766          toolUseContext.pushApiMetricsEntry?.(message.ttftMs)
767          continue
768        }
769  
770        // Yield attachment messages (e.g., structured_output) without recording them
771        if (message.type === 'attachment') {
772          // Handle max turns reached signal from query.ts
773          if (message.attachment.type === 'max_turns_reached') {
774            logForDebugging(
775              `[Agent
776  : $
777  {
778    agentDefinition.agentType
779  }
780  ] Reached max turns limit ($
781  {
782    message.attachment.maxTurns
783  }
784  )`,
785            )
786            break
787          }
788          yield message
789          continue
790        }
791  
792        if (isRecordableMessage(message)) {
793          // Record only the new message with correct parent (O(1) per message)
794          await recordSidechainTranscript(
795            [message],
796            agentId,
797            lastRecordedUuid,
798          ).catch(err =>
799            logForDebugging(`Failed to record sidechain transcript: ${err}`),
800          )
801          if (message.type !== 'progress') {
802            lastRecordedUuid = message.uuid
803          }
804          yield message
805        }
806      }
807  
808      if (agentAbortController.signal.aborted) {
809        throw new AbortError()
810      }
811  
812      // Run callback if provided (only built-in agents have callbacks)
813      if (isBuiltInAgent(agentDefinition) && agentDefinition.callback) {
814        agentDefinition.callback()
815      }
816    } finally {
817      // Clean up agent-specific MCP servers (runs on normal completion, abort, or error)
818      await mcpCleanup()
819      // Clean up agent's session hooks
820      if (agentDefinition.hooks) {
821        clearSessionHooks(rootSetAppState, agentId)
822      }
823      // Clean up prompt cache tracking state for this agent
824      if (feature('PROMPT_CACHE_BREAK_DETECTION')) {
825        cleanupAgentTracking(agentId)
826      }
827      // Release cloned file state cache memory
828      agentToolUseContext.readFileState.clear()
829      // Release the cloned fork context messages
830      initialMessages.length = 0
831      // Release perfetto agent registry entry
832      unregisterPerfettoAgent(agentId)
833      // Release transcript subdir mapping
834      clearAgentTranscriptSubdir(agentId)
835      // Release this agent's todos entry. Without this, every subagent that
836      // called TodoWrite leaves a key in AppState.todos forever (even after all
837      // items complete, the value is [] but the key stays). Whale sessions
838      // spawn hundreds of agents; each orphaned key is a small leak that adds up.
839      rootSetAppState(prev => {
840        if (!(agentId in prev.todos)) return prev
841        const { [agentId]: _removed, ...todos } = prev.todos
842        return { ...prev, todos }
843      })
844      // Kill any background bash tasks this agent spawned. Without this, a
845      // `run_in_background` shell loop (e.g. test fixture fake-logs.sh) outlives
846      // the agent as a PPID=1 zombie once the main session eventually exits.
847      killShellTasksForAgent(agentId, toolUseContext.getAppState, rootSetAppState)
848      /* eslint-disable @typescript-eslint/no-require-imports */
849      if (feature('MONITOR_TOOL')) {
850        const mcpMod =
851          require('../../tasks/MonitorMcpTask/MonitorMcpTask.js') as typeof import('../../tasks/MonitorMcpTask/MonitorMcpTask.js')
852        mcpMod.killMonitorMcpTasksForAgent(
853          agentId,
854          toolUseContext.getAppState,
855          rootSetAppState,
856        )
857      }
858      /* eslint-enable @typescript-eslint/no-require-imports */
859    }
860  }
861  
862  /**
863   * Filters out assistant messages with incomplete tool calls (tool uses without results).
864   * This prevents API errors when sending messages with orphaned tool calls.
865   */
866  export function filterIncompleteToolCalls(messages: Message[]): Message[] {
867    // Build a set of tool use IDs that have results
868    const toolUseIdsWithResults = new Set<string>()
869  
870    for (const message of messages) {
871      if (message?.type === 'user') {
872        const userMessage = message as UserMessage
873        const content = userMessage.message.content
874        if (Array.isArray(content)) {
875          for (const block of content) {
876            if (block.type === 'tool_result' && block.tool_use_id) {
877              toolUseIdsWithResults.add(block.tool_use_id)
878            }
879          }
880        }
881      }
882    }
883  
884    // Filter out assistant messages that contain tool calls without results
885    return messages.filter(message => {
886      if (message?.type === 'assistant') {
887        const assistantMessage = message as AssistantMessage
888        const content = assistantMessage.message.content
889        if (Array.isArray(content)) {
890          // Check if this assistant message has any tool uses without results
891          const hasIncompleteToolCall = content.some(
892            block =>
893              block.type === 'tool_use' &&
894              block.id &&
895              !toolUseIdsWithResults.has(block.id),
896          )
897          // Exclude messages with incomplete tool calls
898          return !hasIncompleteToolCall
899        }
900      }
901      // Keep all non-assistant messages and assistant messages without tool calls
902      return true
903    })
904  }
905  
906  async function getAgentSystemPrompt(
907    agentDefinition: AgentDefinition,
908    toolUseContext: Pick<ToolUseContext, 'options'>,
909    resolvedAgentModel: string,
910    additionalWorkingDirectories: string[],
911    resolvedTools: readonly Tool[],
912  ): Promise<string[]> {
913    const enabledToolNames = new Set(resolvedTools.map(t => t.name))
914    try {
915      const agentPrompt = agentDefinition.getSystemPrompt({ toolUseContext })
916      const prompts = [agentPrompt]
917  
918      return await enhanceSystemPromptWithEnvDetails(
919        prompts,
920        resolvedAgentModel,
921        additionalWorkingDirectories,
922        enabledToolNames,
923      )
924    } catch (_error) {
925      return enhanceSystemPromptWithEnvDetails(
926        [DEFAULT_AGENT_PROMPT],
927        resolvedAgentModel,
928        additionalWorkingDirectories,
929        enabledToolNames,
930      )
931    }
932  }
933  
934  /**
935   * Resolve a skill name from agent frontmatter to a registered command name.
936   *
937   * Plugin skills are registered with namespaced names (e.g., "my-plugin:my-skill")
938   * but agents reference them with bare names (e.g., "my-skill"). This function
939   * tries multiple resolution strategies:
940   *
941   * 1. Exact match via hasCommand (name, userFacingName, aliases)
942   * 2. Prefix with agent's plugin name (e.g., "my-skill" → "my-plugin:my-skill")
943   * 3. Suffix match — find any command whose name ends with ":skillName"
944   */
945  function resolveSkillName(
946    skillName: string,
947    allSkills: Command[],
948    agentDefinition: AgentDefinition,
949  ): string | null {
950    // 1. Direct match
951    if (hasCommand(skillName, allSkills)) {
952      return skillName
953    }
954  
955    // 2. Try prefixing with the agent's plugin name
956    // Plugin agents have agentType like "pluginName:agentName"
957    const pluginPrefix = agentDefinition.agentType.split(':')[0]
958    if (pluginPrefix) {
959      const qualifiedName = `${pluginPrefix}:${skillName}`
960      if (hasCommand(qualifiedName, allSkills)) {
961        return qualifiedName
962      }
963    }
964  
965    // 3. Suffix match — find a skill whose name ends with ":skillName"
966    const suffix = `:${skillName}`
967    const match = allSkills.find(cmd => cmd.name.endsWith(suffix))
968    if (match) {
969      return match.name
970    }
971  
972    return null
973  }