/ services / mcp / useManageMCPConnections.ts
useManageMCPConnections.ts
   1  import { feature } from 'bun:bundle'
   2  import { basename } from 'path'
   3  import { useCallback, useEffect, useRef } from 'react'
   4  import { getSessionId } from '../../bootstrap/state.js'
   5  import type { Command } from '../../commands.js'
   6  import type { Tool } from '../../Tool.js'
   7  import {
   8    clearServerCache,
   9    fetchCommandsForClient,
  10    fetchResourcesForClient,
  11    fetchToolsForClient,
  12    getMcpToolsCommandsAndResources,
  13    reconnectMcpServerImpl,
  14  } from './client.js'
  15  import type {
  16    MCPServerConnection,
  17    ScopedMcpServerConfig,
  18    ServerResource,
  19  } from './types.js'
  20  
  21  /* eslint-disable @typescript-eslint/no-require-imports */
  22  const fetchMcpSkillsForClient = feature('MCP_SKILLS')
  23    ? (
  24        require('../../skills/mcpSkills.js') as typeof import('../../skills/mcpSkills.js')
  25      ).fetchMcpSkillsForClient
  26    : null
  27  const clearSkillIndexCache = feature('EXPERIMENTAL_SKILL_SEARCH')
  28    ? (
  29        require('../skillSearch/localSearch.js') as typeof import('../skillSearch/localSearch.js')
  30      ).clearSkillIndexCache
  31    : null
  32  
  33  import {
  34    PromptListChangedNotificationSchema,
  35    ResourceListChangedNotificationSchema,
  36    ToolListChangedNotificationSchema,
  37  } from '@modelcontextprotocol/sdk/types.js'
  38  import omit from 'lodash-es/omit.js'
  39  import reject from 'lodash-es/reject.js'
  40  import {
  41    type AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
  42    logEvent,
  43  } from 'src/services/analytics/index.js'
  44  import {
  45    dedupClaudeAiMcpServers,
  46    doesEnterpriseMcpConfigExist,
  47    filterMcpServersByPolicy,
  48    getClaudeCodeMcpConfigs,
  49    isMcpServerDisabled,
  50    setMcpServerEnabled,
  51  } from 'src/services/mcp/config.js'
  52  import type { AppState } from 'src/state/AppState.js'
  53  import type { PluginError } from 'src/types/plugin.js'
  54  import { logForDebugging } from 'src/utils/debug.js'
  55  import { getAllowedChannels } from '../../bootstrap/state.js'
  56  import { useNotifications } from '../../context/notifications.js'
  57  import {
  58    useAppState,
  59    useAppStateStore,
  60    useSetAppState,
  61  } from '../../state/AppState.js'
  62  import { errorMessage } from '../../utils/errors.js'
  63  /* eslint-enable @typescript-eslint/no-require-imports */
  64  import { logMCPDebug, logMCPError } from '../../utils/log.js'
  65  import { enqueue } from '../../utils/messageQueueManager.js'
  66  import {
  67    CHANNEL_PERMISSION_METHOD,
  68    ChannelMessageNotificationSchema,
  69    ChannelPermissionNotificationSchema,
  70    findChannelEntry,
  71    gateChannelServer,
  72    wrapChannelMessage,
  73  } from './channelNotification.js'
  74  import {
  75    type ChannelPermissionCallbacks,
  76    createChannelPermissionCallbacks,
  77    isChannelPermissionRelayEnabled,
  78  } from './channelPermissions.js'
  79  import {
  80    clearClaudeAIMcpConfigsCache,
  81    fetchClaudeAIMcpConfigsIfEligible,
  82  } from './claudeai.js'
  83  import { registerElicitationHandler } from './elicitationHandler.js'
  84  import { getMcpPrefix } from './mcpStringUtils.js'
  85  import { commandBelongsToServer, excludeStalePluginClients } from './utils.js'
  86  
  87  // Constants for reconnection with exponential backoff
  88  const MAX_RECONNECT_ATTEMPTS = 5
  89  const INITIAL_BACKOFF_MS = 1000
  90  const MAX_BACKOFF_MS = 30000
  91  
  92  /**
  93   * Create a unique key for a plugin error to enable deduplication
  94   */
  95  function getErrorKey(error: PluginError): string {
  96    const plugin = 'plugin' in error ? error.plugin : 'no-plugin'
  97    return `${error.type}:${error.source}:${plugin}`
  98  }
  99  
 100  /**
 101   * Add errors to AppState, deduplicating to avoid showing the same error multiple times
 102   */
 103  function addErrorsToAppState(
 104    setAppState: (updater: (prev: AppState) => AppState) => void,
 105    newErrors: PluginError[],
 106  ): void {
 107    if (newErrors.length === 0) return
 108  
 109    setAppState(prevState => {
 110      // Build set of existing error keys
 111      const existingKeys = new Set(
 112        prevState.plugins.errors.map(e => getErrorKey(e)),
 113      )
 114  
 115      // Only add errors that don't already exist
 116      const uniqueNewErrors = newErrors.filter(
 117        error => !existingKeys.has(getErrorKey(error)),
 118      )
 119  
 120      if (uniqueNewErrors.length === 0) {
 121        return prevState
 122      }
 123  
 124      return {
 125        ...prevState,
 126        plugins: {
 127          ...prevState.plugins,
 128          errors: [...prevState.plugins.errors, ...uniqueNewErrors],
 129        },
 130      }
 131    })
 132  }
 133  
 134  /**
 135   * Hook to manage MCP (Model Context Protocol) server connections and updates
 136   *
 137   * This hook:
 138   * 1. Initializes MCP client connections based on config
 139   * 2. Sets up handlers for connection lifecycle events and sync with app state
 140   * 3. Manages automatic reconnection for SSE connections
 141   * 4. Returns a reconnect function
 142   */
 143  export function useManageMCPConnections(
 144    dynamicMcpConfig: Record<string, ScopedMcpServerConfig> | undefined,
 145    isStrictMcpConfig = false,
 146  ) {
 147    const store = useAppStateStore()
 148    const _authVersion = useAppState(s => s.authVersion)
 149    // Incremented by /reload-plugins (refreshActivePlugins) to pick up newly
 150    // enabled plugin MCP servers. getClaudeCodeMcpConfigs() reads loadAllPlugins()
 151    // which has been cleared by refreshActivePlugins, so the effects below see
 152    // fresh plugin data on re-run.
 153    const _pluginReconnectKey = useAppState(s => s.mcp.pluginReconnectKey)
 154    const setAppState = useSetAppState()
 155  
 156    // Track active reconnection attempts to allow cancellation
 157    const reconnectTimersRef = useRef<Map<string, NodeJS.Timeout>>(new Map())
 158  
 159    // Dedup the --channels blocked warning per skip kind so that a user who
 160    // sees "run /login" (auth skip), logs in, then hits the policy gate
 161    // gets a second toast.
 162    const channelWarnedKindsRef = useRef<
 163      Set<'disabled' | 'auth' | 'policy' | 'marketplace' | 'allowlist'>
 164    >(new Set())
 165    // Channel permission callbacks — constructed once, stable ref. Stored in
 166    // AppState so interactiveHandler can subscribe. The pending Map lives inside
 167    // the closure (not module-level, not AppState — functions-in-state is brittle).
 168    const channelPermCallbacksRef = useRef<ChannelPermissionCallbacks | null>(
 169      null,
 170    )
 171    if (
 172      (feature('KAIROS') || feature('KAIROS_CHANNELS')) &&
 173      channelPermCallbacksRef.current === null
 174    ) {
 175      channelPermCallbacksRef.current = createChannelPermissionCallbacks()
 176    }
 177    // Store callbacks in AppState so interactiveHandler.ts can reach them via
 178    // ctx.toolUseContext.getAppState(). One-time set — the ref is stable.
 179    useEffect(() => {
 180      if (feature('KAIROS') || feature('KAIROS_CHANNELS')) {
 181        const callbacks = channelPermCallbacksRef.current
 182        if (!callbacks) return
 183        // GrowthBook runtime gate — separate from channels so channels can
 184        // ship without this. Checked at mount; mid-session flips need restart.
 185        // If off, callbacks never go into AppState → interactiveHandler sees
 186        // undefined → never sends → intercept has nothing pending → "yes tbxkq"
 187        // flows to Claude as normal chat. One gate, full disable.
 188        if (!isChannelPermissionRelayEnabled()) return
 189        setAppState(prev => {
 190          if (prev.channelPermissionCallbacks === callbacks) return prev
 191          return { ...prev, channelPermissionCallbacks: callbacks }
 192        })
 193        return () => {
 194          setAppState(prev => {
 195            if (prev.channelPermissionCallbacks === undefined) return prev
 196            return { ...prev, channelPermissionCallbacks: undefined }
 197          })
 198        }
 199      }
 200    }, [setAppState])
 201    const { addNotification } = useNotifications()
 202  
 203    // Batched MCP state updates: queue individual server updates and flush them
 204    // in a single setAppState call via setTimeout. Using a time-based window
 205    // (instead of queueMicrotask) ensures updates are batched even when
 206    // connection callbacks arrive at different times due to network I/O.
 207    const MCP_BATCH_FLUSH_MS = 16
 208    type PendingUpdate = MCPServerConnection & {
 209      tools?: Tool[]
 210      commands?: Command[]
 211      resources?: ServerResource[]
 212    }
 213    const pendingUpdatesRef = useRef<PendingUpdate[]>([])
 214    const flushTimerRef = useRef<ReturnType<typeof setTimeout> | null>(null)
 215  
 216    const flushPendingUpdates = useCallback(() => {
 217      flushTimerRef.current = null
 218      const updates = pendingUpdatesRef.current
 219      if (updates.length === 0) return
 220      pendingUpdatesRef.current = []
 221  
 222      setAppState(prevState => {
 223        let mcp = prevState.mcp
 224  
 225        for (const update of updates) {
 226          const {
 227            tools: rawTools,
 228            commands: rawCmds,
 229            resources: rawRes,
 230            ...client
 231          } = update
 232          const tools =
 233            client.type === 'disabled' || client.type === 'failed'
 234              ? (rawTools ?? [])
 235              : rawTools
 236          const commands =
 237            client.type === 'disabled' || client.type === 'failed'
 238              ? (rawCmds ?? [])
 239              : rawCmds
 240          const resources =
 241            client.type === 'disabled' || client.type === 'failed'
 242              ? (rawRes ?? [])
 243              : rawRes
 244  
 245          const prefix = getMcpPrefix(client.name)
 246          const existingClientIndex = mcp.clients.findIndex(
 247            c => c.name === client.name,
 248          )
 249  
 250          const updatedClients =
 251            existingClientIndex === -1
 252              ? [...mcp.clients, client]
 253              : mcp.clients.map(c => (c.name === client.name ? client : c))
 254  
 255          const updatedTools =
 256            tools === undefined
 257              ? mcp.tools
 258              : [...reject(mcp.tools, t => t.name?.startsWith(prefix)), ...tools]
 259  
 260          const updatedCommands =
 261            commands === undefined
 262              ? mcp.commands
 263              : [
 264                  ...reject(mcp.commands, c =>
 265                    commandBelongsToServer(c, client.name),
 266                  ),
 267                  ...commands,
 268                ]
 269  
 270          const updatedResources =
 271            resources === undefined
 272              ? mcp.resources
 273              : {
 274                  ...mcp.resources,
 275                  ...(resources.length > 0
 276                    ? { [client.name]: resources }
 277                    : omit(mcp.resources, client.name)),
 278                }
 279  
 280          mcp = {
 281            ...mcp,
 282            clients: updatedClients,
 283            tools: updatedTools,
 284            commands: updatedCommands,
 285            resources: updatedResources,
 286          }
 287        }
 288  
 289        return { ...prevState, mcp }
 290      })
 291    }, [setAppState])
 292  
 293    // Update server state, tools, commands, and resources.
 294    // When tools, commands, or resources are undefined, the existing values are preserved.
 295    // When type is 'disabled' or 'failed', tools/commands/resources are automatically cleared.
 296    // Updates are batched via setTimeout to coalesce updates arriving within MCP_BATCH_FLUSH_MS.
 297    const updateServer = useCallback(
 298      (update: PendingUpdate) => {
 299        pendingUpdatesRef.current.push(update)
 300        if (flushTimerRef.current === null) {
 301          flushTimerRef.current = setTimeout(
 302            flushPendingUpdates,
 303            MCP_BATCH_FLUSH_MS,
 304          )
 305        }
 306      },
 307      [flushPendingUpdates],
 308    )
 309  
 310    const onConnectionAttempt = useCallback(
 311      ({
 312        client,
 313        tools,
 314        commands,
 315        resources,
 316      }: {
 317        client: MCPServerConnection
 318        tools: Tool[]
 319        commands: Command[]
 320        resources?: ServerResource[]
 321      }) => {
 322        updateServer({ ...client, tools, commands, resources })
 323  
 324        // Handle side effects based on client state
 325        switch (client.type) {
 326          case 'connected': {
 327            // Overwrite the default elicitation handler registered in connectToServer
 328            // with the real one (queues elicitation in AppState for UI). Registering
 329            // here (once per connect) instead of in a [mcpClients] effect avoids
 330            // re-running for every already-connected server on each state change.
 331            registerElicitationHandler(client.client, client.name, setAppState)
 332  
 333            client.client.onclose = () => {
 334              const configType = client.config.type ?? 'stdio'
 335  
 336              clearServerCache(client.name, client.config).catch(() => {
 337                logForDebugging(
 338                  `Failed to invalidate the server cache: ${client.name}`,
 339                )
 340              })
 341  
 342              // TODO: This really isn't great: ideally we'd check appstate as the source of truth
 343              // as to whether it was disconnected due to a disable, but appstate is stale at this
 344              // point. Getting a live reference to appstate feels a little hacky, so we'll just
 345              // check the disk state. We may want to refactor some of this.
 346              if (isMcpServerDisabled(client.name)) {
 347                logMCPDebug(
 348                  client.name,
 349                  `Server is disabled, skipping automatic reconnection`,
 350                )
 351                return
 352              }
 353  
 354              // Handle automatic reconnection for remote transports
 355              // Skip stdio (local process) and sdk (internal) - they don't support reconnection
 356              if (configType !== 'stdio' && configType !== 'sdk') {
 357                const transportType = getTransportDisplayName(configType)
 358                logMCPDebug(
 359                  client.name,
 360                  `${transportType} transport closed/disconnected, attempting automatic reconnection`,
 361                )
 362  
 363                // Cancel any existing reconnection attempt for this server
 364                const existingTimer = reconnectTimersRef.current.get(client.name)
 365                if (existingTimer) {
 366                  clearTimeout(existingTimer)
 367                  reconnectTimersRef.current.delete(client.name)
 368                }
 369  
 370                // Attempt reconnection with exponential backoff
 371                const reconnectWithBackoff = async () => {
 372                  for (
 373                    let attempt = 1;
 374                    attempt <= MAX_RECONNECT_ATTEMPTS;
 375                    attempt++
 376                  ) {
 377                    // Check if server was disabled while we were waiting
 378                    if (isMcpServerDisabled(client.name)) {
 379                      logMCPDebug(
 380                        client.name,
 381                        `Server disabled during reconnection, stopping retry`,
 382                      )
 383                      reconnectTimersRef.current.delete(client.name)
 384                      return
 385                    }
 386  
 387                    updateServer({
 388                      ...client,
 389                      type: 'pending',
 390                      reconnectAttempt: attempt,
 391                      maxReconnectAttempts: MAX_RECONNECT_ATTEMPTS,
 392                    })
 393  
 394                    const reconnectStartTime = Date.now()
 395                    try {
 396                      const result = await reconnectMcpServerImpl(
 397                        client.name,
 398                        client.config,
 399                      )
 400                      const elapsed = Date.now() - reconnectStartTime
 401  
 402                      if (result.client.type === 'connected') {
 403                        logMCPDebug(
 404                          client.name,
 405                          `${transportType} reconnection successful after ${elapsed}ms (attempt ${attempt})`,
 406                        )
 407                        reconnectTimersRef.current.delete(client.name)
 408                        onConnectionAttempt(result)
 409                        return
 410                      }
 411  
 412                      logMCPDebug(
 413                        client.name,
 414                        `${transportType} reconnection attempt ${attempt} completed with status: ${result.client.type}`,
 415                      )
 416  
 417                      // On final attempt, update state with the result
 418                      if (attempt === MAX_RECONNECT_ATTEMPTS) {
 419                        logMCPDebug(
 420                          client.name,
 421                          `Max reconnection attempts (${MAX_RECONNECT_ATTEMPTS}) reached, giving up`,
 422                        )
 423                        reconnectTimersRef.current.delete(client.name)
 424                        onConnectionAttempt(result)
 425                        return
 426                      }
 427                    } catch (error) {
 428                      const elapsed = Date.now() - reconnectStartTime
 429                      logMCPError(
 430                        client.name,
 431                        `${transportType} reconnection attempt ${attempt} failed after ${elapsed}ms: ${error}`,
 432                      )
 433  
 434                      // On final attempt, mark as failed
 435                      if (attempt === MAX_RECONNECT_ATTEMPTS) {
 436                        logMCPDebug(
 437                          client.name,
 438                          `Max reconnection attempts (${MAX_RECONNECT_ATTEMPTS}) reached, giving up`,
 439                        )
 440                        reconnectTimersRef.current.delete(client.name)
 441                        updateServer({ ...client, type: 'failed' })
 442                        return
 443                      }
 444                    }
 445  
 446                    // Schedule next retry with exponential backoff
 447                    const backoffMs = Math.min(
 448                      INITIAL_BACKOFF_MS * Math.pow(2, attempt - 1),
 449                      MAX_BACKOFF_MS,
 450                    )
 451                    logMCPDebug(
 452                      client.name,
 453                      `Scheduling reconnection attempt ${attempt + 1} in ${backoffMs}ms`,
 454                    )
 455  
 456                    await new Promise<void>(resolve => {
 457                      // eslint-disable-next-line no-restricted-syntax -- timer stored in ref for cancellation; sleep() doesn't expose the handle
 458                      const timer = setTimeout(resolve, backoffMs)
 459                      reconnectTimersRef.current.set(client.name, timer)
 460                    })
 461                  }
 462                }
 463  
 464                void reconnectWithBackoff()
 465              } else {
 466                updateServer({ ...client, type: 'failed' })
 467              }
 468            }
 469  
 470            // Channel push: notifications/claude/channel → enqueue().
 471            // Gate decides whether to register the handler; connection stays
 472            // up either way (allowedMcpServers controls that).
 473            if (feature('KAIROS') || feature('KAIROS_CHANNELS')) {
 474              const gate = gateChannelServer(
 475                client.name,
 476                client.capabilities,
 477                client.config.pluginSource,
 478              )
 479              const entry = findChannelEntry(client.name, getAllowedChannels())
 480              // Plugin identifier for telemetry — log name@marketplace for any
 481              // plugin-kind entry (same tier as tengu_plugin_installed, which
 482              // logs arbitrary plugin_id+marketplace_name ungated). server-kind
 483              // names are MCP-server-name tier; those are opt-in-only elsewhere
 484              // (see isAnalyticsToolDetailsLoggingEnabled in metadata.ts) and
 485              // stay unlogged here. is_dev/entry_kind segment the rest.
 486              const pluginId =
 487                entry?.kind === 'plugin'
 488                  ? (`${entry.name}@${entry.marketplace}` as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS)
 489                  : undefined
 490              // Skip capability-miss — every non-channel MCP server trips it.
 491              if (gate.action === 'register' || gate.kind !== 'capability') {
 492                logEvent('tengu_mcp_channel_gate', {
 493                  registered: gate.action === 'register',
 494                  skip_kind:
 495                    gate.action === 'skip'
 496                      ? (gate.kind as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS)
 497                      : undefined,
 498                  entry_kind:
 499                    entry?.kind as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
 500                  is_dev: entry?.dev ?? false,
 501                  plugin: pluginId,
 502                })
 503              }
 504              switch (gate.action) {
 505                case 'register':
 506                  logMCPDebug(client.name, 'Channel notifications registered')
 507                  client.client.setNotificationHandler(
 508                    ChannelMessageNotificationSchema(),
 509                    async notification => {
 510                      const { content, meta } = notification.params
 511                      logMCPDebug(
 512                        client.name,
 513                        `notifications/claude/channel: ${content.slice(0, 80)}`,
 514                      )
 515                      logEvent('tengu_mcp_channel_message', {
 516                        content_length: content.length,
 517                        meta_key_count: Object.keys(meta ?? {}).length,
 518                        entry_kind:
 519                          entry?.kind as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
 520                        is_dev: entry?.dev ?? false,
 521                        plugin: pluginId,
 522                      })
 523                      enqueue({
 524                        mode: 'prompt',
 525                        value: wrapChannelMessage(client.name, content, meta),
 526                        priority: 'next',
 527                        isMeta: true,
 528                        origin: { kind: 'channel', server: client.name },
 529                        skipSlashCommands: true,
 530                      })
 531                    },
 532                  )
 533                  // Permission-reply handler — separate event, separate
 534                  // capability. Only registers if the server declares
 535                  // claude/channel/permission (same opt-in check as the send
 536                  // path in interactiveHandler.ts). Server parses the user's
 537                  // reply and emits {request_id, behavior}; no regex on our
 538                  // side, text in the general channel can't accidentally match.
 539                  if (
 540                    client.capabilities?.experimental?.[
 541                      'claude/channel/permission'
 542                    ] !== undefined
 543                  ) {
 544                    client.client.setNotificationHandler(
 545                      ChannelPermissionNotificationSchema(),
 546                      async notification => {
 547                        const { request_id, behavior } = notification.params
 548                        const resolved =
 549                          channelPermCallbacksRef.current?.resolve(
 550                            request_id,
 551                            behavior,
 552                            client.name,
 553                          ) ?? false
 554                        logMCPDebug(
 555                          client.name,
 556                          `notifications/claude/channel/permission: ${request_id} → ${behavior} (${resolved ? 'matched pending' : 'no pending entry — stale or unknown ID'})`,
 557                        )
 558                      },
 559                    )
 560                  }
 561                  break
 562                case 'skip':
 563                  // Idempotent teardown so a register→skip re-gate (e.g.
 564                  // effect re-runs after /logout) actually removes the live
 565                  // handler. Without this, mid-session demotion is one-way:
 566                  // the gate says skip but the earlier handler keeps enqueuing.
 567                  // Map.delete — safe when never registered.
 568                  client.client.removeNotificationHandler(
 569                    'notifications/claude/channel',
 570                  )
 571                  client.client.removeNotificationHandler(
 572                    CHANNEL_PERMISSION_METHOD,
 573                  )
 574                  logMCPDebug(
 575                    client.name,
 576                    `Channel notifications skipped: ${gate.reason}`,
 577                  )
 578                  // Surface a once-per-kind toast when a channel server is
 579                  // blocked. This is the only
 580                  // user-visible signal (logMCPDebug above requires --debug).
 581                  // Capability/session skips are expected noise and stay
 582                  // debug-only. marketplace/allowlist run after session — if
 583                  // we're here with those kinds, the user asked for it.
 584                  if (
 585                    gate.kind !== 'capability' &&
 586                    gate.kind !== 'session' &&
 587                    !channelWarnedKindsRef.current.has(gate.kind) &&
 588                    (gate.kind === 'marketplace' ||
 589                      gate.kind === 'allowlist' ||
 590                      entry !== undefined)
 591                  ) {
 592                    channelWarnedKindsRef.current.add(gate.kind)
 593                    // disabled/auth/policy get custom toast copy (shorter, actionable);
 594                    // marketplace/allowlist reuse the gate's reason verbatim
 595                    // since it already names the mismatch.
 596                    const text =
 597                      gate.kind === 'disabled'
 598                        ? 'Channels are not currently available'
 599                        : gate.kind === 'auth'
 600                          ? 'Channels require claude.ai authentication · run /login'
 601                          : gate.kind === 'policy'
 602                            ? 'Channels are not enabled for your org · have an administrator set channelsEnabled: true in managed settings'
 603                            : gate.reason
 604                    addNotification({
 605                      key: `channels-blocked-${gate.kind}`,
 606                      priority: 'high',
 607                      text,
 608                      color: 'warning',
 609                      timeoutMs: 12000,
 610                    })
 611                  }
 612                  break
 613              }
 614            }
 615  
 616            // Register notification handlers for list_changed notifications
 617            // These allow the server to notify us when tools, prompts, or resources change
 618            if (client.capabilities?.tools?.listChanged) {
 619              client.client.setNotificationHandler(
 620                ToolListChangedNotificationSchema,
 621                async () => {
 622                  logMCPDebug(
 623                    client.name,
 624                    `Received tools/list_changed notification, refreshing tools`,
 625                  )
 626                  try {
 627                    // Grab cached promise before invalidating to log previous count
 628                    const previousToolsPromise = fetchToolsForClient.cache.get(
 629                      client.name,
 630                    )
 631                    fetchToolsForClient.cache.delete(client.name)
 632                    const newTools = await fetchToolsForClient(client)
 633                    const newCount = newTools.length
 634                    if (previousToolsPromise) {
 635                      previousToolsPromise.then(
 636                        (previousTools: Tool[]) => {
 637                          logEvent('tengu_mcp_list_changed', {
 638                            type: 'tools' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
 639                            previousCount: previousTools.length,
 640                            newCount,
 641                          })
 642                        },
 643                        () => {
 644                          logEvent('tengu_mcp_list_changed', {
 645                            type: 'tools' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
 646                            newCount,
 647                          })
 648                        },
 649                      )
 650                    } else {
 651                      logEvent('tengu_mcp_list_changed', {
 652                        type: 'tools' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
 653                        newCount,
 654                      })
 655                    }
 656                    updateServer({ ...client, tools: newTools })
 657                  } catch (error) {
 658                    logMCPError(
 659                      client.name,
 660                      `Failed to refresh tools after list_changed notification: ${errorMessage(error)}`,
 661                    )
 662                  }
 663                },
 664              )
 665            }
 666  
 667            if (client.capabilities?.prompts?.listChanged) {
 668              client.client.setNotificationHandler(
 669                PromptListChangedNotificationSchema,
 670                async () => {
 671                  logMCPDebug(
 672                    client.name,
 673                    `Received prompts/list_changed notification, refreshing prompts`,
 674                  )
 675                  logEvent('tengu_mcp_list_changed', {
 676                    type: 'prompts' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
 677                  })
 678                  try {
 679                    // Skills come from resources, not prompts — don't invalidate their
 680                    // cache here. fetchMcpSkillsForClient returns the cached result.
 681                    fetchCommandsForClient.cache.delete(client.name)
 682                    const [mcpPrompts, mcpSkills] = await Promise.all([
 683                      fetchCommandsForClient(client),
 684                      feature('MCP_SKILLS')
 685                        ? fetchMcpSkillsForClient!(client)
 686                        : Promise.resolve([]),
 687                    ])
 688                    updateServer({
 689                      ...client,
 690                      commands: [...mcpPrompts, ...mcpSkills],
 691                    })
 692                    // MCP skills changed — invalidate skill-search index so
 693                    // next discovery rebuilds with the new set.
 694                    clearSkillIndexCache?.()
 695                  } catch (error) {
 696                    logMCPError(
 697                      client.name,
 698                      `Failed to refresh prompts after list_changed notification: ${errorMessage(error)}`,
 699                    )
 700                  }
 701                },
 702              )
 703            }
 704  
 705            if (client.capabilities?.resources?.listChanged) {
 706              client.client.setNotificationHandler(
 707                ResourceListChangedNotificationSchema,
 708                async () => {
 709                  logMCPDebug(
 710                    client.name,
 711                    `Received resources/list_changed notification, refreshing resources`,
 712                  )
 713                  logEvent('tengu_mcp_list_changed', {
 714                    type: 'resources' as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
 715                  })
 716                  try {
 717                    fetchResourcesForClient.cache.delete(client.name)
 718                    if (feature('MCP_SKILLS')) {
 719                      // Skills are discovered from resources, so refresh them too.
 720                      // Invalidate prompts cache as well: we write commands here,
 721                      // and a concurrent prompts/list_changed could otherwise have
 722                      // us stomp its fresh result with our cached stale one.
 723                      fetchMcpSkillsForClient!.cache.delete(client.name)
 724                      fetchCommandsForClient.cache.delete(client.name)
 725                      const [newResources, mcpPrompts, mcpSkills] =
 726                        await Promise.all([
 727                          fetchResourcesForClient(client),
 728                          fetchCommandsForClient(client),
 729                          fetchMcpSkillsForClient!(client),
 730                        ])
 731                      updateServer({
 732                        ...client,
 733                        resources: newResources,
 734                        commands: [...mcpPrompts, ...mcpSkills],
 735                      })
 736                      // MCP skills changed — invalidate skill-search index so
 737                      // next discovery rebuilds with the new set.
 738                      clearSkillIndexCache?.()
 739                    } else {
 740                      const newResources = await fetchResourcesForClient(client)
 741                      updateServer({ ...client, resources: newResources })
 742                    }
 743                  } catch (error) {
 744                    logMCPError(
 745                      client.name,
 746                      `Failed to refresh resources after list_changed notification: ${errorMessage(error)}`,
 747                    )
 748                  }
 749                },
 750              )
 751            }
 752            break
 753          }
 754  
 755          case 'needs-auth':
 756          case 'failed':
 757          case 'pending':
 758          case 'disabled':
 759            break
 760        }
 761      },
 762      [updateServer],
 763    )
 764  
 765    // Initialize all servers to pending state if they don't exist in appState.
 766    // Re-runs on session change (/clear) and on /reload-plugins (pluginReconnectKey).
 767    // On plugin reload, also disconnects stale plugin MCP servers (scope 'dynamic')
 768    // that no longer appear in configs — prevents ghost tools from disabled plugins.
 769    // Skip claude.ai dedup here to avoid blocking on the network fetch; the connect
 770    // useEffect below runs immediately after and dedups before connecting.
 771    const sessionId = getSessionId()
 772    useEffect(() => {
 773      async function initializeServersAsPending() {
 774        const { servers: existingConfigs, errors: mcpErrors } = isStrictMcpConfig
 775          ? { servers: {}, errors: [] }
 776          : await getClaudeCodeMcpConfigs(dynamicMcpConfig)
 777        const configs = { ...existingConfigs, ...dynamicMcpConfig }
 778  
 779        // Add MCP errors to plugin errors for UI visibility (deduplicated)
 780        addErrorsToAppState(setAppState, mcpErrors)
 781  
 782        setAppState(prevState => {
 783          // Disconnect MCP servers that are stale: plugin servers removed from
 784          // config, or any server whose config hash changed (edited .mcp.json).
 785          // Stale servers get re-added as 'pending' below since their name is
 786          // now absent from mcpWithoutStale.clients.
 787          const { stale, ...mcpWithoutStale } = excludeStalePluginClients(
 788            prevState.mcp,
 789            configs,
 790          )
 791          // Clean up stale connections. Fire-and-forget — state updaters must
 792          // be synchronous. Three hazards to defuse before calling cleanup:
 793          //   1. Pending reconnect timer would fire with the OLD config.
 794          //   2. onclose (set at L254) starts reconnectWithBackoff with the
 795          //      OLD config from its closure — it checks isMcpServerDisabled
 796          //      but config-changed servers aren't disabled, so it'd race the
 797          //      fresh connection and last updateServer wins.
 798          //   3. clearServerCache internally calls connectToServer (memoized).
 799          //      For never-connected servers (disabled/pending/failed) the
 800          //      cache is empty → real connect attempt → spawn/OAuth just to
 801          //      immediately kill it. Only connected servers need cleanup.
 802          for (const s of stale) {
 803            const timer = reconnectTimersRef.current.get(s.name)
 804            if (timer) {
 805              clearTimeout(timer)
 806              reconnectTimersRef.current.delete(s.name)
 807            }
 808            if (s.type === 'connected') {
 809              s.client.onclose = undefined
 810              void clearServerCache(s.name, s.config).catch(() => {})
 811            }
 812          }
 813  
 814          const existingServerNames = new Set(
 815            mcpWithoutStale.clients.map(c => c.name),
 816          )
 817          const newClients = Object.entries(configs)
 818            .filter(([name]) => !existingServerNames.has(name))
 819            .map(([name, config]) => ({
 820              name,
 821              type: isMcpServerDisabled(name)
 822                ? ('disabled' as const)
 823                : ('pending' as const),
 824              config,
 825            }))
 826  
 827          if (newClients.length === 0 && stale.length === 0) {
 828            return prevState
 829          }
 830  
 831          return {
 832            ...prevState,
 833            mcp: {
 834              ...prevState.mcp,
 835              ...mcpWithoutStale,
 836              clients: [...mcpWithoutStale.clients, ...newClients],
 837            },
 838          }
 839        })
 840      }
 841  
 842      void initializeServersAsPending().catch(error => {
 843        logMCPError(
 844          'useManageMCPConnections',
 845          `Failed to initialize servers as pending: ${errorMessage(error)}`,
 846        )
 847      })
 848    }, [
 849      isStrictMcpConfig,
 850      dynamicMcpConfig,
 851      setAppState,
 852      sessionId,
 853      _pluginReconnectKey,
 854    ])
 855  
 856    // Load MCP configs and connect to servers
 857    // Two-phase loading: Claude Code configs first (fast), then claude.ai configs (may be slow)
 858    useEffect(() => {
 859      let cancelled = false
 860  
 861      async function loadAndConnectMcpConfigs() {
 862        // Clear claude.ai MCP cache so we fetch fresh configs with current auth
 863        // state. This is important when authVersion changes (e.g., after login/
 864        // logout). Kick off the fetch now so it overlaps with loadAllPlugins()
 865        // inside getClaudeCodeMcpConfigs; it's awaited only at the dedup step.
 866        // Phase 2 below awaits the same promise — no second network call.
 867        let claudeaiPromise: Promise<Record<string, ScopedMcpServerConfig>>
 868        if (isStrictMcpConfig || doesEnterpriseMcpConfigExist()) {
 869          claudeaiPromise = Promise.resolve({})
 870        } else {
 871          clearClaudeAIMcpConfigsCache()
 872          claudeaiPromise = fetchClaudeAIMcpConfigsIfEligible()
 873        }
 874  
 875        // Phase 1: Load Claude Code configs. Plugin MCP servers that duplicate a
 876        // --mcp-config entry or a claude.ai connector are suppressed here so they
 877        // don't connect alongside the connector in Phase 2.
 878        const { servers: claudeCodeConfigs, errors: mcpErrors } =
 879          isStrictMcpConfig
 880            ? { servers: {}, errors: [] }
 881            : await getClaudeCodeMcpConfigs(dynamicMcpConfig, claudeaiPromise)
 882        if (cancelled) return
 883  
 884        // Add MCP errors to plugin errors for UI visibility (deduplicated)
 885        addErrorsToAppState(setAppState, mcpErrors)
 886  
 887        const configs = { ...claudeCodeConfigs, ...dynamicMcpConfig }
 888  
 889        // Start connecting to Claude Code servers (don't wait - runs concurrently with Phase 2)
 890        // Filter out disabled servers to avoid unnecessary connection attempts
 891        const enabledConfigs = Object.fromEntries(
 892          Object.entries(configs).filter(([name]) => !isMcpServerDisabled(name)),
 893        )
 894        getMcpToolsCommandsAndResources(
 895          onConnectionAttempt,
 896          enabledConfigs,
 897        ).catch(error => {
 898          logMCPError(
 899            'useManageMcpConnections',
 900            `Failed to get MCP resources: ${errorMessage(error)}`,
 901          )
 902        })
 903  
 904        // Phase 2: Await claude.ai configs (started above; memoized — no second fetch)
 905        let claudeaiConfigs: Record<string, ScopedMcpServerConfig> = {}
 906        if (!isStrictMcpConfig) {
 907          claudeaiConfigs = filterMcpServersByPolicy(
 908            await claudeaiPromise,
 909          ).allowed
 910          if (cancelled) return
 911  
 912          // Suppress claude.ai connectors that duplicate an enabled manual server.
 913          // Keys never collide (`slack` vs `claude.ai Slack`) so the merge below
 914          // won't catch this — need content-based dedup by URL signature.
 915          if (Object.keys(claudeaiConfigs).length > 0) {
 916            const { servers: dedupedClaudeAi } = dedupClaudeAiMcpServers(
 917              claudeaiConfigs,
 918              configs,
 919            )
 920            claudeaiConfigs = dedupedClaudeAi
 921          }
 922  
 923          if (Object.keys(claudeaiConfigs).length > 0) {
 924            // Add claude.ai servers as pending immediately so they show up in UI
 925            setAppState(prevState => {
 926              const existingServerNames = new Set(
 927                prevState.mcp.clients.map(c => c.name),
 928              )
 929              const newClients = Object.entries(claudeaiConfigs)
 930                .filter(([name]) => !existingServerNames.has(name))
 931                .map(([name, config]) => ({
 932                  name,
 933                  type: isMcpServerDisabled(name)
 934                    ? ('disabled' as const)
 935                    : ('pending' as const),
 936                  config,
 937                }))
 938              if (newClients.length === 0) return prevState
 939              return {
 940                ...prevState,
 941                mcp: {
 942                  ...prevState.mcp,
 943                  clients: [...prevState.mcp.clients, ...newClients],
 944                },
 945              }
 946            })
 947  
 948            // Now start connecting (only enabled servers)
 949            const enabledClaudeaiConfigs = Object.fromEntries(
 950              Object.entries(claudeaiConfigs).filter(
 951                ([name]) => !isMcpServerDisabled(name),
 952              ),
 953            )
 954            getMcpToolsCommandsAndResources(
 955              onConnectionAttempt,
 956              enabledClaudeaiConfigs,
 957            ).catch(error => {
 958              logMCPError(
 959                'useManageMcpConnections',
 960                `Failed to get claude.ai MCP resources: ${errorMessage(error)}`,
 961              )
 962            })
 963          }
 964        }
 965  
 966        // Log server counts after both phases complete
 967        const allConfigs = { ...configs, ...claudeaiConfigs }
 968        const counts = {
 969          enterprise: 0,
 970          global: 0,
 971          project: 0,
 972          user: 0,
 973          plugin: 0,
 974          claudeai: 0,
 975        }
 976        // Ant-only: collect stdio command basenames to correlate with RSS/FPS
 977        // metrics. Stdio servers like rust-analyzer can be heavy and we want to
 978        // know which ones correlate with poor session performance.
 979        const stdioCommands: string[] = []
 980        for (const [name, serverConfig] of Object.entries(allConfigs)) {
 981          if (serverConfig.scope === 'enterprise') counts.enterprise++
 982          else if (serverConfig.scope === 'user') counts.global++
 983          else if (serverConfig.scope === 'project') counts.project++
 984          else if (serverConfig.scope === 'local') counts.user++
 985          else if (serverConfig.scope === 'dynamic') counts.plugin++
 986          else if (serverConfig.scope === 'claudeai') counts.claudeai++
 987  
 988          if (
 989            process.env.USER_TYPE === 'ant' &&
 990            !isMcpServerDisabled(name) &&
 991            (serverConfig.type === undefined || serverConfig.type === 'stdio') &&
 992            'command' in serverConfig
 993          ) {
 994            stdioCommands.push(basename(serverConfig.command))
 995          }
 996        }
 997        logEvent('tengu_mcp_servers', {
 998          ...counts,
 999          ...(process.env.USER_TYPE === 'ant' && stdioCommands.length > 0
1000            ? {
1001                stdio_commands: stdioCommands
1002                  .sort()
1003                  .join(
1004                    ',',
1005                  ) as AnalyticsMetadata_I_VERIFIED_THIS_IS_NOT_CODE_OR_FILEPATHS,
1006              }
1007            : {}),
1008        })
1009      }
1010  
1011      void loadAndConnectMcpConfigs()
1012  
1013      return () => {
1014        cancelled = true
1015      }
1016    }, [
1017      isStrictMcpConfig,
1018      dynamicMcpConfig,
1019      onConnectionAttempt,
1020      setAppState,
1021      _authVersion,
1022      sessionId,
1023      _pluginReconnectKey,
1024    ])
1025  
1026    // Cleanup all timers on unmount
1027    useEffect(() => {
1028      const timers = reconnectTimersRef.current
1029      return () => {
1030        for (const timer of timers.values()) {
1031          clearTimeout(timer)
1032        }
1033        timers.clear()
1034        // Flush any pending batched MCP updates before unmount
1035        if (flushTimerRef.current !== null) {
1036          clearTimeout(flushTimerRef.current)
1037          flushTimerRef.current = null
1038          flushPendingUpdates()
1039        }
1040      }
1041    }, [flushPendingUpdates])
1042  
1043    // Expose reconnectMcpServer function for components to use.
1044    // Reads mcp.clients via store.getState() so this callback stays stable
1045    // across client state transitions (no need to re-create on every connect).
1046    const reconnectMcpServer = useCallback(
1047      async (serverName: string) => {
1048        const client = store
1049          .getState()
1050          .mcp.clients.find(c => c.name === serverName)
1051        if (!client) {
1052          throw new Error(`MCP server ${serverName} not found`)
1053        }
1054  
1055        // Cancel any pending automatic reconnection attempt
1056        const existingTimer = reconnectTimersRef.current.get(serverName)
1057        if (existingTimer) {
1058          clearTimeout(existingTimer)
1059          reconnectTimersRef.current.delete(serverName)
1060        }
1061  
1062        const result = await reconnectMcpServerImpl(serverName, client.config)
1063  
1064        onConnectionAttempt(result)
1065  
1066        // Don't throw, just let UI handle the client type in case the reconnect failed
1067        // (Detailed logs are within the reconnectMcpServerImpl via --debug)
1068        return result
1069      },
1070      [store, onConnectionAttempt],
1071    )
1072  
1073    // Expose function to toggle server enabled/disabled state
1074    const toggleMcpServer = useCallback(
1075      async (serverName: string): Promise<void> => {
1076        const client = store
1077          .getState()
1078          .mcp.clients.find(c => c.name === serverName)
1079        if (!client) {
1080          throw new Error(`MCP server ${serverName} not found`)
1081        }
1082  
1083        const isCurrentlyDisabled = client.type === 'disabled'
1084  
1085        if (!isCurrentlyDisabled) {
1086          // Cancel any pending automatic reconnection attempt
1087          const existingTimer = reconnectTimersRef.current.get(serverName)
1088          if (existingTimer) {
1089            clearTimeout(existingTimer)
1090            reconnectTimersRef.current.delete(serverName)
1091          }
1092  
1093          // Persist disabled state to disk FIRST before clearing cache
1094          // This is important because the onclose handler checks disk state
1095          setMcpServerEnabled(serverName, false)
1096  
1097          // Disabling: disconnect and clean up if currently connected
1098          if (client.type === 'connected') {
1099            await clearServerCache(serverName, client.config)
1100          }
1101  
1102          // Update to disabled state (tools/commands/resources auto-cleared)
1103          updateServer({
1104            name: serverName,
1105            type: 'disabled',
1106            config: client.config,
1107          })
1108        } else {
1109          // Enabling: persist enabled state to disk first
1110          setMcpServerEnabled(serverName, true)
1111  
1112          // Mark as pending and reconnect
1113          updateServer({
1114            name: serverName,
1115            type: 'pending',
1116            config: client.config,
1117          })
1118  
1119          // Reconnect the server
1120          const result = await reconnectMcpServerImpl(serverName, client.config)
1121  
1122          onConnectionAttempt(result)
1123        }
1124      },
1125      [store, updateServer, onConnectionAttempt],
1126    )
1127  
1128    return { reconnectMcpServer, toggleMcpServer }
1129  }
1130  
1131  function getTransportDisplayName(type: string): string {
1132    switch (type) {
1133      case 'http':
1134        return 'HTTP'
1135      case 'ws':
1136      case 'ws-ide':
1137        return 'WebSocket'
1138      default:
1139        return 'SSE'
1140    }
1141  }