/ entrypoints / init.ts
init.ts
  1  import { profileCheckpoint } from '../utils/startupProfiler.js'
  2  import '../bootstrap/state.js'
  3  import '../utils/config.js'
  4  import type { Attributes, MetricOptions } from '@opentelemetry/api'
  5  import memoize from 'lodash-es/memoize.js'
  6  import { getIsNonInteractiveSession } from 'src/bootstrap/state.js'
  7  import type { AttributedCounter } from '../bootstrap/state.js'
  8  import { getSessionCounter, setMeter } from '../bootstrap/state.js'
  9  import { shutdownLspServerManager } from '../services/lsp/manager.js'
 10  import { populateOAuthAccountInfoIfNeeded } from '../services/oauth/client.js'
 11  import {
 12    initializePolicyLimitsLoadingPromise,
 13    isPolicyLimitsEligible,
 14  } from '../services/policyLimits/index.js'
 15  import {
 16    initializeRemoteManagedSettingsLoadingPromise,
 17    isEligibleForRemoteManagedSettings,
 18    waitForRemoteManagedSettingsToLoad,
 19  } from '../services/remoteManagedSettings/index.js'
 20  import { preconnectAnthropicApi } from '../utils/apiPreconnect.js'
 21  import { applyExtraCACertsFromConfig } from '../utils/caCertsConfig.js'
 22  import { registerCleanup } from '../utils/cleanupRegistry.js'
 23  import { enableConfigs, recordFirstStartTime } from '../utils/config.js'
 24  import { logForDebugging } from '../utils/debug.js'
 25  import { detectCurrentRepository } from '../utils/detectRepository.js'
 26  import { logForDiagnosticsNoPII } from '../utils/diagLogs.js'
 27  import { initJetBrainsDetection } from '../utils/envDynamic.js'
 28  import { isEnvTruthy } from '../utils/envUtils.js'
 29  import { ConfigParseError, errorMessage } from '../utils/errors.js'
 30  // showInvalidConfigDialog is dynamically imported in the error path to avoid loading React at init
 31  import {
 32    gracefulShutdownSync,
 33    setupGracefulShutdown,
 34  } from '../utils/gracefulShutdown.js'
 35  import {
 36    applyConfigEnvironmentVariables,
 37    applySafeConfigEnvironmentVariables,
 38  } from '../utils/managedEnv.js'
 39  import { configureGlobalMTLS } from '../utils/mtls.js'
 40  import {
 41    ensureScratchpadDir,
 42    isScratchpadEnabled,
 43  } from '../utils/permissions/filesystem.js'
 44  // initializeTelemetry is loaded lazily via import() in setMeterState() to defer
 45  // ~400KB of OpenTelemetry + protobuf modules until telemetry is actually initialized.
 46  // gRPC exporters (~700KB via @grpc/grpc-js) are further lazy-loaded within instrumentation.ts.
 47  import { configureGlobalAgents } from '../utils/proxy.js'
 48  import { isBetaTracingEnabled } from '../utils/telemetry/betaSessionTracing.js'
 49  import { getTelemetryAttributes } from '../utils/telemetryAttributes.js'
 50  import { setShellIfWindows } from '../utils/windowsPaths.js'
 51  
 52  // initialize1PEventLogging is dynamically imported to defer OpenTelemetry sdk-logs/resources
 53  
 54  // Track if telemetry has been initialized to prevent double initialization
 55  let telemetryInitialized = false
 56  
 57  export const init = memoize(async (): Promise<void> => {
 58    const initStartTime = Date.now()
 59    logForDiagnosticsNoPII('info', 'init_started')
 60    profileCheckpoint('init_function_start')
 61  
 62    // Validate configs are valid and enable configuration system
 63    try {
 64      const configsStart = Date.now()
 65      enableConfigs()
 66      logForDiagnosticsNoPII('info', 'init_configs_enabled', {
 67        duration_ms: Date.now() - configsStart,
 68      })
 69      profileCheckpoint('init_configs_enabled')
 70  
 71      // Apply only safe environment variables before trust dialog
 72      // Full environment variables are applied after trust is established
 73      const envVarsStart = Date.now()
 74      applySafeConfigEnvironmentVariables()
 75  
 76      // Apply NODE_EXTRA_CA_CERTS from settings.json to process.env early,
 77      // before any TLS connections. Bun caches the TLS cert store at boot
 78      // via BoringSSL, so this must happen before the first TLS handshake.
 79      applyExtraCACertsFromConfig()
 80  
 81      logForDiagnosticsNoPII('info', 'init_safe_env_vars_applied', {
 82        duration_ms: Date.now() - envVarsStart,
 83      })
 84      profileCheckpoint('init_safe_env_vars_applied')
 85  
 86      // Make sure things get flushed on exit
 87      setupGracefulShutdown()
 88      profileCheckpoint('init_after_graceful_shutdown')
 89  
 90      // Initialize 1P event logging (no security concerns, but deferred to avoid
 91      // loading OpenTelemetry sdk-logs at startup). growthbook.js is already in
 92      // the module cache by this point (firstPartyEventLogger imports it), so the
 93      // second dynamic import adds no load cost.
 94      void Promise.all([
 95        import('../services/analytics/firstPartyEventLogger.js'),
 96        import('../services/analytics/growthbook.js'),
 97      ]).then(([fp, gb]) => {
 98        fp.initialize1PEventLogging()
 99        // Rebuild the logger provider if tengu_1p_event_batch_config changes
100        // mid-session. Change detection (isEqual) is inside the handler so
101        // unchanged refreshes are no-ops.
102        gb.onGrowthBookRefresh(() => {
103          void fp.reinitialize1PEventLoggingIfConfigChanged()
104        })
105      })
106      profileCheckpoint('init_after_1p_event_logging')
107  
108      // Populate OAuth account info if it is not already cached in config. This is needed since the
109      // OAuth account info may not be populated when logging in through the VSCode extension.
110      void populateOAuthAccountInfoIfNeeded()
111      profileCheckpoint('init_after_oauth_populate')
112  
113      // Initialize JetBrains IDE detection asynchronously (populates cache for later sync access)
114      void initJetBrainsDetection()
115      profileCheckpoint('init_after_jetbrains_detection')
116  
117      // Detect GitHub repository asynchronously (populates cache for gitDiff PR linking)
118      void detectCurrentRepository()
119  
120      // Initialize the loading promise early so that other systems (like plugin hooks)
121      // can await remote settings loading. The promise includes a timeout to prevent
122      // deadlocks if loadRemoteManagedSettings() is never called (e.g., Agent SDK tests).
123      if (isEligibleForRemoteManagedSettings()) {
124        initializeRemoteManagedSettingsLoadingPromise()
125      }
126      if (isPolicyLimitsEligible()) {
127        initializePolicyLimitsLoadingPromise()
128      }
129      profileCheckpoint('init_after_remote_settings_check')
130  
131      // Record the first start time
132      recordFirstStartTime()
133  
134      // Configure global mTLS settings
135      const mtlsStart = Date.now()
136      logForDebugging('[init] configureGlobalMTLS starting')
137      configureGlobalMTLS()
138      logForDiagnosticsNoPII('info', 'init_mtls_configured', {
139        duration_ms: Date.now() - mtlsStart,
140      })
141      logForDebugging('[init] configureGlobalMTLS complete')
142  
143      // Configure global HTTP agents (proxy and/or mTLS)
144      const proxyStart = Date.now()
145      logForDebugging('[init] configureGlobalAgents starting')
146      configureGlobalAgents()
147      logForDiagnosticsNoPII('info', 'init_proxy_configured', {
148        duration_ms: Date.now() - proxyStart,
149      })
150      logForDebugging('[init] configureGlobalAgents complete')
151      profileCheckpoint('init_network_configured')
152  
153      // Preconnect to the Anthropic API — overlap TCP+TLS handshake
154      // (~100-200ms) with the ~100ms of action-handler work before the API
155      // request. After CA certs + proxy agents are configured so the warmed
156      // connection uses the right transport. Fire-and-forget; skipped for
157      // proxy/mTLS/unix/cloud-provider where the SDK's dispatcher wouldn't
158      // reuse the global pool.
159      preconnectAnthropicApi()
160  
161      // CCR upstreamproxy: start the local CONNECT relay so agent subprocesses
162      // can reach org-configured upstreams with credential injection. Gated on
163      // CLAUDE_CODE_REMOTE + GrowthBook; fail-open on any error. Lazy import so
164      // non-CCR startups don't pay the module load. The getUpstreamProxyEnv
165      // function is registered with subprocessEnv.ts so subprocess spawning can
166      // inject proxy vars without a static import of the upstreamproxy module.
167      if (isEnvTruthy(process.env.CLAUDE_CODE_REMOTE)) {
168        try {
169          const { initUpstreamProxy, getUpstreamProxyEnv } = await import(
170            '../upstreamproxy/upstreamproxy.js'
171          )
172          const { registerUpstreamProxyEnvFn } = await import(
173            '../utils/subprocessEnv.js'
174          )
175          registerUpstreamProxyEnvFn(getUpstreamProxyEnv)
176          await initUpstreamProxy()
177        } catch (err) {
178          logForDebugging(
179            `[init] upstreamproxy init failed: ${err instanceof Error ? err.message : String(err)}; continuing without proxy`,
180            { level: 'warn' },
181          )
182        }
183      }
184  
185      // Set up git-bash if relevant
186      setShellIfWindows()
187  
188      // Register LSP manager cleanup (initialization happens in main.tsx after --plugin-dir is processed)
189      registerCleanup(shutdownLspServerManager)
190  
191      // gh-32730: teams created by subagents (or main agent without
192      // explicit TeamDelete) were left on disk forever. Register cleanup
193      // for all teams created this session. Lazy import: swarm code is
194      // behind feature gate and most sessions never create teams.
195      registerCleanup(async () => {
196        const { cleanupSessionTeams } = await import(
197          '../utils/swarm/teamHelpers.js'
198        )
199        await cleanupSessionTeams()
200      })
201  
202      // Initialize scratchpad directory if enabled
203      if (isScratchpadEnabled()) {
204        const scratchpadStart = Date.now()
205        await ensureScratchpadDir()
206        logForDiagnosticsNoPII('info', 'init_scratchpad_created', {
207          duration_ms: Date.now() - scratchpadStart,
208        })
209      }
210  
211      logForDiagnosticsNoPII('info', 'init_completed', {
212        duration_ms: Date.now() - initStartTime,
213      })
214      profileCheckpoint('init_function_end')
215    } catch (error) {
216      if (error instanceof ConfigParseError) {
217        // Skip the interactive Ink dialog when we can't safely render it.
218        // The dialog breaks JSON consumers (e.g. desktop marketplace plugin
219        // manager running `plugin marketplace list --json` in a VM sandbox).
220        if (getIsNonInteractiveSession()) {
221          process.stderr.write(
222            `Configuration error in ${error.filePath}: ${error.message}\n`,
223          )
224          gracefulShutdownSync(1)
225          return
226        }
227  
228        // Show the invalid config dialog with the error object and wait for it to complete
229        return import('../components/InvalidConfigDialog.js').then(m =>
230          m.showInvalidConfigDialog({ error }),
231        )
232        // Dialog itself handles process.exit, so we don't need additional cleanup here
233      } else {
234        // For non-config errors, rethrow them
235        throw error
236      }
237    }
238  })
239  
240  /**
241   * Initialize telemetry after trust has been granted.
242   * For remote-settings-eligible users, waits for settings to load (non-blocking),
243   * then re-applies env vars (to include remote settings) before initializing telemetry.
244   * For non-eligible users, initializes telemetry immediately.
245   * This should only be called once, after the trust dialog has been accepted.
246   */
247  export function initializeTelemetryAfterTrust(): void {
248    if (isEligibleForRemoteManagedSettings()) {
249      // For SDK/headless mode with beta tracing, initialize eagerly first
250      // to ensure the tracer is ready before the first query runs.
251      // The async path below will still run but doInitializeTelemetry() guards against double init.
252      if (getIsNonInteractiveSession() && isBetaTracingEnabled()) {
253        void doInitializeTelemetry().catch(error => {
254          logForDebugging(
255            `[3P telemetry] Eager telemetry init failed (beta tracing): ${errorMessage(error)}`,
256            { level: 'error' },
257          )
258        })
259      }
260      logForDebugging(
261        '[3P telemetry] Waiting for remote managed settings before telemetry init',
262      )
263      void waitForRemoteManagedSettingsToLoad()
264        .then(async () => {
265          logForDebugging(
266            '[3P telemetry] Remote managed settings loaded, initializing telemetry',
267          )
268          // Re-apply env vars to pick up remote settings before initializing telemetry.
269          applyConfigEnvironmentVariables()
270          await doInitializeTelemetry()
271        })
272        .catch(error => {
273          logForDebugging(
274            `[3P telemetry] Telemetry init failed (remote settings path): ${errorMessage(error)}`,
275            { level: 'error' },
276          )
277        })
278    } else {
279      void doInitializeTelemetry().catch(error => {
280        logForDebugging(
281          `[3P telemetry] Telemetry init failed: ${errorMessage(error)}`,
282          { level: 'error' },
283        )
284      })
285    }
286  }
287  
288  async function doInitializeTelemetry(): Promise<void> {
289    if (telemetryInitialized) {
290      // Already initialized, nothing to do
291      return
292    }
293  
294    // Set flag before init to prevent double initialization
295    telemetryInitialized = true
296    try {
297      await setMeterState()
298    } catch (error) {
299      // Reset flag on failure so subsequent calls can retry
300      telemetryInitialized = false
301      throw error
302    }
303  }
304  
305  async function setMeterState(): Promise<void> {
306    // Lazy-load instrumentation to defer ~400KB of OpenTelemetry + protobuf
307    const { initializeTelemetry } = await import(
308      '../utils/telemetry/instrumentation.js'
309    )
310    // Initialize customer OTLP telemetry (metrics, logs, traces)
311    const meter = await initializeTelemetry()
312    if (meter) {
313      // Create factory function for attributed counters
314      const createAttributedCounter = (
315        name: string,
316        options: MetricOptions,
317      ): AttributedCounter => {
318        const counter = meter?.createCounter(name, options)
319  
320        return {
321          add(value: number, additionalAttributes: Attributes = {}) {
322            // Always fetch fresh telemetry attributes to ensure they're up to date
323            const currentAttributes = getTelemetryAttributes()
324            const mergedAttributes = {
325              ...currentAttributes,
326              ...additionalAttributes,
327            }
328            counter?.add(value, mergedAttributes)
329          },
330        }
331      }
332  
333      setMeter(meter, createAttributedCounter)
334  
335      // Increment session counter here because the startup telemetry path
336      // runs before this async initialization completes, so the counter
337      // would be null there.
338      getSessionCounter()?.add(1)
339    }
340  }