/ utils / telemetry / instrumentation.ts
instrumentation.ts
  1  import { DiagLogLevel, diag, trace } from '@opentelemetry/api'
  2  import { logs } from '@opentelemetry/api-logs'
  3  // OTLP/Prometheus exporters are dynamically imported inside the protocol
  4  // switch statements below. A process uses at most one protocol variant per
  5  // signal, but static imports would load all 6 (~1.2MB) on every startup.
  6  import {
  7    envDetector,
  8    hostDetector,
  9    osDetector,
 10    resourceFromAttributes,
 11  } from '@opentelemetry/resources'
 12  import {
 13    BatchLogRecordProcessor,
 14    ConsoleLogRecordExporter,
 15    LoggerProvider,
 16  } from '@opentelemetry/sdk-logs'
 17  import {
 18    ConsoleMetricExporter,
 19    MeterProvider,
 20    PeriodicExportingMetricReader,
 21  } from '@opentelemetry/sdk-metrics'
 22  import {
 23    BasicTracerProvider,
 24    BatchSpanProcessor,
 25    ConsoleSpanExporter,
 26  } from '@opentelemetry/sdk-trace-base'
 27  import {
 28    ATTR_SERVICE_NAME,
 29    ATTR_SERVICE_VERSION,
 30    SEMRESATTRS_HOST_ARCH,
 31  } from '@opentelemetry/semantic-conventions'
 32  import { HttpsProxyAgent } from 'https-proxy-agent'
 33  import {
 34    getLoggerProvider,
 35    getMeterProvider,
 36    getTracerProvider,
 37    setEventLogger,
 38    setLoggerProvider,
 39    setMeterProvider,
 40    setTracerProvider,
 41  } from 'src/bootstrap/state.js'
 42  import {
 43    getOtelHeadersFromHelper,
 44    getSubscriptionType,
 45    is1PApiCustomer,
 46    isClaudeAISubscriber,
 47  } from 'src/utils/auth.js'
 48  import { getPlatform, getWslVersion } from 'src/utils/platform.js'
 49  
 50  import { getCACertificates } from '../caCerts.js'
 51  import { registerCleanup } from '../cleanupRegistry.js'
 52  import { getHasFormattedOutput, logForDebugging } from '../debug.js'
 53  import { isEnvTruthy } from '../envUtils.js'
 54  import { errorMessage } from '../errors.js'
 55  import { getMTLSConfig } from '../mtls.js'
 56  import { getProxyUrl, shouldBypassProxy } from '../proxy.js'
 57  import { getSettings_DEPRECATED } from '../settings/settings.js'
 58  import { jsonStringify } from '../slowOperations.js'
 59  import { profileCheckpoint } from '../startupProfiler.js'
 60  import { isBetaTracingEnabled } from './betaSessionTracing.js'
 61  import { BigQueryMetricsExporter } from './bigqueryExporter.js'
 62  import { ClaudeCodeDiagLogger } from './logger.js'
 63  import { initializePerfettoTracing } from './perfettoTracing.js'
 64  import {
 65    endInteractionSpan,
 66    isEnhancedTelemetryEnabled,
 67  } from './sessionTracing.js'
 68  
 69  const DEFAULT_METRICS_EXPORT_INTERVAL_MS = 60000
 70  const DEFAULT_LOGS_EXPORT_INTERVAL_MS = 5000
 71  const DEFAULT_TRACES_EXPORT_INTERVAL_MS = 5000
 72  
 73  class TelemetryTimeoutError extends Error {}
 74  
 75  function telemetryTimeout(ms: number, message: string): Promise<never> {
 76    return new Promise((_, reject) => {
 77      setTimeout(
 78        (rej: (e: Error) => void, msg: string) =>
 79          rej(new TelemetryTimeoutError(msg)),
 80        ms,
 81        reject,
 82        message,
 83      ).unref()
 84    })
 85  }
 86  
 87  export function bootstrapTelemetry() {
 88    if (process.env.USER_TYPE === 'ant') {
 89      // Read from ANT_ prefixed variables that are defined at build time
 90      if (process.env.ANT_OTEL_METRICS_EXPORTER) {
 91        process.env.OTEL_METRICS_EXPORTER = process.env.ANT_OTEL_METRICS_EXPORTER
 92      }
 93      if (process.env.ANT_OTEL_LOGS_EXPORTER) {
 94        process.env.OTEL_LOGS_EXPORTER = process.env.ANT_OTEL_LOGS_EXPORTER
 95      }
 96      if (process.env.ANT_OTEL_TRACES_EXPORTER) {
 97        process.env.OTEL_TRACES_EXPORTER = process.env.ANT_OTEL_TRACES_EXPORTER
 98      }
 99      if (process.env.ANT_OTEL_EXPORTER_OTLP_PROTOCOL) {
100        process.env.OTEL_EXPORTER_OTLP_PROTOCOL =
101          process.env.ANT_OTEL_EXPORTER_OTLP_PROTOCOL
102      }
103      if (process.env.ANT_OTEL_EXPORTER_OTLP_ENDPOINT) {
104        process.env.OTEL_EXPORTER_OTLP_ENDPOINT =
105          process.env.ANT_OTEL_EXPORTER_OTLP_ENDPOINT
106      }
107      if (process.env.ANT_OTEL_EXPORTER_OTLP_HEADERS) {
108        process.env.OTEL_EXPORTER_OTLP_HEADERS =
109          process.env.ANT_OTEL_EXPORTER_OTLP_HEADERS
110      }
111    }
112  
113    // Set default tempoality to 'delta' because it's the more sane default
114    if (!process.env.OTEL_EXPORTER_OTLP_METRICS_TEMPORALITY_PREFERENCE) {
115      process.env.OTEL_EXPORTER_OTLP_METRICS_TEMPORALITY_PREFERENCE = 'delta'
116    }
117  }
118  
119  // Per OTEL spec, "none" means "no automatically configured exporter for this signal".
120  // https://opentelemetry.io/docs/specs/otel/configuration/sdk-environment-variables/#exporter-selection
121  export function parseExporterTypes(value: string | undefined): string[] {
122    return (value || '')
123      .trim()
124      .split(',')
125      .filter(Boolean)
126      .map(t => t.trim())
127      .filter(t => t !== 'none')
128  }
129  
130  async function getOtlpReaders() {
131    const exporterTypes = parseExporterTypes(process.env.OTEL_METRICS_EXPORTER)
132    const exportInterval = parseInt(
133      process.env.OTEL_METRIC_EXPORT_INTERVAL ||
134        DEFAULT_METRICS_EXPORT_INTERVAL_MS.toString(),
135    )
136  
137    const exporters = []
138    for (const exporterType of exporterTypes) {
139      if (exporterType === 'console') {
140        // Custom console exporter that shows resource attributes
141        const consoleExporter = new ConsoleMetricExporter()
142        const originalExport = consoleExporter.export.bind(consoleExporter)
143  
144        consoleExporter.export = (metrics, callback) => {
145          // Log resource attributes once at the start
146          if (metrics.resource && metrics.resource.attributes) {
147            // The console exporter is for debugging, so console output is intentional here
148  
149            logForDebugging('\n=== Resource Attributes ===')
150            logForDebugging(jsonStringify(metrics.resource.attributes))
151            logForDebugging('===========================\n')
152          }
153  
154          return originalExport(metrics, callback)
155        }
156  
157        exporters.push(consoleExporter)
158      } else if (exporterType === 'otlp') {
159        const protocol =
160          process.env.OTEL_EXPORTER_OTLP_METRICS_PROTOCOL?.trim() ||
161          process.env.OTEL_EXPORTER_OTLP_PROTOCOL?.trim()
162  
163        const httpConfig = getOTLPExporterConfig()
164  
165        switch (protocol) {
166          case 'grpc': {
167            // Lazy-import to keep @grpc/grpc-js (~700KB) out of the telemetry chunk
168            // when the protocol is http/protobuf (ant default) or http/json.
169            const { OTLPMetricExporter } = await import(
170              '@opentelemetry/exporter-metrics-otlp-grpc'
171            )
172            exporters.push(new OTLPMetricExporter())
173            break
174          }
175          case 'http/json': {
176            const { OTLPMetricExporter } = await import(
177              '@opentelemetry/exporter-metrics-otlp-http'
178            )
179            exporters.push(new OTLPMetricExporter(httpConfig))
180            break
181          }
182          case 'http/protobuf': {
183            const { OTLPMetricExporter } = await import(
184              '@opentelemetry/exporter-metrics-otlp-proto'
185            )
186            exporters.push(new OTLPMetricExporter(httpConfig))
187            break
188          }
189          default:
190            throw new Error(
191              `Unknown protocol set in OTEL_EXPORTER_OTLP_METRICS_PROTOCOL or OTEL_EXPORTER_OTLP_PROTOCOL env var: ${protocol}`,
192            )
193        }
194      } else if (exporterType === 'prometheus') {
195        const { PrometheusExporter } = await import(
196          '@opentelemetry/exporter-prometheus'
197        )
198        exporters.push(new PrometheusExporter())
199      } else {
200        throw new Error(
201          `Unknown exporter type set in OTEL_EXPORTER_OTLP_METRICS_PROTOCOL or OTEL_EXPORTER_OTLP_PROTOCOL env var: ${exporterType}`,
202        )
203      }
204    }
205  
206    return exporters.map(exporter => {
207      if ('export' in exporter) {
208        return new PeriodicExportingMetricReader({
209          exporter,
210          exportIntervalMillis: exportInterval,
211        })
212      }
213      return exporter
214    })
215  }
216  
217  async function getOtlpLogExporters() {
218    const exporterTypes = parseExporterTypes(process.env.OTEL_LOGS_EXPORTER)
219  
220    const protocol =
221      process.env.OTEL_EXPORTER_OTLP_LOGS_PROTOCOL?.trim() ||
222      process.env.OTEL_EXPORTER_OTLP_PROTOCOL?.trim()
223    const endpoint = process.env.OTEL_EXPORTER_OTLP_ENDPOINT
224  
225    logForDebugging(
226      `[3P telemetry] getOtlpLogExporters: types=${jsonStringify(exporterTypes)}, protocol=${protocol}, endpoint=${endpoint}`,
227    )
228  
229    const exporters = []
230    for (const exporterType of exporterTypes) {
231      if (exporterType === 'console') {
232        exporters.push(new ConsoleLogRecordExporter())
233      } else if (exporterType === 'otlp') {
234        const httpConfig = getOTLPExporterConfig()
235  
236        switch (protocol) {
237          case 'grpc': {
238            const { OTLPLogExporter } = await import(
239              '@opentelemetry/exporter-logs-otlp-grpc'
240            )
241            exporters.push(new OTLPLogExporter())
242            break
243          }
244          case 'http/json': {
245            const { OTLPLogExporter } = await import(
246              '@opentelemetry/exporter-logs-otlp-http'
247            )
248            exporters.push(new OTLPLogExporter(httpConfig))
249            break
250          }
251          case 'http/protobuf': {
252            const { OTLPLogExporter } = await import(
253              '@opentelemetry/exporter-logs-otlp-proto'
254            )
255            exporters.push(new OTLPLogExporter(httpConfig))
256            break
257          }
258          default:
259            throw new Error(
260              `Unknown protocol set in OTEL_EXPORTER_OTLP_LOGS_PROTOCOL or OTEL_EXPORTER_OTLP_PROTOCOL env var: ${protocol}`,
261            )
262        }
263      } else {
264        throw new Error(
265          `Unknown exporter type set in OTEL_LOGS_EXPORTER env var: ${exporterType}`,
266        )
267      }
268    }
269  
270    return exporters
271  }
272  
273  async function getOtlpTraceExporters() {
274    const exporterTypes = parseExporterTypes(process.env.OTEL_TRACES_EXPORTER)
275  
276    const exporters = []
277    for (const exporterType of exporterTypes) {
278      if (exporterType === 'console') {
279        exporters.push(new ConsoleSpanExporter())
280      } else if (exporterType === 'otlp') {
281        const protocol =
282          process.env.OTEL_EXPORTER_OTLP_TRACES_PROTOCOL?.trim() ||
283          process.env.OTEL_EXPORTER_OTLP_PROTOCOL?.trim()
284  
285        const httpConfig = getOTLPExporterConfig()
286  
287        switch (protocol) {
288          case 'grpc': {
289            const { OTLPTraceExporter } = await import(
290              '@opentelemetry/exporter-trace-otlp-grpc'
291            )
292            exporters.push(new OTLPTraceExporter())
293            break
294          }
295          case 'http/json': {
296            const { OTLPTraceExporter } = await import(
297              '@opentelemetry/exporter-trace-otlp-http'
298            )
299            exporters.push(new OTLPTraceExporter(httpConfig))
300            break
301          }
302          case 'http/protobuf': {
303            const { OTLPTraceExporter } = await import(
304              '@opentelemetry/exporter-trace-otlp-proto'
305            )
306            exporters.push(new OTLPTraceExporter(httpConfig))
307            break
308          }
309          default:
310            throw new Error(
311              `Unknown protocol set in OTEL_EXPORTER_OTLP_TRACES_PROTOCOL or OTEL_EXPORTER_OTLP_PROTOCOL env var: ${protocol}`,
312            )
313        }
314      } else {
315        throw new Error(
316          `Unknown exporter type set in OTEL_TRACES_EXPORTER env var: ${exporterType}`,
317        )
318      }
319    }
320  
321    return exporters
322  }
323  
324  export function isTelemetryEnabled() {
325    return isEnvTruthy(process.env.CLAUDE_CODE_ENABLE_TELEMETRY)
326  }
327  
328  function getBigQueryExportingReader() {
329    const bigqueryExporter = new BigQueryMetricsExporter()
330    return new PeriodicExportingMetricReader({
331      exporter: bigqueryExporter,
332      exportIntervalMillis: 5 * 60 * 1000, // 5mins for BigQuery metrics exporter to reduce load
333    })
334  }
335  
336  function isBigQueryMetricsEnabled() {
337    // BigQuery metrics are enabled for:
338    // 1. API customers (excluding Claude.ai subscribers and Bedrock/Vertex)
339    // 2. Claude for Enterprise (C4E) users
340    // 3. Claude for Teams users
341    const subscriptionType = getSubscriptionType()
342    const isC4EOrTeamUser =
343      isClaudeAISubscriber() &&
344      (subscriptionType === 'enterprise' || subscriptionType === 'team')
345  
346    return is1PApiCustomer() || isC4EOrTeamUser
347  }
348  
349  /**
350   * Initialize beta tracing - a separate code path for detailed debugging.
351   * Uses BETA_TRACING_ENDPOINT instead of OTEL_EXPORTER_OTLP_ENDPOINT.
352   */
353  async function initializeBetaTracing(
354    resource: ReturnType<typeof resourceFromAttributes>,
355  ): Promise<void> {
356    const endpoint = process.env.BETA_TRACING_ENDPOINT
357    if (!endpoint) {
358      return
359    }
360  
361    const [{ OTLPTraceExporter }, { OTLPLogExporter }] = await Promise.all([
362      import('@opentelemetry/exporter-trace-otlp-http'),
363      import('@opentelemetry/exporter-logs-otlp-http'),
364    ])
365  
366    const httpConfig = {
367      url: `${endpoint}/v1/traces`,
368    }
369  
370    const logHttpConfig = {
371      url: `${endpoint}/v1/logs`,
372    }
373  
374    // Initialize trace exporter
375    const traceExporter = new OTLPTraceExporter(httpConfig)
376    const spanProcessor = new BatchSpanProcessor(traceExporter, {
377      scheduledDelayMillis: DEFAULT_TRACES_EXPORT_INTERVAL_MS,
378    })
379  
380    const tracerProvider = new BasicTracerProvider({
381      resource,
382      spanProcessors: [spanProcessor],
383    })
384  
385    trace.setGlobalTracerProvider(tracerProvider)
386    setTracerProvider(tracerProvider)
387  
388    // Initialize log exporter
389    const logExporter = new OTLPLogExporter(logHttpConfig)
390    const loggerProvider = new LoggerProvider({
391      resource,
392      processors: [
393        new BatchLogRecordProcessor(logExporter, {
394          scheduledDelayMillis: DEFAULT_LOGS_EXPORT_INTERVAL_MS,
395        }),
396      ],
397    })
398  
399    logs.setGlobalLoggerProvider(loggerProvider)
400    setLoggerProvider(loggerProvider)
401  
402    // Initialize event logger
403    const eventLogger = logs.getLogger(
404      'com.anthropic.claude_code.events',
405      MACRO.VERSION,
406    )
407    setEventLogger(eventLogger)
408  
409    // Setup flush handlers - flush both logs AND traces
410    process.on('beforeExit', async () => {
411      await loggerProvider?.forceFlush()
412      await tracerProvider?.forceFlush()
413    })
414  
415    process.on('exit', () => {
416      void loggerProvider?.forceFlush()
417      void tracerProvider?.forceFlush()
418    })
419  }
420  
421  export async function initializeTelemetry() {
422    profileCheckpoint('telemetry_init_start')
423    bootstrapTelemetry()
424  
425    // Console exporters call console.dir on a timer (5s logs/traces, 60s
426    // metrics), writing pretty-printed objects to stdout. In stream-json
427    // mode stdout is the SDK message channel; the first line (`{`) breaks
428    // the SDK's line reader. Stripped here (not main.tsx) because init.ts
429    // re-runs applyConfigEnvironmentVariables() inside initializeTelemetry-
430    // AfterTrust for remote-managed-settings users, and bootstrapTelemetry
431    // above copies ANT_OTEL_* for ant users — both would undo an earlier strip.
432    if (getHasFormattedOutput()) {
433      for (const key of [
434        'OTEL_METRICS_EXPORTER',
435        'OTEL_LOGS_EXPORTER',
436        'OTEL_TRACES_EXPORTER',
437      ] as const) {
438        const v = process.env[key]
439        if (v?.includes('console')) {
440          process.env[key] = v
441            .split(',')
442            .map(s => s.trim())
443            .filter(s => s !== 'console')
444            .join(',')
445        }
446      }
447    }
448  
449    diag.setLogger(new ClaudeCodeDiagLogger(), DiagLogLevel.ERROR)
450  
451    // Initialize Perfetto tracing (independent of OTEL)
452    // Enable via CLAUDE_CODE_PERFETTO_TRACE=1 or CLAUDE_CODE_PERFETTO_TRACE=<path>
453    initializePerfettoTracing()
454  
455    const readers = []
456  
457    // Add customer exporters (if enabled)
458    const telemetryEnabled = isTelemetryEnabled()
459    logForDebugging(
460      `[3P telemetry] isTelemetryEnabled=${telemetryEnabled} (CLAUDE_CODE_ENABLE_TELEMETRY=${process.env.CLAUDE_CODE_ENABLE_TELEMETRY})`,
461    )
462    if (telemetryEnabled) {
463      readers.push(...(await getOtlpReaders()))
464    }
465  
466    // Add BigQuery exporter (for API customers, C4E users, and internal users)
467    if (isBigQueryMetricsEnabled()) {
468      readers.push(getBigQueryExportingReader())
469    }
470  
471    // Create base resource with service attributes
472    const platform = getPlatform()
473    const baseAttributes: Record<string, string> = {
474      [ATTR_SERVICE_NAME]: 'claude-code',
475      [ATTR_SERVICE_VERSION]: MACRO.VERSION,
476    }
477  
478    // Add WSL-specific attributes if running on WSL
479    if (platform === 'wsl') {
480      const wslVersion = getWslVersion()
481      if (wslVersion) {
482        baseAttributes['wsl.version'] = wslVersion
483      }
484    }
485  
486    const baseResource = resourceFromAttributes(baseAttributes)
487  
488    // Use OpenTelemetry detectors
489    const osResource = resourceFromAttributes(
490      osDetector.detect().attributes || {},
491    )
492  
493    // Extract only host.arch from hostDetector
494    const hostDetected = hostDetector.detect()
495    const hostArchAttributes = hostDetected.attributes?.[SEMRESATTRS_HOST_ARCH]
496      ? {
497          [SEMRESATTRS_HOST_ARCH]: hostDetected.attributes[SEMRESATTRS_HOST_ARCH],
498        }
499      : {}
500    const hostArchResource = resourceFromAttributes(hostArchAttributes)
501  
502    const envResource = resourceFromAttributes(
503      envDetector.detect().attributes || {},
504    )
505  
506    // Merge resources - later resources take precedence
507    const resource = baseResource
508      .merge(osResource)
509      .merge(hostArchResource)
510      .merge(envResource)
511  
512    // Check if beta tracing is enabled - this is a separate code path
513    // Available to all users who set ENABLE_BETA_TRACING_DETAILED=1 and BETA_TRACING_ENDPOINT
514    if (isBetaTracingEnabled()) {
515      void initializeBetaTracing(resource).catch(e =>
516        logForDebugging(`Beta tracing init failed: ${e}`, { level: 'error' }),
517      )
518      // Still set up meter provider for metrics (but skip regular logs/traces setup)
519      const meterProvider = new MeterProvider({
520        resource,
521        views: [],
522        readers,
523      })
524      setMeterProvider(meterProvider)
525  
526      // Register shutdown for beta tracing
527      const shutdownTelemetry = async () => {
528        const timeoutMs = parseInt(
529          process.env.CLAUDE_CODE_OTEL_SHUTDOWN_TIMEOUT_MS || '2000',
530        )
531        try {
532          endInteractionSpan()
533  
534          // Force flush + shutdown together inside the timeout. Previously forceFlush
535          // was awaited unbounded BEFORE the race, blocking exit on slow OTLP endpoints.
536          // Each provider's flush→shutdown is chained independently so a slow logger
537          // flush doesn't delay meterProvider/tracerProvider shutdown (no waterfall).
538          const loggerProvider = getLoggerProvider()
539          const tracerProvider = getTracerProvider()
540  
541          const chains: Promise<void>[] = [meterProvider.shutdown()]
542          if (loggerProvider) {
543            chains.push(
544              loggerProvider.forceFlush().then(() => loggerProvider.shutdown()),
545            )
546          }
547          if (tracerProvider) {
548            chains.push(
549              tracerProvider.forceFlush().then(() => tracerProvider.shutdown()),
550            )
551          }
552  
553          await Promise.race([
554            Promise.all(chains),
555            telemetryTimeout(timeoutMs, 'OpenTelemetry shutdown timeout'),
556          ])
557        } catch {
558          // Ignore shutdown errors
559        }
560      }
561      registerCleanup(shutdownTelemetry)
562  
563      return meterProvider.getMeter('com.anthropic.claude_code', MACRO.VERSION)
564    }
565  
566    const meterProvider = new MeterProvider({
567      resource,
568      views: [],
569      readers,
570    })
571  
572    // Store reference in state for flushing
573    setMeterProvider(meterProvider)
574  
575    // Initialize logs if telemetry is enabled
576    if (telemetryEnabled) {
577      const logExporters = await getOtlpLogExporters()
578      logForDebugging(
579        `[3P telemetry] Created ${logExporters.length} log exporter(s)`,
580      )
581  
582      if (logExporters.length > 0) {
583        const loggerProvider = new LoggerProvider({
584          resource,
585          // Add batch processors for each exporter
586          processors: logExporters.map(
587            exporter =>
588              new BatchLogRecordProcessor(exporter, {
589                scheduledDelayMillis: parseInt(
590                  process.env.OTEL_LOGS_EXPORT_INTERVAL ||
591                    DEFAULT_LOGS_EXPORT_INTERVAL_MS.toString(),
592                ),
593              }),
594          ),
595        })
596  
597        // Register the logger provider globally
598        logs.setGlobalLoggerProvider(loggerProvider)
599        setLoggerProvider(loggerProvider)
600  
601        // Initialize event logger
602        const eventLogger = logs.getLogger(
603          'com.anthropic.claude_code.events',
604          MACRO.VERSION,
605        )
606        setEventLogger(eventLogger)
607        logForDebugging('[3P telemetry] Event logger set successfully')
608  
609        // 'beforeExit' is emitted when Node.js empties its event loop and has no additional work to schedule.
610        // Unlike 'exit', it allows us to perform async operations, so it works well for letting
611        // network requests complete before the process exits naturally.
612        process.on('beforeExit', async () => {
613          await loggerProvider?.forceFlush()
614          // Also flush traces - they use BatchSpanProcessor which needs explicit flush
615          const tracerProvider = getTracerProvider()
616          await tracerProvider?.forceFlush()
617        })
618  
619        process.on('exit', () => {
620          // Final attempt to flush logs and traces
621          void loggerProvider?.forceFlush()
622          void getTracerProvider()?.forceFlush()
623        })
624      }
625    }
626  
627    // Initialize tracing if enhanced telemetry is enabled (BETA)
628    if (telemetryEnabled && isEnhancedTelemetryEnabled()) {
629      const traceExporters = await getOtlpTraceExporters()
630      if (traceExporters.length > 0) {
631        // Create span processors for each exporter
632        const spanProcessors = traceExporters.map(
633          exporter =>
634            new BatchSpanProcessor(exporter, {
635              scheduledDelayMillis: parseInt(
636                process.env.OTEL_TRACES_EXPORT_INTERVAL ||
637                  DEFAULT_TRACES_EXPORT_INTERVAL_MS.toString(),
638              ),
639            }),
640        )
641  
642        const tracerProvider = new BasicTracerProvider({
643          resource,
644          spanProcessors,
645        })
646  
647        // Register the tracer provider globally
648        trace.setGlobalTracerProvider(tracerProvider)
649        setTracerProvider(tracerProvider)
650      }
651    }
652  
653    // Shutdown metrics and logs on exit (flushes and closes exporters)
654    const shutdownTelemetry = async () => {
655      const timeoutMs = parseInt(
656        process.env.CLAUDE_CODE_OTEL_SHUTDOWN_TIMEOUT_MS || '2000',
657      )
658  
659      try {
660        // End any active interaction span before shutdown
661        endInteractionSpan()
662  
663        const shutdownPromises = [meterProvider.shutdown()]
664        const loggerProvider = getLoggerProvider()
665        if (loggerProvider) {
666          shutdownPromises.push(loggerProvider.shutdown())
667        }
668        const tracerProvider = getTracerProvider()
669        if (tracerProvider) {
670          shutdownPromises.push(tracerProvider.shutdown())
671        }
672  
673        await Promise.race([
674          Promise.all(shutdownPromises),
675          telemetryTimeout(timeoutMs, 'OpenTelemetry shutdown timeout'),
676        ])
677      } catch (error) {
678        if (error instanceof Error && error.message.includes('timeout')) {
679          logForDebugging(
680            `
681  OpenTelemetry telemetry flush timed out after ${timeoutMs}ms
682  
683  To resolve this issue, you can:
684  1. Increase the timeout by setting CLAUDE_CODE_OTEL_SHUTDOWN_TIMEOUT_MS env var (e.g., 5000 for 5 seconds)
685  2. Check if your OpenTelemetry backend is experiencing scalability issues
686  3. Disable OpenTelemetry by unsetting CLAUDE_CODE_ENABLE_TELEMETRY env var
687  
688  Current timeout: ${timeoutMs}ms
689  `,
690            { level: 'error' },
691          )
692        }
693        throw error
694      }
695    }
696  
697    // Always register shutdown (internal metrics are always enabled)
698    registerCleanup(shutdownTelemetry)
699  
700    return meterProvider.getMeter('com.anthropic.claude_code', MACRO.VERSION)
701  }
702  
703  /**
704   * Flush all pending telemetry data immediately.
705   * This should be called before logout or org switching to prevent data leakage.
706   */
707  export async function flushTelemetry(): Promise<void> {
708    const meterProvider = getMeterProvider()
709    if (!meterProvider) {
710      return
711    }
712  
713    const timeoutMs = parseInt(
714      process.env.CLAUDE_CODE_OTEL_FLUSH_TIMEOUT_MS || '5000',
715    )
716  
717    try {
718      const flushPromises = [meterProvider.forceFlush()]
719      const loggerProvider = getLoggerProvider()
720      if (loggerProvider) {
721        flushPromises.push(loggerProvider.forceFlush())
722      }
723      const tracerProvider = getTracerProvider()
724      if (tracerProvider) {
725        flushPromises.push(tracerProvider.forceFlush())
726      }
727  
728      await Promise.race([
729        Promise.all(flushPromises),
730        telemetryTimeout(timeoutMs, 'OpenTelemetry flush timeout'),
731      ])
732  
733      logForDebugging('Telemetry flushed successfully')
734    } catch (error) {
735      if (error instanceof TelemetryTimeoutError) {
736        logForDebugging(
737          `Telemetry flush timed out after ${timeoutMs}ms. Some metrics may not be exported.`,
738          { level: 'warn' },
739        )
740      } else {
741        logForDebugging(`Telemetry flush failed: ${errorMessage(error)}`, {
742          level: 'error',
743        })
744      }
745      // Don't throw - allow logout to continue even if flush fails
746    }
747  }
748  
749  function parseOtelHeadersEnvVar(): Record<string, string> {
750    const headers: Record<string, string> = {}
751    const envHeaders = process.env.OTEL_EXPORTER_OTLP_HEADERS
752    if (envHeaders) {
753      for (const pair of envHeaders.split(',')) {
754        const [key, ...valueParts] = pair.split('=')
755        if (key && valueParts.length > 0) {
756          headers[key.trim()] = valueParts.join('=').trim()
757        }
758      }
759    }
760    return headers
761  }
762  
763  /**
764   * Get configuration for OTLP exporters including:
765   * - HTTP agent options (proxy, mTLS)
766   * - Dynamic headers via otelHeadersHelper or static headers from env var
767   */
768  function getOTLPExporterConfig() {
769    const proxyUrl = getProxyUrl()
770    const mtlsConfig = getMTLSConfig()
771    const settings = getSettings_DEPRECATED()
772  
773    // Build base config
774    const config: Record<string, unknown> = {}
775  
776    // Parse static headers from env var once (doesn't change at runtime)
777    const staticHeaders = parseOtelHeadersEnvVar()
778  
779    // If otelHeadersHelper is configured, use async headers function for dynamic refresh
780    // Otherwise just return static headers if any exist
781    if (settings?.otelHeadersHelper) {
782      config.headers = async (): Promise<Record<string, string>> => {
783        const dynamicHeaders = getOtelHeadersFromHelper()
784        return { ...staticHeaders, ...dynamicHeaders }
785      }
786    } else if (Object.keys(staticHeaders).length > 0) {
787      config.headers = async (): Promise<Record<string, string>> => staticHeaders
788    }
789  
790    // Check if we should bypass proxy for OTEL endpoint
791    const otelEndpoint = process.env.OTEL_EXPORTER_OTLP_ENDPOINT
792    if (!proxyUrl || (otelEndpoint && shouldBypassProxy(otelEndpoint))) {
793      // No proxy configured or OTEL endpoint should bypass proxy
794      const caCerts = getCACertificates()
795      if (mtlsConfig || caCerts) {
796        config.httpAgentOptions = {
797          ...mtlsConfig,
798          ...(caCerts && { ca: caCerts }),
799        }
800      }
801      return config
802    }
803  
804    // Return an HttpAgentFactory function that creates our proxy agent
805    const caCerts = getCACertificates()
806    const agentFactory = (_protocol: string) => {
807      // Create and return the proxy agent with mTLS and CA cert config
808      const proxyAgent =
809        mtlsConfig || caCerts
810          ? new HttpsProxyAgent(proxyUrl, {
811              ...(mtlsConfig && {
812                cert: mtlsConfig.cert,
813                key: mtlsConfig.key,
814                passphrase: mtlsConfig.passphrase,
815              }),
816              ...(caCerts && { ca: caCerts }),
817            })
818          : new HttpsProxyAgent(proxyUrl)
819  
820      return proxyAgent
821    }
822  
823    config.httpAgentOptions = agentFactory
824    return config
825  }