/ src / instrumentation.ts
instrumentation.ts
 1  import { hmrSingleton } from '@/lib/shared-utils'
 2  
 3  const TAG = 'instrumentation'
 4  
 5  export async function register() {
 6    if (process.env.NEXT_RUNTIME === 'nodejs') {
 7      const { log } = await import('@/lib/server/logger')
 8      const { ensureOpenTelemetryStarted, shutdownOpenTelemetry } = await import('@/lib/server/observability/otel')
 9      const isWorkerOnly = process.env.SWARMCLAW_WORKER_ONLY === '1'
10      const { initWsServer, closeWsServer } = await import('./lib/server/ws-hub')
11      const { ensureDaemonStarted } = await import('@/lib/server/runtime/daemon-state')
12      await ensureOpenTelemetryStarted()
13  
14      // Defer migrations, WS init, and daemon startup so the HTTP listener can bind
15      // and /api/healthz can respond immediately. Heavy per-install work (session
16      // migrations on large data dirs, daemon recovery) no longer gates first boot.
17      setImmediate(() => {
18        void (async () => {
19          try {
20            const { backfillAllKnownPeerIds, pruneThreadConnectorMirrors } = await import('@/lib/server/connectors/session-consolidation')
21            backfillAllKnownPeerIds()
22            pruneThreadConnectorMirrors()
23          } catch (err) {
24            log.error(TAG, 'connector session consolidation failed:', err)
25          }
26  
27          if (isWorkerOnly) {
28            log.info(TAG, 'Booting in WORKER ONLY mode')
29            ensureDaemonStarted('worker-boot')
30          } else {
31            initWsServer()
32            ensureDaemonStarted('instrumentation')
33          }
34        })()
35      })
36  
37      // Graceful shutdown: stop background services and close WS connections
38      const shutdownState = hmrSingleton('__swarmclaw_shutdown_state__', () => ({
39        registered: false,
40        shuttingDown: false,
41      }))
42  
43      const shutdown = async (signal: string) => {
44        if (shutdownState.shuttingDown) return
45        shutdownState.shuttingDown = true
46        log.info(TAG, `${signal} received, shutting down gracefully...`)
47        try {
48          const { stopDaemon } = await import('@/lib/server/runtime/daemon-state')
49          await stopDaemon({ source: signal })
50        } catch (err) {
51          log.error(TAG, 'Failed to stop daemon during shutdown:', err)
52        }
53        try {
54          await shutdownOpenTelemetry()
55        } catch (err) {
56          log.error(TAG, 'Failed to stop OpenTelemetry during shutdown:', err)
57        }
58        if (!isWorkerOnly) {
59          await closeWsServer()
60        }
61        process.exit(0)
62      }
63      if (!shutdownState.registered) {
64        process.on('SIGTERM', () => { void shutdown('SIGTERM') })
65        process.on('SIGINT', () => { void shutdown('SIGINT') })
66  
67        // Gracefully handle EPIPE errors from child processes (e.g. Playwright MCP proxy)
68        // that occur during dev server restarts when stdio pipes break
69        process.on('uncaughtException', (err: NodeJS.ErrnoException) => {
70          if (err.code === 'EPIPE') {
71            log.warn(TAG, 'Ignoring EPIPE (expected during dev server restart)')
72            return
73          }
74          log.error(TAG, 'Uncaught exception:', err)
75          process.exit(1)
76        })
77  
78        // LangGraph's streamEvents leaves dangling internal promises when the
79        // for-await loop exits early. Suppress expected LangGraph rejections;
80        // log all others so they're not silently dropped.
81        process.on('unhandledRejection', (err: unknown) => {
82          if (
83            err && typeof err === 'object'
84            && ('pregelTaskId' in err
85              || (err instanceof Error && (err.name === 'AbortError' || err.name === 'GraphRecursionError'))
86              || (err as Record<string, unknown>).lc_error_code === 'GRAPH_RECURSION_LIMIT')
87          ) {
88            return
89          }
90          log.error(TAG, 'Unhandled rejection:', err)
91        })
92  
93        shutdownState.registered = true
94      }
95    }
96  }