instrumentation.ts
1 import { hmrSingleton } from '@/lib/shared-utils' 2 3 const TAG = 'instrumentation' 4 5 export async function register() { 6 if (process.env.NEXT_RUNTIME === 'nodejs') { 7 const { log } = await import('@/lib/server/logger') 8 const { ensureOpenTelemetryStarted, shutdownOpenTelemetry } = await import('@/lib/server/observability/otel') 9 const isWorkerOnly = process.env.SWARMCLAW_WORKER_ONLY === '1' 10 const { initWsServer, closeWsServer } = await import('./lib/server/ws-hub') 11 const { ensureDaemonStarted } = await import('@/lib/server/runtime/daemon-state') 12 await ensureOpenTelemetryStarted() 13 14 // Defer migrations, WS init, and daemon startup so the HTTP listener can bind 15 // and /api/healthz can respond immediately. Heavy per-install work (session 16 // migrations on large data dirs, daemon recovery) no longer gates first boot. 17 setImmediate(() => { 18 void (async () => { 19 try { 20 const { backfillAllKnownPeerIds, pruneThreadConnectorMirrors } = await import('@/lib/server/connectors/session-consolidation') 21 backfillAllKnownPeerIds() 22 pruneThreadConnectorMirrors() 23 } catch (err) { 24 log.error(TAG, 'connector session consolidation failed:', err) 25 } 26 27 if (isWorkerOnly) { 28 log.info(TAG, 'Booting in WORKER ONLY mode') 29 ensureDaemonStarted('worker-boot') 30 } else { 31 initWsServer() 32 ensureDaemonStarted('instrumentation') 33 } 34 })() 35 }) 36 37 // Graceful shutdown: stop background services and close WS connections 38 const shutdownState = hmrSingleton('__swarmclaw_shutdown_state__', () => ({ 39 registered: false, 40 shuttingDown: false, 41 })) 42 43 const shutdown = async (signal: string) => { 44 if (shutdownState.shuttingDown) return 45 shutdownState.shuttingDown = true 46 log.info(TAG, `${signal} received, shutting down gracefully...`) 47 try { 48 const { stopDaemon } = await import('@/lib/server/runtime/daemon-state') 49 await stopDaemon({ source: signal }) 50 } catch (err) { 51 log.error(TAG, 'Failed to stop daemon during shutdown:', err) 52 } 53 try { 54 await shutdownOpenTelemetry() 55 } catch (err) { 56 log.error(TAG, 'Failed to stop OpenTelemetry during shutdown:', err) 57 } 58 if (!isWorkerOnly) { 59 await closeWsServer() 60 } 61 process.exit(0) 62 } 63 if (!shutdownState.registered) { 64 process.on('SIGTERM', () => { void shutdown('SIGTERM') }) 65 process.on('SIGINT', () => { void shutdown('SIGINT') }) 66 67 // Gracefully handle EPIPE errors from child processes (e.g. Playwright MCP proxy) 68 // that occur during dev server restarts when stdio pipes break 69 process.on('uncaughtException', (err: NodeJS.ErrnoException) => { 70 if (err.code === 'EPIPE') { 71 log.warn(TAG, 'Ignoring EPIPE (expected during dev server restart)') 72 return 73 } 74 log.error(TAG, 'Uncaught exception:', err) 75 process.exit(1) 76 }) 77 78 // LangGraph's streamEvents leaves dangling internal promises when the 79 // for-await loop exits early. Suppress expected LangGraph rejections; 80 // log all others so they're not silently dropped. 81 process.on('unhandledRejection', (err: unknown) => { 82 if ( 83 err && typeof err === 'object' 84 && ('pregelTaskId' in err 85 || (err instanceof Error && (err.name === 'AbortError' || err.name === 'GraphRecursionError')) 86 || (err as Record<string, unknown>).lc_error_code === 'GRAPH_RECURSION_LIMIT') 87 ) { 88 return 89 } 90 log.error(TAG, 'Unhandled rejection:', err) 91 }) 92 93 shutdownState.registered = true 94 } 95 } 96 }