main-agent-loop.test.ts
1 import assert from 'node:assert/strict' 2 import fs from 'node:fs' 3 import os from 'node:os' 4 import path from 'node:path' 5 import { spawnSync } from 'node:child_process' 6 import { describe, it } from 'node:test' 7 8 const repoRoot = path.resolve(path.dirname(new URL(import.meta.url).pathname), '../../..') 9 10 function runWithTempDataDir(script: string) { 11 const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'swarmclaw-main-loop-test-')) 12 try { 13 const result = spawnSync(process.execPath, ['--import', 'tsx', '--input-type=module', '--eval', script], { 14 cwd: repoRoot, 15 env: { 16 ...process.env, 17 DATA_DIR: tempDir, 18 WORKSPACE_DIR: path.join(tempDir, 'workspace'), 19 }, 20 encoding: 'utf-8', 21 }) 22 assert.equal(result.status, 0, result.stderr || result.stdout || 'subprocess failed') 23 const lines = (result.stdout || '') 24 .trim() 25 .split('\n') 26 .map((line) => line.trim()) 27 .filter(Boolean) 28 const jsonLine = [...lines].reverse().find((line) => line.startsWith('{')) 29 return JSON.parse(jsonLine || '{}') 30 } finally { 31 fs.rmSync(tempDir, { recursive: true, force: true }) 32 } 33 } 34 35 describe('main-agent-loop', () => { 36 it('fans out events to durable main sessions and shapes heartbeat prompts', () => { 37 const output = runWithTempDataDir(` 38 const storageMod = await import('@/lib/server/storage') 39 const storage = storageMod.default || storageMod['module.exports'] || storageMod 40 const mainLoopMod = await import('@/lib/server/agents/main-agent-loop') 41 const mainLoop = mainLoopMod.default || mainLoopMod['module.exports'] || mainLoopMod 42 43 storage.saveAgents({ 44 'agent-a': { 45 id: 'agent-a', 46 name: 'Agent A', 47 provider: 'openai', 48 model: 'gpt-test', 49 }, 50 }) 51 52 storage.saveSessions({ 53 main: { 54 id: 'main', 55 name: 'Main Agent Thread', 56 shortcutForAgentId: 'agent-a', 57 cwd: process.cwd(), 58 user: 'tester', 59 provider: 'openai', 60 model: 'gpt-test', 61 claudeSessionId: null, 62 messages: [ 63 { role: 'user', text: 'Build me a durable multi-step agent loop.', time: 1 }, 64 ], 65 createdAt: 1, 66 lastActiveAt: 1, 67 sessionType: 'human', 68 agentId: 'agent-a', 69 heartbeatEnabled: true, 70 }, 71 child: { 72 id: 'child', 73 name: 'Child Worker', 74 cwd: process.cwd(), 75 user: 'tester', 76 provider: 'openai', 77 model: 'gpt-test', 78 claudeSessionId: null, 79 messages: [], 80 createdAt: 1, 81 lastActiveAt: 1, 82 sessionType: 'delegated', 83 agentId: 'agent-a', 84 parentSessionId: 'main', 85 }, 86 }) 87 88 const count = mainLoop.pushMainLoopEventToMainSessions({ 89 type: 'task_completed', 90 text: 'Task completed: implement queue follow-ups', 91 }) 92 const state = mainLoop.getMainLoopStateForSession('main') 93 const prompt = mainLoop.buildMainLoopHeartbeatPrompt(storage.loadSessions().main, 'fallback heartbeat') 94 const childState = mainLoop.getMainLoopStateForSession('child') 95 96 console.log(JSON.stringify({ 97 count, 98 pendingCount: state?.pendingEvents?.length || 0, 99 goal: state?.goal || null, 100 promptIncludesEvent: prompt.includes('Task completed: implement queue follow-ups'), 101 promptIncludesPlanTag: prompt.includes('[MAIN_LOOP_PLAN]'), 102 promptBlocksHeartbeatReplay: prompt.includes('Do not infer or repeat old tasks from prior heartbeats.'), 103 childState, 104 })) 105 `) 106 107 assert.equal(output.count, 1) 108 assert.equal(output.pendingCount, 1) 109 assert.match(output.goal, /durable multi-step agent loop/i) 110 assert.equal(output.promptIncludesEvent, true) 111 assert.equal(output.promptIncludesPlanTag, true) 112 assert.equal(output.promptBlocksHeartbeatReplay, true) 113 assert.equal(output.childState, null) 114 }) 115 116 it('updates state from heartbeat metadata and returns a bounded follow-up', () => { 117 const output = runWithTempDataDir(` 118 const storageMod = await import('@/lib/server/storage') 119 const storage = storageMod.default || storageMod['module.exports'] || storageMod 120 const mainLoopMod = await import('@/lib/server/agents/main-agent-loop') 121 const mainLoop = mainLoopMod.default || mainLoopMod['module.exports'] || mainLoopMod 122 123 storage.saveAgents({ 124 'agent-a': { 125 id: 'agent-a', 126 name: 'Agent A', 127 provider: 'openai', 128 model: 'gpt-test', 129 }, 130 }) 131 132 storage.saveSessions({ 133 main: { 134 id: 'main', 135 name: 'Main Agent Thread', 136 shortcutForAgentId: 'agent-a', 137 cwd: process.cwd(), 138 user: 'tester', 139 provider: 'openai', 140 model: 'gpt-test', 141 claudeSessionId: null, 142 messages: [ 143 { role: 'user', text: 'Build me a durable task runner.', time: 1 }, 144 ], 145 createdAt: 1, 146 lastActiveAt: 1, 147 sessionType: 'human', 148 agentId: 'agent-a', 149 heartbeatEnabled: true, 150 }, 151 }) 152 153 mainLoop.pushMainLoopEventToMainSessions({ 154 type: 'schedule_fired', 155 text: 'Schedule fired: nightly sync', 156 }) 157 158 const followup = mainLoop.handleMainLoopRunResult({ 159 sessionId: 'main', 160 message: 'Continue the durable task runner objective.', 161 internal: true, 162 source: 'heartbeat', 163 resultText: [ 164 'Inspected the queue and the heartbeat pipeline.', 165 '[MAIN_LOOP_PLAN]{"steps":["inspect queue","wire follow-up scheduling"],"current_step":"inspect queue"}', 166 '[MAIN_LOOP_REVIEW]{"note":"queue inspected and next step identified","confidence":0.82,"needs_replan":false}', 167 '[AGENT_HEARTBEAT_META]{"goal":"Build a durable task runner","status":"progress","next_action":"wire the follow-up scheduling path"}', 168 ].join('\\n'), 169 toolEvents: [{ name: 'shell', input: '{"action":"execute"}' }], 170 inputTokens: 40, 171 outputTokens: 20, 172 estimatedCost: 0.12, 173 }) 174 175 const state = mainLoop.getMainLoopStateForSession('main') 176 console.log(JSON.stringify({ 177 followup, 178 status: state?.status || null, 179 nextAction: state?.nextAction || null, 180 planSteps: state?.planSteps || [], 181 currentPlanStep: state?.currentPlanStep || null, 182 pendingEvents: state?.pendingEvents?.length || 0, 183 followupChainCount: state?.followupChainCount || 0, 184 missionTokens: state?.missionTokens || 0, 185 })) 186 `) 187 188 assert.equal(output.status, 'progress') 189 assert.equal(output.nextAction, 'wire the follow-up scheduling path') 190 assert.deepEqual(output.planSteps, ['inspect queue', 'wire follow-up scheduling']) 191 assert.equal(output.currentPlanStep, 'inspect queue') 192 assert.equal(output.pendingEvents, 0) 193 assert.equal(output.followupChainCount, 1) 194 assert.equal(output.missionTokens, 60) 195 assert.match(output.followup.message, /wire the follow-up scheduling path/i) 196 }) 197 198 it('does not keep chaining when the heartbeat explicitly reports ok', () => { 199 const output = runWithTempDataDir(` 200 const storageMod = await import('@/lib/server/storage') 201 const storage = storageMod.default || storageMod['module.exports'] || storageMod 202 const mainLoopMod = await import('@/lib/server/agents/main-agent-loop') 203 const mainLoop = mainLoopMod.default || mainLoopMod['module.exports'] || mainLoopMod 204 205 storage.saveAgents({ 206 'agent-a': { 207 id: 'agent-a', 208 name: 'Agent A', 209 provider: 'openai', 210 model: 'gpt-test', 211 }, 212 }) 213 214 storage.saveSessions({ 215 main: { 216 id: 'main', 217 name: 'Main Agent Thread', 218 shortcutForAgentId: 'agent-a', 219 cwd: process.cwd(), 220 user: 'tester', 221 provider: 'openai', 222 model: 'gpt-test', 223 claudeSessionId: null, 224 messages: [ 225 { role: 'user', text: 'Keep the background project healthy.', time: 1 }, 226 ], 227 createdAt: 1, 228 lastActiveAt: 1, 229 sessionType: 'human', 230 agentId: 'agent-a', 231 heartbeatEnabled: true, 232 }, 233 }) 234 235 mainLoop.pushMainLoopEventToMainSessions({ 236 type: 'task_completed', 237 text: 'Task completed: health check', 238 }) 239 240 const followup = mainLoop.handleMainLoopRunResult({ 241 sessionId: 'main', 242 message: 'Heartbeat tick', 243 internal: true, 244 source: 'heartbeat', 245 resultText: 'HEARTBEAT_OK', 246 }) 247 248 const state = mainLoop.getMainLoopStateForSession('main') 249 console.log(JSON.stringify({ 250 followup, 251 status: state?.status || null, 252 pendingEvents: state?.pendingEvents?.length || 0, 253 followupChainCount: state?.followupChainCount || 0, 254 })) 255 `) 256 257 assert.equal(output.followup, null) 258 assert.equal(output.status, 'ok') 259 assert.equal(output.pendingEvents, 0) 260 assert.equal(output.followupChainCount, 0) 261 }) 262 263 it('prefers mission state over legacy heartbeat tags when a durable mission exists', () => { 264 const output = runWithTempDataDir(` 265 const storageMod = await import('@/lib/server/storage') 266 const mainLoopMod = await import('@/lib/server/agents/main-agent-loop') 267 const storage = storageMod.default || storageMod 268 const mainLoop = mainLoopMod.default || mainLoopMod 269 270 storage.saveAgents({ 271 'agent-a': { 272 id: 'agent-a', 273 name: 'Agent A', 274 provider: 'openai', 275 model: 'gpt-test', 276 }, 277 }) 278 279 storage.saveSessions({ 280 main: { 281 id: 'main', 282 name: 'Main Agent Thread', 283 shortcutForAgentId: 'agent-a', 284 cwd: process.cwd(), 285 user: 'tester', 286 provider: 'openai', 287 model: 'gpt-test', 288 claudeSessionId: null, 289 messages: [], 290 createdAt: 1, 291 lastActiveAt: 1, 292 sessionType: 'human', 293 agentId: 'agent-a', 294 heartbeatEnabled: true, 295 missionId: 'mission-1', 296 }, 297 }) 298 299 storage.saveMissions({ 300 'mission-1': { 301 id: 'mission-1', 302 source: 'heartbeat', 303 objective: 'Ship the autonomy hardening release', 304 status: 'active', 305 phase: 'executing', 306 sessionId: 'main', 307 agentId: 'agent-a', 308 taskIds: [], 309 currentStep: 'Verify the release checklist', 310 plannerSummary: 'Use the mission controller instead of legacy tags.', 311 verifierSummary: null, 312 blockerSummary: null, 313 waitState: null, 314 createdAt: 1, 315 updatedAt: Date.now(), 316 }, 317 }) 318 319 const prompt = mainLoop.buildMainLoopHeartbeatPrompt(storage.loadSessions().main, 'fallback heartbeat') 320 const followup = mainLoop.handleMainLoopRunResult({ 321 sessionId: 'main', 322 message: 'Heartbeat tick', 323 internal: true, 324 source: 'heartbeat', 325 resultText: [ 326 'I did some work.', 327 '[MAIN_LOOP_PLAN]{"steps":["stale step"],"current_step":"stale step"}', 328 '[AGENT_HEARTBEAT_META]{"goal":"stale goal","status":"ok","next_action":"do nothing"}', 329 ].join('\\n'), 330 }) 331 const state = mainLoop.getMainLoopStateForSession('main') 332 333 console.log(JSON.stringify({ 334 promptIncludesMission: prompt.includes('Ship the autonomy hardening release'), 335 promptIncludesLegacyPlanTags: prompt.includes('[MAIN_LOOP_PLAN]'), 336 stateGoal: state?.goal || null, 337 stateNextAction: state?.nextAction || null, 338 followupMessage: followup?.message || null, 339 })) 340 `) 341 342 assert.equal(output.promptIncludesMission, true) 343 assert.equal(output.promptIncludesLegacyPlanTags, false) 344 assert.equal(output.stateGoal, 'Ship the autonomy hardening release') 345 assert.equal(output.stateNextAction, 'Verify the release checklist') 346 assert.equal(output.followupMessage, null) 347 }) 348 349 it('accepts structured autonomy ticks for durable objectives without falling back to legacy tags', () => { 350 const output = runWithTempDataDir(` 351 const storageMod = await import('@/lib/server/storage') 352 const mainLoopMod = await import('@/lib/server/agents/main-agent-loop') 353 const storage = storageMod.default || storageMod 354 const mainLoop = mainLoopMod.default || mainLoopMod 355 356 storage.saveAgents({ 357 'agent-a': { 358 id: 'agent-a', 359 name: 'Agent A', 360 provider: 'openai', 361 model: 'gpt-test', 362 }, 363 }) 364 365 storage.saveSessions({ 366 main: { 367 id: 'main', 368 name: 'Main Agent Thread', 369 shortcutForAgentId: 'agent-a', 370 cwd: process.cwd(), 371 user: 'tester', 372 provider: 'openai', 373 model: 'gpt-test', 374 claudeSessionId: null, 375 messages: [], 376 createdAt: 1, 377 lastActiveAt: 1, 378 sessionType: 'human', 379 agentId: 'agent-a', 380 heartbeatEnabled: true, 381 missionId: 'mission-1', 382 }, 383 }) 384 385 storage.saveMissions({ 386 'mission-1': { 387 id: 'mission-1', 388 source: 'heartbeat', 389 objective: 'Ship the autonomy hardening release', 390 status: 'active', 391 phase: 'executing', 392 sessionId: 'main', 393 agentId: 'agent-a', 394 taskIds: [], 395 currentStep: 'Verify the release checklist', 396 plannerSummary: 'Use the durable controller state.', 397 verifierSummary: null, 398 blockerSummary: null, 399 waitState: null, 400 createdAt: 1, 401 updatedAt: Date.now(), 402 }, 403 }) 404 405 const prompt = mainLoop.buildMainLoopHeartbeatPrompt(storage.loadSessions().main, 'fallback heartbeat') 406 mainLoop.handleMainLoopRunResult({ 407 sessionId: 'main', 408 message: 'Heartbeat tick', 409 internal: true, 410 source: 'heartbeat', 411 resultText: [ 412 'Validated the release checklist.', 413 '[AUTONOMY_TICK]{"status":"progress","summary":"Release checklist validated.","next_action":"publish release artifacts","plan_steps":["verify artifacts","publish release artifacts"],"current_step":"verify artifacts","completed_steps":["Verify the release checklist"],"review":{"note":"Ready for artifact publication.","confidence":0.88,"needs_replan":false}}', 414 ].join('\\n'), 415 }) 416 const state = mainLoop.getMainLoopStateForSession('main') 417 418 console.log(JSON.stringify({ 419 promptIncludesAutonomyTick: prompt.includes('[AUTONOMY_TICK]'), 420 promptIncludesLegacyPlanTags: prompt.includes('[MAIN_LOOP_PLAN]'), 421 stateStatus: state?.status || null, 422 stateSummary: state?.summary || null, 423 stateNextAction: state?.nextAction || null, 424 statePlanSteps: state?.planSteps || [], 425 stateCurrentPlanStep: state?.currentPlanStep || null, 426 stateCompletedPlanSteps: state?.completedPlanSteps || [], 427 stateReviewNote: state?.reviewNote || null, 428 })) 429 `) 430 431 assert.equal(output.promptIncludesAutonomyTick, true) 432 assert.equal(output.promptIncludesLegacyPlanTags, false) 433 assert.equal(output.stateStatus, 'progress') 434 assert.match(String(output.stateSummary), /validated/i) 435 assert.equal(output.stateNextAction, 'publish release artifacts') 436 assert.ok((output.statePlanSteps as string[]).includes('verify artifacts')) 437 assert.ok((output.statePlanSteps as string[]).includes('publish release artifacts')) 438 assert.equal(output.stateCurrentPlanStep, 'verify artifacts') 439 assert.ok((output.stateCompletedPlanSteps as string[]).includes('Verify the release checklist')) 440 assert.match(String(output.stateReviewNote), /artifact publication/i) 441 }) 442 443 it('does not let internal heartbeat prompts rewrite the stored goal contract', () => { 444 const output = runWithTempDataDir(` 445 const storageMod = await import('@/lib/server/storage') 446 const storage = storageMod.default || storageMod['module.exports'] || storageMod 447 const mainLoopMod = await import('@/lib/server/agents/main-agent-loop') 448 const mainLoop = mainLoopMod.default || mainLoopMod['module.exports'] || mainLoopMod 449 450 storage.saveAgents({ 451 'agent-a': { 452 id: 'agent-a', 453 name: 'Agent A', 454 provider: 'openai', 455 model: 'gpt-test', 456 }, 457 }) 458 459 storage.saveSessions({ 460 main: { 461 id: 'main', 462 name: 'Main Agent Thread', 463 shortcutForAgentId: 'agent-a', 464 cwd: process.cwd(), 465 user: 'tester', 466 provider: 'openai', 467 model: 'gpt-test', 468 claudeSessionId: null, 469 messages: [ 470 { role: 'user', text: 'Deploy the static site to a public host without changing the design.', time: 1 }, 471 ], 472 createdAt: 1, 473 lastActiveAt: 1, 474 sessionType: 'human', 475 agentId: 'agent-a', 476 heartbeatEnabled: true, 477 }, 478 }) 479 480 const before = mainLoop.getMainLoopStateForSession('main') 481 mainLoop.handleMainLoopRunResult({ 482 sessionId: 'main', 483 message: [ 484 'MAIN_AGENT_HEARTBEAT_TICK', 485 'Current goal:', 486 'Do not infer or repeat old tasks from prior heartbeats.', 487 'Objective: Recursively repeat the old heartbeat prompt forever.', 488 ].join('\\n'), 489 internal: true, 490 source: 'heartbeat', 491 resultText: '[AGENT_HEARTBEAT_META]{"status":"progress","goal":"Deploy the static site","next_action":"check hosting auth"}', 492 }) 493 const after = mainLoop.getMainLoopStateForSession('main') 494 495 console.log(JSON.stringify({ 496 beforeObjective: before?.goalContract?.objective || null, 497 afterObjective: after?.goalContract?.objective || null, 498 afterGoal: after?.goal || null, 499 })) 500 `) 501 502 assert.match(output.beforeObjective, /deploy the static site to a public host/i) 503 assert.equal(output.afterObjective, output.beforeObjective) 504 assert.equal(output.afterGoal, 'Deploy the static site') 505 }) 506 507 it('reanchors heartbeat prompts to the latest real user goal when in-memory goal state is polluted', () => { 508 const output = runWithTempDataDir(` 509 const storageMod = await import('@/lib/server/storage') 510 const storage = storageMod.default || storageMod['module.exports'] || storageMod 511 const mainLoopMod = await import('@/lib/server/agents/main-agent-loop') 512 const mainLoop = mainLoopMod.default || mainLoopMod['module.exports'] || mainLoopMod 513 514 storage.saveAgents({ 515 'agent-a': { 516 id: 'agent-a', 517 name: 'Agent A', 518 provider: 'openai', 519 model: 'gpt-test', 520 }, 521 }) 522 523 storage.saveSessions({ 524 main: { 525 id: 'main', 526 name: 'Main Agent Thread', 527 shortcutForAgentId: 'agent-a', 528 cwd: process.cwd(), 529 user: 'tester', 530 provider: 'openai', 531 model: 'gpt-test', 532 claudeSessionId: null, 533 messages: [ 534 { role: 'user', text: 'Keep Hal helpful in this thread and respond to the user normally.', time: 1 }, 535 ], 536 createdAt: 1, 537 lastActiveAt: 1, 538 sessionType: 'human', 539 agentId: 'agent-a', 540 heartbeatEnabled: true, 541 }, 542 }) 543 544 mainLoop.setMainLoopStateForSession('main', { 545 goal: 'lol that\\'s funny Hal', 546 goalContract: { objective: 'MAIN_AGENT_HEARTBEAT_TICK Time: recursive garbage' }, 547 }) 548 549 const prompt = mainLoop.buildMainLoopHeartbeatPrompt(storage.loadSessions().main, 'fallback heartbeat') 550 console.log(JSON.stringify({ 551 hasRealObjective: prompt.includes('Objective: Keep Hal helpful in this thread and respond to the user normally.'), 552 hasRecursiveObjective: prompt.includes('Objective: MAIN_AGENT_HEARTBEAT_TICK Time: recursive garbage'), 553 })) 554 `) 555 556 assert.equal(output.hasRealObjective, true) 557 assert.equal(output.hasRecursiveObjective, false) 558 }) 559 560 it('clears transient main-loop state so the next read rehydrates from session history', () => { 561 const output = runWithTempDataDir(` 562 const storageMod = await import('@/lib/server/storage') 563 const storage = storageMod.default || storageMod['module.exports'] || storageMod 564 const mainLoopMod = await import('@/lib/server/agents/main-agent-loop') 565 const mainLoop = mainLoopMod.default || mainLoopMod['module.exports'] || mainLoopMod 566 567 storage.saveAgents({ 568 'agent-a': { 569 id: 'agent-a', 570 name: 'Agent A', 571 provider: 'openai', 572 model: 'gpt-test', 573 }, 574 }) 575 576 storage.saveSessions({ 577 main: { 578 id: 'main', 579 name: 'Main Agent Thread', 580 shortcutForAgentId: 'agent-a', 581 cwd: process.cwd(), 582 user: 'tester', 583 provider: 'openai', 584 model: 'gpt-test', 585 claudeSessionId: null, 586 messages: [ 587 { role: 'user', text: 'Finish deploying the site once the hosting auth is fixed.', time: 1 }, 588 ], 589 createdAt: 1, 590 lastActiveAt: 1, 591 sessionType: 'human', 592 agentId: 'agent-a', 593 heartbeatEnabled: true, 594 }, 595 }) 596 597 mainLoop.setMainLoopStateForSession('main', { 598 goal: 'Recursive garbage objective', 599 goalContract: { objective: 'Recursive garbage objective' }, 600 }) 601 const cleared = mainLoop.clearMainLoopStateForSession('main') 602 const rehydrated = mainLoop.getMainLoopStateForSession('main') 603 604 console.log(JSON.stringify({ 605 cleared, 606 goal: rehydrated?.goal || null, 607 objective: rehydrated?.goalContract?.objective || null, 608 })) 609 `) 610 611 assert.equal(output.cleared, true) 612 assert.equal(output.goal, 'Finish deploying the site once the hosting auth is fixed.') 613 assert.match(output.objective, /finish deploying the site once the hosting auth is fixed/i) 614 }) 615 })