/ src / lib / server / agents / main-agent-loop.test.ts
main-agent-loop.test.ts
  1  import assert from 'node:assert/strict'
  2  import fs from 'node:fs'
  3  import os from 'node:os'
  4  import path from 'node:path'
  5  import { spawnSync } from 'node:child_process'
  6  import { describe, it } from 'node:test'
  7  
  8  const repoRoot = path.resolve(path.dirname(new URL(import.meta.url).pathname), '../../..')
  9  
 10  function runWithTempDataDir(script: string) {
 11    const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'swarmclaw-main-loop-test-'))
 12    try {
 13      const result = spawnSync(process.execPath, ['--import', 'tsx', '--input-type=module', '--eval', script], {
 14        cwd: repoRoot,
 15        env: {
 16          ...process.env,
 17          DATA_DIR: tempDir,
 18          WORKSPACE_DIR: path.join(tempDir, 'workspace'),
 19        },
 20        encoding: 'utf-8',
 21      })
 22      assert.equal(result.status, 0, result.stderr || result.stdout || 'subprocess failed')
 23      const lines = (result.stdout || '')
 24        .trim()
 25        .split('\n')
 26        .map((line) => line.trim())
 27        .filter(Boolean)
 28      const jsonLine = [...lines].reverse().find((line) => line.startsWith('{'))
 29      return JSON.parse(jsonLine || '{}')
 30    } finally {
 31      fs.rmSync(tempDir, { recursive: true, force: true })
 32    }
 33  }
 34  
 35  describe('main-agent-loop', () => {
 36    it('fans out events to durable main sessions and shapes heartbeat prompts', () => {
 37      const output = runWithTempDataDir(`
 38        const storageMod = await import('@/lib/server/storage')
 39        const storage = storageMod.default || storageMod['module.exports'] || storageMod
 40        const mainLoopMod = await import('@/lib/server/agents/main-agent-loop')
 41        const mainLoop = mainLoopMod.default || mainLoopMod['module.exports'] || mainLoopMod
 42  
 43        storage.saveAgents({
 44          'agent-a': {
 45            id: 'agent-a',
 46            name: 'Agent A',
 47            provider: 'openai',
 48            model: 'gpt-test',
 49          },
 50        })
 51  
 52        storage.saveSessions({
 53          main: {
 54            id: 'main',
 55            name: 'Main Agent Thread',
 56            shortcutForAgentId: 'agent-a',
 57            cwd: process.cwd(),
 58            user: 'tester',
 59            provider: 'openai',
 60            model: 'gpt-test',
 61            claudeSessionId: null,
 62            messages: [
 63              { role: 'user', text: 'Build me a durable multi-step agent loop.', time: 1 },
 64            ],
 65            createdAt: 1,
 66            lastActiveAt: 1,
 67            sessionType: 'human',
 68            agentId: 'agent-a',
 69            heartbeatEnabled: true,
 70          },
 71          child: {
 72            id: 'child',
 73            name: 'Child Worker',
 74            cwd: process.cwd(),
 75            user: 'tester',
 76            provider: 'openai',
 77            model: 'gpt-test',
 78            claudeSessionId: null,
 79            messages: [],
 80            createdAt: 1,
 81            lastActiveAt: 1,
 82            sessionType: 'delegated',
 83            agentId: 'agent-a',
 84            parentSessionId: 'main',
 85          },
 86        })
 87  
 88        const count = mainLoop.pushMainLoopEventToMainSessions({
 89          type: 'task_completed',
 90          text: 'Task completed: implement queue follow-ups',
 91        })
 92        const state = mainLoop.getMainLoopStateForSession('main')
 93        const prompt = mainLoop.buildMainLoopHeartbeatPrompt(storage.loadSessions().main, 'fallback heartbeat')
 94        const childState = mainLoop.getMainLoopStateForSession('child')
 95  
 96        console.log(JSON.stringify({
 97          count,
 98          pendingCount: state?.pendingEvents?.length || 0,
 99          goal: state?.goal || null,
100          promptIncludesEvent: prompt.includes('Task completed: implement queue follow-ups'),
101          promptIncludesPlanTag: prompt.includes('[MAIN_LOOP_PLAN]'),
102          promptBlocksHeartbeatReplay: prompt.includes('Do not infer or repeat old tasks from prior heartbeats.'),
103          childState,
104        }))
105      `)
106  
107      assert.equal(output.count, 1)
108      assert.equal(output.pendingCount, 1)
109      assert.match(output.goal, /durable multi-step agent loop/i)
110      assert.equal(output.promptIncludesEvent, true)
111      assert.equal(output.promptIncludesPlanTag, true)
112      assert.equal(output.promptBlocksHeartbeatReplay, true)
113      assert.equal(output.childState, null)
114    })
115  
116    it('updates state from heartbeat metadata and returns a bounded follow-up', () => {
117      const output = runWithTempDataDir(`
118        const storageMod = await import('@/lib/server/storage')
119        const storage = storageMod.default || storageMod['module.exports'] || storageMod
120        const mainLoopMod = await import('@/lib/server/agents/main-agent-loop')
121        const mainLoop = mainLoopMod.default || mainLoopMod['module.exports'] || mainLoopMod
122  
123        storage.saveAgents({
124          'agent-a': {
125            id: 'agent-a',
126            name: 'Agent A',
127            provider: 'openai',
128            model: 'gpt-test',
129          },
130        })
131  
132        storage.saveSessions({
133          main: {
134            id: 'main',
135            name: 'Main Agent Thread',
136            shortcutForAgentId: 'agent-a',
137            cwd: process.cwd(),
138            user: 'tester',
139            provider: 'openai',
140            model: 'gpt-test',
141            claudeSessionId: null,
142            messages: [
143              { role: 'user', text: 'Build me a durable task runner.', time: 1 },
144            ],
145            createdAt: 1,
146            lastActiveAt: 1,
147            sessionType: 'human',
148            agentId: 'agent-a',
149            heartbeatEnabled: true,
150          },
151        })
152  
153        mainLoop.pushMainLoopEventToMainSessions({
154          type: 'schedule_fired',
155          text: 'Schedule fired: nightly sync',
156        })
157  
158        const followup = mainLoop.handleMainLoopRunResult({
159          sessionId: 'main',
160          message: 'Continue the durable task runner objective.',
161          internal: true,
162          source: 'heartbeat',
163          resultText: [
164            'Inspected the queue and the heartbeat pipeline.',
165            '[MAIN_LOOP_PLAN]{"steps":["inspect queue","wire follow-up scheduling"],"current_step":"inspect queue"}',
166            '[MAIN_LOOP_REVIEW]{"note":"queue inspected and next step identified","confidence":0.82,"needs_replan":false}',
167            '[AGENT_HEARTBEAT_META]{"goal":"Build a durable task runner","status":"progress","next_action":"wire the follow-up scheduling path"}',
168          ].join('\\n'),
169          toolEvents: [{ name: 'shell', input: '{"action":"execute"}' }],
170          inputTokens: 40,
171          outputTokens: 20,
172          estimatedCost: 0.12,
173        })
174  
175        const state = mainLoop.getMainLoopStateForSession('main')
176        console.log(JSON.stringify({
177          followup,
178          status: state?.status || null,
179          nextAction: state?.nextAction || null,
180          planSteps: state?.planSteps || [],
181          currentPlanStep: state?.currentPlanStep || null,
182          pendingEvents: state?.pendingEvents?.length || 0,
183          followupChainCount: state?.followupChainCount || 0,
184          missionTokens: state?.missionTokens || 0,
185        }))
186      `)
187  
188      assert.equal(output.status, 'progress')
189      assert.equal(output.nextAction, 'wire the follow-up scheduling path')
190      assert.deepEqual(output.planSteps, ['inspect queue', 'wire follow-up scheduling'])
191      assert.equal(output.currentPlanStep, 'inspect queue')
192      assert.equal(output.pendingEvents, 0)
193      assert.equal(output.followupChainCount, 1)
194      assert.equal(output.missionTokens, 60)
195      assert.match(output.followup.message, /wire the follow-up scheduling path/i)
196    })
197  
198    it('does not keep chaining when the heartbeat explicitly reports ok', () => {
199      const output = runWithTempDataDir(`
200        const storageMod = await import('@/lib/server/storage')
201        const storage = storageMod.default || storageMod['module.exports'] || storageMod
202        const mainLoopMod = await import('@/lib/server/agents/main-agent-loop')
203        const mainLoop = mainLoopMod.default || mainLoopMod['module.exports'] || mainLoopMod
204  
205        storage.saveAgents({
206          'agent-a': {
207            id: 'agent-a',
208            name: 'Agent A',
209            provider: 'openai',
210            model: 'gpt-test',
211          },
212        })
213  
214        storage.saveSessions({
215          main: {
216            id: 'main',
217            name: 'Main Agent Thread',
218            shortcutForAgentId: 'agent-a',
219            cwd: process.cwd(),
220            user: 'tester',
221            provider: 'openai',
222            model: 'gpt-test',
223            claudeSessionId: null,
224            messages: [
225              { role: 'user', text: 'Keep the background project healthy.', time: 1 },
226            ],
227            createdAt: 1,
228            lastActiveAt: 1,
229            sessionType: 'human',
230            agentId: 'agent-a',
231            heartbeatEnabled: true,
232          },
233        })
234  
235        mainLoop.pushMainLoopEventToMainSessions({
236          type: 'task_completed',
237          text: 'Task completed: health check',
238        })
239  
240        const followup = mainLoop.handleMainLoopRunResult({
241          sessionId: 'main',
242          message: 'Heartbeat tick',
243          internal: true,
244          source: 'heartbeat',
245          resultText: 'HEARTBEAT_OK',
246        })
247  
248        const state = mainLoop.getMainLoopStateForSession('main')
249        console.log(JSON.stringify({
250          followup,
251          status: state?.status || null,
252          pendingEvents: state?.pendingEvents?.length || 0,
253          followupChainCount: state?.followupChainCount || 0,
254        }))
255      `)
256  
257      assert.equal(output.followup, null)
258      assert.equal(output.status, 'ok')
259      assert.equal(output.pendingEvents, 0)
260      assert.equal(output.followupChainCount, 0)
261    })
262  
263    it('prefers mission state over legacy heartbeat tags when a durable mission exists', () => {
264      const output = runWithTempDataDir(`
265        const storageMod = await import('@/lib/server/storage')
266        const mainLoopMod = await import('@/lib/server/agents/main-agent-loop')
267        const storage = storageMod.default || storageMod
268        const mainLoop = mainLoopMod.default || mainLoopMod
269  
270        storage.saveAgents({
271          'agent-a': {
272            id: 'agent-a',
273            name: 'Agent A',
274            provider: 'openai',
275            model: 'gpt-test',
276          },
277        })
278  
279        storage.saveSessions({
280          main: {
281            id: 'main',
282            name: 'Main Agent Thread',
283            shortcutForAgentId: 'agent-a',
284            cwd: process.cwd(),
285            user: 'tester',
286            provider: 'openai',
287            model: 'gpt-test',
288            claudeSessionId: null,
289            messages: [],
290            createdAt: 1,
291            lastActiveAt: 1,
292            sessionType: 'human',
293            agentId: 'agent-a',
294            heartbeatEnabled: true,
295            missionId: 'mission-1',
296          },
297        })
298  
299        storage.saveMissions({
300          'mission-1': {
301            id: 'mission-1',
302            source: 'heartbeat',
303            objective: 'Ship the autonomy hardening release',
304            status: 'active',
305            phase: 'executing',
306            sessionId: 'main',
307            agentId: 'agent-a',
308            taskIds: [],
309            currentStep: 'Verify the release checklist',
310            plannerSummary: 'Use the mission controller instead of legacy tags.',
311            verifierSummary: null,
312            blockerSummary: null,
313            waitState: null,
314            createdAt: 1,
315            updatedAt: Date.now(),
316          },
317        })
318  
319        const prompt = mainLoop.buildMainLoopHeartbeatPrompt(storage.loadSessions().main, 'fallback heartbeat')
320        const followup = mainLoop.handleMainLoopRunResult({
321          sessionId: 'main',
322          message: 'Heartbeat tick',
323          internal: true,
324          source: 'heartbeat',
325          resultText: [
326            'I did some work.',
327            '[MAIN_LOOP_PLAN]{"steps":["stale step"],"current_step":"stale step"}',
328            '[AGENT_HEARTBEAT_META]{"goal":"stale goal","status":"ok","next_action":"do nothing"}',
329          ].join('\\n'),
330        })
331        const state = mainLoop.getMainLoopStateForSession('main')
332  
333        console.log(JSON.stringify({
334          promptIncludesMission: prompt.includes('Ship the autonomy hardening release'),
335          promptIncludesLegacyPlanTags: prompt.includes('[MAIN_LOOP_PLAN]'),
336          stateGoal: state?.goal || null,
337          stateNextAction: state?.nextAction || null,
338          followupMessage: followup?.message || null,
339        }))
340      `)
341  
342      assert.equal(output.promptIncludesMission, true)
343      assert.equal(output.promptIncludesLegacyPlanTags, false)
344      assert.equal(output.stateGoal, 'Ship the autonomy hardening release')
345      assert.equal(output.stateNextAction, 'Verify the release checklist')
346      assert.equal(output.followupMessage, null)
347    })
348  
349    it('accepts structured autonomy ticks for durable objectives without falling back to legacy tags', () => {
350      const output = runWithTempDataDir(`
351        const storageMod = await import('@/lib/server/storage')
352        const mainLoopMod = await import('@/lib/server/agents/main-agent-loop')
353        const storage = storageMod.default || storageMod
354        const mainLoop = mainLoopMod.default || mainLoopMod
355  
356        storage.saveAgents({
357          'agent-a': {
358            id: 'agent-a',
359            name: 'Agent A',
360            provider: 'openai',
361            model: 'gpt-test',
362          },
363        })
364  
365        storage.saveSessions({
366          main: {
367            id: 'main',
368            name: 'Main Agent Thread',
369            shortcutForAgentId: 'agent-a',
370            cwd: process.cwd(),
371            user: 'tester',
372            provider: 'openai',
373            model: 'gpt-test',
374            claudeSessionId: null,
375            messages: [],
376            createdAt: 1,
377            lastActiveAt: 1,
378            sessionType: 'human',
379            agentId: 'agent-a',
380            heartbeatEnabled: true,
381            missionId: 'mission-1',
382          },
383        })
384  
385        storage.saveMissions({
386          'mission-1': {
387            id: 'mission-1',
388            source: 'heartbeat',
389            objective: 'Ship the autonomy hardening release',
390            status: 'active',
391            phase: 'executing',
392            sessionId: 'main',
393            agentId: 'agent-a',
394            taskIds: [],
395            currentStep: 'Verify the release checklist',
396            plannerSummary: 'Use the durable controller state.',
397            verifierSummary: null,
398            blockerSummary: null,
399            waitState: null,
400            createdAt: 1,
401            updatedAt: Date.now(),
402          },
403        })
404  
405        const prompt = mainLoop.buildMainLoopHeartbeatPrompt(storage.loadSessions().main, 'fallback heartbeat')
406        mainLoop.handleMainLoopRunResult({
407          sessionId: 'main',
408          message: 'Heartbeat tick',
409          internal: true,
410          source: 'heartbeat',
411          resultText: [
412            'Validated the release checklist.',
413            '[AUTONOMY_TICK]{"status":"progress","summary":"Release checklist validated.","next_action":"publish release artifacts","plan_steps":["verify artifacts","publish release artifacts"],"current_step":"verify artifacts","completed_steps":["Verify the release checklist"],"review":{"note":"Ready for artifact publication.","confidence":0.88,"needs_replan":false}}',
414          ].join('\\n'),
415        })
416        const state = mainLoop.getMainLoopStateForSession('main')
417  
418        console.log(JSON.stringify({
419          promptIncludesAutonomyTick: prompt.includes('[AUTONOMY_TICK]'),
420          promptIncludesLegacyPlanTags: prompt.includes('[MAIN_LOOP_PLAN]'),
421          stateStatus: state?.status || null,
422          stateSummary: state?.summary || null,
423          stateNextAction: state?.nextAction || null,
424          statePlanSteps: state?.planSteps || [],
425          stateCurrentPlanStep: state?.currentPlanStep || null,
426          stateCompletedPlanSteps: state?.completedPlanSteps || [],
427          stateReviewNote: state?.reviewNote || null,
428        }))
429      `)
430  
431      assert.equal(output.promptIncludesAutonomyTick, true)
432      assert.equal(output.promptIncludesLegacyPlanTags, false)
433      assert.equal(output.stateStatus, 'progress')
434      assert.match(String(output.stateSummary), /validated/i)
435      assert.equal(output.stateNextAction, 'publish release artifacts')
436      assert.ok((output.statePlanSteps as string[]).includes('verify artifacts'))
437      assert.ok((output.statePlanSteps as string[]).includes('publish release artifacts'))
438      assert.equal(output.stateCurrentPlanStep, 'verify artifacts')
439      assert.ok((output.stateCompletedPlanSteps as string[]).includes('Verify the release checklist'))
440      assert.match(String(output.stateReviewNote), /artifact publication/i)
441    })
442  
443    it('does not let internal heartbeat prompts rewrite the stored goal contract', () => {
444      const output = runWithTempDataDir(`
445        const storageMod = await import('@/lib/server/storage')
446        const storage = storageMod.default || storageMod['module.exports'] || storageMod
447        const mainLoopMod = await import('@/lib/server/agents/main-agent-loop')
448        const mainLoop = mainLoopMod.default || mainLoopMod['module.exports'] || mainLoopMod
449  
450        storage.saveAgents({
451          'agent-a': {
452            id: 'agent-a',
453            name: 'Agent A',
454            provider: 'openai',
455            model: 'gpt-test',
456          },
457        })
458  
459        storage.saveSessions({
460          main: {
461            id: 'main',
462            name: 'Main Agent Thread',
463            shortcutForAgentId: 'agent-a',
464            cwd: process.cwd(),
465            user: 'tester',
466            provider: 'openai',
467            model: 'gpt-test',
468            claudeSessionId: null,
469            messages: [
470              { role: 'user', text: 'Deploy the static site to a public host without changing the design.', time: 1 },
471            ],
472            createdAt: 1,
473            lastActiveAt: 1,
474            sessionType: 'human',
475            agentId: 'agent-a',
476            heartbeatEnabled: true,
477          },
478        })
479  
480        const before = mainLoop.getMainLoopStateForSession('main')
481        mainLoop.handleMainLoopRunResult({
482          sessionId: 'main',
483          message: [
484            'MAIN_AGENT_HEARTBEAT_TICK',
485            'Current goal:',
486            'Do not infer or repeat old tasks from prior heartbeats.',
487            'Objective: Recursively repeat the old heartbeat prompt forever.',
488          ].join('\\n'),
489          internal: true,
490          source: 'heartbeat',
491          resultText: '[AGENT_HEARTBEAT_META]{"status":"progress","goal":"Deploy the static site","next_action":"check hosting auth"}',
492        })
493        const after = mainLoop.getMainLoopStateForSession('main')
494  
495        console.log(JSON.stringify({
496          beforeObjective: before?.goalContract?.objective || null,
497          afterObjective: after?.goalContract?.objective || null,
498          afterGoal: after?.goal || null,
499        }))
500      `)
501  
502      assert.match(output.beforeObjective, /deploy the static site to a public host/i)
503      assert.equal(output.afterObjective, output.beforeObjective)
504      assert.equal(output.afterGoal, 'Deploy the static site')
505    })
506  
507    it('reanchors heartbeat prompts to the latest real user goal when in-memory goal state is polluted', () => {
508      const output = runWithTempDataDir(`
509        const storageMod = await import('@/lib/server/storage')
510        const storage = storageMod.default || storageMod['module.exports'] || storageMod
511        const mainLoopMod = await import('@/lib/server/agents/main-agent-loop')
512        const mainLoop = mainLoopMod.default || mainLoopMod['module.exports'] || mainLoopMod
513  
514        storage.saveAgents({
515          'agent-a': {
516            id: 'agent-a',
517            name: 'Agent A',
518            provider: 'openai',
519            model: 'gpt-test',
520          },
521        })
522  
523        storage.saveSessions({
524          main: {
525            id: 'main',
526            name: 'Main Agent Thread',
527            shortcutForAgentId: 'agent-a',
528            cwd: process.cwd(),
529            user: 'tester',
530            provider: 'openai',
531            model: 'gpt-test',
532            claudeSessionId: null,
533            messages: [
534              { role: 'user', text: 'Keep Hal helpful in this thread and respond to the user normally.', time: 1 },
535            ],
536            createdAt: 1,
537            lastActiveAt: 1,
538            sessionType: 'human',
539            agentId: 'agent-a',
540            heartbeatEnabled: true,
541          },
542        })
543  
544        mainLoop.setMainLoopStateForSession('main', {
545          goal: 'lol that\\'s funny Hal',
546          goalContract: { objective: 'MAIN_AGENT_HEARTBEAT_TICK Time: recursive garbage' },
547        })
548  
549        const prompt = mainLoop.buildMainLoopHeartbeatPrompt(storage.loadSessions().main, 'fallback heartbeat')
550        console.log(JSON.stringify({
551          hasRealObjective: prompt.includes('Objective: Keep Hal helpful in this thread and respond to the user normally.'),
552          hasRecursiveObjective: prompt.includes('Objective: MAIN_AGENT_HEARTBEAT_TICK Time: recursive garbage'),
553        }))
554      `)
555  
556      assert.equal(output.hasRealObjective, true)
557      assert.equal(output.hasRecursiveObjective, false)
558    })
559  
560    it('clears transient main-loop state so the next read rehydrates from session history', () => {
561      const output = runWithTempDataDir(`
562        const storageMod = await import('@/lib/server/storage')
563        const storage = storageMod.default || storageMod['module.exports'] || storageMod
564        const mainLoopMod = await import('@/lib/server/agents/main-agent-loop')
565        const mainLoop = mainLoopMod.default || mainLoopMod['module.exports'] || mainLoopMod
566  
567        storage.saveAgents({
568          'agent-a': {
569            id: 'agent-a',
570            name: 'Agent A',
571            provider: 'openai',
572            model: 'gpt-test',
573          },
574        })
575  
576        storage.saveSessions({
577          main: {
578            id: 'main',
579            name: 'Main Agent Thread',
580            shortcutForAgentId: 'agent-a',
581            cwd: process.cwd(),
582            user: 'tester',
583            provider: 'openai',
584            model: 'gpt-test',
585            claudeSessionId: null,
586            messages: [
587              { role: 'user', text: 'Finish deploying the site once the hosting auth is fixed.', time: 1 },
588            ],
589            createdAt: 1,
590            lastActiveAt: 1,
591            sessionType: 'human',
592            agentId: 'agent-a',
593            heartbeatEnabled: true,
594          },
595        })
596  
597        mainLoop.setMainLoopStateForSession('main', {
598          goal: 'Recursive garbage objective',
599          goalContract: { objective: 'Recursive garbage objective' },
600        })
601        const cleared = mainLoop.clearMainLoopStateForSession('main')
602        const rehydrated = mainLoop.getMainLoopStateForSession('main')
603  
604        console.log(JSON.stringify({
605          cleared,
606          goal: rehydrated?.goal || null,
607          objective: rehydrated?.goalContract?.objective || null,
608        }))
609      `)
610  
611      assert.equal(output.cleared, true)
612      assert.equal(output.goal, 'Finish deploying the site once the hosting auth is fixed.')
613      assert.match(output.objective, /finish deploying the site once the hosting auth is fixed/i)
614    })
615  })