/ src / lib / server / tool-loop-detection.test.ts
tool-loop-detection.test.ts
  1  import assert from 'node:assert/strict'
  2  import { describe, it } from 'node:test'
  3  import { ToolLoopTracker, hashToolInput, hashToolOutput } from './tool-loop-detection'
  4  
  5  describe('ToolLoopTracker', () => {
  6    it('returns null for normal non-repeating tool calls', () => {
  7      const tracker = new ToolLoopTracker()
  8      assert.equal(tracker.record('web_search', { query: 'weather london' }, 'Sunny, 20C'), null)
  9      assert.equal(tracker.record('files', { action: 'write', path: '/tmp/test.json' }, 'OK'), null)
 10      assert.equal(tracker.record('web_search', { query: 'weather paris' }, 'Cloudy, 15C'), null)
 11      assert.equal(tracker.size, 3)
 12    })
 13  
 14    it('detects generic repeat at warning threshold', () => {
 15      const tracker = new ToolLoopTracker({ repeatWarn: 3, repeatCritical: 6 })
 16      for (let i = 0; i < 2; i++) {
 17        assert.equal(tracker.record('web_search', { query: 'same query' }, `result ${i}`), null)
 18      }
 19      const result = tracker.record('web_search', { query: 'same query' }, 'result 2')
 20      assert.ok(result)
 21      assert.equal(result.severity, 'warning')
 22      assert.equal(result.detector, 'generic_repeat')
 23    })
 24  
 25    it('detects generic repeat at critical threshold', () => {
 26      const tracker = new ToolLoopTracker({ repeatWarn: 3, repeatCritical: 5, toolFrequencyWarn: 100, toolFrequencyCritical: 100 })
 27      for (let i = 0; i < 4; i++) {
 28        tracker.record('web_search', { query: 'same' }, `result ${i}`)
 29      }
 30      const result = tracker.record('web_search', { query: 'same' }, 'result 4')
 31      assert.ok(result)
 32      assert.equal(result.severity, 'critical')
 33      assert.equal(result.detector, 'generic_repeat')
 34    })
 35  
 36    it('detects polling stall when same tool returns identical output', () => {
 37      const tracker = new ToolLoopTracker({ pollWarn: 3, pollCritical: 5 })
 38      // Different inputs but same output = polling stall
 39      for (let i = 0; i < 2; i++) {
 40        assert.equal(tracker.record('process', { action: 'poll', id: `run-${i}` }, 'status: running'), null)
 41      }
 42      const result = tracker.record('process', { action: 'poll', id: 'run-2' }, 'status: running')
 43      assert.ok(result)
 44      assert.equal(result.severity, 'warning')
 45      assert.equal(result.detector, 'polling_stall')
 46    })
 47  
 48    it('detects ping-pong between two tools', () => {
 49      const tracker = new ToolLoopTracker({ pingPongWarn: 2, pingPongCritical: 4, repeatWarn: 100, repeatCritical: 100, pollWarn: 100, pollCritical: 100 })
 50      // Simulate A-B-A-B with identical outputs
 51      for (let i = 0; i < 2; i++) {
 52        tracker.record('web_search', { query: 'find it' }, 'no results found')
 53        tracker.record('web_fetch', { url: 'https://example.com' }, '404 not found')
 54      }
 55      // One more A to complete the 3rd pair-start
 56      const result = tracker.record('web_search', { query: 'find it' }, 'no results found')
 57      // The ping-pong detector checks the last pair against previous pairs
 58      // After 4 calls (A-B-A-B) + 1 more A, we have 2 full A-B cycles with identical results
 59      if (result) {
 60        assert.equal(result.detector, 'ping_pong')
 61      }
 62    })
 63  
 64    it('circuit breaker fires at absolute cap', () => {
 65      const tracker = new ToolLoopTracker({ circuitBreaker: 5, repeatWarn: 100, repeatCritical: 100, toolFrequencyWarn: 100, toolFrequencyCritical: 100 })
 66      for (let i = 0; i < 4; i++) {
 67        tracker.record('shell', { command: 'curl http://stuck.com' }, `err ${i}`)
 68      }
 69      const result = tracker.record('shell', { command: 'curl http://stuck.com' }, 'err 4')
 70      assert.ok(result)
 71      assert.equal(result.severity, 'critical')
 72      assert.equal(result.detector, 'circuit_breaker')
 73    })
 74  
 75    it('does not fire for varied tool calls even with many total calls', () => {
 76      const tracker = new ToolLoopTracker({ toolFrequencyWarn: 100, toolFrequencyCritical: 100 })
 77      for (let i = 0; i < 20; i++) {
 78        const result = tracker.record('web_search', { query: `query ${i}` }, `result ${i}`)
 79        assert.equal(result, null, `Unexpected detection at call ${i}`)
 80      }
 81      assert.equal(tracker.size, 20)
 82    })
 83  
 84    it('detects tool frequency when same tool is called too many times (any input)', () => {
 85      const tracker = new ToolLoopTracker({ toolFrequencyWarn: 3, toolFrequencyCritical: 5 })
 86      for (let i = 0; i < 2; i++) {
 87        assert.equal(tracker.record('web_search', { query: `q${i}` }, `r${i}`), null)
 88      }
 89      const warn = tracker.record('web_search', { query: 'q2' }, 'r2')
 90      assert.ok(warn)
 91      assert.equal(warn.severity, 'warning')
 92      assert.equal(warn.detector, 'tool_frequency')
 93    })
 94  
 95    it('previews critical repeats before another identical tool call executes', () => {
 96      const tracker = new ToolLoopTracker({ repeatWarn: 2, repeatCritical: 3, toolFrequencyWarn: 100, toolFrequencyCritical: 100 })
 97      tracker.record('web_search', { query: 'same' }, 'result 1')
 98      tracker.record('web_search', { query: 'same' }, 'result 2')
 99  
100      const preview = tracker.preview('web_search', { query: 'same' })
101      assert.ok(preview)
102      assert.equal(preview?.severity, 'critical')
103      assert.equal(preview?.detector, 'generic_repeat')
104    })
105  
106    it('previews tool overuse by frequency before the next call executes', () => {
107      const tracker = new ToolLoopTracker({ toolFrequencyWarn: 2, toolFrequencyCritical: 4 })
108      tracker.record('browser', { action: 'open', url: 'https://a.example' }, 'ok')
109  
110      const preview = tracker.preview('browser', { action: 'open', url: 'https://b.example' })
111      assert.ok(preview)
112      assert.equal(preview?.severity, 'warning')
113      assert.equal(preview?.detector, 'tool_frequency')
114    })
115  
116    it('detects output stagnation when many calls produce identical output', () => {
117      const tracker = new ToolLoopTracker({ repeatWarn: 100, repeatCritical: 100, toolFrequencyWarn: 100, toolFrequencyCritical: 100 })
118      for (let i = 0; i < 7; i++) {
119        assert.equal(tracker.record(`tool_${i}`, { input: `arg_${i}` }, 'Connection refused'), null)
120      }
121      const result = tracker.record('tool_7', { input: 'arg_7' }, 'Connection refused')
122      assert.ok(result)
123      assert.equal(result.detector, 'output_stagnation')
124      assert.equal(result.severity, 'critical')
125    })
126  
127    it('detects output stagnation warning when 6 of 8 calls match', () => {
128      const tracker = new ToolLoopTracker({ repeatWarn: 100, repeatCritical: 100, toolFrequencyWarn: 100, toolFrequencyCritical: 100 })
129      for (let i = 0; i < 6; i++) {
130        tracker.record(`tool_${i}`, { input: `arg_${i}` }, 'same error output')
131      }
132      tracker.record('tool_6', { input: 'arg_6' }, 'different output A')
133      const result = tracker.record('tool_7', { input: 'arg_7' }, 'different output B')
134      assert.ok(result)
135      assert.equal(result.detector, 'output_stagnation')
136      assert.equal(result.severity, 'warning')
137    })
138  
139    it('detects error convergence when most calls return errors', () => {
140      const tracker = new ToolLoopTracker({ repeatWarn: 100, repeatCritical: 100, toolFrequencyWarn: 100, toolFrequencyCritical: 100 })
141      tracker.record('shell', { cmd: 'test1' }, 'ok result')
142      for (let i = 0; i < 5; i++) {
143        tracker.record(`tool_${i}`, { input: `arg_${i}` }, `Error: ECONNREFUSED ${i}`)
144      }
145      const result = tracker.record('tool_5', { input: 'arg_5' }, 'Error: timeout on request')
146      if (result) {
147        assert.equal(result.detector, 'error_convergence')
148      }
149    })
150  
151    it('does not fire stagnation for varied outputs', () => {
152      const tracker = new ToolLoopTracker({ repeatWarn: 100, repeatCritical: 100, toolFrequencyWarn: 100, toolFrequencyCritical: 100 })
153      for (let i = 0; i < 10; i++) {
154        const result = tracker.record(`tool_${i}`, { input: `arg_${i}` }, `unique result ${i}`)
155        assert.equal(result, null)
156      }
157    })
158  })
159  
160  describe('hash helpers', () => {
161    it('produces consistent hashes for same input', () => {
162      assert.equal(hashToolInput({ query: 'test' }), hashToolInput({ query: 'test' }))
163      assert.equal(hashToolOutput('hello world'), hashToolOutput('hello world'))
164    })
165  
166    it('produces different hashes for different input', () => {
167      assert.notEqual(hashToolInput({ query: 'a' }), hashToolInput({ query: 'b' }))
168    })
169  })