tool-loop-detection.test.ts
1 import assert from 'node:assert/strict' 2 import { describe, it } from 'node:test' 3 import { ToolLoopTracker, hashToolInput, hashToolOutput } from './tool-loop-detection' 4 5 describe('ToolLoopTracker', () => { 6 it('returns null for normal non-repeating tool calls', () => { 7 const tracker = new ToolLoopTracker() 8 assert.equal(tracker.record('web_search', { query: 'weather london' }, 'Sunny, 20C'), null) 9 assert.equal(tracker.record('files', { action: 'write', path: '/tmp/test.json' }, 'OK'), null) 10 assert.equal(tracker.record('web_search', { query: 'weather paris' }, 'Cloudy, 15C'), null) 11 assert.equal(tracker.size, 3) 12 }) 13 14 it('detects generic repeat at warning threshold', () => { 15 const tracker = new ToolLoopTracker({ repeatWarn: 3, repeatCritical: 6 }) 16 for (let i = 0; i < 2; i++) { 17 assert.equal(tracker.record('web_search', { query: 'same query' }, `result ${i}`), null) 18 } 19 const result = tracker.record('web_search', { query: 'same query' }, 'result 2') 20 assert.ok(result) 21 assert.equal(result.severity, 'warning') 22 assert.equal(result.detector, 'generic_repeat') 23 }) 24 25 it('detects generic repeat at critical threshold', () => { 26 const tracker = new ToolLoopTracker({ repeatWarn: 3, repeatCritical: 5, toolFrequencyWarn: 100, toolFrequencyCritical: 100 }) 27 for (let i = 0; i < 4; i++) { 28 tracker.record('web_search', { query: 'same' }, `result ${i}`) 29 } 30 const result = tracker.record('web_search', { query: 'same' }, 'result 4') 31 assert.ok(result) 32 assert.equal(result.severity, 'critical') 33 assert.equal(result.detector, 'generic_repeat') 34 }) 35 36 it('detects polling stall when same tool returns identical output', () => { 37 const tracker = new ToolLoopTracker({ pollWarn: 3, pollCritical: 5 }) 38 // Different inputs but same output = polling stall 39 for (let i = 0; i < 2; i++) { 40 assert.equal(tracker.record('process', { action: 'poll', id: `run-${i}` }, 'status: running'), null) 41 } 42 const result = tracker.record('process', { action: 'poll', id: 'run-2' }, 'status: running') 43 assert.ok(result) 44 assert.equal(result.severity, 'warning') 45 assert.equal(result.detector, 'polling_stall') 46 }) 47 48 it('detects ping-pong between two tools', () => { 49 const tracker = new ToolLoopTracker({ pingPongWarn: 2, pingPongCritical: 4, repeatWarn: 100, repeatCritical: 100, pollWarn: 100, pollCritical: 100 }) 50 // Simulate A-B-A-B with identical outputs 51 for (let i = 0; i < 2; i++) { 52 tracker.record('web_search', { query: 'find it' }, 'no results found') 53 tracker.record('web_fetch', { url: 'https://example.com' }, '404 not found') 54 } 55 // One more A to complete the 3rd pair-start 56 const result = tracker.record('web_search', { query: 'find it' }, 'no results found') 57 // The ping-pong detector checks the last pair against previous pairs 58 // After 4 calls (A-B-A-B) + 1 more A, we have 2 full A-B cycles with identical results 59 if (result) { 60 assert.equal(result.detector, 'ping_pong') 61 } 62 }) 63 64 it('circuit breaker fires at absolute cap', () => { 65 const tracker = new ToolLoopTracker({ circuitBreaker: 5, repeatWarn: 100, repeatCritical: 100, toolFrequencyWarn: 100, toolFrequencyCritical: 100 }) 66 for (let i = 0; i < 4; i++) { 67 tracker.record('shell', { command: 'curl http://stuck.com' }, `err ${i}`) 68 } 69 const result = tracker.record('shell', { command: 'curl http://stuck.com' }, 'err 4') 70 assert.ok(result) 71 assert.equal(result.severity, 'critical') 72 assert.equal(result.detector, 'circuit_breaker') 73 }) 74 75 it('does not fire for varied tool calls even with many total calls', () => { 76 const tracker = new ToolLoopTracker({ toolFrequencyWarn: 100, toolFrequencyCritical: 100 }) 77 for (let i = 0; i < 20; i++) { 78 const result = tracker.record('web_search', { query: `query ${i}` }, `result ${i}`) 79 assert.equal(result, null, `Unexpected detection at call ${i}`) 80 } 81 assert.equal(tracker.size, 20) 82 }) 83 84 it('detects tool frequency when same tool is called too many times (any input)', () => { 85 const tracker = new ToolLoopTracker({ toolFrequencyWarn: 3, toolFrequencyCritical: 5 }) 86 for (let i = 0; i < 2; i++) { 87 assert.equal(tracker.record('web_search', { query: `q${i}` }, `r${i}`), null) 88 } 89 const warn = tracker.record('web_search', { query: 'q2' }, 'r2') 90 assert.ok(warn) 91 assert.equal(warn.severity, 'warning') 92 assert.equal(warn.detector, 'tool_frequency') 93 }) 94 95 it('previews critical repeats before another identical tool call executes', () => { 96 const tracker = new ToolLoopTracker({ repeatWarn: 2, repeatCritical: 3, toolFrequencyWarn: 100, toolFrequencyCritical: 100 }) 97 tracker.record('web_search', { query: 'same' }, 'result 1') 98 tracker.record('web_search', { query: 'same' }, 'result 2') 99 100 const preview = tracker.preview('web_search', { query: 'same' }) 101 assert.ok(preview) 102 assert.equal(preview?.severity, 'critical') 103 assert.equal(preview?.detector, 'generic_repeat') 104 }) 105 106 it('previews tool overuse by frequency before the next call executes', () => { 107 const tracker = new ToolLoopTracker({ toolFrequencyWarn: 2, toolFrequencyCritical: 4 }) 108 tracker.record('browser', { action: 'open', url: 'https://a.example' }, 'ok') 109 110 const preview = tracker.preview('browser', { action: 'open', url: 'https://b.example' }) 111 assert.ok(preview) 112 assert.equal(preview?.severity, 'warning') 113 assert.equal(preview?.detector, 'tool_frequency') 114 }) 115 116 it('detects output stagnation when many calls produce identical output', () => { 117 const tracker = new ToolLoopTracker({ repeatWarn: 100, repeatCritical: 100, toolFrequencyWarn: 100, toolFrequencyCritical: 100 }) 118 for (let i = 0; i < 7; i++) { 119 assert.equal(tracker.record(`tool_${i}`, { input: `arg_${i}` }, 'Connection refused'), null) 120 } 121 const result = tracker.record('tool_7', { input: 'arg_7' }, 'Connection refused') 122 assert.ok(result) 123 assert.equal(result.detector, 'output_stagnation') 124 assert.equal(result.severity, 'critical') 125 }) 126 127 it('detects output stagnation warning when 6 of 8 calls match', () => { 128 const tracker = new ToolLoopTracker({ repeatWarn: 100, repeatCritical: 100, toolFrequencyWarn: 100, toolFrequencyCritical: 100 }) 129 for (let i = 0; i < 6; i++) { 130 tracker.record(`tool_${i}`, { input: `arg_${i}` }, 'same error output') 131 } 132 tracker.record('tool_6', { input: 'arg_6' }, 'different output A') 133 const result = tracker.record('tool_7', { input: 'arg_7' }, 'different output B') 134 assert.ok(result) 135 assert.equal(result.detector, 'output_stagnation') 136 assert.equal(result.severity, 'warning') 137 }) 138 139 it('detects error convergence when most calls return errors', () => { 140 const tracker = new ToolLoopTracker({ repeatWarn: 100, repeatCritical: 100, toolFrequencyWarn: 100, toolFrequencyCritical: 100 }) 141 tracker.record('shell', { cmd: 'test1' }, 'ok result') 142 for (let i = 0; i < 5; i++) { 143 tracker.record(`tool_${i}`, { input: `arg_${i}` }, `Error: ECONNREFUSED ${i}`) 144 } 145 const result = tracker.record('tool_5', { input: 'arg_5' }, 'Error: timeout on request') 146 if (result) { 147 assert.equal(result.detector, 'error_convergence') 148 } 149 }) 150 151 it('does not fire stagnation for varied outputs', () => { 152 const tracker = new ToolLoopTracker({ repeatWarn: 100, repeatCritical: 100, toolFrequencyWarn: 100, toolFrequencyCritical: 100 }) 153 for (let i = 0; i < 10; i++) { 154 const result = tracker.record(`tool_${i}`, { input: `arg_${i}` }, `unique result ${i}`) 155 assert.equal(result, null) 156 } 157 }) 158 }) 159 160 describe('hash helpers', () => { 161 it('produces consistent hashes for same input', () => { 162 assert.equal(hashToolInput({ query: 'test' }), hashToolInput({ query: 'test' })) 163 assert.equal(hashToolOutput('hello world'), hashToolOutput('hello world')) 164 }) 165 166 it('produces different hashes for different input', () => { 167 assert.notEqual(hashToolInput({ query: 'a' }), hashToolInput({ query: 'b' })) 168 }) 169 })