test_read_loop_detection.py
1 #!/usr/bin/env python3 2 """ 3 Tests for the read-loop detection mechanism in file_tools. 4 5 Verifies that: 6 1. Only *consecutive* identical reads trigger warnings/blocks 7 2. Any other tool call in between resets the consecutive counter 8 3. Warn on 3rd consecutive, block on 4th+ 9 4. Different regions/files/tasks don't trigger false warnings 10 5. get_read_files_summary returns accurate history (unaffected by search keys) 11 6. clear_read_tracker resets state 12 7. notify_other_tool_call resets consecutive counters 13 8. Context compression injects file-read history 14 15 Run with: python -m pytest tests/tools/test_read_loop_detection.py -v 16 """ 17 18 import json 19 import unittest 20 from unittest.mock import patch, MagicMock 21 22 from tools.file_tools import ( 23 read_file_tool, 24 search_tool, 25 notify_other_tool_call, 26 _read_tracker, 27 ) 28 29 30 class _FakeReadResult: 31 """Minimal stand-in for FileOperations.read_file return value.""" 32 def __init__(self, content="line1\nline2\n", total_lines=2): 33 self.content = content 34 self._total_lines = total_lines 35 36 def to_dict(self): 37 return {"content": self.content, "total_lines": self._total_lines} 38 39 40 def _fake_read_file(path, offset=1, limit=500): 41 return _FakeReadResult(content=f"content of {path}", total_lines=10) 42 43 44 class _FakeSearchResult: 45 """Minimal stand-in for FileOperations.search return value.""" 46 def __init__(self): 47 self.matches = [] 48 49 def to_dict(self): 50 return {"matches": [{"file": "test.py", "line": 1, "text": "match"}]} 51 52 53 def _make_fake_file_ops(): 54 fake = MagicMock() 55 fake.read_file = _fake_read_file 56 fake.search = lambda **kw: _FakeSearchResult() 57 return fake 58 59 60 class TestReadLoopDetection(unittest.TestCase): 61 """Verify that read_file_tool detects and warns on consecutive re-reads.""" 62 63 def setUp(self): 64 _read_tracker.clear() 65 66 def tearDown(self): 67 _read_tracker.clear() 68 69 @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops()) 70 def test_first_read_has_no_warning(self, _mock_ops): 71 result = json.loads(read_file_tool("/tmp/test.py", task_id="t1")) 72 self.assertNotIn("_warning", result) 73 self.assertIn("content", result) 74 75 @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops()) 76 def test_second_consecutive_read_no_warning(self, _mock_ops): 77 """2nd consecutive read should NOT warn (threshold is 3).""" 78 read_file_tool("/tmp/test.py", offset=1, limit=500, task_id="t1") 79 result = json.loads( 80 read_file_tool("/tmp/test.py", offset=1, limit=500, task_id="t1") 81 ) 82 self.assertNotIn("_warning", result) 83 self.assertIn("content", result) 84 85 @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops()) 86 def test_third_consecutive_read_has_warning(self, _mock_ops): 87 """3rd consecutive read of the same region triggers a warning.""" 88 for _ in range(2): 89 read_file_tool("/tmp/test.py", task_id="t1") 90 result = json.loads(read_file_tool("/tmp/test.py", task_id="t1")) 91 self.assertIn("_warning", result) 92 self.assertIn("3 times", result["_warning"]) 93 # Warning still returns content 94 self.assertIn("content", result) 95 96 @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops()) 97 def test_fourth_consecutive_read_is_blocked(self, _mock_ops): 98 """4th consecutive read of the same region is BLOCKED — no content.""" 99 for _ in range(3): 100 read_file_tool("/tmp/test.py", task_id="t1") 101 result = json.loads(read_file_tool("/tmp/test.py", task_id="t1")) 102 self.assertIn("error", result) 103 self.assertIn("BLOCKED", result["error"]) 104 self.assertIn("4 times", result["error"]) 105 self.assertNotIn("content", result) 106 107 @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops()) 108 def test_fifth_consecutive_read_still_blocked(self, _mock_ops): 109 """Subsequent reads remain blocked with incrementing count.""" 110 for _ in range(4): 111 read_file_tool("/tmp/test.py", task_id="t1") 112 result = json.loads(read_file_tool("/tmp/test.py", task_id="t1")) 113 self.assertIn("BLOCKED", result["error"]) 114 self.assertIn("5 times", result["error"]) 115 116 @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops()) 117 def test_different_region_resets_consecutive(self, _mock_ops): 118 """Reading a different region of the same file resets consecutive count.""" 119 read_file_tool("/tmp/test.py", offset=1, limit=500, task_id="t1") 120 read_file_tool("/tmp/test.py", offset=1, limit=500, task_id="t1") 121 # Now read a different region — this resets the consecutive counter 122 result = json.loads( 123 read_file_tool("/tmp/test.py", offset=501, limit=500, task_id="t1") 124 ) 125 self.assertNotIn("_warning", result) 126 127 @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops()) 128 def test_different_file_resets_consecutive(self, _mock_ops): 129 """Reading a different file resets the consecutive counter.""" 130 read_file_tool("/tmp/a.py", task_id="t1") 131 read_file_tool("/tmp/a.py", task_id="t1") 132 result = json.loads(read_file_tool("/tmp/b.py", task_id="t1")) 133 self.assertNotIn("_warning", result) 134 135 @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops()) 136 def test_different_tasks_isolated(self, _mock_ops): 137 """Different task_ids have separate consecutive counters.""" 138 read_file_tool("/tmp/test.py", task_id="task_a") 139 result = json.loads( 140 read_file_tool("/tmp/test.py", task_id="task_b") 141 ) 142 self.assertNotIn("_warning", result) 143 144 @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops()) 145 def test_warning_still_returns_content(self, _mock_ops): 146 """Even with a warning (3rd read), the file content is still returned.""" 147 for _ in range(2): 148 read_file_tool("/tmp/test.py", task_id="t1") 149 result = json.loads(read_file_tool("/tmp/test.py", task_id="t1")) 150 self.assertIn("_warning", result) 151 self.assertIn("content", result) 152 self.assertIn("content of /tmp/test.py", result["content"]) 153 154 155 class TestNotifyOtherToolCall(unittest.TestCase): 156 """Verify that notify_other_tool_call resets the consecutive counter.""" 157 158 def setUp(self): 159 _read_tracker.clear() 160 161 def tearDown(self): 162 _read_tracker.clear() 163 164 @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops()) 165 def test_other_tool_resets_consecutive(self, _mock_ops): 166 """After another tool runs, re-reading the same file is NOT consecutive.""" 167 read_file_tool("/tmp/test.py", task_id="t1") 168 read_file_tool("/tmp/test.py", task_id="t1") 169 # Simulate a different tool being called 170 notify_other_tool_call("t1") 171 # This should be treated as a fresh read (consecutive reset) 172 result = json.loads(read_file_tool("/tmp/test.py", task_id="t1")) 173 self.assertNotIn("_warning", result) 174 self.assertIn("content", result) 175 176 @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops()) 177 def test_other_tool_prevents_block(self, _mock_ops): 178 """Agent can keep reading if other tools are used in between.""" 179 for i in range(10): 180 read_file_tool("/tmp/test.py", task_id="t1") 181 notify_other_tool_call("t1") 182 # After 10 reads interleaved with other tools, still no warning 183 result = json.loads(read_file_tool("/tmp/test.py", task_id="t1")) 184 self.assertNotIn("_warning", result) 185 self.assertNotIn("error", result) 186 self.assertIn("content", result) 187 188 @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops()) 189 def test_notify_on_unknown_task_is_safe(self, _mock_ops): 190 """notify_other_tool_call on a task that hasn't read anything is a no-op.""" 191 notify_other_tool_call("nonexistent_task") # Should not raise 192 193 194 195 196 197 class TestSearchLoopDetection(unittest.TestCase): 198 """Verify that search_tool detects and blocks consecutive repeated searches.""" 199 200 def setUp(self): 201 _read_tracker.clear() 202 203 def tearDown(self): 204 _read_tracker.clear() 205 206 @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops()) 207 def test_first_search_no_warning(self, _mock_ops): 208 result = json.loads(search_tool("def main", task_id="t1")) 209 self.assertNotIn("_warning", result) 210 self.assertNotIn("error", result) 211 212 @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops()) 213 def test_second_consecutive_search_no_warning(self, _mock_ops): 214 """2nd consecutive search should NOT warn (threshold is 3).""" 215 search_tool("def main", task_id="t1") 216 result = json.loads(search_tool("def main", task_id="t1")) 217 self.assertNotIn("_warning", result) 218 self.assertNotIn("error", result) 219 220 @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops()) 221 def test_third_consecutive_search_has_warning(self, _mock_ops): 222 """3rd consecutive identical search triggers a warning.""" 223 for _ in range(2): 224 search_tool("def main", task_id="t1") 225 result = json.loads(search_tool("def main", task_id="t1")) 226 self.assertIn("_warning", result) 227 self.assertIn("3 times", result["_warning"]) 228 # Warning still returns results 229 self.assertIn("matches", result) 230 231 @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops()) 232 def test_fourth_consecutive_search_is_blocked(self, _mock_ops): 233 """4th consecutive identical search is BLOCKED.""" 234 for _ in range(3): 235 search_tool("def main", task_id="t1") 236 result = json.loads(search_tool("def main", task_id="t1")) 237 self.assertIn("error", result) 238 self.assertIn("BLOCKED", result["error"]) 239 self.assertNotIn("matches", result) 240 241 @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops()) 242 def test_different_pattern_resets_consecutive(self, _mock_ops): 243 """A different search pattern resets the consecutive counter.""" 244 search_tool("def main", task_id="t1") 245 search_tool("def main", task_id="t1") 246 result = json.loads(search_tool("class Foo", task_id="t1")) 247 self.assertNotIn("_warning", result) 248 self.assertNotIn("error", result) 249 250 @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops()) 251 def test_different_task_isolated(self, _mock_ops): 252 """Different tasks have separate consecutive counters.""" 253 search_tool("def main", task_id="t1") 254 result = json.loads(search_tool("def main", task_id="t2")) 255 self.assertNotIn("_warning", result) 256 257 @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops()) 258 def test_other_tool_resets_search_consecutive(self, _mock_ops): 259 """notify_other_tool_call resets search consecutive counter too.""" 260 search_tool("def main", task_id="t1") 261 search_tool("def main", task_id="t1") 262 notify_other_tool_call("t1") 263 result = json.loads(search_tool("def main", task_id="t1")) 264 self.assertNotIn("_warning", result) 265 self.assertNotIn("error", result) 266 267 @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops()) 268 def test_pagination_offset_does_not_count_as_repeat(self, _mock_ops): 269 """Paginating truncated results should not be blocked as a repeat search.""" 270 for offset in (0, 50, 100, 150): 271 result = json.loads(search_tool("def main", task_id="t1", offset=offset, limit=50)) 272 self.assertNotIn("_warning", result) 273 self.assertNotIn("error", result) 274 275 @patch("tools.file_tools._get_file_ops", return_value=_make_fake_file_ops()) 276 def test_read_between_searches_resets_consecutive(self, _mock_ops): 277 """A read_file call between searches resets search consecutive counter.""" 278 search_tool("def main", task_id="t1") 279 search_tool("def main", task_id="t1") 280 # A read changes the last_key, resetting consecutive for the search 281 read_file_tool("/tmp/test.py", task_id="t1") 282 result = json.loads(search_tool("def main", task_id="t1")) 283 self.assertNotIn("_warning", result) 284 self.assertNotIn("error", result) 285 286 287 class TestTodoInjectionFiltering(unittest.TestCase): 288 """Verify that format_for_injection filters completed/cancelled todos.""" 289 290 def test_filters_completed_and_cancelled(self): 291 from tools.todo_tool import TodoStore 292 store = TodoStore() 293 store.write([ 294 {"id": "1", "content": "Read codebase", "status": "completed"}, 295 {"id": "2", "content": "Write fix", "status": "in_progress"}, 296 {"id": "3", "content": "Run tests", "status": "pending"}, 297 {"id": "4", "content": "Abandoned", "status": "cancelled"}, 298 ]) 299 injection = store.format_for_injection() 300 self.assertNotIn("Read codebase", injection) 301 self.assertNotIn("Abandoned", injection) 302 self.assertIn("Write fix", injection) 303 self.assertIn("Run tests", injection) 304 305 def test_all_completed_returns_none(self): 306 from tools.todo_tool import TodoStore 307 store = TodoStore() 308 store.write([ 309 {"id": "1", "content": "Done", "status": "completed"}, 310 {"id": "2", "content": "Also done", "status": "cancelled"}, 311 ]) 312 self.assertIsNone(store.format_for_injection()) 313 314 def test_empty_store_returns_none(self): 315 from tools.todo_tool import TodoStore 316 store = TodoStore() 317 self.assertIsNone(store.format_for_injection()) 318 319 def test_all_active_included(self): 320 from tools.todo_tool import TodoStore 321 store = TodoStore() 322 store.write([ 323 {"id": "1", "content": "Task A", "status": "pending"}, 324 {"id": "2", "content": "Task B", "status": "in_progress"}, 325 ]) 326 injection = store.format_for_injection() 327 self.assertIn("Task A", injection) 328 self.assertIn("Task B", injection) 329 330 331 if __name__ == "__main__": 332 unittest.main()