test_streaming_tool_call_repair.py
1 """Tests for tool call argument repair in the streaming assembly path. 2 3 The streaming path (run_agent._call_chat_completions) assembles tool call 4 deltas into full arguments. When a model truncates or malforms the JSON 5 (e.g. GLM-5.1 via Ollama), the assembly path used to pass the broken JSON 6 straight through — setting has_truncated_tool_args but NOT repairing it. 7 That triggered the truncation handler to kill the session with /new required. 8 9 The fix: repair arguments in the streaming assembly path using 10 _repair_tool_call_arguments() so repairable malformations (trailing commas, 11 unclosed brackets, Python None) don't kill the session. 12 """ 13 14 import json 15 import pytest 16 17 from run_agent import _repair_tool_call_arguments 18 19 20 class TestStreamingAssemblyRepair: 21 """Verify that _repair_tool_call_arguments is applied to streaming tool 22 call arguments before they're assembled into mock_tool_calls. 23 24 These tests verify the REPAIR FUNCTION itself works correctly for the 25 cases that arise during streaming assembly. Integration tests that 26 exercise the full streaming path are in test_agent_loop_tool_calling.py. 27 """ 28 29 # -- Truncation cases (most common streaming failure) -- 30 31 def test_truncated_object_no_close_brace(self): 32 """Model stops mid-JSON, common with output length limits.""" 33 raw = '{"command": "ls -la", "timeout": 30' 34 result = _repair_tool_call_arguments(raw, "terminal") 35 parsed = json.loads(result) 36 assert parsed["command"] == "ls -la" 37 assert parsed["timeout"] == 30 38 39 def test_truncated_nested_object(self): 40 """Model truncates inside a nested structure.""" 41 raw = '{"path": "/tmp/foo", "content": "hello"' 42 result = _repair_tool_call_arguments(raw, "write_file") 43 parsed = json.loads(result) 44 assert parsed["path"] == "/tmp/foo" 45 46 def test_truncated_mid_value(self): 47 """Model cuts off mid-string-value.""" 48 raw = '{"command": "git clone ht' 49 result = _repair_tool_call_arguments(raw, "terminal") 50 # Should produce valid JSON (even if command value is lost) 51 json.loads(result) 52 53 # -- Trailing comma cases (Ollama/GLM common) -- 54 55 def test_trailing_comma_before_close_brace(self): 56 raw = '{"path": "/tmp", "content": "x",}' 57 result = _repair_tool_call_arguments(raw, "write_file") 58 assert json.loads(result) == {"path": "/tmp", "content": "x"} 59 60 def test_trailing_comma_in_list(self): 61 raw = '{"items": [1, 2, 3,]}' 62 result = _repair_tool_call_arguments(raw, "test") 63 assert json.loads(result) == {"items": [1, 2, 3]} 64 65 # -- Python None from model output -- 66 67 def test_python_none_literal(self): 68 raw = "None" 69 result = _repair_tool_call_arguments(raw, "test") 70 assert result == "{}" 71 72 # -- Empty arguments (some models emit empty string) -- 73 74 def test_empty_string(self): 75 assert _repair_tool_call_arguments("", "test") == "{}" 76 77 def test_whitespace_only(self): 78 assert _repair_tool_call_arguments(" \n ", "test") == "{}" 79 80 # -- Already-valid JSON passes through unchanged -- 81 82 def test_valid_json_passthrough(self): 83 raw = '{"path": "/tmp/foo", "content": "hello"}' 84 result = _repair_tool_call_arguments(raw, "write_file") 85 assert json.loads(result) == {"path": "/tmp/foo", "content": "hello"} 86 87 # -- Extra closing brackets (rare but happens) -- 88 89 def test_extra_closing_brace(self): 90 raw = '{"key": "value"}}' 91 result = _repair_tool_call_arguments(raw, "test") 92 assert json.loads(result) == {"key": "value"} 93 94 # -- Real-world GLM-5.1 truncation pattern -- 95 96 def test_glm_truncation_pattern(self): 97 """GLM-5.1 via Ollama commonly truncates like this. 98 99 This pattern has an unclosed colon at the end ("background":) which 100 makes it unrepairable — the last-resort empty object {} is the 101 safest option. The important thing is that repairable patterns 102 (trailing comma, unclosed brace WITHOUT hanging colon) DO get fixed. 103 """ 104 raw = '{"command": "ls -la /tmp", "timeout": 30, "background":' 105 result = _repair_tool_call_arguments(raw, "terminal") 106 # Unrepairable — returns empty object (hanging colon can't be fixed) 107 parsed = json.loads(result) 108 assert parsed == {} 109 110 def test_glm_truncation_repairable(self): 111 """GLM-5.1 truncation pattern that IS repairable.""" 112 raw = '{"command": "ls -la /tmp", "timeout": 30' 113 result = _repair_tool_call_arguments(raw, "terminal") 114 parsed = json.loads(result) 115 assert parsed["command"] == "ls -la /tmp" 116 assert parsed["timeout"] == 30