Cradicle Explorer

/ tests / run_agent / test_streaming_tool_call_repair.py
test_streaming_tool_call_repair.py
  1  """Tests for tool call argument repair in the streaming assembly path.
  2  
  3  The streaming path (run_agent._call_chat_completions) assembles tool call
  4  deltas into full arguments.  When a model truncates or malforms the JSON
  5  (e.g. GLM-5.1 via Ollama), the assembly path used to pass the broken JSON
  6  straight through — setting has_truncated_tool_args but NOT repairing it.
  7  That triggered the truncation handler to kill the session with /new required.
  8  
  9  The fix: repair arguments in the streaming assembly path using
 10  _repair_tool_call_arguments() so repairable malformations (trailing commas,
 11  unclosed brackets, Python None) don't kill the session.
 12  """
 13  
 14  import json
 15  import pytest
 16  
 17  from run_agent import _repair_tool_call_arguments
 18  
 19  
 20  class TestStreamingAssemblyRepair:
 21      """Verify that _repair_tool_call_arguments is applied to streaming tool
 22      call arguments before they're assembled into mock_tool_calls.
 23  
 24      These tests verify the REPAIR FUNCTION itself works correctly for the
 25      cases that arise during streaming assembly.  Integration tests that
 26      exercise the full streaming path are in test_agent_loop_tool_calling.py.
 27      """
 28  
 29      # -- Truncation cases (most common streaming failure) --
 30  
 31      def test_truncated_object_no_close_brace(self):
 32          """Model stops mid-JSON, common with output length limits."""
 33          raw = '{"command": "ls -la", "timeout": 30'
 34          result = _repair_tool_call_arguments(raw, "terminal")
 35          parsed = json.loads(result)
 36          assert parsed["command"] == "ls -la"
 37          assert parsed["timeout"] == 30
 38  
 39      def test_truncated_nested_object(self):
 40          """Model truncates inside a nested structure."""
 41          raw = '{"path": "/tmp/foo", "content": "hello"'
 42          result = _repair_tool_call_arguments(raw, "write_file")
 43          parsed = json.loads(result)
 44          assert parsed["path"] == "/tmp/foo"
 45  
 46      def test_truncated_mid_value(self):
 47          """Model cuts off mid-string-value."""
 48          raw = '{"command": "git clone ht'
 49          result = _repair_tool_call_arguments(raw, "terminal")
 50          # Should produce valid JSON (even if command value is lost)
 51          json.loads(result)
 52  
 53      # -- Trailing comma cases (Ollama/GLM common) --
 54  
 55      def test_trailing_comma_before_close_brace(self):
 56          raw = '{"path": "/tmp", "content": "x",}'
 57          result = _repair_tool_call_arguments(raw, "write_file")
 58          assert json.loads(result) == {"path": "/tmp", "content": "x"}
 59  
 60      def test_trailing_comma_in_list(self):
 61          raw = '{"items": [1, 2, 3,]}'
 62          result = _repair_tool_call_arguments(raw, "test")
 63          assert json.loads(result) == {"items": [1, 2, 3]}
 64  
 65      # -- Python None from model output --
 66  
 67      def test_python_none_literal(self):
 68          raw = "None"
 69          result = _repair_tool_call_arguments(raw, "test")
 70          assert result == "{}"
 71  
 72      # -- Empty arguments (some models emit empty string) --
 73  
 74      def test_empty_string(self):
 75          assert _repair_tool_call_arguments("", "test") == "{}"
 76  
 77      def test_whitespace_only(self):
 78          assert _repair_tool_call_arguments("   \n  ", "test") == "{}"
 79  
 80      # -- Already-valid JSON passes through unchanged --
 81  
 82      def test_valid_json_passthrough(self):
 83          raw = '{"path": "/tmp/foo", "content": "hello"}'
 84          result = _repair_tool_call_arguments(raw, "write_file")
 85          assert json.loads(result) == {"path": "/tmp/foo", "content": "hello"}
 86  
 87      # -- Extra closing brackets (rare but happens) --
 88  
 89      def test_extra_closing_brace(self):
 90          raw = '{"key": "value"}}'
 91          result = _repair_tool_call_arguments(raw, "test")
 92          assert json.loads(result) == {"key": "value"}
 93  
 94      # -- Real-world GLM-5.1 truncation pattern --
 95  
 96      def test_glm_truncation_pattern(self):
 97          """GLM-5.1 via Ollama commonly truncates like this.
 98  
 99          This pattern has an unclosed colon at the end ("background":) which
100          makes it unrepairable — the last-resort empty object {} is the
101          safest option.  The important thing is that repairable patterns
102          (trailing comma, unclosed brace WITHOUT hanging colon) DO get fixed.
103          """
104          raw = '{"command": "ls -la /tmp", "timeout": 30, "background":'
105          result = _repair_tool_call_arguments(raw, "terminal")
106          # Unrepairable — returns empty object (hanging colon can't be fixed)
107          parsed = json.loads(result)
108          assert parsed == {}
109  
110      def test_glm_truncation_repairable(self):
111          """GLM-5.1 truncation pattern that IS repairable."""
112          raw = '{"command": "ls -la /tmp", "timeout": 30'
113          result = _repair_tool_call_arguments(raw, "terminal")
114          parsed = json.loads(result)
115          assert parsed["command"] == "ls -la /tmp"
116          assert parsed["timeout"] == 30