Cradicle Explorer

/ tests / run_agent / test_thinking_only_sanitizer.py
test_thinking_only_sanitizer.py
  1  """Tests for the thinking-only assistant message sanitizer.
  2  
  3  Covers _is_thinking_only_assistant() + _drop_thinking_only_and_merge_users()
  4  in run_agent.py. The sanitizer runs on the per-call api_messages copy and
  5  drops assistant turns that contain only reasoning (no visible content, no
  6  tool_calls). Adjacent user messages left behind are merged so role
  7  alternation is preserved for the provider.
  8  
  9  Claude Code uses this exact pattern (filterOrphanedThinkingOnlyMessages +
 10  mergeAdjacentUserMessages in src/utils/messages.ts). See #16823 for the
 11  backstory on why the alternative — fabricating "." stub text — was rejected.
 12  """
 13  
 14  from run_agent import AIAgent
 15  
 16  
 17  # ---------------------------------------------------------------------------
 18  # _is_thinking_only_assistant — detection
 19  # ---------------------------------------------------------------------------
 20  
 21  
 22  class TestIsThinkingOnlyAssistant:
 23  
 24      def test_plain_assistant_reply_is_not_thinking_only(self):
 25          msg = {"role": "assistant", "content": "Hello there"}
 26          assert not AIAgent._is_thinking_only_assistant(msg)
 27  
 28      def test_assistant_with_tool_calls_is_not_thinking_only(self):
 29          msg = {
 30              "role": "assistant",
 31              "content": "",
 32              "reasoning": "let me use a tool",
 33              "tool_calls": [{"id": "c1", "function": {"name": "terminal", "arguments": "{}"}}],
 34          }
 35          assert not AIAgent._is_thinking_only_assistant(msg)
 36  
 37      def test_empty_content_plus_reasoning_is_thinking_only(self):
 38          msg = {"role": "assistant", "content": "", "reasoning": "thinking..."}
 39          assert AIAgent._is_thinking_only_assistant(msg)
 40  
 41      def test_none_content_plus_reasoning_content_is_thinking_only(self):
 42          msg = {"role": "assistant", "content": None, "reasoning_content": "thinking..."}
 43          assert AIAgent._is_thinking_only_assistant(msg)
 44  
 45      def test_whitespace_only_content_plus_reasoning_is_thinking_only(self):
 46          msg = {"role": "assistant", "content": "   \n\n  ", "reasoning": "r"}
 47          assert AIAgent._is_thinking_only_assistant(msg)
 48  
 49      def test_empty_content_no_reasoning_is_not_thinking_only(self):
 50          # If there's no reasoning either, this is just an empty turn — let
 51          # other sanitizers handle it (orphan-tool-pair, etc.). We only care
 52          # about the specific thinking-only case.
 53          msg = {"role": "assistant", "content": ""}
 54          assert not AIAgent._is_thinking_only_assistant(msg)
 55  
 56      def test_list_content_all_thinking_blocks_is_thinking_only(self):
 57          # Anthropic-native shape
 58          msg = {
 59              "role": "assistant",
 60              "content": [
 61                  {"type": "thinking", "thinking": "...", "signature": "sig"},
 62              ],
 63              "reasoning": "...",
 64          }
 65          assert AIAgent._is_thinking_only_assistant(msg)
 66  
 67      def test_list_content_with_real_text_is_not_thinking_only(self):
 68          msg = {
 69              "role": "assistant",
 70              "content": [
 71                  {"type": "thinking", "thinking": "..."},
 72                  {"type": "text", "text": "Hi there"},
 73              ],
 74              "reasoning": "...",
 75          }
 76          assert not AIAgent._is_thinking_only_assistant(msg)
 77  
 78      def test_list_content_with_tool_use_block_is_not_thinking_only(self):
 79          msg = {
 80              "role": "assistant",
 81              "content": [
 82                  {"type": "thinking", "thinking": "..."},
 83                  {"type": "tool_use", "id": "tu1", "name": "terminal", "input": {}},
 84              ],
 85          }
 86          assert not AIAgent._is_thinking_only_assistant(msg)
 87  
 88      def test_list_content_thinking_plus_whitespace_text_is_thinking_only(self):
 89          msg = {
 90              "role": "assistant",
 91              "content": [
 92                  {"type": "thinking", "thinking": "..."},
 93                  {"type": "text", "text": "   "},
 94              ],
 95              "reasoning": "...",
 96          }
 97          assert AIAgent._is_thinking_only_assistant(msg)
 98  
 99      def test_reasoning_details_list_form_detected(self):
100          msg = {
101              "role": "assistant",
102              "content": "",
103              "reasoning_details": [{"type": "thinking", "text": "..."}],
104          }
105          assert AIAgent._is_thinking_only_assistant(msg)
106  
107      def test_user_message_never_thinking_only(self):
108          assert not AIAgent._is_thinking_only_assistant({"role": "user", "content": ""})
109  
110      def test_tool_message_never_thinking_only(self):
111          assert not AIAgent._is_thinking_only_assistant(
112              {"role": "tool", "content": "", "tool_call_id": "x"}
113          )
114  
115      def test_non_dict_returns_false(self):
116          assert not AIAgent._is_thinking_only_assistant(None)
117          assert not AIAgent._is_thinking_only_assistant("hello")
118  
119  
120  # ---------------------------------------------------------------------------
121  # _drop_thinking_only_and_merge_users — the full pass
122  # ---------------------------------------------------------------------------
123  
124  
125  class TestDropThinkingOnlyAndMergeUsers:
126  
127      def test_empty_list_passthrough(self):
128          assert AIAgent._drop_thinking_only_and_merge_users([]) == []
129  
130      def test_no_thinking_only_messages_is_noop_identity(self):
131          msgs = [
132              {"role": "user", "content": "hi"},
133              {"role": "assistant", "content": "hello"},
134          ]
135          out = AIAgent._drop_thinking_only_and_merge_users(msgs)
136          # Should return the original list untouched (identity) when no changes.
137          assert out is msgs
138  
139      def test_drops_thinking_only_between_user_messages_and_merges(self):
140          msgs = [
141              {"role": "user", "content": "help me with X"},
142              {"role": "assistant", "content": "", "reasoning": "let me think"},
143              {"role": "user", "content": "ok continue"},
144          ]
145          out = AIAgent._drop_thinking_only_and_merge_users(msgs)
146          assert len(out) == 1
147          assert out[0]["role"] == "user"
148          assert out[0]["content"] == "help me with X\n\nok continue"
149  
150      def test_preserves_alternation_after_drop(self):
151          msgs = [
152              {"role": "user", "content": "u1"},
153              {"role": "assistant", "content": "", "reasoning": "..."},
154              {"role": "user", "content": "u2"},
155              {"role": "assistant", "content": "real reply"},
156          ]
157          out = AIAgent._drop_thinking_only_and_merge_users(msgs)
158          roles = [m["role"] for m in out]
159          assert roles == ["user", "assistant"]
160          assert out[0]["content"] == "u1\n\nu2"
161          assert out[1]["content"] == "real reply"
162  
163      def test_does_not_merge_when_drop_leaves_non_adjacent_users(self):
164          # Thinking-only at end of conversation — no trailing user to merge
165          msgs = [
166              {"role": "user", "content": "u1"},
167              {"role": "assistant", "content": "reply"},
168              {"role": "user", "content": "u2"},
169              {"role": "assistant", "content": "", "reasoning": "..."},
170          ]
171          out = AIAgent._drop_thinking_only_and_merge_users(msgs)
172          assert [m["role"] for m in out] == ["user", "assistant", "user"]
173  
174      def test_multiple_thinking_only_in_sequence_collapses(self):
175          msgs = [
176              {"role": "user", "content": "u1"},
177              {"role": "assistant", "content": "", "reasoning": "r1"},
178              {"role": "assistant", "content": "", "reasoning": "r2"},
179              {"role": "user", "content": "u2"},
180          ]
181          out = AIAgent._drop_thinking_only_and_merge_users(msgs)
182          assert len(out) == 1
183          assert out[0]["content"] == "u1\n\nu2"
184  
185      def test_does_not_touch_stored_messages_original_list_unmutated(self):
186          original_first_user = {"role": "user", "content": "u1"}
187          original_assistant = {"role": "assistant", "content": "", "reasoning": "..."}
188          original_second_user = {"role": "user", "content": "u2"}
189          msgs = [original_first_user, original_assistant, original_second_user]
190          AIAgent._drop_thinking_only_and_merge_users(msgs)
191          # Caller passes in a per-call copy already, but the sanitizer itself
192          # must not rewrite the dicts it was handed on the drop path.
193          # (It CAN mutate merged dicts — those come from the caller's copy.)
194          assert original_first_user["content"] == "u1"
195          assert original_second_user["content"] == "u2"
196  
197      def test_tool_result_between_user_and_thinking_preserved(self):
198          # Tool results shouldn't block a drop — but they do block the merge
199          # (user/tool are different roles). This scenario shouldn't happen in
200          # practice because a thinking-only turn won't have tool_calls, but if
201          # it did somehow, the surrounding tool result stays put.
202          msgs = [
203              {"role": "user", "content": "u1"},
204              {"role": "assistant", "tool_calls": [{"id": "c1", "function": {"name": "t", "arguments": "{}"}}]},
205              {"role": "tool", "tool_call_id": "c1", "content": "ok"},
206              {"role": "assistant", "content": "", "reasoning": "..."},
207              {"role": "user", "content": "u2"},
208          ]
209          out = AIAgent._drop_thinking_only_and_merge_users(msgs)
210          assert [m["role"] for m in out] == ["user", "assistant", "tool", "user"]
211  
212      def test_merge_concatenates_list_content_user_messages(self):
213          msgs = [
214              {"role": "user", "content": [{"type": "text", "text": "first"}]},
215              {"role": "assistant", "content": "", "reasoning": "..."},
216              {"role": "user", "content": [{"type": "text", "text": "second"}]},
217          ]
218          out = AIAgent._drop_thinking_only_and_merge_users(msgs)
219          assert len(out) == 1
220          assert out[0]["content"] == [
221              {"type": "text", "text": "first"},
222              {"type": "text", "text": "second"},
223          ]
224  
225      def test_merge_mixed_string_and_list_content(self):
226          msgs = [
227              {"role": "user", "content": "plain text"},
228              {"role": "assistant", "content": "", "reasoning": "..."},
229              {"role": "user", "content": [{"type": "text", "text": "block text"}]},
230          ]
231          out = AIAgent._drop_thinking_only_and_merge_users(msgs)
232          assert len(out) == 1
233          assert out[0]["content"] == [
234              {"type": "text", "text": "plain text"},
235              {"type": "text", "text": "block text"},
236          ]
237  
238      def test_system_messages_ignored_by_pass(self):
239          msgs = [
240              {"role": "system", "content": "sys prompt"},
241              {"role": "user", "content": "u1"},
242              {"role": "assistant", "content": "", "reasoning": "..."},
243              {"role": "user", "content": "u2"},
244          ]
245          out = AIAgent._drop_thinking_only_and_merge_users(msgs)
246          assert len(out) == 2
247          assert out[0]["role"] == "system"
248          assert out[1]["role"] == "user"
249          assert out[1]["content"] == "u1\n\nu2"