test_thinking_only_sanitizer.py
1 """Tests for the thinking-only assistant message sanitizer. 2 3 Covers _is_thinking_only_assistant() + _drop_thinking_only_and_merge_users() 4 in run_agent.py. The sanitizer runs on the per-call api_messages copy and 5 drops assistant turns that contain only reasoning (no visible content, no 6 tool_calls). Adjacent user messages left behind are merged so role 7 alternation is preserved for the provider. 8 9 Claude Code uses this exact pattern (filterOrphanedThinkingOnlyMessages + 10 mergeAdjacentUserMessages in src/utils/messages.ts). See #16823 for the 11 backstory on why the alternative — fabricating "." stub text — was rejected. 12 """ 13 14 from run_agent import AIAgent 15 16 17 # --------------------------------------------------------------------------- 18 # _is_thinking_only_assistant — detection 19 # --------------------------------------------------------------------------- 20 21 22 class TestIsThinkingOnlyAssistant: 23 24 def test_plain_assistant_reply_is_not_thinking_only(self): 25 msg = {"role": "assistant", "content": "Hello there"} 26 assert not AIAgent._is_thinking_only_assistant(msg) 27 28 def test_assistant_with_tool_calls_is_not_thinking_only(self): 29 msg = { 30 "role": "assistant", 31 "content": "", 32 "reasoning": "let me use a tool", 33 "tool_calls": [{"id": "c1", "function": {"name": "terminal", "arguments": "{}"}}], 34 } 35 assert not AIAgent._is_thinking_only_assistant(msg) 36 37 def test_empty_content_plus_reasoning_is_thinking_only(self): 38 msg = {"role": "assistant", "content": "", "reasoning": "thinking..."} 39 assert AIAgent._is_thinking_only_assistant(msg) 40 41 def test_none_content_plus_reasoning_content_is_thinking_only(self): 42 msg = {"role": "assistant", "content": None, "reasoning_content": "thinking..."} 43 assert AIAgent._is_thinking_only_assistant(msg) 44 45 def test_whitespace_only_content_plus_reasoning_is_thinking_only(self): 46 msg = {"role": "assistant", "content": " \n\n ", "reasoning": "r"} 47 assert AIAgent._is_thinking_only_assistant(msg) 48 49 def test_empty_content_no_reasoning_is_not_thinking_only(self): 50 # If there's no reasoning either, this is just an empty turn — let 51 # other sanitizers handle it (orphan-tool-pair, etc.). We only care 52 # about the specific thinking-only case. 53 msg = {"role": "assistant", "content": ""} 54 assert not AIAgent._is_thinking_only_assistant(msg) 55 56 def test_list_content_all_thinking_blocks_is_thinking_only(self): 57 # Anthropic-native shape 58 msg = { 59 "role": "assistant", 60 "content": [ 61 {"type": "thinking", "thinking": "...", "signature": "sig"}, 62 ], 63 "reasoning": "...", 64 } 65 assert AIAgent._is_thinking_only_assistant(msg) 66 67 def test_list_content_with_real_text_is_not_thinking_only(self): 68 msg = { 69 "role": "assistant", 70 "content": [ 71 {"type": "thinking", "thinking": "..."}, 72 {"type": "text", "text": "Hi there"}, 73 ], 74 "reasoning": "...", 75 } 76 assert not AIAgent._is_thinking_only_assistant(msg) 77 78 def test_list_content_with_tool_use_block_is_not_thinking_only(self): 79 msg = { 80 "role": "assistant", 81 "content": [ 82 {"type": "thinking", "thinking": "..."}, 83 {"type": "tool_use", "id": "tu1", "name": "terminal", "input": {}}, 84 ], 85 } 86 assert not AIAgent._is_thinking_only_assistant(msg) 87 88 def test_list_content_thinking_plus_whitespace_text_is_thinking_only(self): 89 msg = { 90 "role": "assistant", 91 "content": [ 92 {"type": "thinking", "thinking": "..."}, 93 {"type": "text", "text": " "}, 94 ], 95 "reasoning": "...", 96 } 97 assert AIAgent._is_thinking_only_assistant(msg) 98 99 def test_reasoning_details_list_form_detected(self): 100 msg = { 101 "role": "assistant", 102 "content": "", 103 "reasoning_details": [{"type": "thinking", "text": "..."}], 104 } 105 assert AIAgent._is_thinking_only_assistant(msg) 106 107 def test_user_message_never_thinking_only(self): 108 assert not AIAgent._is_thinking_only_assistant({"role": "user", "content": ""}) 109 110 def test_tool_message_never_thinking_only(self): 111 assert not AIAgent._is_thinking_only_assistant( 112 {"role": "tool", "content": "", "tool_call_id": "x"} 113 ) 114 115 def test_non_dict_returns_false(self): 116 assert not AIAgent._is_thinking_only_assistant(None) 117 assert not AIAgent._is_thinking_only_assistant("hello") 118 119 120 # --------------------------------------------------------------------------- 121 # _drop_thinking_only_and_merge_users — the full pass 122 # --------------------------------------------------------------------------- 123 124 125 class TestDropThinkingOnlyAndMergeUsers: 126 127 def test_empty_list_passthrough(self): 128 assert AIAgent._drop_thinking_only_and_merge_users([]) == [] 129 130 def test_no_thinking_only_messages_is_noop_identity(self): 131 msgs = [ 132 {"role": "user", "content": "hi"}, 133 {"role": "assistant", "content": "hello"}, 134 ] 135 out = AIAgent._drop_thinking_only_and_merge_users(msgs) 136 # Should return the original list untouched (identity) when no changes. 137 assert out is msgs 138 139 def test_drops_thinking_only_between_user_messages_and_merges(self): 140 msgs = [ 141 {"role": "user", "content": "help me with X"}, 142 {"role": "assistant", "content": "", "reasoning": "let me think"}, 143 {"role": "user", "content": "ok continue"}, 144 ] 145 out = AIAgent._drop_thinking_only_and_merge_users(msgs) 146 assert len(out) == 1 147 assert out[0]["role"] == "user" 148 assert out[0]["content"] == "help me with X\n\nok continue" 149 150 def test_preserves_alternation_after_drop(self): 151 msgs = [ 152 {"role": "user", "content": "u1"}, 153 {"role": "assistant", "content": "", "reasoning": "..."}, 154 {"role": "user", "content": "u2"}, 155 {"role": "assistant", "content": "real reply"}, 156 ] 157 out = AIAgent._drop_thinking_only_and_merge_users(msgs) 158 roles = [m["role"] for m in out] 159 assert roles == ["user", "assistant"] 160 assert out[0]["content"] == "u1\n\nu2" 161 assert out[1]["content"] == "real reply" 162 163 def test_does_not_merge_when_drop_leaves_non_adjacent_users(self): 164 # Thinking-only at end of conversation — no trailing user to merge 165 msgs = [ 166 {"role": "user", "content": "u1"}, 167 {"role": "assistant", "content": "reply"}, 168 {"role": "user", "content": "u2"}, 169 {"role": "assistant", "content": "", "reasoning": "..."}, 170 ] 171 out = AIAgent._drop_thinking_only_and_merge_users(msgs) 172 assert [m["role"] for m in out] == ["user", "assistant", "user"] 173 174 def test_multiple_thinking_only_in_sequence_collapses(self): 175 msgs = [ 176 {"role": "user", "content": "u1"}, 177 {"role": "assistant", "content": "", "reasoning": "r1"}, 178 {"role": "assistant", "content": "", "reasoning": "r2"}, 179 {"role": "user", "content": "u2"}, 180 ] 181 out = AIAgent._drop_thinking_only_and_merge_users(msgs) 182 assert len(out) == 1 183 assert out[0]["content"] == "u1\n\nu2" 184 185 def test_does_not_touch_stored_messages_original_list_unmutated(self): 186 original_first_user = {"role": "user", "content": "u1"} 187 original_assistant = {"role": "assistant", "content": "", "reasoning": "..."} 188 original_second_user = {"role": "user", "content": "u2"} 189 msgs = [original_first_user, original_assistant, original_second_user] 190 AIAgent._drop_thinking_only_and_merge_users(msgs) 191 # Caller passes in a per-call copy already, but the sanitizer itself 192 # must not rewrite the dicts it was handed on the drop path. 193 # (It CAN mutate merged dicts — those come from the caller's copy.) 194 assert original_first_user["content"] == "u1" 195 assert original_second_user["content"] == "u2" 196 197 def test_tool_result_between_user_and_thinking_preserved(self): 198 # Tool results shouldn't block a drop — but they do block the merge 199 # (user/tool are different roles). This scenario shouldn't happen in 200 # practice because a thinking-only turn won't have tool_calls, but if 201 # it did somehow, the surrounding tool result stays put. 202 msgs = [ 203 {"role": "user", "content": "u1"}, 204 {"role": "assistant", "tool_calls": [{"id": "c1", "function": {"name": "t", "arguments": "{}"}}]}, 205 {"role": "tool", "tool_call_id": "c1", "content": "ok"}, 206 {"role": "assistant", "content": "", "reasoning": "..."}, 207 {"role": "user", "content": "u2"}, 208 ] 209 out = AIAgent._drop_thinking_only_and_merge_users(msgs) 210 assert [m["role"] for m in out] == ["user", "assistant", "tool", "user"] 211 212 def test_merge_concatenates_list_content_user_messages(self): 213 msgs = [ 214 {"role": "user", "content": [{"type": "text", "text": "first"}]}, 215 {"role": "assistant", "content": "", "reasoning": "..."}, 216 {"role": "user", "content": [{"type": "text", "text": "second"}]}, 217 ] 218 out = AIAgent._drop_thinking_only_and_merge_users(msgs) 219 assert len(out) == 1 220 assert out[0]["content"] == [ 221 {"type": "text", "text": "first"}, 222 {"type": "text", "text": "second"}, 223 ] 224 225 def test_merge_mixed_string_and_list_content(self): 226 msgs = [ 227 {"role": "user", "content": "plain text"}, 228 {"role": "assistant", "content": "", "reasoning": "..."}, 229 {"role": "user", "content": [{"type": "text", "text": "block text"}]}, 230 ] 231 out = AIAgent._drop_thinking_only_and_merge_users(msgs) 232 assert len(out) == 1 233 assert out[0]["content"] == [ 234 {"type": "text", "text": "plain text"}, 235 {"type": "text", "text": "block text"}, 236 ] 237 238 def test_system_messages_ignored_by_pass(self): 239 msgs = [ 240 {"role": "system", "content": "sys prompt"}, 241 {"role": "user", "content": "u1"}, 242 {"role": "assistant", "content": "", "reasoning": "..."}, 243 {"role": "user", "content": "u2"}, 244 ] 245 out = AIAgent._drop_thinking_only_and_merge_users(msgs) 246 assert len(out) == 2 247 assert out[0]["role"] == "system" 248 assert out[1]["role"] == "user" 249 assert out[1]["content"] == "u1\n\nu2"