test_deepseek_anthropic_thinking.py
1 """Regression guard: preserve thinking blocks on DeepSeek's /anthropic endpoint. 2 3 DeepSeek's ``api.deepseek.com/anthropic`` route speaks the Anthropic Messages 4 protocol but, when thinking mode is enabled, requires ``thinking`` blocks from 5 prior assistant turns to round-trip on subsequent requests. The generic 6 third-party path strips them (signatures are Anthropic-proprietary and other 7 proxies cannot validate them), so without a DeepSeek-specific carve-out the 8 next tool-call turn fails with HTTP 400:: 9 10 The content[].thinking in the thinking mode must be passed back to the 11 API. 12 13 DeepSeek's compatibility matrix lists ``thinking`` as supported but 14 ``redacted_thinking`` and ``cache_control`` on thinking blocks as not 15 supported. Handling is the same as Kimi's ``/coding`` endpoint: strip 16 Anthropic-signed blocks (DeepSeek can't validate them) but preserve unsigned 17 blocks that Hermes synthesises from ``reasoning_content``. 18 19 See hermes-agent#16748. 20 """ 21 22 from __future__ import annotations 23 24 import pytest 25 26 27 class TestDeepSeekAnthropicPreservesThinking: 28 """convert_messages_to_anthropic must replay DeepSeek thinking blocks.""" 29 30 @pytest.mark.parametrize( 31 "base_url", 32 [ 33 "https://api.deepseek.com/anthropic", 34 "https://api.deepseek.com/anthropic/", 35 "https://api.deepseek.com/anthropic/v1", 36 "https://API.DeepSeek.com/anthropic", 37 ], 38 ) 39 def test_unsigned_thinking_block_survives_replay(self, base_url: str) -> None: 40 """Unsigned thinking (synthesised from reasoning_content) must be preserved.""" 41 from agent.anthropic_adapter import convert_messages_to_anthropic 42 43 messages = [ 44 {"role": "user", "content": "hi"}, 45 { 46 "role": "assistant", 47 "reasoning_content": "planning the tool call", 48 "tool_calls": [ 49 { 50 "id": "call_1", 51 "type": "function", 52 "function": {"name": "skill_view", "arguments": "{}"}, 53 } 54 ], 55 }, 56 {"role": "tool", "tool_call_id": "call_1", "content": "ok"}, 57 ] 58 _system, converted = convert_messages_to_anthropic( 59 messages, base_url=base_url 60 ) 61 62 assistant_msg = next(m for m in converted if m["role"] == "assistant") 63 thinking_blocks = [ 64 b for b in assistant_msg["content"] 65 if isinstance(b, dict) and b.get("type") == "thinking" 66 ] 67 assert len(thinking_blocks) == 1, ( 68 f"DeepSeek /anthropic ({base_url}) must preserve unsigned thinking " 69 "blocks synthesised from reasoning_content — upstream rejects " 70 "replayed tool-call messages without them." 71 ) 72 assert thinking_blocks[0]["thinking"] == "planning the tool call" 73 # Synthesised block — never has a signature 74 assert "signature" not in thinking_blocks[0] 75 76 def test_unsigned_thinking_preserved_on_non_latest_assistant_turn(self) -> None: 77 """DeepSeek validates history across every prior assistant turn, not just last.""" 78 from agent.anthropic_adapter import convert_messages_to_anthropic 79 80 messages = [ 81 {"role": "user", "content": "q1"}, 82 { 83 "role": "assistant", 84 "reasoning_content": "r1", 85 "tool_calls": [ 86 { 87 "id": "call_1", 88 "type": "function", 89 "function": {"name": "f", "arguments": "{}"}, 90 } 91 ], 92 }, 93 {"role": "tool", "tool_call_id": "call_1", "content": "ok"}, 94 {"role": "user", "content": "q2"}, 95 { 96 "role": "assistant", 97 "reasoning_content": "r2", 98 "tool_calls": [ 99 { 100 "id": "call_2", 101 "type": "function", 102 "function": {"name": "f", "arguments": "{}"}, 103 } 104 ], 105 }, 106 {"role": "tool", "tool_call_id": "call_2", "content": "ok"}, 107 ] 108 _system, converted = convert_messages_to_anthropic( 109 messages, base_url="https://api.deepseek.com/anthropic" 110 ) 111 112 assistants = [m for m in converted if m["role"] == "assistant"] 113 assert len(assistants) == 2 114 for assistant, expected in zip(assistants, ("r1", "r2")): 115 thinking = [ 116 b for b in assistant["content"] 117 if isinstance(b, dict) and b.get("type") == "thinking" 118 ] 119 assert len(thinking) == 1 120 assert thinking[0]["thinking"] == expected 121 122 def test_signed_anthropic_thinking_block_is_stripped(self) -> None: 123 """Anthropic-signed blocks (that leaked through) must still be stripped. 124 125 DeepSeek issues its own signatures and cannot validate Anthropic's — 126 the strip-signed / keep-unsigned split matches the Kimi policy. 127 """ 128 from agent.anthropic_adapter import convert_messages_to_anthropic 129 130 messages = [ 131 {"role": "user", "content": "hi"}, 132 { 133 "role": "assistant", 134 "content": [ 135 { 136 "type": "thinking", 137 "thinking": "anthropic-signed payload", 138 "signature": "anthropic-sig-xyz", 139 }, 140 {"type": "text", "text": "hello"}, 141 ], 142 }, 143 {"role": "user", "content": "again"}, 144 ] 145 _system, converted = convert_messages_to_anthropic( 146 messages, base_url="https://api.deepseek.com/anthropic" 147 ) 148 149 assistant_msg = next(m for m in converted if m["role"] == "assistant") 150 thinking_blocks = [ 151 b for b in assistant_msg["content"] 152 if isinstance(b, dict) and b.get("type") == "thinking" 153 ] 154 assert thinking_blocks == [], ( 155 "Signed Anthropic thinking blocks must be stripped on DeepSeek — " 156 "DeepSeek cannot validate Anthropic-proprietary signatures." 157 ) 158 159 def test_cache_control_stripped_from_thinking_block(self) -> None: 160 """cache_control must still be stripped even when the block is preserved. 161 162 DeepSeek's compatibility matrix lists cache_control on thinking blocks 163 as ignored — cache markers interfere with signature validation on 164 upstreams that do check them, so Hermes strips them everywhere. 165 """ 166 from agent.anthropic_adapter import convert_messages_to_anthropic 167 168 messages = [ 169 {"role": "user", "content": "hi"}, 170 { 171 "role": "assistant", 172 "reasoning_content": "r1", 173 "tool_calls": [ 174 { 175 "id": "call_1", 176 "type": "function", 177 "function": {"name": "f", "arguments": "{}"}, 178 } 179 ], 180 }, 181 {"role": "tool", "tool_call_id": "call_1", "content": "ok"}, 182 ] 183 # Inject cache_control on the synthesised thinking block after-the-fact 184 # by running conversion once, mutating, then re-running would be 185 # indirect. Instead check the simpler invariant: no thinking block in 186 # the converted output carries cache_control. 187 _system, converted = convert_messages_to_anthropic( 188 messages, base_url="https://api.deepseek.com/anthropic" 189 ) 190 for m in converted: 191 if not isinstance(m.get("content"), list): 192 continue 193 for b in m["content"]: 194 if isinstance(b, dict) and b.get("type") in ("thinking", "redacted_thinking"): 195 assert "cache_control" not in b 196 197 def test_openai_compat_deepseek_base_is_not_matched(self) -> None: 198 """The OpenAI-compatible ``api.deepseek.com`` base must NOT trigger the 199 DeepSeek /anthropic branch — it never reaches this adapter, but the 200 detector should still fail closed so an accidental misuse doesn't 201 quietly send signed Anthropic blocks to an OpenAI endpoint. 202 """ 203 from agent.anthropic_adapter import _is_deepseek_anthropic_endpoint 204 205 assert _is_deepseek_anthropic_endpoint("https://api.deepseek.com") is False 206 assert _is_deepseek_anthropic_endpoint("https://api.deepseek.com/v1") is False 207 assert _is_deepseek_anthropic_endpoint("https://api.deepseek.com/anthropic") is True 208 assert _is_deepseek_anthropic_endpoint("https://api.deepseek.com/anthropic/v1") is True 209 210 def test_non_deepseek_third_party_still_strips_all_thinking(self) -> None: 211 """MiniMax and other third-party Anthropic endpoints must keep the 212 generic strip-all behaviour (they reject unsigned blocks outright). 213 """ 214 from agent.anthropic_adapter import convert_messages_to_anthropic 215 216 messages = [ 217 {"role": "user", "content": "hi"}, 218 { 219 "role": "assistant", 220 "reasoning_content": "r1", 221 "tool_calls": [ 222 { 223 "id": "call_1", 224 "type": "function", 225 "function": {"name": "f", "arguments": "{}"}, 226 } 227 ], 228 }, 229 {"role": "tool", "tool_call_id": "call_1", "content": "ok"}, 230 ] 231 _system, converted = convert_messages_to_anthropic( 232 messages, base_url="https://api.minimax.io/anthropic" 233 ) 234 assistant_msg = next(m for m in converted if m["role"] == "assistant") 235 thinking_blocks = [ 236 b for b in assistant_msg["content"] 237 if isinstance(b, dict) and b.get("type") == "thinking" 238 ] 239 assert thinking_blocks == [], ( 240 "Non-DeepSeek third-party endpoints must keep the generic " 241 "strip-all-thinking behaviour — unsigned blocks get rejected." 242 )