Cradicle Explorer

/ tests / agent / test_deepseek_anthropic_thinking.py
test_deepseek_anthropic_thinking.py
  1  """Regression guard: preserve thinking blocks on DeepSeek's /anthropic endpoint.
  2  
  3  DeepSeek's ``api.deepseek.com/anthropic`` route speaks the Anthropic Messages
  4  protocol but, when thinking mode is enabled, requires ``thinking`` blocks from
  5  prior assistant turns to round-trip on subsequent requests.  The generic
  6  third-party path strips them (signatures are Anthropic-proprietary and other
  7  proxies cannot validate them), so without a DeepSeek-specific carve-out the
  8  next tool-call turn fails with HTTP 400::
  9  
 10      The content[].thinking in the thinking mode must be passed back to the
 11      API.
 12  
 13  DeepSeek's compatibility matrix lists ``thinking`` as supported but
 14  ``redacted_thinking`` and ``cache_control`` on thinking blocks as not
 15  supported.  Handling is the same as Kimi's ``/coding`` endpoint: strip
 16  Anthropic-signed blocks (DeepSeek can't validate them) but preserve unsigned
 17  blocks that Hermes synthesises from ``reasoning_content``.
 18  
 19  See hermes-agent#16748.
 20  """
 21  
 22  from __future__ import annotations
 23  
 24  import pytest
 25  
 26  
 27  class TestDeepSeekAnthropicPreservesThinking:
 28      """convert_messages_to_anthropic must replay DeepSeek thinking blocks."""
 29  
 30      @pytest.mark.parametrize(
 31          "base_url",
 32          [
 33              "https://api.deepseek.com/anthropic",
 34              "https://api.deepseek.com/anthropic/",
 35              "https://api.deepseek.com/anthropic/v1",
 36              "https://API.DeepSeek.com/anthropic",
 37          ],
 38      )
 39      def test_unsigned_thinking_block_survives_replay(self, base_url: str) -> None:
 40          """Unsigned thinking (synthesised from reasoning_content) must be preserved."""
 41          from agent.anthropic_adapter import convert_messages_to_anthropic
 42  
 43          messages = [
 44              {"role": "user", "content": "hi"},
 45              {
 46                  "role": "assistant",
 47                  "reasoning_content": "planning the tool call",
 48                  "tool_calls": [
 49                      {
 50                          "id": "call_1",
 51                          "type": "function",
 52                          "function": {"name": "skill_view", "arguments": "{}"},
 53                      }
 54                  ],
 55              },
 56              {"role": "tool", "tool_call_id": "call_1", "content": "ok"},
 57          ]
 58          _system, converted = convert_messages_to_anthropic(
 59              messages, base_url=base_url
 60          )
 61  
 62          assistant_msg = next(m for m in converted if m["role"] == "assistant")
 63          thinking_blocks = [
 64              b for b in assistant_msg["content"]
 65              if isinstance(b, dict) and b.get("type") == "thinking"
 66          ]
 67          assert len(thinking_blocks) == 1, (
 68              f"DeepSeek /anthropic ({base_url}) must preserve unsigned thinking "
 69              "blocks synthesised from reasoning_content — upstream rejects "
 70              "replayed tool-call messages without them."
 71          )
 72          assert thinking_blocks[0]["thinking"] == "planning the tool call"
 73          # Synthesised block — never has a signature
 74          assert "signature" not in thinking_blocks[0]
 75  
 76      def test_unsigned_thinking_preserved_on_non_latest_assistant_turn(self) -> None:
 77          """DeepSeek validates history across every prior assistant turn, not just last."""
 78          from agent.anthropic_adapter import convert_messages_to_anthropic
 79  
 80          messages = [
 81              {"role": "user", "content": "q1"},
 82              {
 83                  "role": "assistant",
 84                  "reasoning_content": "r1",
 85                  "tool_calls": [
 86                      {
 87                          "id": "call_1",
 88                          "type": "function",
 89                          "function": {"name": "f", "arguments": "{}"},
 90                      }
 91                  ],
 92              },
 93              {"role": "tool", "tool_call_id": "call_1", "content": "ok"},
 94              {"role": "user", "content": "q2"},
 95              {
 96                  "role": "assistant",
 97                  "reasoning_content": "r2",
 98                  "tool_calls": [
 99                      {
100                          "id": "call_2",
101                          "type": "function",
102                          "function": {"name": "f", "arguments": "{}"},
103                      }
104                  ],
105              },
106              {"role": "tool", "tool_call_id": "call_2", "content": "ok"},
107          ]
108          _system, converted = convert_messages_to_anthropic(
109              messages, base_url="https://api.deepseek.com/anthropic"
110          )
111  
112          assistants = [m for m in converted if m["role"] == "assistant"]
113          assert len(assistants) == 2
114          for assistant, expected in zip(assistants, ("r1", "r2")):
115              thinking = [
116                  b for b in assistant["content"]
117                  if isinstance(b, dict) and b.get("type") == "thinking"
118              ]
119              assert len(thinking) == 1
120              assert thinking[0]["thinking"] == expected
121  
122      def test_signed_anthropic_thinking_block_is_stripped(self) -> None:
123          """Anthropic-signed blocks (that leaked through) must still be stripped.
124  
125          DeepSeek issues its own signatures and cannot validate Anthropic's —
126          the strip-signed / keep-unsigned split matches the Kimi policy.
127          """
128          from agent.anthropic_adapter import convert_messages_to_anthropic
129  
130          messages = [
131              {"role": "user", "content": "hi"},
132              {
133                  "role": "assistant",
134                  "content": [
135                      {
136                          "type": "thinking",
137                          "thinking": "anthropic-signed payload",
138                          "signature": "anthropic-sig-xyz",
139                      },
140                      {"type": "text", "text": "hello"},
141                  ],
142              },
143              {"role": "user", "content": "again"},
144          ]
145          _system, converted = convert_messages_to_anthropic(
146              messages, base_url="https://api.deepseek.com/anthropic"
147          )
148  
149          assistant_msg = next(m for m in converted if m["role"] == "assistant")
150          thinking_blocks = [
151              b for b in assistant_msg["content"]
152              if isinstance(b, dict) and b.get("type") == "thinking"
153          ]
154          assert thinking_blocks == [], (
155              "Signed Anthropic thinking blocks must be stripped on DeepSeek — "
156              "DeepSeek cannot validate Anthropic-proprietary signatures."
157          )
158  
159      def test_cache_control_stripped_from_thinking_block(self) -> None:
160          """cache_control must still be stripped even when the block is preserved.
161  
162          DeepSeek's compatibility matrix lists cache_control on thinking blocks
163          as ignored — cache markers interfere with signature validation on
164          upstreams that do check them, so Hermes strips them everywhere.
165          """
166          from agent.anthropic_adapter import convert_messages_to_anthropic
167  
168          messages = [
169              {"role": "user", "content": "hi"},
170              {
171                  "role": "assistant",
172                  "reasoning_content": "r1",
173                  "tool_calls": [
174                      {
175                          "id": "call_1",
176                          "type": "function",
177                          "function": {"name": "f", "arguments": "{}"},
178                      }
179                  ],
180              },
181              {"role": "tool", "tool_call_id": "call_1", "content": "ok"},
182          ]
183          # Inject cache_control on the synthesised thinking block after-the-fact
184          # by running conversion once, mutating, then re-running would be
185          # indirect.  Instead check the simpler invariant: no thinking block in
186          # the converted output carries cache_control.
187          _system, converted = convert_messages_to_anthropic(
188              messages, base_url="https://api.deepseek.com/anthropic"
189          )
190          for m in converted:
191              if not isinstance(m.get("content"), list):
192                  continue
193              for b in m["content"]:
194                  if isinstance(b, dict) and b.get("type") in ("thinking", "redacted_thinking"):
195                      assert "cache_control" not in b
196  
197      def test_openai_compat_deepseek_base_is_not_matched(self) -> None:
198          """The OpenAI-compatible ``api.deepseek.com`` base must NOT trigger the
199          DeepSeek /anthropic branch — it never reaches this adapter, but the
200          detector should still fail closed so an accidental misuse doesn't
201          quietly send signed Anthropic blocks to an OpenAI endpoint.
202          """
203          from agent.anthropic_adapter import _is_deepseek_anthropic_endpoint
204  
205          assert _is_deepseek_anthropic_endpoint("https://api.deepseek.com") is False
206          assert _is_deepseek_anthropic_endpoint("https://api.deepseek.com/v1") is False
207          assert _is_deepseek_anthropic_endpoint("https://api.deepseek.com/anthropic") is True
208          assert _is_deepseek_anthropic_endpoint("https://api.deepseek.com/anthropic/v1") is True
209  
210      def test_non_deepseek_third_party_still_strips_all_thinking(self) -> None:
211          """MiniMax and other third-party Anthropic endpoints must keep the
212          generic strip-all behaviour (they reject unsigned blocks outright).
213          """
214          from agent.anthropic_adapter import convert_messages_to_anthropic
215  
216          messages = [
217              {"role": "user", "content": "hi"},
218              {
219                  "role": "assistant",
220                  "reasoning_content": "r1",
221                  "tool_calls": [
222                      {
223                          "id": "call_1",
224                          "type": "function",
225                          "function": {"name": "f", "arguments": "{}"},
226                      }
227                  ],
228              },
229              {"role": "tool", "tool_call_id": "call_1", "content": "ok"},
230          ]
231          _system, converted = convert_messages_to_anthropic(
232              messages, base_url="https://api.minimax.io/anthropic"
233          )
234          assistant_msg = next(m for m in converted if m["role"] == "assistant")
235          thinking_blocks = [
236              b for b in assistant_msg["content"]
237              if isinstance(b, dict) and b.get("type") == "thinking"
238          ]
239          assert thinking_blocks == [], (
240              "Non-DeepSeek third-party endpoints must keep the generic "
241              "strip-all-thinking behaviour — unsigned blocks get rejected."
242          )