anthropic_adapter.py
1 """Anthropic Messages API adapter for Hermes Agent. 2 3 Translates between Hermes's internal OpenAI-style message format and 4 Anthropic's Messages API. Follows the same pattern as the codex_responses 5 adapter — all provider-specific logic is isolated here. 6 7 Auth supports: 8 - Regular API keys (sk-ant-api*) → x-api-key header 9 - OAuth setup-tokens (sk-ant-oat*) → Bearer auth + beta header 10 - Claude Code credentials (~/.claude.json or ~/.claude/.credentials.json) → Bearer auth 11 """ 12 13 import copy 14 import json 15 import logging 16 import os 17 import platform 18 import subprocess 19 from pathlib import Path 20 21 from hermes_constants import get_hermes_home 22 from typing import Any, Dict, List, Optional, Tuple 23 from utils import base_url_host_matches, normalize_proxy_env_vars 24 25 # NOTE: `import anthropic` is deliberately NOT at module top — the SDK pulls 26 # ~220 ms of imports (anthropic.types, anthropic.lib.tools._beta_runner, etc.) 27 # and the 3 usage sites (build_anthropic_client, build_anthropic_bedrock_client, 28 # read_claude_code_credentials_from_keychain) are all on cold user-triggered 29 # paths. Access via the `_get_anthropic_sdk()` accessor below, which caches 30 # the module after the first call and returns None on ImportError. 31 _anthropic_sdk: Any = ... # sentinel — None means "tried and missing" 32 33 34 def _get_anthropic_sdk(): 35 """Return the ``anthropic`` SDK module, importing lazily. None if not installed.""" 36 global _anthropic_sdk 37 if _anthropic_sdk is ...: 38 try: 39 import anthropic as _sdk 40 _anthropic_sdk = _sdk 41 except ImportError: 42 _anthropic_sdk = None 43 return _anthropic_sdk 44 45 logger = logging.getLogger(__name__) 46 47 THINKING_BUDGET = {"xhigh": 32000, "high": 16000, "medium": 8000, "low": 4000} 48 # Hermes effort → Anthropic adaptive-thinking effort (output_config.effort). 49 # Anthropic exposes 5 levels on 4.7+: low, medium, high, xhigh, max. 50 # Opus/Sonnet 4.6 only expose 4 levels: low, medium, high, max — no xhigh. 51 # We preserve xhigh as xhigh on 4.7+ (the recommended default for coding/ 52 # agentic work) and downgrade it to max on pre-4.7 adaptive models (which 53 # is the strongest level they accept). "minimal" is a legacy alias that 54 # maps to low on every model. See: 55 # https://platform.claude.com/docs/en/about-claude/models/migration-guide 56 ADAPTIVE_EFFORT_MAP = { 57 "max": "max", 58 "xhigh": "xhigh", 59 "high": "high", 60 "medium": "medium", 61 "low": "low", 62 "minimal": "low", 63 } 64 65 # Models that accept the "xhigh" output_config.effort level. Opus 4.7 added 66 # xhigh as a distinct level between high and max; older adaptive-thinking 67 # models (4.6) reject it with a 400. Keep this substring list in sync with 68 # the Anthropic migration guide as new model families ship. 69 _XHIGH_EFFORT_SUBSTRINGS = ("4-7", "4.7") 70 71 # Models where extended thinking is deprecated/removed (4.6+ behavior: adaptive 72 # is the only supported mode; 4.7 additionally forbids manual thinking entirely 73 # and drops temperature/top_p/top_k). 74 _ADAPTIVE_THINKING_SUBSTRINGS = ("4-6", "4.6", "4-7", "4.7") 75 76 # Models where temperature/top_p/top_k return 400 if set to non-default values. 77 # This is the Opus 4.7 contract; future 4.x+ models are expected to follow it. 78 _NO_SAMPLING_PARAMS_SUBSTRINGS = ("4-7", "4.7") 79 _FAST_MODE_SUPPORTED_SUBSTRINGS = ("opus-4-6", "opus-4.6") 80 81 # ── Max output token limits per Anthropic model ─────────────────────── 82 # Source: Anthropic docs + Cline model catalog. Anthropic's API requires 83 # max_tokens as a mandatory field. Previously we hardcoded 16384, which 84 # starves thinking-enabled models (thinking tokens count toward the limit). 85 _ANTHROPIC_OUTPUT_LIMITS = { 86 # Claude 4.7 87 "claude-opus-4-7": 128_000, 88 # Claude 4.6 89 "claude-opus-4-6": 128_000, 90 "claude-sonnet-4-6": 64_000, 91 # Claude 4.5 92 "claude-opus-4-5": 64_000, 93 "claude-sonnet-4-5": 64_000, 94 "claude-haiku-4-5": 64_000, 95 # Claude 4 96 "claude-opus-4": 32_000, 97 "claude-sonnet-4": 64_000, 98 # Claude 3.7 99 "claude-3-7-sonnet": 128_000, 100 # Claude 3.5 101 "claude-3-5-sonnet": 8_192, 102 "claude-3-5-haiku": 8_192, 103 # Claude 3 104 "claude-3-opus": 4_096, 105 "claude-3-sonnet": 4_096, 106 "claude-3-haiku": 4_096, 107 # Third-party Anthropic-compatible providers 108 "minimax": 131_072, 109 # Qwen models via DashScope Anthropic-compatible endpoint 110 # DashScope enforces max_tokens ∈ [1, 65536] 111 "qwen3": 65_536, 112 } 113 114 # For any model not in the table, assume the highest current limit. 115 # Future Anthropic models are unlikely to have *less* output capacity. 116 _ANTHROPIC_DEFAULT_OUTPUT_LIMIT = 128_000 117 118 119 def _get_anthropic_max_output(model: str) -> int: 120 """Look up the max output token limit for an Anthropic model. 121 122 Uses substring matching against _ANTHROPIC_OUTPUT_LIMITS so date-stamped 123 model IDs (claude-sonnet-4-5-20250929) and variant suffixes (:1m, :fast) 124 resolve correctly. Longest-prefix match wins to avoid e.g. "claude-3-5" 125 matching before "claude-3-5-sonnet". 126 127 Normalizes dots to hyphens so that model names like 128 ``anthropic/claude-opus-4.6`` match the ``claude-opus-4-6`` table key. 129 """ 130 m = model.lower().replace(".", "-") 131 best_key = "" 132 best_val = _ANTHROPIC_DEFAULT_OUTPUT_LIMIT 133 for key, val in _ANTHROPIC_OUTPUT_LIMITS.items(): 134 if key in m and len(key) > len(best_key): 135 best_key = key 136 best_val = val 137 return best_val 138 139 140 def _resolve_positive_anthropic_max_tokens(value) -> Optional[int]: 141 """Return ``value`` floored to a positive int, or ``None`` if it is not a 142 finite positive number. Ported from openclaw/openclaw#66664. 143 144 Anthropic's Messages API rejects ``max_tokens`` values that are 0, 145 negative, non-integer, or non-finite with HTTP 400. Python's ``or`` 146 idiom (``max_tokens or fallback``) correctly catches ``0`` but lets 147 negative ints and fractional floats (``-1``, ``0.5``) through to the 148 API, producing a user-visible failure instead of a local error. 149 """ 150 # Booleans are a subclass of int — exclude explicitly so ``True`` doesn't 151 # silently become 1 and ``False`` doesn't become 0. 152 if isinstance(value, bool): 153 return None 154 if not isinstance(value, (int, float)): 155 return None 156 try: 157 import math 158 if not math.isfinite(value): 159 return None 160 except Exception: 161 return None 162 floored = int(value) # truncates toward zero for floats 163 return floored if floored > 0 else None 164 165 166 def _resolve_anthropic_messages_max_tokens( 167 requested, 168 model: str, 169 context_length: Optional[int] = None, 170 ) -> int: 171 """Resolve the ``max_tokens`` budget for an Anthropic Messages call. 172 173 Prefers ``requested`` when it is a positive finite number; otherwise 174 falls back to the model's output ceiling. Raises ``ValueError`` if no 175 positive budget can be resolved (should not happen with current model 176 table defaults, but guards against a future regression where 177 ``_get_anthropic_max_output`` could return ``0``). 178 179 Separately, callers apply a context-window clamp — this resolver does 180 not, to keep the positive-value contract independent of endpoint 181 specifics. 182 183 Ported from openclaw/openclaw#66664 (resolveAnthropicMessagesMaxTokens). 184 """ 185 resolved = _resolve_positive_anthropic_max_tokens(requested) 186 if resolved is not None: 187 return resolved 188 fallback = _get_anthropic_max_output(model) 189 if fallback > 0: 190 return fallback 191 raise ValueError( 192 f"Anthropic Messages adapter requires a positive max_tokens value for " 193 f"model {model!r}; got {requested!r} and no model default resolved." 194 ) 195 196 197 def _supports_adaptive_thinking(model: str) -> bool: 198 """Return True for Claude 4.6+ models that support adaptive thinking.""" 199 return any(v in model for v in _ADAPTIVE_THINKING_SUBSTRINGS) 200 201 202 def _supports_xhigh_effort(model: str) -> bool: 203 """Return True for models that accept the 'xhigh' adaptive effort level. 204 205 Opus 4.7 introduced xhigh as a distinct level between high and max. 206 Pre-4.7 adaptive models (Opus/Sonnet 4.6) only accept low/medium/high/max 207 and reject xhigh with an HTTP 400. Callers should downgrade xhigh→max 208 when this returns False. 209 """ 210 return any(v in model for v in _XHIGH_EFFORT_SUBSTRINGS) 211 212 213 def _forbids_sampling_params(model: str) -> bool: 214 """Return True for models that 400 on any non-default temperature/top_p/top_k. 215 216 Opus 4.7 explicitly rejects sampling parameters; later Claude releases are 217 expected to follow suit. Callers should omit these fields entirely rather 218 than passing zero/default values (the API rejects anything non-null). 219 """ 220 return any(v in model for v in _NO_SAMPLING_PARAMS_SUBSTRINGS) 221 222 223 def _supports_fast_mode(model: str) -> bool: 224 """Return True for models that support Anthropic Fast Mode (speed=fast). 225 226 Per Anthropic docs, fast mode is currently supported on Opus 4.6 only. 227 Sending ``speed: "fast"`` to any other Claude model (including Opus 4.7) 228 returns HTTP 400. This guard prevents silently 400'ing when stale config 229 or older callers leave fast mode enabled across a model upgrade. 230 """ 231 return any(v in model for v in _FAST_MODE_SUPPORTED_SUBSTRINGS) 232 233 234 # Beta headers for enhanced features (sent with ALL auth types). 235 # As of Opus 4.7 (2026-04-16), the first two are GA on Claude 4.6+ — the 236 # beta headers are still accepted (harmless no-op) but not required. Kept 237 # here so older Claude (4.5, 4.1) + third-party Anthropic-compat endpoints 238 # that still gate on the headers continue to get the enhanced features. 239 # 240 # ``context-1m-2025-08-07`` unlocks the 1M context window on Claude Opus 4.6/4.7 241 # and Sonnet 4.6 when served via AWS Bedrock or Azure AI Foundry. 1M is GA on 242 # native Anthropic (api.anthropic.com) for Opus 4.6+, but Bedrock/Azure still 243 # gate it behind this beta header as of 2026-04 — without it Bedrock caps Opus 244 # at 200K even though model_metadata.py advertises 1M. The header is a harmless 245 # no-op on endpoints where 1M is GA. 246 # 247 # Migration guide: remove these if you no longer support ≤4.5 models or once 248 # Bedrock/Azure promote 1M to GA. 249 _COMMON_BETAS = [ 250 "interleaved-thinking-2025-05-14", 251 "fine-grained-tool-streaming-2025-05-14", 252 "context-1m-2025-08-07", 253 ] 254 # MiniMax's Anthropic-compatible endpoints fail tool-use requests when 255 # the fine-grained tool streaming beta is present. Omit it so tool calls 256 # fall back to the provider's default response path. 257 _TOOL_STREAMING_BETA = "fine-grained-tool-streaming-2025-05-14" 258 # 1M context beta — see comment on _COMMON_BETAS above. Stripped for 259 # Bearer-auth (MiniMax) endpoints since they host their own models and 260 # unknown Anthropic beta headers risk request rejection. 261 _CONTEXT_1M_BETA = "context-1m-2025-08-07" 262 263 # Fast mode beta — enables the ``speed: "fast"`` request parameter for 264 # significantly higher output token throughput on Opus 4.6 (~2.5x). 265 # See https://platform.claude.com/docs/en/build-with-claude/fast-mode 266 _FAST_MODE_BETA = "fast-mode-2026-02-01" 267 268 # Additional beta headers required for OAuth/subscription auth. 269 # Matches what Claude Code (and pi-ai / OpenCode) send. 270 _OAUTH_ONLY_BETAS = [ 271 "claude-code-20250219", 272 "oauth-2025-04-20", 273 ] 274 275 # Claude Code identity — required for OAuth requests to be routed correctly. 276 # Without these, Anthropic's infrastructure intermittently 500s OAuth traffic. 277 # The version must stay reasonably current — Anthropic rejects OAuth requests 278 # when the spoofed user-agent version is too far behind the actual release. 279 _CLAUDE_CODE_VERSION_FALLBACK = "2.1.74" 280 _claude_code_version_cache: Optional[str] = None 281 282 283 def _detect_claude_code_version() -> str: 284 """Detect the installed Claude Code version, fall back to a static constant. 285 286 Anthropic's OAuth infrastructure validates the user-agent version and may 287 reject requests with a version that's too old. Detecting dynamically means 288 users who keep Claude Code updated never hit stale-version 400s. 289 """ 290 import subprocess as _sp 291 292 for cmd in ("claude", "claude-code"): 293 try: 294 result = _sp.run( 295 [cmd, "--version"], 296 capture_output=True, text=True, timeout=5, 297 ) 298 if result.returncode == 0 and result.stdout.strip(): 299 # Output is like "2.1.74 (Claude Code)" or just "2.1.74" 300 version = result.stdout.strip().split()[0] 301 if version and version[0].isdigit(): 302 return version 303 except Exception: 304 pass 305 return _CLAUDE_CODE_VERSION_FALLBACK 306 307 308 _CLAUDE_CODE_SYSTEM_PREFIX = "You are Claude Code, Anthropic's official CLI for Claude." 309 _MCP_TOOL_PREFIX = "mcp_" 310 311 312 def _get_claude_code_version() -> str: 313 """Lazily detect the installed Claude Code version when OAuth headers need it.""" 314 global _claude_code_version_cache 315 if _claude_code_version_cache is None: 316 _claude_code_version_cache = _detect_claude_code_version() 317 return _claude_code_version_cache 318 319 320 def _is_oauth_token(key: str) -> bool: 321 """Check if the key is an Anthropic OAuth/setup token. 322 323 Positively identifies Anthropic OAuth tokens by their key format: 324 - ``sk-ant-`` prefix (but NOT ``sk-ant-api``) → setup tokens, managed keys 325 - ``eyJ`` prefix → JWTs from the Anthropic OAuth flow 326 - ``cc-`` prefix → Claude Code OAuth access tokens (from CLAUDE_CODE_OAUTH_TOKEN) 327 328 Non-Anthropic keys (MiniMax, Alibaba, etc.) don't match any pattern 329 and correctly return False. 330 """ 331 if not key: 332 return False 333 # Regular Anthropic Console API keys — x-api-key auth, never OAuth 334 if key.startswith("sk-ant-api"): 335 return False 336 # Anthropic-issued tokens (setup-tokens sk-ant-oat-*, managed keys) 337 if key.startswith("sk-ant-"): 338 return True 339 # JWTs from Anthropic OAuth flow 340 if key.startswith("eyJ"): 341 return True 342 # Claude Code OAuth access tokens (opaque, from CLAUDE_CODE_OAUTH_TOKEN) 343 if key.startswith("cc-"): 344 return True 345 return False 346 347 348 def _normalize_base_url_text(base_url) -> str: 349 """Normalize SDK/base transport URL values to a plain string for inspection. 350 351 Some client objects expose ``base_url`` as an ``httpx.URL`` instead of a raw 352 string. Provider/auth detection should accept either shape. 353 """ 354 if not base_url: 355 return "" 356 return str(base_url).strip() 357 358 359 def _is_third_party_anthropic_endpoint(base_url: str | None) -> bool: 360 """Return True for non-Anthropic endpoints using the Anthropic Messages API. 361 362 Third-party proxies (Azure AI Foundry, AWS Bedrock, self-hosted) authenticate 363 with their own API keys via x-api-key, not Anthropic OAuth tokens. OAuth 364 detection should be skipped for these endpoints. 365 """ 366 normalized = _normalize_base_url_text(base_url) 367 if not normalized: 368 return False # No base_url = direct Anthropic API 369 normalized = normalized.rstrip("/").lower() 370 if "anthropic.com" in normalized: 371 return False # Direct Anthropic API — OAuth applies 372 return True # Any other endpoint is a third-party proxy 373 374 375 def _is_kimi_coding_endpoint(base_url: str | None) -> bool: 376 """Return True for Kimi's /coding endpoint that requires claude-code UA.""" 377 normalized = _normalize_base_url_text(base_url) 378 if not normalized: 379 return False 380 return normalized.rstrip("/").lower().startswith("https://api.kimi.com/coding") 381 382 383 # Model-name prefixes that identify the Kimi / Moonshot family. Covers 384 # - official slugs: ``kimi-k2.5``, ``kimi_thinking``, ``moonshot-v1-8k`` 385 # - common release lines: ``k1.5-...``, ``k2-thinking``, ``k25-...``, ``k2.5-...`` 386 # Matched case-insensitively against the post-``normalize_model_name`` form, 387 # so a caller's ``provider/vendor/model`` slug is handled the same as a 388 # bare name. 389 _KIMI_FAMILY_MODEL_PREFIXES = ( 390 "kimi-", "kimi_", 391 "moonshot-", "moonshot_", 392 "k1.", "k1-", 393 "k2.", "k2-", 394 "k25", "k2.5", 395 ) 396 397 398 def _model_name_is_kimi_family(model: str | None) -> bool: 399 if not isinstance(model, str): 400 return False 401 m = model.strip().lower() 402 if not m: 403 return False 404 # Strip vendor prefix (e.g. ``moonshotai/kimi-k2.5`` → ``kimi-k2.5``) 405 if "/" in m: 406 m = m.rsplit("/", 1)[-1] 407 return m.startswith(_KIMI_FAMILY_MODEL_PREFIXES) 408 409 410 def _is_kimi_family_endpoint(base_url: str | None, model: str | None = None) -> bool: 411 """Return True for any Kimi / Moonshot Anthropic-Messages-speaking endpoint. 412 413 Broader than ``_is_kimi_coding_endpoint`` — matches: 414 415 - Kimi's official ``/coding`` URL (legacy check, preserved) 416 - Any ``api.kimi.com`` / ``moonshot.ai`` / ``moonshot.cn`` host 417 - Custom or proxied endpoints whose *model* name is in the Kimi / Moonshot 418 family (``kimi-*``, ``moonshot-*``, ``k1.*``, ``k2.*``, …). Users with 419 ``api_mode: anthropic_messages`` on a private gateway fronting Kimi 420 fall into this branch — the upstream still enforces Kimi's thinking 421 semantics (reasoning_content required on every replayed tool-call 422 message) regardless of the gateway's hostname. 423 424 Used to decide whether to drop Anthropic's ``thinking`` kwarg and to 425 preserve unsigned reasoning_content-derived thinking blocks on replay. 426 See hermes-agent#13848, #17057. 427 """ 428 if _is_kimi_coding_endpoint(base_url): 429 return True 430 for _domain in ("api.kimi.com", "moonshot.ai", "moonshot.cn"): 431 if base_url_host_matches(base_url or "", _domain): 432 return True 433 if _model_name_is_kimi_family(model): 434 return True 435 return False 436 437 438 def _is_deepseek_anthropic_endpoint(base_url: str | None) -> bool: 439 """Return True for DeepSeek's Anthropic-compatible endpoint. 440 441 DeepSeek's ``/anthropic`` route speaks the Anthropic Messages protocol 442 but, when thinking mode is enabled, requires the ``thinking`` blocks 443 from prior assistant turns to round-trip on subsequent requests — the 444 generic third-party path strips them and triggers HTTP 400:: 445 446 The content[].thinking in the thinking mode must be passed back 447 to the API. 448 449 Per DeepSeek's published compatibility matrix the blocks are unsigned 450 (no Anthropic-proprietary signature, no ``redacted_thinking`` support), 451 so this endpoint is handled with the same strip-signed / keep-unsigned 452 policy used for Kimi's ``/coding`` endpoint. The match is pinned to 453 the ``/anthropic`` path so the OpenAI-compatible ``api.deepseek.com`` 454 base URL (which never reaches this adapter) is not misclassified. 455 See hermes-agent#16748. 456 """ 457 if not base_url_host_matches(base_url or "", "api.deepseek.com"): 458 return False 459 normalized = _normalize_base_url_text(base_url) 460 if not normalized: 461 return False 462 return "/anthropic" in normalized.rstrip("/").lower() 463 464 465 def _requires_bearer_auth(base_url: str | None) -> bool: 466 """Return True for Anthropic-compatible providers that require Bearer auth. 467 468 Some third-party /anthropic endpoints implement Anthropic's Messages API but 469 require Authorization: Bearer *** of Anthropic's native x-api-key header. 470 MiniMax's global and China Anthropic-compatible endpoints follow this pattern. 471 """ 472 normalized = _normalize_base_url_text(base_url) 473 if not normalized: 474 return False 475 normalized = normalized.rstrip("/").lower() 476 return normalized.startswith(("https://api.minimax.io/anthropic", "https://api.minimaxi.com/anthropic")) 477 478 479 def _common_betas_for_base_url( 480 base_url: str | None, 481 *, 482 drop_context_1m_beta: bool = False, 483 ) -> list[str]: 484 """Return the beta headers that are safe for the configured endpoint. 485 486 MiniMax's Anthropic-compatible endpoints (Bearer-auth) reject requests 487 that include Anthropic's ``fine-grained-tool-streaming`` beta — every 488 tool-use message triggers a connection error. Strip that beta for 489 Bearer-auth endpoints while keeping all other betas intact. 490 491 The ``context-1m-2025-08-07`` beta is also stripped for Bearer-auth 492 endpoints — MiniMax hosts its own models, not Claude, so the header is 493 irrelevant at best and risks request rejection at worst. 494 495 ``drop_context_1m_beta=True`` additionally strips the 1M-context beta on 496 otherwise-unrelated endpoints. The OAuth retry path flips this flag after 497 a subscription rejects the beta with 498 "The long context beta is not yet available for this subscription" so 499 subsequent requests in the same session don't repeat the probe. See the 500 reactive recovery loop in ``run_agent.py`` and issue-comment history on 501 PR #17680 for the full rationale. 502 """ 503 if _requires_bearer_auth(base_url): 504 _stripped = {_TOOL_STREAMING_BETA, _CONTEXT_1M_BETA} 505 return [b for b in _COMMON_BETAS if b not in _stripped] 506 if drop_context_1m_beta: 507 return [b for b in _COMMON_BETAS if b != _CONTEXT_1M_BETA] 508 return _COMMON_BETAS 509 510 511 def build_anthropic_client( 512 api_key: str, 513 base_url: str = None, 514 timeout: float = None, 515 *, 516 drop_context_1m_beta: bool = False, 517 ): 518 """Create an Anthropic client, auto-detecting setup-tokens vs API keys. 519 520 If *timeout* is provided it overrides the default 900s read timeout. The 521 connect timeout stays at 10s. Callers pass this from the per-provider / 522 per-model ``request_timeout_seconds`` config so Anthropic-native and 523 Anthropic-compatible providers respect the same knob as OpenAI-wire 524 providers. 525 526 ``drop_context_1m_beta=True`` strips ``context-1m-2025-08-07`` from the 527 client-level ``anthropic-beta`` header. Used by the reactive OAuth retry 528 path in ``run_agent.py`` when a subscription rejects the beta; leave at 529 its default on fresh clients so 1M-capable subscriptions keep the 530 capability. 531 532 Returns an anthropic.Anthropic instance. 533 """ 534 _anthropic_sdk = _get_anthropic_sdk() 535 if _anthropic_sdk is None: 536 raise ImportError( 537 "The 'anthropic' package is required for the Anthropic provider. " 538 "Install it with: pip install 'anthropic>=0.39.0'" 539 ) 540 541 normalize_proxy_env_vars() 542 543 from httpx import Timeout 544 545 normalized_base_url = _normalize_base_url_text(base_url) 546 _read_timeout = timeout if (isinstance(timeout, (int, float)) and timeout > 0) else 900.0 547 kwargs = { 548 "timeout": Timeout(timeout=float(_read_timeout), connect=10.0), 549 } 550 if normalized_base_url: 551 # Azure Anthropic endpoints require an ``api-version`` query parameter. 552 # Pass it via default_query so the SDK appends it to every request URL 553 # without corrupting the base_url (appending it directly produces 554 # malformed paths like /anthropic?api-version=.../v1/messages). 555 _is_azure_endpoint = "azure.com" in normalized_base_url.lower() 556 if _is_azure_endpoint and "api-version" not in normalized_base_url: 557 kwargs["base_url"] = normalized_base_url.rstrip("/") 558 kwargs["default_query"] = {"api-version": "2025-04-15"} 559 else: 560 kwargs["base_url"] = normalized_base_url 561 common_betas = _common_betas_for_base_url( 562 normalized_base_url, 563 drop_context_1m_beta=drop_context_1m_beta, 564 ) 565 566 if _is_kimi_coding_endpoint(base_url): 567 # Kimi's /coding endpoint requires User-Agent: claude-code/0.1.0 568 # to be recognized as a valid Coding Agent. Without it, returns 403. 569 # Check this BEFORE _requires_bearer_auth since both match api.kimi.com/coding. 570 kwargs["api_key"] = api_key 571 kwargs["default_headers"] = { 572 "User-Agent": "claude-code/0.1.0", 573 **( {"anthropic-beta": ",".join(common_betas)} if common_betas else {} ) 574 } 575 elif _requires_bearer_auth(normalized_base_url): 576 # Some Anthropic-compatible providers (e.g. MiniMax) expect the API key in 577 # Authorization: Bearer *** for regular API keys. Route those endpoints 578 # through auth_token so the SDK sends Bearer auth instead of x-api-key. 579 # Check this before OAuth token shape detection because MiniMax secrets do 580 # not use Anthropic's sk-ant-api prefix and would otherwise be misread as 581 # Anthropic OAuth/setup tokens. 582 kwargs["auth_token"] = api_key 583 if common_betas: 584 kwargs["default_headers"] = {"anthropic-beta": ",".join(common_betas)} 585 elif _is_third_party_anthropic_endpoint(base_url): 586 # Third-party proxies (Azure AI Foundry, AWS Bedrock, etc.) use their 587 # own API keys with x-api-key auth. Skip OAuth detection — their keys 588 # don't follow Anthropic's sk-ant-* prefix convention and would be 589 # misclassified as OAuth tokens. 590 kwargs["api_key"] = api_key 591 if common_betas: 592 kwargs["default_headers"] = {"anthropic-beta": ",".join(common_betas)} 593 elif _is_oauth_token(api_key): 594 # OAuth access token / setup-token → Bearer auth + Claude Code identity. 595 # Anthropic routes OAuth requests based on user-agent and headers; 596 # without Claude Code's fingerprint, requests get intermittent 500s. 597 all_betas = common_betas + _OAUTH_ONLY_BETAS 598 kwargs["auth_token"] = api_key 599 kwargs["default_headers"] = { 600 "anthropic-beta": ",".join(all_betas), 601 "user-agent": f"claude-cli/{_get_claude_code_version()} (external, cli)", 602 "x-app": "cli", 603 } 604 else: 605 # Regular API key → x-api-key header + common betas 606 kwargs["api_key"] = api_key 607 if common_betas: 608 kwargs["default_headers"] = {"anthropic-beta": ",".join(common_betas)} 609 610 return _anthropic_sdk.Anthropic(**kwargs) 611 612 613 def build_anthropic_bedrock_client(region: str): 614 """Create an AnthropicBedrock client for Bedrock Claude models. 615 616 Uses the Anthropic SDK's native Bedrock adapter, which provides full 617 Claude feature parity: prompt caching, thinking budgets, adaptive 618 thinking, fast mode — features not available via the Converse API. 619 620 Attaches the common Anthropic beta headers as client-level defaults so 621 that Bedrock-hosted Claude models get the same enhanced features as 622 native Anthropic. The ``context-1m-2025-08-07`` beta in particular 623 unlocks the 1M context window for Opus 4.6/4.7 on Bedrock — without 624 it, Bedrock caps these models at 200K even though the Anthropic API 625 serves them with 1M natively. 626 627 Auth uses the boto3 default credential chain (IAM roles, SSO, env vars). 628 """ 629 _anthropic_sdk = _get_anthropic_sdk() 630 if _anthropic_sdk is None: 631 raise ImportError( 632 "The 'anthropic' package is required for the Bedrock provider. " 633 "Install it with: pip install 'anthropic>=0.39.0'" 634 ) 635 if not hasattr(_anthropic_sdk, "AnthropicBedrock"): 636 raise ImportError( 637 "anthropic.AnthropicBedrock not available. " 638 "Upgrade with: pip install 'anthropic>=0.39.0'" 639 ) 640 from httpx import Timeout 641 642 return _anthropic_sdk.AnthropicBedrock( 643 aws_region=region, 644 timeout=Timeout(timeout=900.0, connect=10.0), 645 default_headers={"anthropic-beta": ",".join(_COMMON_BETAS)}, 646 ) 647 648 649 def _read_claude_code_credentials_from_keychain() -> Optional[Dict[str, Any]]: 650 """Read Claude Code OAuth credentials from the macOS Keychain. 651 652 Claude Code >=2.1.114 stores credentials in the macOS Keychain under the 653 service name "Claude Code-credentials" rather than (or in addition to) 654 the JSON file at ~/.claude/.credentials.json. 655 656 The password field contains a JSON string with the same claudeAiOauth 657 structure as the JSON file. 658 659 Returns dict with {accessToken, refreshToken?, expiresAt?} or None. 660 """ 661 if platform.system() != "Darwin": 662 return None 663 664 try: 665 # Read the "Claude Code-credentials" generic password entry 666 result = subprocess.run( 667 ["security", "find-generic-password", 668 "-s", "Claude Code-credentials", 669 "-w"], 670 capture_output=True, 671 text=True, 672 timeout=5, 673 ) 674 except (OSError, subprocess.TimeoutExpired): 675 logger.debug("Keychain: security command not available or timed out") 676 return None 677 678 if result.returncode != 0: 679 logger.debug("Keychain: no entry found for 'Claude Code-credentials'") 680 return None 681 682 raw = result.stdout.strip() 683 if not raw: 684 return None 685 686 try: 687 data = json.loads(raw) 688 except json.JSONDecodeError: 689 logger.debug("Keychain: credentials payload is not valid JSON") 690 return None 691 692 oauth_data = data.get("claudeAiOauth") 693 if oauth_data and isinstance(oauth_data, dict): 694 access_token = oauth_data.get("accessToken", "") 695 if access_token: 696 return { 697 "accessToken": access_token, 698 "refreshToken": oauth_data.get("refreshToken", ""), 699 "expiresAt": oauth_data.get("expiresAt", 0), 700 "source": "macos_keychain", 701 } 702 703 return None 704 705 706 def read_claude_code_credentials() -> Optional[Dict[str, Any]]: 707 """Read refreshable Claude Code OAuth credentials. 708 709 Checks two sources in order: 710 1. macOS Keychain (Darwin only) — "Claude Code-credentials" entry 711 2. ~/.claude/.credentials.json file 712 713 This intentionally excludes ~/.claude.json primaryApiKey. Opencode's 714 subscription flow is OAuth/setup-token based with refreshable credentials, 715 and native direct Anthropic provider usage should follow that path rather 716 than auto-detecting Claude's first-party managed key. 717 718 Returns dict with {accessToken, refreshToken?, expiresAt?} or None. 719 """ 720 # Try macOS Keychain first (covers Claude Code >=2.1.114) 721 kc_creds = _read_claude_code_credentials_from_keychain() 722 if kc_creds: 723 return kc_creds 724 725 # Fall back to JSON file 726 cred_path = Path.home() / ".claude" / ".credentials.json" 727 if cred_path.exists(): 728 try: 729 data = json.loads(cred_path.read_text(encoding="utf-8")) 730 oauth_data = data.get("claudeAiOauth") 731 if oauth_data and isinstance(oauth_data, dict): 732 access_token = oauth_data.get("accessToken", "") 733 if access_token: 734 return { 735 "accessToken": access_token, 736 "refreshToken": oauth_data.get("refreshToken", ""), 737 "expiresAt": oauth_data.get("expiresAt", 0), 738 "source": "claude_code_credentials_file", 739 } 740 except (json.JSONDecodeError, OSError, IOError) as e: 741 logger.debug("Failed to read ~/.claude/.credentials.json: %s", e) 742 743 return None 744 745 746 def read_claude_managed_key() -> Optional[str]: 747 """Read Claude's native managed key from ~/.claude.json for diagnostics only.""" 748 claude_json = Path.home() / ".claude.json" 749 if claude_json.exists(): 750 try: 751 data = json.loads(claude_json.read_text(encoding="utf-8")) 752 primary_key = data.get("primaryApiKey", "") 753 if isinstance(primary_key, str) and primary_key.strip(): 754 return primary_key.strip() 755 except (json.JSONDecodeError, OSError, IOError) as e: 756 logger.debug("Failed to read ~/.claude.json: %s", e) 757 return None 758 759 760 def is_claude_code_token_valid(creds: Dict[str, Any]) -> bool: 761 """Check if Claude Code credentials have a non-expired access token.""" 762 import time 763 764 expires_at = creds.get("expiresAt", 0) 765 if not expires_at: 766 # No expiry set (managed keys) — valid if token is present 767 return bool(creds.get("accessToken")) 768 769 # expiresAt is in milliseconds since epoch 770 now_ms = int(time.time() * 1000) 771 # Allow 60 seconds of buffer 772 return now_ms < (expires_at - 60_000) 773 774 775 def refresh_anthropic_oauth_pure(refresh_token: str, *, use_json: bool = False) -> Dict[str, Any]: 776 """Refresh an Anthropic OAuth token without mutating local credential files.""" 777 import time 778 import urllib.parse 779 import urllib.request 780 781 if not refresh_token: 782 raise ValueError("refresh_token is required") 783 784 client_id = "9d1c250a-e61b-44d9-88ed-5944d1962f5e" 785 if use_json: 786 data = json.dumps({ 787 "grant_type": "refresh_token", 788 "refresh_token": refresh_token, 789 "client_id": client_id, 790 }).encode() 791 content_type = "application/json" 792 else: 793 data = urllib.parse.urlencode({ 794 "grant_type": "refresh_token", 795 "refresh_token": refresh_token, 796 "client_id": client_id, 797 }).encode() 798 content_type = "application/x-www-form-urlencoded" 799 800 token_endpoints = [ 801 "https://platform.claude.com/v1/oauth/token", 802 "https://console.anthropic.com/v1/oauth/token", 803 ] 804 last_error = None 805 for endpoint in token_endpoints: 806 req = urllib.request.Request( 807 endpoint, 808 data=data, 809 headers={ 810 "Content-Type": content_type, 811 "User-Agent": f"claude-cli/{_get_claude_code_version()} (external, cli)", 812 }, 813 method="POST", 814 ) 815 try: 816 with urllib.request.urlopen(req, timeout=10) as resp: 817 result = json.loads(resp.read().decode()) 818 except Exception as exc: 819 last_error = exc 820 logger.debug("Anthropic token refresh failed at %s: %s", endpoint, exc) 821 continue 822 823 access_token = result.get("access_token", "") 824 if not access_token: 825 raise ValueError("Anthropic refresh response was missing access_token") 826 next_refresh = result.get("refresh_token", refresh_token) 827 expires_in = result.get("expires_in", 3600) 828 return { 829 "access_token": access_token, 830 "refresh_token": next_refresh, 831 "expires_at_ms": int(time.time() * 1000) + (expires_in * 1000), 832 } 833 834 if last_error is not None: 835 raise last_error 836 raise ValueError("Anthropic token refresh failed") 837 838 839 def _refresh_oauth_token(creds: Dict[str, Any]) -> Optional[str]: 840 """Attempt to refresh an expired Claude Code OAuth token.""" 841 refresh_token = creds.get("refreshToken", "") 842 if not refresh_token: 843 logger.debug("No refresh token available — cannot refresh") 844 return None 845 846 try: 847 refreshed = refresh_anthropic_oauth_pure(refresh_token, use_json=False) 848 _write_claude_code_credentials( 849 refreshed["access_token"], 850 refreshed["refresh_token"], 851 refreshed["expires_at_ms"], 852 ) 853 logger.debug("Successfully refreshed Claude Code OAuth token") 854 return refreshed["access_token"] 855 except Exception as e: 856 logger.debug("Failed to refresh Claude Code token: %s", e) 857 return None 858 859 860 def _write_claude_code_credentials( 861 access_token: str, 862 refresh_token: str, 863 expires_at_ms: int, 864 *, 865 scopes: Optional[list] = None, 866 ) -> None: 867 """Write refreshed credentials back to ~/.claude/.credentials.json. 868 869 The optional *scopes* list (e.g. ``["user:inference", "user:profile", ...]``) 870 is persisted so that Claude Code's own auth check recognises the credential 871 as valid. Claude Code >=2.1.81 gates on the presence of ``"user:inference"`` 872 in the stored scopes before it will use the token. 873 """ 874 cred_path = Path.home() / ".claude" / ".credentials.json" 875 try: 876 # Read existing file to preserve other fields 877 existing = {} 878 if cred_path.exists(): 879 existing = json.loads(cred_path.read_text(encoding="utf-8")) 880 881 oauth_data: Dict[str, Any] = { 882 "accessToken": access_token, 883 "refreshToken": refresh_token, 884 "expiresAt": expires_at_ms, 885 } 886 if scopes is not None: 887 oauth_data["scopes"] = scopes 888 elif "claudeAiOauth" in existing and "scopes" in existing["claudeAiOauth"]: 889 # Preserve previously-stored scopes when the refresh response 890 # does not include a scope field. 891 oauth_data["scopes"] = existing["claudeAiOauth"]["scopes"] 892 893 existing["claudeAiOauth"] = oauth_data 894 895 cred_path.parent.mkdir(parents=True, exist_ok=True) 896 _tmp_cred = cred_path.with_suffix(".tmp") 897 _tmp_cred.write_text(json.dumps(existing, indent=2), encoding="utf-8") 898 _tmp_cred.replace(cred_path) 899 # Restrict permissions (credentials file) 900 cred_path.chmod(0o600) 901 except (OSError, IOError) as e: 902 logger.debug("Failed to write refreshed credentials: %s", e) 903 904 905 def _resolve_claude_code_token_from_credentials(creds: Optional[Dict[str, Any]] = None) -> Optional[str]: 906 """Resolve a token from Claude Code credential files, refreshing if needed.""" 907 creds = creds or read_claude_code_credentials() 908 if creds and is_claude_code_token_valid(creds): 909 logger.debug("Using Claude Code credentials (auto-detected)") 910 return creds["accessToken"] 911 if creds: 912 logger.debug("Claude Code credentials expired — attempting refresh") 913 refreshed = _refresh_oauth_token(creds) 914 if refreshed: 915 return refreshed 916 logger.debug("Token refresh failed — re-run 'claude setup-token' to reauthenticate") 917 return None 918 919 920 def _prefer_refreshable_claude_code_token(env_token: str, creds: Optional[Dict[str, Any]]) -> Optional[str]: 921 """Prefer Claude Code creds when a persisted env OAuth token would shadow refresh. 922 923 Hermes historically persisted setup tokens into ANTHROPIC_TOKEN. That makes 924 later refresh impossible because the static env token wins before we ever 925 inspect Claude Code's refreshable credential file. If we have a refreshable 926 Claude Code credential record, prefer it over the static env OAuth token. 927 """ 928 if not env_token or not _is_oauth_token(env_token) or not isinstance(creds, dict): 929 return None 930 if not creds.get("refreshToken"): 931 return None 932 933 resolved = _resolve_claude_code_token_from_credentials(creds) 934 if resolved and resolved != env_token: 935 logger.debug( 936 "Preferring Claude Code credential file over static env OAuth token so refresh can proceed" 937 ) 938 return resolved 939 return None 940 941 942 def resolve_anthropic_token() -> Optional[str]: 943 """Resolve an Anthropic token from all available sources. 944 945 Priority: 946 1. ANTHROPIC_TOKEN env var (OAuth/setup token saved by Hermes) 947 2. CLAUDE_CODE_OAUTH_TOKEN env var 948 3. Claude Code credentials (~/.claude.json or ~/.claude/.credentials.json) 949 — with automatic refresh if expired and a refresh token is available 950 4. ANTHROPIC_API_KEY env var (regular API key, or legacy fallback) 951 952 Returns the token string or None. 953 """ 954 creds = read_claude_code_credentials() 955 956 # 1. Hermes-managed OAuth/setup token env var 957 token = os.getenv("ANTHROPIC_TOKEN", "").strip() 958 if token: 959 preferred = _prefer_refreshable_claude_code_token(token, creds) 960 if preferred: 961 return preferred 962 return token 963 964 # 2. CLAUDE_CODE_OAUTH_TOKEN (used by Claude Code for setup-tokens) 965 cc_token = os.getenv("CLAUDE_CODE_OAUTH_TOKEN", "").strip() 966 if cc_token: 967 preferred = _prefer_refreshable_claude_code_token(cc_token, creds) 968 if preferred: 969 return preferred 970 return cc_token 971 972 # 3. Claude Code credential file 973 resolved_claude_token = _resolve_claude_code_token_from_credentials(creds) 974 if resolved_claude_token: 975 return resolved_claude_token 976 977 # 4. Regular API key, or a legacy OAuth token saved in ANTHROPIC_API_KEY. 978 # This remains as a compatibility fallback for pre-migration Hermes configs. 979 api_key = os.getenv("ANTHROPIC_API_KEY", "").strip() 980 if api_key: 981 return api_key 982 983 return None 984 985 986 def run_oauth_setup_token() -> Optional[str]: 987 """Run 'claude setup-token' interactively and return the resulting token. 988 989 Checks multiple sources after the subprocess completes: 990 1. Claude Code credential files (may be written by the subprocess) 991 2. CLAUDE_CODE_OAUTH_TOKEN / ANTHROPIC_TOKEN env vars 992 993 Returns the token string, or None if no credentials were obtained. 994 Raises FileNotFoundError if the 'claude' CLI is not installed. 995 """ 996 import shutil 997 import subprocess 998 999 claude_path = shutil.which("claude") 1000 if not claude_path: 1001 raise FileNotFoundError( 1002 "The 'claude' CLI is not installed. " 1003 "Install it with: npm install -g @anthropic-ai/claude-code" 1004 ) 1005 1006 # Run interactively — stdin/stdout/stderr inherited so user can interact 1007 try: 1008 subprocess.run([claude_path, "setup-token"]) 1009 except (KeyboardInterrupt, EOFError): 1010 return None 1011 1012 # Check if credentials were saved to Claude Code's config files 1013 creds = read_claude_code_credentials() 1014 if creds and is_claude_code_token_valid(creds): 1015 return creds["accessToken"] 1016 1017 # Check env vars that may have been set 1018 for env_var in ("CLAUDE_CODE_OAUTH_TOKEN", "ANTHROPIC_TOKEN"): 1019 val = os.getenv(env_var, "").strip() 1020 if val: 1021 return val 1022 1023 return None 1024 1025 1026 # ── Hermes-native PKCE OAuth flow ──────────────────────────────────────── 1027 # Mirrors the flow used by Claude Code, pi-ai, and OpenCode. 1028 # Stores credentials in ~/.hermes/.anthropic_oauth.json (our own file). 1029 1030 _OAUTH_CLIENT_ID = "9d1c250a-e61b-44d9-88ed-5944d1962f5e" 1031 _OAUTH_TOKEN_URL = "https://console.anthropic.com/v1/oauth/token" 1032 _OAUTH_REDIRECT_URI = "https://console.anthropic.com/oauth/code/callback" 1033 _OAUTH_SCOPES = "org:create_api_key user:profile user:inference" 1034 _HERMES_OAUTH_FILE = get_hermes_home() / ".anthropic_oauth.json" 1035 1036 1037 def _generate_pkce() -> tuple: 1038 """Generate PKCE code_verifier and code_challenge (S256).""" 1039 import base64 1040 import hashlib 1041 import secrets 1042 1043 verifier = base64.urlsafe_b64encode(secrets.token_bytes(32)).rstrip(b"=").decode() 1044 challenge = base64.urlsafe_b64encode( 1045 hashlib.sha256(verifier.encode()).digest() 1046 ).rstrip(b"=").decode() 1047 return verifier, challenge 1048 1049 1050 def run_hermes_oauth_login_pure() -> Optional[Dict[str, Any]]: 1051 """Run Hermes-native OAuth PKCE flow and return credential state.""" 1052 import time 1053 import webbrowser 1054 1055 verifier, challenge = _generate_pkce() 1056 1057 params = { 1058 "code": "true", 1059 "client_id": _OAUTH_CLIENT_ID, 1060 "response_type": "code", 1061 "redirect_uri": _OAUTH_REDIRECT_URI, 1062 "scope": _OAUTH_SCOPES, 1063 "code_challenge": challenge, 1064 "code_challenge_method": "S256", 1065 "state": verifier, 1066 } 1067 from urllib.parse import urlencode 1068 1069 auth_url = f"https://claude.ai/oauth/authorize?{urlencode(params)}" 1070 1071 print() 1072 print("Authorize Hermes with your Claude Pro/Max subscription.") 1073 print() 1074 print("╭─ Claude Pro/Max Authorization ────────────────────╮") 1075 print("│ │") 1076 print("│ Open this link in your browser: │") 1077 print("╰───────────────────────────────────────────────────╯") 1078 print() 1079 print(f" {auth_url}") 1080 print() 1081 1082 try: 1083 webbrowser.open(auth_url) 1084 print(" (Browser opened automatically)") 1085 except Exception: 1086 pass 1087 1088 print() 1089 print("After authorizing, you'll see a code. Paste it below.") 1090 print() 1091 try: 1092 auth_code = input("Authorization code: ").strip() 1093 except (KeyboardInterrupt, EOFError): 1094 return None 1095 1096 if not auth_code: 1097 print("No code entered.") 1098 return None 1099 1100 splits = auth_code.split("#") 1101 code = splits[0] 1102 state = splits[1] if len(splits) > 1 else "" 1103 1104 try: 1105 import urllib.request 1106 1107 exchange_data = json.dumps({ 1108 "grant_type": "authorization_code", 1109 "client_id": _OAUTH_CLIENT_ID, 1110 "code": code, 1111 "state": state, 1112 "redirect_uri": _OAUTH_REDIRECT_URI, 1113 "code_verifier": verifier, 1114 }).encode() 1115 1116 req = urllib.request.Request( 1117 _OAUTH_TOKEN_URL, 1118 data=exchange_data, 1119 headers={ 1120 "Content-Type": "application/json", 1121 "User-Agent": f"claude-cli/{_get_claude_code_version()} (external, cli)", 1122 }, 1123 method="POST", 1124 ) 1125 1126 with urllib.request.urlopen(req, timeout=15) as resp: 1127 result = json.loads(resp.read().decode()) 1128 except Exception as e: 1129 print(f"Token exchange failed: {e}") 1130 return None 1131 1132 access_token = result.get("access_token", "") 1133 refresh_token = result.get("refresh_token", "") 1134 expires_in = result.get("expires_in", 3600) 1135 1136 if not access_token: 1137 print("No access token in response.") 1138 return None 1139 1140 expires_at_ms = int(time.time() * 1000) + (expires_in * 1000) 1141 return { 1142 "access_token": access_token, 1143 "refresh_token": refresh_token, 1144 "expires_at_ms": expires_at_ms, 1145 } 1146 1147 1148 def read_hermes_oauth_credentials() -> Optional[Dict[str, Any]]: 1149 """Read Hermes-managed OAuth credentials from ~/.hermes/.anthropic_oauth.json.""" 1150 if _HERMES_OAUTH_FILE.exists(): 1151 try: 1152 data = json.loads(_HERMES_OAUTH_FILE.read_text(encoding="utf-8")) 1153 if data.get("accessToken"): 1154 return data 1155 except (json.JSONDecodeError, OSError, IOError) as e: 1156 logger.debug("Failed to read Hermes OAuth credentials: %s", e) 1157 return None 1158 1159 1160 # --------------------------------------------------------------------------- 1161 # Message / tool / response format conversion 1162 # --------------------------------------------------------------------------- 1163 1164 1165 def _is_bedrock_model_id(model: str) -> bool: 1166 """Detect AWS Bedrock model IDs that use dots as namespace separators. 1167 1168 Bedrock model IDs come in two forms: 1169 - Bare: ``anthropic.claude-opus-4-7`` 1170 - Regional (inference profiles): ``us.anthropic.claude-sonnet-4-5-v1:0`` 1171 1172 In both cases the dots separate namespace components, not version 1173 numbers, and must be preserved verbatim for the Bedrock API. 1174 """ 1175 lower = model.lower() 1176 # Regional inference-profile prefixes 1177 if any(lower.startswith(p) for p in ("global.", "us.", "eu.", "ap.", "jp.")): 1178 return True 1179 # Bare Bedrock model IDs: provider.model-family 1180 if lower.startswith("anthropic."): 1181 return True 1182 return False 1183 1184 1185 def normalize_model_name(model: str, preserve_dots: bool = False) -> str: 1186 """Normalize a model name for the Anthropic API. 1187 1188 - Strips 'anthropic/' prefix (OpenRouter format, case-insensitive) 1189 - Converts dots to hyphens in version numbers (OpenRouter uses dots, 1190 Anthropic uses hyphens: claude-opus-4.6 → claude-opus-4-6), unless 1191 preserve_dots is True (e.g. for Alibaba/DashScope: qwen3.5-plus). 1192 - Preserves Bedrock model IDs (``anthropic.claude-opus-4-7``) and 1193 regional inference profiles (``us.anthropic.claude-*``) whose dots 1194 are namespace separators, not version separators. 1195 """ 1196 lower = model.lower() 1197 if lower.startswith("anthropic/"): 1198 model = model[len("anthropic/"):] 1199 if not preserve_dots: 1200 # Bedrock model IDs use dots as namespace separators 1201 # (e.g. "anthropic.claude-opus-4-7", "us.anthropic.claude-*"). 1202 # These must not be converted to hyphens. See issue #12295. 1203 if _is_bedrock_model_id(model): 1204 return model 1205 # Only convert dots to hyphens for Anthropic/Claude models. 1206 # Non-Anthropic models (gpt-5.4, gemini-2.5, etc.) use dots 1207 # as part of their canonical names. See issue #17171. 1208 _lower = model.lower() 1209 if _lower.startswith("claude-") or _lower.startswith("anthropic/"): 1210 model = model.replace(".", "-") 1211 return model 1212 1213 1214 def _sanitize_tool_id(tool_id: str) -> str: 1215 """Sanitize a tool call ID for the Anthropic API. 1216 1217 Anthropic requires IDs matching [a-zA-Z0-9_-]. Replace invalid 1218 characters with underscores and ensure non-empty. 1219 """ 1220 import re 1221 if not tool_id: 1222 return "tool_0" 1223 sanitized = re.sub(r"[^a-zA-Z0-9_-]", "_", tool_id) 1224 return sanitized or "tool_0" 1225 1226 1227 def _normalize_tool_input_schema(schema: Any) -> Dict[str, Any]: 1228 """Normalize tool schemas before sending them to Anthropic. 1229 1230 Anthropic's tool schema validator rejects nullable unions such as 1231 ``anyOf: [{"type": "string"}, {"type": "null"}]`` that Pydantic/MCP 1232 commonly emits for optional fields. Tool optionality is represented by 1233 the parent ``required`` array, so we delegate to the shared 1234 ``strip_nullable_unions`` helper to collapse nullable unions to the 1235 non-null branch while preserving metadata like description/default. 1236 1237 ``keep_nullable_hint=False`` because the Anthropic validator does not 1238 recognize the OpenAPI-style ``nullable: true`` extension and strict 1239 schema-to-grammar converters may reject unknown keywords. 1240 1241 Top-level ``oneOf``/``allOf``/``anyOf`` are also stripped here: the 1242 Anthropic API rejects union keywords at the schema root with a generic 1243 HTTP 400. Several upstream and plugin tools ship schemas with one of 1244 these keywords at the top level (commonly for Pydantic discriminated 1245 unions). If we land here with those keywords still present after 1246 nullable-union stripping, drop them and fall back to a plain object 1247 schema so the tool still validates at the Anthropic boundary. 1248 """ 1249 if not schema: 1250 return {"type": "object", "properties": {}} 1251 1252 from tools.schema_sanitizer import strip_nullable_unions 1253 1254 normalized = strip_nullable_unions(schema, keep_nullable_hint=False) 1255 if not isinstance(normalized, dict): 1256 return {"type": "object", "properties": {}} 1257 # Strip top-level union keywords that Anthropic's validator rejects. 1258 banned = {"oneOf", "allOf", "anyOf"} 1259 if banned & normalized.keys(): 1260 normalized = {k: v for k, v in normalized.items() if k not in banned} 1261 if "type" not in normalized: 1262 normalized["type"] = "object" 1263 if normalized.get("type") == "object" and not isinstance(normalized.get("properties"), dict): 1264 normalized = {**normalized, "properties": {}} 1265 return normalized 1266 1267 1268 def convert_tools_to_anthropic(tools: List[Dict]) -> List[Dict]: 1269 """Convert OpenAI tool definitions to Anthropic format.""" 1270 if not tools: 1271 return [] 1272 result = [] 1273 seen_names: set = set() 1274 for t in tools: 1275 fn = t.get("function", {}) 1276 name = fn.get("name", "") 1277 # Defensive dedup: Anthropic rejects requests with duplicate tool 1278 # names. Upstream injection paths already dedup, but this guard 1279 # converts a hard API failure into a warning. See: #18478 1280 if name and name in seen_names: 1281 logger.warning( 1282 "convert_tools_to_anthropic: duplicate tool name '%s' " 1283 "— dropping second occurrence", 1284 name, 1285 ) 1286 continue 1287 if name: 1288 seen_names.add(name) 1289 result.append({ 1290 "name": name, 1291 "description": fn.get("description", ""), 1292 "input_schema": _normalize_tool_input_schema( 1293 fn.get("parameters", {"type": "object", "properties": {}}) 1294 ), 1295 }) 1296 return result 1297 1298 1299 def _image_source_from_openai_url(url: str) -> Dict[str, str]: 1300 """Convert an OpenAI-style image URL/data URL into Anthropic image source.""" 1301 url = str(url or "").strip() 1302 if not url: 1303 return {"type": "url", "url": ""} 1304 1305 if url.startswith("data:"): 1306 header, _, data = url.partition(",") 1307 media_type = "image/jpeg" 1308 if header.startswith("data:"): 1309 mime_part = header[len("data:"):].split(";", 1)[0].strip() 1310 if mime_part.startswith("image/"): 1311 media_type = mime_part 1312 return { 1313 "type": "base64", 1314 "media_type": media_type, 1315 "data": data, 1316 } 1317 1318 return {"type": "url", "url": url} 1319 1320 1321 def _convert_content_part_to_anthropic(part: Any) -> Optional[Dict[str, Any]]: 1322 """Convert a single OpenAI-style content part to Anthropic format.""" 1323 if part is None: 1324 return None 1325 if isinstance(part, str): 1326 return {"type": "text", "text": part} 1327 if not isinstance(part, dict): 1328 return {"type": "text", "text": str(part)} 1329 1330 ptype = part.get("type") 1331 1332 if ptype == "input_text": 1333 block: Dict[str, Any] = {"type": "text", "text": part.get("text", "")} 1334 elif ptype in {"image_url", "input_image"}: 1335 image_value = part.get("image_url", {}) 1336 url = image_value.get("url", "") if isinstance(image_value, dict) else str(image_value or "") 1337 block = {"type": "image", "source": _image_source_from_openai_url(url)} 1338 else: 1339 block = dict(part) 1340 1341 if isinstance(part.get("cache_control"), dict) and "cache_control" not in block: 1342 block["cache_control"] = dict(part["cache_control"]) 1343 return block 1344 1345 1346 def _to_plain_data(value: Any, *, _depth: int = 0, _path: Optional[set] = None) -> Any: 1347 """Recursively convert SDK objects to plain Python data structures. 1348 1349 Guards against circular references (``_path`` tracks ``id()`` of objects 1350 on the *current* recursion path) and runaway depth (capped at 20 levels). 1351 Uses path-based tracking so shared (but non-cyclic) objects referenced by 1352 multiple siblings are converted correctly rather than being stringified. 1353 """ 1354 _MAX_DEPTH = 20 1355 if _depth > _MAX_DEPTH: 1356 return str(value) 1357 1358 if _path is None: 1359 _path = set() 1360 1361 obj_id = id(value) 1362 if obj_id in _path: 1363 return str(value) 1364 1365 if hasattr(value, "model_dump"): 1366 _path.add(obj_id) 1367 result = _to_plain_data(value.model_dump(), _depth=_depth + 1, _path=_path) 1368 _path.discard(obj_id) 1369 return result 1370 if isinstance(value, dict): 1371 _path.add(obj_id) 1372 result = {k: _to_plain_data(v, _depth=_depth + 1, _path=_path) for k, v in value.items()} 1373 _path.discard(obj_id) 1374 return result 1375 if isinstance(value, (list, tuple)): 1376 _path.add(obj_id) 1377 result = [_to_plain_data(v, _depth=_depth + 1, _path=_path) for v in value] 1378 _path.discard(obj_id) 1379 return result 1380 if hasattr(value, "__dict__"): 1381 _path.add(obj_id) 1382 result = { 1383 k: _to_plain_data(v, _depth=_depth + 1, _path=_path) 1384 for k, v in vars(value).items() 1385 if not k.startswith("_") 1386 } 1387 _path.discard(obj_id) 1388 return result 1389 return value 1390 1391 1392 def _extract_preserved_thinking_blocks(message: Dict[str, Any]) -> List[Dict[str, Any]]: 1393 """Return Anthropic thinking blocks previously preserved on the message.""" 1394 raw_details = message.get("reasoning_details") 1395 if not isinstance(raw_details, list): 1396 return [] 1397 1398 preserved: List[Dict[str, Any]] = [] 1399 for detail in raw_details: 1400 if not isinstance(detail, dict): 1401 continue 1402 block_type = str(detail.get("type", "") or "").strip().lower() 1403 if block_type not in {"thinking", "redacted_thinking"}: 1404 continue 1405 preserved.append(copy.deepcopy(detail)) 1406 return preserved 1407 1408 1409 def _convert_content_to_anthropic(content: Any) -> Any: 1410 """Convert OpenAI-style multimodal content arrays to Anthropic blocks.""" 1411 if not isinstance(content, list): 1412 return content 1413 1414 converted = [] 1415 for part in content: 1416 block = _convert_content_part_to_anthropic(part) 1417 if block is not None: 1418 converted.append(block) 1419 return converted 1420 1421 1422 def convert_messages_to_anthropic( 1423 messages: List[Dict], 1424 base_url: str | None = None, 1425 model: str | None = None, 1426 ) -> Tuple[Optional[Any], List[Dict]]: 1427 """Convert OpenAI-format messages to Anthropic format. 1428 1429 Returns (system_prompt, anthropic_messages). 1430 System messages are extracted since Anthropic takes them as a separate param. 1431 system_prompt is a string or list of content blocks (when cache_control present). 1432 1433 When *base_url* is provided and points to a third-party Anthropic-compatible 1434 endpoint, all thinking block signatures are stripped. Signatures are 1435 Anthropic-proprietary — third-party endpoints cannot validate them and will 1436 reject them with HTTP 400 "Invalid signature in thinking block". 1437 1438 When *model* is provided and matches the Kimi / Moonshot family (or 1439 *base_url* is a Kimi / Moonshot host), unsigned thinking blocks 1440 synthesised from ``reasoning_content`` are preserved on replayed 1441 assistant tool-call messages — Kimi requires the field to exist, even 1442 if empty. 1443 """ 1444 system = None 1445 result = [] 1446 1447 for m in messages: 1448 role = m.get("role", "user") 1449 content = m.get("content", "") 1450 1451 if role == "system": 1452 if isinstance(content, list): 1453 # Preserve cache_control markers on content blocks 1454 has_cache = any( 1455 p.get("cache_control") for p in content if isinstance(p, dict) 1456 ) 1457 if has_cache: 1458 system = [p for p in content if isinstance(p, dict)] 1459 else: 1460 system = "\n".join( 1461 p["text"] for p in content if p.get("type") == "text" 1462 ) 1463 else: 1464 system = content 1465 continue 1466 1467 if role == "assistant": 1468 blocks = _extract_preserved_thinking_blocks(m) 1469 if content: 1470 if isinstance(content, list): 1471 converted_content = _convert_content_to_anthropic(content) 1472 if isinstance(converted_content, list): 1473 blocks.extend(converted_content) 1474 else: 1475 blocks.append({"type": "text", "text": str(content)}) 1476 for tc in m.get("tool_calls", []): 1477 if not tc or not isinstance(tc, dict): 1478 continue 1479 fn = tc.get("function", {}) 1480 args = fn.get("arguments", "{}") 1481 try: 1482 parsed_args = json.loads(args) if isinstance(args, str) else args 1483 except (json.JSONDecodeError, ValueError): 1484 parsed_args = {} 1485 blocks.append({ 1486 "type": "tool_use", 1487 "id": _sanitize_tool_id(tc.get("id", "")), 1488 "name": fn.get("name", ""), 1489 "input": parsed_args, 1490 }) 1491 # Kimi's /coding endpoint (Anthropic protocol) requires assistant 1492 # tool-call messages to carry reasoning_content when thinking is 1493 # enabled server-side. Preserve it as a thinking block so Kimi 1494 # can validate the message history. See hermes-agent#13848. 1495 # 1496 # Accept empty string "" — _copy_reasoning_content_for_api() 1497 # injects "" as a tier-3 fallback for Kimi tool-call messages 1498 # that had no reasoning. Kimi requires the field to exist, even 1499 # if empty. 1500 # 1501 # Prepend (not append): Anthropic protocol requires thinking 1502 # blocks before text and tool_use blocks. 1503 # 1504 # Guard: only add when reasoning_details didn't already contribute 1505 # thinking blocks. On native Anthropic, reasoning_details produces 1506 # signed thinking blocks — adding another unsigned one from 1507 # reasoning_content would create a duplicate (same text) that gets 1508 # downgraded to a spurious text block on the last assistant message. 1509 reasoning_content = m.get("reasoning_content") 1510 _already_has_thinking = any( 1511 isinstance(b, dict) and b.get("type") in ("thinking", "redacted_thinking") 1512 for b in blocks 1513 ) 1514 if isinstance(reasoning_content, str) and not _already_has_thinking: 1515 blocks.insert(0, {"type": "thinking", "thinking": reasoning_content}) 1516 # Anthropic rejects empty assistant content 1517 effective = blocks or content 1518 if not effective or effective == "": 1519 effective = [{"type": "text", "text": "(empty)"}] 1520 result.append({"role": "assistant", "content": effective}) 1521 continue 1522 1523 if role == "tool": 1524 # Sanitize tool_use_id and ensure non-empty content 1525 result_content = content if isinstance(content, str) else json.dumps(content) 1526 if not result_content: 1527 result_content = "(no output)" 1528 tool_result = { 1529 "type": "tool_result", 1530 "tool_use_id": _sanitize_tool_id(m.get("tool_call_id", "")), 1531 "content": result_content, 1532 } 1533 if isinstance(m.get("cache_control"), dict): 1534 tool_result["cache_control"] = dict(m["cache_control"]) 1535 # Merge consecutive tool results into one user message 1536 if ( 1537 result 1538 and result[-1]["role"] == "user" 1539 and isinstance(result[-1]["content"], list) 1540 and result[-1]["content"] 1541 and result[-1]["content"][0].get("type") == "tool_result" 1542 ): 1543 result[-1]["content"].append(tool_result) 1544 else: 1545 result.append({"role": "user", "content": [tool_result]}) 1546 continue 1547 1548 # Regular user message — validate non-empty content (Anthropic rejects empty) 1549 if isinstance(content, list): 1550 converted_blocks = _convert_content_to_anthropic(content) 1551 # Check if all text blocks are empty 1552 if not converted_blocks or all( 1553 b.get("text", "").strip() == "" 1554 for b in converted_blocks 1555 if isinstance(b, dict) and b.get("type") == "text" 1556 ): 1557 converted_blocks = [{"type": "text", "text": "(empty message)"}] 1558 result.append({"role": "user", "content": converted_blocks}) 1559 else: 1560 # Validate string content is non-empty 1561 if not content or (isinstance(content, str) and not content.strip()): 1562 content = "(empty message)" 1563 result.append({"role": "user", "content": content}) 1564 1565 # Strip orphaned tool_use blocks (no matching tool_result follows) 1566 tool_result_ids = set() 1567 for m in result: 1568 if m["role"] == "user" and isinstance(m["content"], list): 1569 for block in m["content"]: 1570 if block.get("type") == "tool_result": 1571 tool_result_ids.add(block.get("tool_use_id")) 1572 for m in result: 1573 if m["role"] == "assistant" and isinstance(m["content"], list): 1574 m["content"] = [ 1575 b 1576 for b in m["content"] 1577 if b.get("type") != "tool_use" or b.get("id") in tool_result_ids 1578 ] 1579 if not m["content"]: 1580 m["content"] = [{"type": "text", "text": "(tool call removed)"}] 1581 1582 # Strip orphaned tool_result blocks (no matching tool_use precedes them). 1583 # This is the mirror of the above: context compression or session truncation 1584 # can remove an assistant message containing a tool_use while leaving the 1585 # subsequent tool_result intact. Anthropic rejects these with a 400. 1586 tool_use_ids = set() 1587 for m in result: 1588 if m["role"] == "assistant" and isinstance(m["content"], list): 1589 for block in m["content"]: 1590 if block.get("type") == "tool_use": 1591 tool_use_ids.add(block.get("id")) 1592 for m in result: 1593 if m["role"] == "user" and isinstance(m["content"], list): 1594 m["content"] = [ 1595 b 1596 for b in m["content"] 1597 if b.get("type") != "tool_result" or b.get("tool_use_id") in tool_use_ids 1598 ] 1599 if not m["content"]: 1600 m["content"] = [{"type": "text", "text": "(tool result removed)"}] 1601 1602 # Enforce strict role alternation (Anthropic rejects consecutive same-role messages) 1603 fixed = [] 1604 for m in result: 1605 if fixed and fixed[-1]["role"] == m["role"]: 1606 if m["role"] == "user": 1607 # Merge consecutive user messages 1608 prev_content = fixed[-1]["content"] 1609 curr_content = m["content"] 1610 if isinstance(prev_content, str) and isinstance(curr_content, str): 1611 fixed[-1]["content"] = prev_content + "\n" + curr_content 1612 elif isinstance(prev_content, list) and isinstance(curr_content, list): 1613 fixed[-1]["content"] = prev_content + curr_content 1614 else: 1615 # Mixed types — wrap string in list 1616 if isinstance(prev_content, str): 1617 prev_content = [{"type": "text", "text": prev_content}] 1618 if isinstance(curr_content, str): 1619 curr_content = [{"type": "text", "text": curr_content}] 1620 fixed[-1]["content"] = prev_content + curr_content 1621 else: 1622 # Consecutive assistant messages — merge text content. 1623 # Drop thinking blocks from the *second* message: their 1624 # signature was computed against a different turn boundary 1625 # and becomes invalid once merged. 1626 if isinstance(m["content"], list): 1627 m["content"] = [ 1628 b for b in m["content"] 1629 if not (isinstance(b, dict) and b.get("type") in ("thinking", "redacted_thinking")) 1630 ] 1631 prev_blocks = fixed[-1]["content"] 1632 curr_blocks = m["content"] 1633 if isinstance(prev_blocks, list) and isinstance(curr_blocks, list): 1634 fixed[-1]["content"] = prev_blocks + curr_blocks 1635 elif isinstance(prev_blocks, str) and isinstance(curr_blocks, str): 1636 fixed[-1]["content"] = prev_blocks + "\n" + curr_blocks 1637 else: 1638 # Mixed types — normalize both to list and merge 1639 if isinstance(prev_blocks, str): 1640 prev_blocks = [{"type": "text", "text": prev_blocks}] 1641 if isinstance(curr_blocks, str): 1642 curr_blocks = [{"type": "text", "text": curr_blocks}] 1643 fixed[-1]["content"] = prev_blocks + curr_blocks 1644 else: 1645 fixed.append(m) 1646 result = fixed 1647 1648 # ── Thinking block signature management ────────────────────────── 1649 # Anthropic signs thinking blocks against the full turn content. 1650 # Any upstream mutation (context compression, session truncation, 1651 # orphan stripping, message merging) invalidates the signature, 1652 # causing HTTP 400 "Invalid signature in thinking block". 1653 # 1654 # Signatures are Anthropic-proprietary. Third-party endpoints 1655 # (MiniMax, Azure AI Foundry, self-hosted proxies) cannot validate 1656 # them and will reject them outright. When targeting a third-party 1657 # endpoint, strip ALL thinking/redacted_thinking blocks from every 1658 # assistant message — the third-party will generate its own 1659 # thinking blocks if it supports extended thinking. 1660 # 1661 # For direct Anthropic (strategy following clawdbot/OpenClaw): 1662 # 1. Strip thinking/redacted_thinking from all assistant messages 1663 # EXCEPT the last one — preserves reasoning continuity on the 1664 # current tool-use chain while avoiding stale signature errors. 1665 # 2. Downgrade unsigned thinking blocks (no signature) to text — 1666 # Anthropic can't validate them and will reject them. 1667 # 3. Strip cache_control from thinking/redacted_thinking blocks — 1668 # cache markers can interfere with signature validation. 1669 _THINKING_TYPES = frozenset(("thinking", "redacted_thinking")) 1670 _is_third_party = _is_third_party_anthropic_endpoint(base_url) 1671 # Kimi /coding and DeepSeek /anthropic share a contract: both speak the 1672 # Anthropic Messages protocol upstream but require that thinking blocks 1673 # synthesised from reasoning_content round-trip on subsequent turns when 1674 # thinking is enabled. Signed Anthropic blocks still have to be stripped 1675 # (neither endpoint can validate Anthropic's signatures); unsigned blocks 1676 # are preserved. See hermes-agent#13848 (Kimi) and #16748 (DeepSeek). 1677 _preserve_unsigned_thinking = ( 1678 _is_kimi_family_endpoint(base_url, model) 1679 or _is_deepseek_anthropic_endpoint(base_url) 1680 ) 1681 1682 last_assistant_idx = None 1683 for i in range(len(result) - 1, -1, -1): 1684 if result[i].get("role") == "assistant": 1685 last_assistant_idx = i 1686 break 1687 1688 for idx, m in enumerate(result): 1689 if m.get("role") != "assistant" or not isinstance(m.get("content"), list): 1690 continue 1691 1692 if _preserve_unsigned_thinking: 1693 # Kimi's /coding and DeepSeek's /anthropic endpoints both enable 1694 # thinking server-side and require unsigned thinking blocks on 1695 # replayed assistant tool-call messages. Strip signed Anthropic 1696 # blocks (neither upstream can validate Anthropic signatures) but 1697 # preserve the unsigned ones we synthesised from reasoning_content. 1698 new_content = [] 1699 for b in m["content"]: 1700 if not isinstance(b, dict) or b.get("type") not in _THINKING_TYPES: 1701 new_content.append(b) 1702 continue 1703 if b.get("signature") or b.get("data"): 1704 # Anthropic-signed block — upstream can't validate, strip 1705 continue 1706 # Unsigned thinking (synthesised from reasoning_content) — 1707 # keep it: the upstream needs it for message-history validation. 1708 new_content.append(b) 1709 m["content"] = new_content or [{"type": "text", "text": "(empty)"}] 1710 elif _is_third_party or idx != last_assistant_idx: 1711 # Third-party endpoint: strip ALL thinking blocks from every 1712 # assistant message — signatures are Anthropic-proprietary. 1713 # Direct Anthropic: strip from non-latest assistant messages only. 1714 stripped = [ 1715 b for b in m["content"] 1716 if not (isinstance(b, dict) and b.get("type") in _THINKING_TYPES) 1717 ] 1718 m["content"] = stripped or [{"type": "text", "text": "(thinking elided)"}] 1719 else: 1720 # Latest assistant on direct Anthropic: keep signed thinking 1721 # blocks for reasoning continuity; downgrade unsigned ones to 1722 # plain text. 1723 new_content = [] 1724 for b in m["content"]: 1725 if not isinstance(b, dict) or b.get("type") not in _THINKING_TYPES: 1726 new_content.append(b) 1727 continue 1728 if b.get("type") == "redacted_thinking": 1729 # Redacted blocks use 'data' for the signature payload 1730 if b.get("data"): 1731 new_content.append(b) 1732 # else: drop — no data means it can't be validated 1733 elif b.get("signature"): 1734 # Signed thinking block — keep it 1735 new_content.append(b) 1736 else: 1737 # Unsigned thinking — downgrade to text so it's not lost 1738 thinking_text = b.get("thinking", "") 1739 if thinking_text: 1740 new_content.append({"type": "text", "text": thinking_text}) 1741 m["content"] = new_content or [{"type": "text", "text": "(empty)"}] 1742 1743 # Strip cache_control from any remaining thinking/redacted_thinking 1744 # blocks — cache markers interfere with signature validation. 1745 for b in m["content"]: 1746 if isinstance(b, dict) and b.get("type") in _THINKING_TYPES: 1747 b.pop("cache_control", None) 1748 1749 return system, result 1750 1751 1752 def build_anthropic_kwargs( 1753 model: str, 1754 messages: List[Dict], 1755 tools: Optional[List[Dict]], 1756 max_tokens: Optional[int], 1757 reasoning_config: Optional[Dict[str, Any]], 1758 tool_choice: Optional[str] = None, 1759 is_oauth: bool = False, 1760 preserve_dots: bool = False, 1761 context_length: Optional[int] = None, 1762 base_url: str | None = None, 1763 fast_mode: bool = False, 1764 drop_context_1m_beta: bool = False, 1765 ) -> Dict[str, Any]: 1766 """Build kwargs for anthropic.messages.create(). 1767 1768 Naming note — two distinct concepts, easily confused: 1769 max_tokens = OUTPUT token cap for a single response. 1770 Anthropic's API calls this "max_tokens" but it only 1771 limits the *output*. Anthropic's own native SDK 1772 renamed it "max_output_tokens" for clarity. 1773 context_length = TOTAL context window (input tokens + output tokens). 1774 The API enforces: input_tokens + max_tokens ≤ context_length. 1775 Stored on the ContextCompressor; reduced on overflow errors. 1776 1777 When *max_tokens* is None the model's native output ceiling is used 1778 (e.g. 128K for Opus 4.6, 64K for Sonnet 4.6). 1779 1780 When *context_length* is provided and the model's native output ceiling 1781 exceeds it (e.g. a local endpoint with an 8K window), the output cap is 1782 clamped to context_length − 1. This only kicks in for unusually small 1783 context windows; for full-size models the native output cap is always 1784 smaller than the context window so no clamping happens. 1785 NOTE: this clamping does not account for prompt size — if the prompt is 1786 large, Anthropic may still reject the request. The caller must detect 1787 "max_tokens too large given prompt" errors and retry with a smaller cap 1788 (see parse_available_output_tokens_from_error + _ephemeral_max_output_tokens). 1789 1790 When *is_oauth* is True, applies Claude Code compatibility transforms: 1791 system prompt prefix, tool name prefixing, and prompt sanitization. 1792 1793 When *preserve_dots* is True, model name dots are not converted to hyphens 1794 (for Alibaba/DashScope anthropic-compatible endpoints: qwen3.5-plus). 1795 1796 When *base_url* points to a third-party Anthropic-compatible endpoint, 1797 thinking block signatures are stripped (they are Anthropic-proprietary). 1798 1799 When *fast_mode* is True, adds ``extra_body["speed"] = "fast"`` and the 1800 fast-mode beta header for ~2.5x faster output throughput on Opus 4.6. 1801 Currently only supported on native Anthropic endpoints (not third-party 1802 compatible ones). 1803 """ 1804 system, anthropic_messages = convert_messages_to_anthropic( 1805 messages, base_url=base_url, model=model 1806 ) 1807 anthropic_tools = convert_tools_to_anthropic(tools) if tools else [] 1808 1809 model = normalize_model_name(model, preserve_dots=preserve_dots) 1810 # effective_max_tokens = output cap for this call (≠ total context window) 1811 # Use the resolver helper so non-positive values (negative ints, 1812 # fractional floats, NaN, non-numeric) fail locally with a clear error 1813 # rather than 400-ing at the Anthropic API. See openclaw/openclaw#66664. 1814 effective_max_tokens = _resolve_anthropic_messages_max_tokens( 1815 max_tokens, model, context_length=context_length 1816 ) 1817 1818 # Clamp output cap to fit inside the total context window. 1819 # Only matters for small custom endpoints where context_length < native 1820 # output ceiling. For standard Anthropic models context_length (e.g. 1821 # 200K) is always larger than the output ceiling (e.g. 128K), so this 1822 # branch is not taken. 1823 if context_length and effective_max_tokens > context_length: 1824 effective_max_tokens = max(context_length - 1, 1) 1825 1826 # ── OAuth: Claude Code identity ────────────────────────────────── 1827 if is_oauth: 1828 # 1. Prepend Claude Code system prompt identity 1829 cc_block = {"type": "text", "text": _CLAUDE_CODE_SYSTEM_PREFIX} 1830 if isinstance(system, list): 1831 system = [cc_block] + system 1832 elif isinstance(system, str) and system: 1833 system = [cc_block, {"type": "text", "text": system}] 1834 else: 1835 system = [cc_block] 1836 1837 # 2. Sanitize system prompt — replace product name references 1838 # to avoid Anthropic's server-side content filters. 1839 for block in system: 1840 if isinstance(block, dict) and block.get("type") == "text": 1841 text = block.get("text", "") 1842 text = text.replace("Hermes Agent", "Claude Code") 1843 text = text.replace("Hermes agent", "Claude Code") 1844 text = text.replace("hermes-agent", "claude-code") 1845 text = text.replace("Nous Research", "Anthropic") 1846 block["text"] = text 1847 1848 # 3. Prefix tool names with mcp_ (Claude Code convention) 1849 if anthropic_tools: 1850 for tool in anthropic_tools: 1851 if "name" in tool: 1852 tool["name"] = _MCP_TOOL_PREFIX + tool["name"] 1853 1854 # 4. Prefix tool names in message history (tool_use and tool_result blocks) 1855 for msg in anthropic_messages: 1856 content = msg.get("content") 1857 if isinstance(content, list): 1858 for block in content: 1859 if isinstance(block, dict): 1860 if block.get("type") == "tool_use" and "name" in block: 1861 if not block["name"].startswith(_MCP_TOOL_PREFIX): 1862 block["name"] = _MCP_TOOL_PREFIX + block["name"] 1863 elif block.get("type") == "tool_result" and "tool_use_id" in block: 1864 pass # tool_result uses ID, not name 1865 1866 kwargs: Dict[str, Any] = { 1867 "model": model, 1868 "messages": anthropic_messages, 1869 "max_tokens": effective_max_tokens, 1870 } 1871 1872 if system: 1873 kwargs["system"] = system 1874 1875 if anthropic_tools: 1876 kwargs["tools"] = anthropic_tools 1877 # Map OpenAI tool_choice to Anthropic format 1878 if tool_choice == "auto" or tool_choice is None: 1879 kwargs["tool_choice"] = {"type": "auto"} 1880 elif tool_choice == "required": 1881 kwargs["tool_choice"] = {"type": "any"} 1882 elif tool_choice == "none": 1883 # Anthropic has no tool_choice "none" — omit tools entirely to prevent use 1884 kwargs.pop("tools", None) 1885 elif isinstance(tool_choice, str): 1886 # Specific tool name 1887 kwargs["tool_choice"] = {"type": "tool", "name": tool_choice} 1888 1889 # Map reasoning_config to Anthropic's thinking parameter. 1890 # Claude 4.6+ models use adaptive thinking + output_config.effort. 1891 # Older models use manual thinking with budget_tokens. 1892 # MiniMax Anthropic-compat endpoints support thinking (manual mode only, 1893 # not adaptive). Haiku does NOT support extended thinking — skip entirely. 1894 # 1895 # Kimi's /coding endpoint speaks the Anthropic Messages protocol but has 1896 # its own thinking semantics: when ``thinking.enabled`` is sent, Kimi 1897 # validates the message history and requires every prior assistant 1898 # tool-call message to carry OpenAI-style ``reasoning_content``. The 1899 # Anthropic path never populates that field, and 1900 # ``convert_messages_to_anthropic`` strips all Anthropic thinking blocks 1901 # on third-party endpoints — so the request fails with HTTP 400 1902 # "thinking is enabled but reasoning_content is missing in assistant 1903 # tool call message at index N". Kimi's reasoning is driven server-side 1904 # on the /coding route, so skip Anthropic's thinking parameter entirely 1905 # for that host. (Kimi on chat_completions enables thinking via 1906 # extra_body in the ChatCompletionsTransport — see #13503.) 1907 # 1908 # On 4.7+ the `thinking.display` field defaults to "omitted", which 1909 # silently hides reasoning text that Hermes surfaces in its CLI. We 1910 # request "summarized" so the reasoning blocks stay populated — matching 1911 # 4.6 behavior and preserving the activity-feed UX during long tool runs. 1912 _is_kimi_coding = _is_kimi_family_endpoint(base_url, model) 1913 if reasoning_config and isinstance(reasoning_config, dict) and not _is_kimi_coding: 1914 if reasoning_config.get("enabled") is not False and "haiku" not in model.lower(): 1915 effort = str(reasoning_config.get("effort", "medium")).lower() 1916 budget = THINKING_BUDGET.get(effort, 8000) 1917 if _supports_adaptive_thinking(model): 1918 kwargs["thinking"] = { 1919 "type": "adaptive", 1920 "display": "summarized", 1921 } 1922 adaptive_effort = ADAPTIVE_EFFORT_MAP.get(effort, "medium") 1923 # Downgrade xhigh→max on models that don't list xhigh as a 1924 # supported level (Opus/Sonnet 4.6). Opus 4.7+ keeps xhigh. 1925 if adaptive_effort == "xhigh" and not _supports_xhigh_effort(model): 1926 adaptive_effort = "max" 1927 kwargs["output_config"] = { 1928 "effort": adaptive_effort, 1929 } 1930 else: 1931 kwargs["thinking"] = {"type": "enabled", "budget_tokens": budget} 1932 # Anthropic requires temperature=1 when thinking is enabled on older models 1933 kwargs["temperature"] = 1 1934 kwargs["max_tokens"] = max(effective_max_tokens, budget + 4096) 1935 1936 # ── Strip sampling params on 4.7+ ───────────────────────────────── 1937 # Opus 4.7 rejects any non-default temperature/top_p/top_k with a 400. 1938 # Callers (auxiliary_client, etc.) may set these for older models; 1939 # drop them here as a safety net so upstream 4.6 → 4.7 migrations 1940 # don't require coordinated edits everywhere. 1941 if _forbids_sampling_params(model): 1942 for _sampling_key in ("temperature", "top_p", "top_k"): 1943 kwargs.pop(_sampling_key, None) 1944 1945 # ── Fast mode (Opus 4.6 only) ──────────────────────────────────── 1946 # Adds extra_body.speed="fast" + the fast-mode beta header for ~2.5x 1947 # output speed. Per Anthropic docs, fast mode is only supported on 1948 # Opus 4.6 — Opus 4.7 and other models 400 on the speed parameter. 1949 # Only for native Anthropic endpoints — third-party providers would 1950 # reject the unknown beta header and speed parameter. 1951 if ( 1952 fast_mode 1953 and not _is_third_party_anthropic_endpoint(base_url) 1954 and _supports_fast_mode(model) 1955 ): 1956 kwargs.setdefault("extra_body", {})["speed"] = "fast" 1957 # Build extra_headers with ALL applicable betas (the per-request 1958 # extra_headers override the client-level anthropic-beta header). 1959 betas = list(_common_betas_for_base_url( 1960 base_url, 1961 drop_context_1m_beta=drop_context_1m_beta, 1962 )) 1963 if is_oauth: 1964 betas.extend(_OAUTH_ONLY_BETAS) 1965 betas.append(_FAST_MODE_BETA) 1966 kwargs["extra_headers"] = {"anthropic-beta": ",".join(betas)} 1967 1968 return kwargs 1969 1970