/ agent / anthropic_adapter.py
anthropic_adapter.py
   1  """Anthropic Messages API adapter for Hermes Agent.
   2  
   3  Translates between Hermes's internal OpenAI-style message format and
   4  Anthropic's Messages API. Follows the same pattern as the codex_responses
   5  adapter — all provider-specific logic is isolated here.
   6  
   7  Auth supports:
   8    - Regular API keys (sk-ant-api*) → x-api-key header
   9    - OAuth setup-tokens (sk-ant-oat*) → Bearer auth + beta header
  10    - Claude Code credentials (~/.claude.json or ~/.claude/.credentials.json) → Bearer auth
  11  """
  12  
  13  import copy
  14  import json
  15  import logging
  16  import os
  17  import platform
  18  import subprocess
  19  from pathlib import Path
  20  
  21  from hermes_constants import get_hermes_home
  22  from typing import Any, Dict, List, Optional, Tuple
  23  from utils import base_url_host_matches, normalize_proxy_env_vars
  24  
  25  # NOTE: `import anthropic` is deliberately NOT at module top — the SDK pulls
  26  # ~220 ms of imports (anthropic.types, anthropic.lib.tools._beta_runner, etc.)
  27  # and the 3 usage sites (build_anthropic_client, build_anthropic_bedrock_client,
  28  # read_claude_code_credentials_from_keychain) are all on cold user-triggered
  29  # paths. Access via the `_get_anthropic_sdk()` accessor below, which caches
  30  # the module after the first call and returns None on ImportError.
  31  _anthropic_sdk: Any = ...  # sentinel — None means "tried and missing"
  32  
  33  
  34  def _get_anthropic_sdk():
  35      """Return the ``anthropic`` SDK module, importing lazily. None if not installed."""
  36      global _anthropic_sdk
  37      if _anthropic_sdk is ...:
  38          try:
  39              import anthropic as _sdk
  40              _anthropic_sdk = _sdk
  41          except ImportError:
  42              _anthropic_sdk = None
  43      return _anthropic_sdk
  44  
  45  logger = logging.getLogger(__name__)
  46  
  47  THINKING_BUDGET = {"xhigh": 32000, "high": 16000, "medium": 8000, "low": 4000}
  48  # Hermes effort → Anthropic adaptive-thinking effort (output_config.effort).
  49  # Anthropic exposes 5 levels on 4.7+: low, medium, high, xhigh, max.
  50  # Opus/Sonnet 4.6 only expose 4 levels: low, medium, high, max — no xhigh.
  51  # We preserve xhigh as xhigh on 4.7+ (the recommended default for coding/
  52  # agentic work) and downgrade it to max on pre-4.7 adaptive models (which
  53  # is the strongest level they accept).  "minimal" is a legacy alias that
  54  # maps to low on every model.  See:
  55  # https://platform.claude.com/docs/en/about-claude/models/migration-guide
  56  ADAPTIVE_EFFORT_MAP = {
  57      "max":     "max",
  58      "xhigh":   "xhigh",
  59      "high":    "high",
  60      "medium":  "medium",
  61      "low":     "low",
  62      "minimal": "low",
  63  }
  64  
  65  # Models that accept the "xhigh" output_config.effort level.  Opus 4.7 added
  66  # xhigh as a distinct level between high and max; older adaptive-thinking
  67  # models (4.6) reject it with a 400.  Keep this substring list in sync with
  68  # the Anthropic migration guide as new model families ship.
  69  _XHIGH_EFFORT_SUBSTRINGS = ("4-7", "4.7")
  70  
  71  # Models where extended thinking is deprecated/removed (4.6+ behavior: adaptive
  72  # is the only supported mode; 4.7 additionally forbids manual thinking entirely
  73  # and drops temperature/top_p/top_k).
  74  _ADAPTIVE_THINKING_SUBSTRINGS = ("4-6", "4.6", "4-7", "4.7")
  75  
  76  # Models where temperature/top_p/top_k return 400 if set to non-default values.
  77  # This is the Opus 4.7 contract; future 4.x+ models are expected to follow it.
  78  _NO_SAMPLING_PARAMS_SUBSTRINGS = ("4-7", "4.7")
  79  _FAST_MODE_SUPPORTED_SUBSTRINGS = ("opus-4-6", "opus-4.6")
  80  
  81  # ── Max output token limits per Anthropic model ───────────────────────
  82  # Source: Anthropic docs + Cline model catalog.  Anthropic's API requires
  83  # max_tokens as a mandatory field.  Previously we hardcoded 16384, which
  84  # starves thinking-enabled models (thinking tokens count toward the limit).
  85  _ANTHROPIC_OUTPUT_LIMITS = {
  86      # Claude 4.7
  87      "claude-opus-4-7":   128_000,
  88      # Claude 4.6
  89      "claude-opus-4-6":   128_000,
  90      "claude-sonnet-4-6":  64_000,
  91      # Claude 4.5
  92      "claude-opus-4-5":    64_000,
  93      "claude-sonnet-4-5":  64_000,
  94      "claude-haiku-4-5":   64_000,
  95      # Claude 4
  96      "claude-opus-4":      32_000,
  97      "claude-sonnet-4":    64_000,
  98      # Claude 3.7
  99      "claude-3-7-sonnet": 128_000,
 100      # Claude 3.5
 101      "claude-3-5-sonnet":   8_192,
 102      "claude-3-5-haiku":    8_192,
 103      # Claude 3
 104      "claude-3-opus":       4_096,
 105      "claude-3-sonnet":     4_096,
 106      "claude-3-haiku":      4_096,
 107      # Third-party Anthropic-compatible providers
 108      "minimax":            131_072,
 109      # Qwen models via DashScope Anthropic-compatible endpoint
 110      # DashScope enforces max_tokens ∈ [1, 65536]
 111      "qwen3":               65_536,
 112  }
 113  
 114  # For any model not in the table, assume the highest current limit.
 115  # Future Anthropic models are unlikely to have *less* output capacity.
 116  _ANTHROPIC_DEFAULT_OUTPUT_LIMIT = 128_000
 117  
 118  
 119  def _get_anthropic_max_output(model: str) -> int:
 120      """Look up the max output token limit for an Anthropic model.
 121  
 122      Uses substring matching against _ANTHROPIC_OUTPUT_LIMITS so date-stamped
 123      model IDs (claude-sonnet-4-5-20250929) and variant suffixes (:1m, :fast)
 124      resolve correctly.  Longest-prefix match wins to avoid e.g. "claude-3-5"
 125      matching before "claude-3-5-sonnet".
 126  
 127      Normalizes dots to hyphens so that model names like
 128      ``anthropic/claude-opus-4.6`` match the ``claude-opus-4-6`` table key.
 129      """
 130      m = model.lower().replace(".", "-")
 131      best_key = ""
 132      best_val = _ANTHROPIC_DEFAULT_OUTPUT_LIMIT
 133      for key, val in _ANTHROPIC_OUTPUT_LIMITS.items():
 134          if key in m and len(key) > len(best_key):
 135              best_key = key
 136              best_val = val
 137      return best_val
 138  
 139  
 140  def _resolve_positive_anthropic_max_tokens(value) -> Optional[int]:
 141      """Return ``value`` floored to a positive int, or ``None`` if it is not a
 142      finite positive number. Ported from openclaw/openclaw#66664.
 143  
 144      Anthropic's Messages API rejects ``max_tokens`` values that are 0,
 145      negative, non-integer, or non-finite with HTTP 400. Python's ``or``
 146      idiom (``max_tokens or fallback``) correctly catches ``0`` but lets
 147      negative ints and fractional floats (``-1``, ``0.5``) through to the
 148      API, producing a user-visible failure instead of a local error.
 149      """
 150      # Booleans are a subclass of int — exclude explicitly so ``True`` doesn't
 151      # silently become 1 and ``False`` doesn't become 0.
 152      if isinstance(value, bool):
 153          return None
 154      if not isinstance(value, (int, float)):
 155          return None
 156      try:
 157          import math
 158          if not math.isfinite(value):
 159              return None
 160      except Exception:
 161          return None
 162      floored = int(value)  # truncates toward zero for floats
 163      return floored if floored > 0 else None
 164  
 165  
 166  def _resolve_anthropic_messages_max_tokens(
 167      requested,
 168      model: str,
 169      context_length: Optional[int] = None,
 170  ) -> int:
 171      """Resolve the ``max_tokens`` budget for an Anthropic Messages call.
 172  
 173      Prefers ``requested`` when it is a positive finite number; otherwise
 174      falls back to the model's output ceiling. Raises ``ValueError`` if no
 175      positive budget can be resolved (should not happen with current model
 176      table defaults, but guards against a future regression where
 177      ``_get_anthropic_max_output`` could return ``0``).
 178  
 179      Separately, callers apply a context-window clamp — this resolver does
 180      not, to keep the positive-value contract independent of endpoint
 181      specifics.
 182  
 183      Ported from openclaw/openclaw#66664 (resolveAnthropicMessagesMaxTokens).
 184      """
 185      resolved = _resolve_positive_anthropic_max_tokens(requested)
 186      if resolved is not None:
 187          return resolved
 188      fallback = _get_anthropic_max_output(model)
 189      if fallback > 0:
 190          return fallback
 191      raise ValueError(
 192          f"Anthropic Messages adapter requires a positive max_tokens value for "
 193          f"model {model!r}; got {requested!r} and no model default resolved."
 194      )
 195  
 196  
 197  def _supports_adaptive_thinking(model: str) -> bool:
 198      """Return True for Claude 4.6+ models that support adaptive thinking."""
 199      return any(v in model for v in _ADAPTIVE_THINKING_SUBSTRINGS)
 200  
 201  
 202  def _supports_xhigh_effort(model: str) -> bool:
 203      """Return True for models that accept the 'xhigh' adaptive effort level.
 204  
 205      Opus 4.7 introduced xhigh as a distinct level between high and max.
 206      Pre-4.7 adaptive models (Opus/Sonnet 4.6) only accept low/medium/high/max
 207      and reject xhigh with an HTTP 400. Callers should downgrade xhigh→max
 208      when this returns False.
 209      """
 210      return any(v in model for v in _XHIGH_EFFORT_SUBSTRINGS)
 211  
 212  
 213  def _forbids_sampling_params(model: str) -> bool:
 214      """Return True for models that 400 on any non-default temperature/top_p/top_k.
 215  
 216      Opus 4.7 explicitly rejects sampling parameters; later Claude releases are
 217      expected to follow suit.  Callers should omit these fields entirely rather
 218      than passing zero/default values (the API rejects anything non-null).
 219      """
 220      return any(v in model for v in _NO_SAMPLING_PARAMS_SUBSTRINGS)
 221  
 222  
 223  def _supports_fast_mode(model: str) -> bool:
 224      """Return True for models that support Anthropic Fast Mode (speed=fast).
 225  
 226      Per Anthropic docs, fast mode is currently supported on Opus 4.6 only.
 227      Sending ``speed: "fast"`` to any other Claude model (including Opus 4.7)
 228      returns HTTP 400. This guard prevents silently 400'ing when stale config
 229      or older callers leave fast mode enabled across a model upgrade.
 230      """
 231      return any(v in model for v in _FAST_MODE_SUPPORTED_SUBSTRINGS)
 232  
 233  
 234  # Beta headers for enhanced features (sent with ALL auth types).
 235  # As of Opus 4.7 (2026-04-16), the first two are GA on Claude 4.6+ — the
 236  # beta headers are still accepted (harmless no-op) but not required. Kept
 237  # here so older Claude (4.5, 4.1) + third-party Anthropic-compat endpoints
 238  # that still gate on the headers continue to get the enhanced features.
 239  #
 240  # ``context-1m-2025-08-07`` unlocks the 1M context window on Claude Opus 4.6/4.7
 241  # and Sonnet 4.6 when served via AWS Bedrock or Azure AI Foundry. 1M is GA on
 242  # native Anthropic (api.anthropic.com) for Opus 4.6+, but Bedrock/Azure still
 243  # gate it behind this beta header as of 2026-04 — without it Bedrock caps Opus
 244  # at 200K even though model_metadata.py advertises 1M. The header is a harmless
 245  # no-op on endpoints where 1M is GA.
 246  #
 247  # Migration guide: remove these if you no longer support ≤4.5 models or once
 248  # Bedrock/Azure promote 1M to GA.
 249  _COMMON_BETAS = [
 250      "interleaved-thinking-2025-05-14",
 251      "fine-grained-tool-streaming-2025-05-14",
 252      "context-1m-2025-08-07",
 253  ]
 254  # MiniMax's Anthropic-compatible endpoints fail tool-use requests when
 255  # the fine-grained tool streaming beta is present.  Omit it so tool calls
 256  # fall back to the provider's default response path.
 257  _TOOL_STREAMING_BETA = "fine-grained-tool-streaming-2025-05-14"
 258  # 1M context beta — see comment on _COMMON_BETAS above. Stripped for
 259  # Bearer-auth (MiniMax) endpoints since they host their own models and
 260  # unknown Anthropic beta headers risk request rejection.
 261  _CONTEXT_1M_BETA = "context-1m-2025-08-07"
 262  
 263  # Fast mode beta — enables the ``speed: "fast"`` request parameter for
 264  # significantly higher output token throughput on Opus 4.6 (~2.5x).
 265  # See https://platform.claude.com/docs/en/build-with-claude/fast-mode
 266  _FAST_MODE_BETA = "fast-mode-2026-02-01"
 267  
 268  # Additional beta headers required for OAuth/subscription auth.
 269  # Matches what Claude Code (and pi-ai / OpenCode) send.
 270  _OAUTH_ONLY_BETAS = [
 271      "claude-code-20250219",
 272      "oauth-2025-04-20",
 273  ]
 274  
 275  # Claude Code identity — required for OAuth requests to be routed correctly.
 276  # Without these, Anthropic's infrastructure intermittently 500s OAuth traffic.
 277  # The version must stay reasonably current — Anthropic rejects OAuth requests
 278  # when the spoofed user-agent version is too far behind the actual release.
 279  _CLAUDE_CODE_VERSION_FALLBACK = "2.1.74"
 280  _claude_code_version_cache: Optional[str] = None
 281  
 282  
 283  def _detect_claude_code_version() -> str:
 284      """Detect the installed Claude Code version, fall back to a static constant.
 285  
 286      Anthropic's OAuth infrastructure validates the user-agent version and may
 287      reject requests with a version that's too old.  Detecting dynamically means
 288      users who keep Claude Code updated never hit stale-version 400s.
 289      """
 290      import subprocess as _sp
 291  
 292      for cmd in ("claude", "claude-code"):
 293          try:
 294              result = _sp.run(
 295                  [cmd, "--version"],
 296                  capture_output=True, text=True, timeout=5,
 297              )
 298              if result.returncode == 0 and result.stdout.strip():
 299                  # Output is like "2.1.74 (Claude Code)" or just "2.1.74"
 300                  version = result.stdout.strip().split()[0]
 301                  if version and version[0].isdigit():
 302                      return version
 303          except Exception:
 304              pass
 305      return _CLAUDE_CODE_VERSION_FALLBACK
 306  
 307  
 308  _CLAUDE_CODE_SYSTEM_PREFIX = "You are Claude Code, Anthropic's official CLI for Claude."
 309  _MCP_TOOL_PREFIX = "mcp_"
 310  
 311  
 312  def _get_claude_code_version() -> str:
 313      """Lazily detect the installed Claude Code version when OAuth headers need it."""
 314      global _claude_code_version_cache
 315      if _claude_code_version_cache is None:
 316          _claude_code_version_cache = _detect_claude_code_version()
 317      return _claude_code_version_cache
 318  
 319  
 320  def _is_oauth_token(key: str) -> bool:
 321      """Check if the key is an Anthropic OAuth/setup token.
 322  
 323      Positively identifies Anthropic OAuth tokens by their key format:
 324      - ``sk-ant-`` prefix (but NOT ``sk-ant-api``) → setup tokens, managed keys
 325      - ``eyJ`` prefix → JWTs from the Anthropic OAuth flow
 326      - ``cc-`` prefix → Claude Code OAuth access tokens (from CLAUDE_CODE_OAUTH_TOKEN)
 327  
 328      Non-Anthropic keys (MiniMax, Alibaba, etc.) don't match any pattern
 329      and correctly return False.
 330      """
 331      if not key:
 332          return False
 333      # Regular Anthropic Console API keys — x-api-key auth, never OAuth
 334      if key.startswith("sk-ant-api"):
 335          return False
 336      # Anthropic-issued tokens (setup-tokens sk-ant-oat-*, managed keys)
 337      if key.startswith("sk-ant-"):
 338          return True
 339      # JWTs from Anthropic OAuth flow
 340      if key.startswith("eyJ"):
 341          return True
 342      # Claude Code OAuth access tokens (opaque, from CLAUDE_CODE_OAUTH_TOKEN)
 343      if key.startswith("cc-"):
 344          return True
 345      return False
 346  
 347  
 348  def _normalize_base_url_text(base_url) -> str:
 349      """Normalize SDK/base transport URL values to a plain string for inspection.
 350  
 351      Some client objects expose ``base_url`` as an ``httpx.URL`` instead of a raw
 352      string.  Provider/auth detection should accept either shape.
 353      """
 354      if not base_url:
 355          return ""
 356      return str(base_url).strip()
 357  
 358  
 359  def _is_third_party_anthropic_endpoint(base_url: str | None) -> bool:
 360      """Return True for non-Anthropic endpoints using the Anthropic Messages API.
 361  
 362      Third-party proxies (Azure AI Foundry, AWS Bedrock, self-hosted) authenticate
 363      with their own API keys via x-api-key, not Anthropic OAuth tokens. OAuth
 364      detection should be skipped for these endpoints.
 365      """
 366      normalized = _normalize_base_url_text(base_url)
 367      if not normalized:
 368          return False  # No base_url = direct Anthropic API
 369      normalized = normalized.rstrip("/").lower()
 370      if "anthropic.com" in normalized:
 371          return False  # Direct Anthropic API — OAuth applies
 372      return True  # Any other endpoint is a third-party proxy
 373  
 374  
 375  def _is_kimi_coding_endpoint(base_url: str | None) -> bool:
 376      """Return True for Kimi's /coding endpoint that requires claude-code UA."""
 377      normalized = _normalize_base_url_text(base_url)
 378      if not normalized:
 379          return False
 380      return normalized.rstrip("/").lower().startswith("https://api.kimi.com/coding")
 381  
 382  
 383  # Model-name prefixes that identify the Kimi / Moonshot family.  Covers
 384  # - official slugs: ``kimi-k2.5``, ``kimi_thinking``, ``moonshot-v1-8k``
 385  # - common release lines: ``k1.5-...``, ``k2-thinking``, ``k25-...``, ``k2.5-...``
 386  # Matched case-insensitively against the post-``normalize_model_name`` form,
 387  # so a caller's ``provider/vendor/model`` slug is handled the same as a
 388  # bare name.
 389  _KIMI_FAMILY_MODEL_PREFIXES = (
 390      "kimi-", "kimi_",
 391      "moonshot-", "moonshot_",
 392      "k1.", "k1-",
 393      "k2.", "k2-",
 394      "k25", "k2.5",
 395  )
 396  
 397  
 398  def _model_name_is_kimi_family(model: str | None) -> bool:
 399      if not isinstance(model, str):
 400          return False
 401      m = model.strip().lower()
 402      if not m:
 403          return False
 404      # Strip vendor prefix (e.g. ``moonshotai/kimi-k2.5`` → ``kimi-k2.5``)
 405      if "/" in m:
 406          m = m.rsplit("/", 1)[-1]
 407      return m.startswith(_KIMI_FAMILY_MODEL_PREFIXES)
 408  
 409  
 410  def _is_kimi_family_endpoint(base_url: str | None, model: str | None = None) -> bool:
 411      """Return True for any Kimi / Moonshot Anthropic-Messages-speaking endpoint.
 412  
 413      Broader than ``_is_kimi_coding_endpoint`` — matches:
 414  
 415      - Kimi's official ``/coding`` URL (legacy check, preserved)
 416      - Any ``api.kimi.com`` / ``moonshot.ai`` / ``moonshot.cn`` host
 417      - Custom or proxied endpoints whose *model* name is in the Kimi / Moonshot
 418        family (``kimi-*``, ``moonshot-*``, ``k1.*``, ``k2.*``, …).  Users with
 419        ``api_mode: anthropic_messages`` on a private gateway fronting Kimi
 420        fall into this branch — the upstream still enforces Kimi's thinking
 421        semantics (reasoning_content required on every replayed tool-call
 422        message) regardless of the gateway's hostname.
 423  
 424      Used to decide whether to drop Anthropic's ``thinking`` kwarg and to
 425      preserve unsigned reasoning_content-derived thinking blocks on replay.
 426      See hermes-agent#13848, #17057.
 427      """
 428      if _is_kimi_coding_endpoint(base_url):
 429          return True
 430      for _domain in ("api.kimi.com", "moonshot.ai", "moonshot.cn"):
 431          if base_url_host_matches(base_url or "", _domain):
 432              return True
 433      if _model_name_is_kimi_family(model):
 434          return True
 435      return False
 436  
 437  
 438  def _is_deepseek_anthropic_endpoint(base_url: str | None) -> bool:
 439      """Return True for DeepSeek's Anthropic-compatible endpoint.
 440  
 441      DeepSeek's ``/anthropic`` route speaks the Anthropic Messages protocol
 442      but, when thinking mode is enabled, requires the ``thinking`` blocks
 443      from prior assistant turns to round-trip on subsequent requests — the
 444      generic third-party path strips them and triggers HTTP 400::
 445  
 446          The content[].thinking in the thinking mode must be passed back
 447          to the API.
 448  
 449      Per DeepSeek's published compatibility matrix the blocks are unsigned
 450      (no Anthropic-proprietary signature, no ``redacted_thinking`` support),
 451      so this endpoint is handled with the same strip-signed / keep-unsigned
 452      policy used for Kimi's ``/coding`` endpoint.  The match is pinned to
 453      the ``/anthropic`` path so the OpenAI-compatible ``api.deepseek.com``
 454      base URL (which never reaches this adapter) is not misclassified.
 455      See hermes-agent#16748.
 456      """
 457      if not base_url_host_matches(base_url or "", "api.deepseek.com"):
 458          return False
 459      normalized = _normalize_base_url_text(base_url)
 460      if not normalized:
 461          return False
 462      return "/anthropic" in normalized.rstrip("/").lower()
 463  
 464  
 465  def _requires_bearer_auth(base_url: str | None) -> bool:
 466      """Return True for Anthropic-compatible providers that require Bearer auth.
 467  
 468      Some third-party /anthropic endpoints implement Anthropic's Messages API but
 469      require Authorization: Bearer *** of Anthropic's native x-api-key header.
 470      MiniMax's global and China Anthropic-compatible endpoints follow this pattern.
 471      """
 472      normalized = _normalize_base_url_text(base_url)
 473      if not normalized:
 474          return False
 475      normalized = normalized.rstrip("/").lower()
 476      return normalized.startswith(("https://api.minimax.io/anthropic", "https://api.minimaxi.com/anthropic"))
 477  
 478  
 479  def _common_betas_for_base_url(
 480      base_url: str | None,
 481      *,
 482      drop_context_1m_beta: bool = False,
 483  ) -> list[str]:
 484      """Return the beta headers that are safe for the configured endpoint.
 485  
 486      MiniMax's Anthropic-compatible endpoints (Bearer-auth) reject requests
 487      that include Anthropic's ``fine-grained-tool-streaming`` beta — every
 488      tool-use message triggers a connection error.  Strip that beta for
 489      Bearer-auth endpoints while keeping all other betas intact.
 490  
 491      The ``context-1m-2025-08-07`` beta is also stripped for Bearer-auth
 492      endpoints — MiniMax hosts its own models, not Claude, so the header is
 493      irrelevant at best and risks request rejection at worst.
 494  
 495      ``drop_context_1m_beta=True`` additionally strips the 1M-context beta on
 496      otherwise-unrelated endpoints. The OAuth retry path flips this flag after
 497      a subscription rejects the beta with
 498      "The long context beta is not yet available for this subscription" so
 499      subsequent requests in the same session don't repeat the probe. See the
 500      reactive recovery loop in ``run_agent.py`` and issue-comment history on
 501      PR #17680 for the full rationale.
 502      """
 503      if _requires_bearer_auth(base_url):
 504          _stripped = {_TOOL_STREAMING_BETA, _CONTEXT_1M_BETA}
 505          return [b for b in _COMMON_BETAS if b not in _stripped]
 506      if drop_context_1m_beta:
 507          return [b for b in _COMMON_BETAS if b != _CONTEXT_1M_BETA]
 508      return _COMMON_BETAS
 509  
 510  
 511  def build_anthropic_client(
 512      api_key: str,
 513      base_url: str = None,
 514      timeout: float = None,
 515      *,
 516      drop_context_1m_beta: bool = False,
 517  ):
 518      """Create an Anthropic client, auto-detecting setup-tokens vs API keys.
 519  
 520      If *timeout* is provided it overrides the default 900s read timeout.  The
 521      connect timeout stays at 10s.  Callers pass this from the per-provider /
 522      per-model ``request_timeout_seconds`` config so Anthropic-native and
 523      Anthropic-compatible providers respect the same knob as OpenAI-wire
 524      providers.
 525  
 526      ``drop_context_1m_beta=True`` strips ``context-1m-2025-08-07`` from the
 527      client-level ``anthropic-beta`` header. Used by the reactive OAuth retry
 528      path in ``run_agent.py`` when a subscription rejects the beta; leave at
 529      its default on fresh clients so 1M-capable subscriptions keep the
 530      capability.
 531  
 532      Returns an anthropic.Anthropic instance.
 533      """
 534      _anthropic_sdk = _get_anthropic_sdk()
 535      if _anthropic_sdk is None:
 536          raise ImportError(
 537              "The 'anthropic' package is required for the Anthropic provider. "
 538              "Install it with: pip install 'anthropic>=0.39.0'"
 539          )
 540  
 541      normalize_proxy_env_vars()
 542  
 543      from httpx import Timeout
 544  
 545      normalized_base_url = _normalize_base_url_text(base_url)
 546      _read_timeout = timeout if (isinstance(timeout, (int, float)) and timeout > 0) else 900.0
 547      kwargs = {
 548          "timeout": Timeout(timeout=float(_read_timeout), connect=10.0),
 549      }
 550      if normalized_base_url:
 551          # Azure Anthropic endpoints require an ``api-version`` query parameter.
 552          # Pass it via default_query so the SDK appends it to every request URL
 553          # without corrupting the base_url (appending it directly produces
 554          # malformed paths like /anthropic?api-version=.../v1/messages).
 555          _is_azure_endpoint = "azure.com" in normalized_base_url.lower()
 556          if _is_azure_endpoint and "api-version" not in normalized_base_url:
 557              kwargs["base_url"] = normalized_base_url.rstrip("/")
 558              kwargs["default_query"] = {"api-version": "2025-04-15"}
 559          else:
 560              kwargs["base_url"] = normalized_base_url
 561      common_betas = _common_betas_for_base_url(
 562          normalized_base_url,
 563          drop_context_1m_beta=drop_context_1m_beta,
 564      )
 565  
 566      if _is_kimi_coding_endpoint(base_url):
 567          # Kimi's /coding endpoint requires User-Agent: claude-code/0.1.0
 568          # to be recognized as a valid Coding Agent. Without it, returns 403.
 569          # Check this BEFORE _requires_bearer_auth since both match api.kimi.com/coding.
 570          kwargs["api_key"] = api_key
 571          kwargs["default_headers"] = {
 572              "User-Agent": "claude-code/0.1.0",
 573              **( {"anthropic-beta": ",".join(common_betas)} if common_betas else {} )
 574          }
 575      elif _requires_bearer_auth(normalized_base_url):
 576          # Some Anthropic-compatible providers (e.g. MiniMax) expect the API key in
 577          # Authorization: Bearer *** for regular API keys. Route those endpoints
 578          # through auth_token so the SDK sends Bearer auth instead of x-api-key.
 579          # Check this before OAuth token shape detection because MiniMax secrets do
 580          # not use Anthropic's sk-ant-api prefix and would otherwise be misread as
 581          # Anthropic OAuth/setup tokens.
 582          kwargs["auth_token"] = api_key
 583          if common_betas:
 584              kwargs["default_headers"] = {"anthropic-beta": ",".join(common_betas)}
 585      elif _is_third_party_anthropic_endpoint(base_url):
 586          # Third-party proxies (Azure AI Foundry, AWS Bedrock, etc.) use their
 587          # own API keys with x-api-key auth. Skip OAuth detection — their keys
 588          # don't follow Anthropic's sk-ant-* prefix convention and would be
 589          # misclassified as OAuth tokens.
 590          kwargs["api_key"] = api_key
 591          if common_betas:
 592              kwargs["default_headers"] = {"anthropic-beta": ",".join(common_betas)}
 593      elif _is_oauth_token(api_key):
 594          # OAuth access token / setup-token → Bearer auth + Claude Code identity.
 595          # Anthropic routes OAuth requests based on user-agent and headers;
 596          # without Claude Code's fingerprint, requests get intermittent 500s.
 597          all_betas = common_betas + _OAUTH_ONLY_BETAS
 598          kwargs["auth_token"] = api_key
 599          kwargs["default_headers"] = {
 600              "anthropic-beta": ",".join(all_betas),
 601              "user-agent": f"claude-cli/{_get_claude_code_version()} (external, cli)",
 602              "x-app": "cli",
 603          }
 604      else:
 605          # Regular API key → x-api-key header + common betas
 606          kwargs["api_key"] = api_key
 607          if common_betas:
 608              kwargs["default_headers"] = {"anthropic-beta": ",".join(common_betas)}
 609  
 610      return _anthropic_sdk.Anthropic(**kwargs)
 611  
 612  
 613  def build_anthropic_bedrock_client(region: str):
 614      """Create an AnthropicBedrock client for Bedrock Claude models.
 615  
 616      Uses the Anthropic SDK's native Bedrock adapter, which provides full
 617      Claude feature parity: prompt caching, thinking budgets, adaptive
 618      thinking, fast mode — features not available via the Converse API.
 619  
 620      Attaches the common Anthropic beta headers as client-level defaults so
 621      that Bedrock-hosted Claude models get the same enhanced features as
 622      native Anthropic. The ``context-1m-2025-08-07`` beta in particular
 623      unlocks the 1M context window for Opus 4.6/4.7 on Bedrock — without
 624      it, Bedrock caps these models at 200K even though the Anthropic API
 625      serves them with 1M natively.
 626  
 627      Auth uses the boto3 default credential chain (IAM roles, SSO, env vars).
 628      """
 629      _anthropic_sdk = _get_anthropic_sdk()
 630      if _anthropic_sdk is None:
 631          raise ImportError(
 632              "The 'anthropic' package is required for the Bedrock provider. "
 633              "Install it with: pip install 'anthropic>=0.39.0'"
 634          )
 635      if not hasattr(_anthropic_sdk, "AnthropicBedrock"):
 636          raise ImportError(
 637              "anthropic.AnthropicBedrock not available. "
 638              "Upgrade with: pip install 'anthropic>=0.39.0'"
 639          )
 640      from httpx import Timeout
 641  
 642      return _anthropic_sdk.AnthropicBedrock(
 643          aws_region=region,
 644          timeout=Timeout(timeout=900.0, connect=10.0),
 645          default_headers={"anthropic-beta": ",".join(_COMMON_BETAS)},
 646      )
 647  
 648  
 649  def _read_claude_code_credentials_from_keychain() -> Optional[Dict[str, Any]]:
 650      """Read Claude Code OAuth credentials from the macOS Keychain.
 651  
 652      Claude Code >=2.1.114 stores credentials in the macOS Keychain under the
 653      service name "Claude Code-credentials" rather than (or in addition to)
 654      the JSON file at ~/.claude/.credentials.json.
 655  
 656      The password field contains a JSON string with the same claudeAiOauth
 657      structure as the JSON file.
 658  
 659      Returns dict with {accessToken, refreshToken?, expiresAt?} or None.
 660      """
 661      if platform.system() != "Darwin":
 662          return None
 663  
 664      try:
 665          # Read the "Claude Code-credentials" generic password entry
 666          result = subprocess.run(
 667              ["security", "find-generic-password",
 668               "-s", "Claude Code-credentials",
 669               "-w"],
 670              capture_output=True,
 671              text=True,
 672              timeout=5,
 673          )
 674      except (OSError, subprocess.TimeoutExpired):
 675          logger.debug("Keychain: security command not available or timed out")
 676          return None
 677  
 678      if result.returncode != 0:
 679          logger.debug("Keychain: no entry found for 'Claude Code-credentials'")
 680          return None
 681  
 682      raw = result.stdout.strip()
 683      if not raw:
 684          return None
 685  
 686      try:
 687          data = json.loads(raw)
 688      except json.JSONDecodeError:
 689          logger.debug("Keychain: credentials payload is not valid JSON")
 690          return None
 691  
 692      oauth_data = data.get("claudeAiOauth")
 693      if oauth_data and isinstance(oauth_data, dict):
 694          access_token = oauth_data.get("accessToken", "")
 695          if access_token:
 696              return {
 697                  "accessToken": access_token,
 698                  "refreshToken": oauth_data.get("refreshToken", ""),
 699                  "expiresAt": oauth_data.get("expiresAt", 0),
 700                  "source": "macos_keychain",
 701              }
 702  
 703      return None
 704  
 705  
 706  def read_claude_code_credentials() -> Optional[Dict[str, Any]]:
 707      """Read refreshable Claude Code OAuth credentials.
 708  
 709      Checks two sources in order:
 710        1. macOS Keychain (Darwin only) — "Claude Code-credentials" entry
 711        2. ~/.claude/.credentials.json file
 712  
 713      This intentionally excludes ~/.claude.json primaryApiKey. Opencode's
 714      subscription flow is OAuth/setup-token based with refreshable credentials,
 715      and native direct Anthropic provider usage should follow that path rather
 716      than auto-detecting Claude's first-party managed key.
 717  
 718      Returns dict with {accessToken, refreshToken?, expiresAt?} or None.
 719      """
 720      # Try macOS Keychain first (covers Claude Code >=2.1.114)
 721      kc_creds = _read_claude_code_credentials_from_keychain()
 722      if kc_creds:
 723          return kc_creds
 724  
 725      # Fall back to JSON file
 726      cred_path = Path.home() / ".claude" / ".credentials.json"
 727      if cred_path.exists():
 728          try:
 729              data = json.loads(cred_path.read_text(encoding="utf-8"))
 730              oauth_data = data.get("claudeAiOauth")
 731              if oauth_data and isinstance(oauth_data, dict):
 732                  access_token = oauth_data.get("accessToken", "")
 733                  if access_token:
 734                      return {
 735                          "accessToken": access_token,
 736                          "refreshToken": oauth_data.get("refreshToken", ""),
 737                          "expiresAt": oauth_data.get("expiresAt", 0),
 738                          "source": "claude_code_credentials_file",
 739                      }
 740          except (json.JSONDecodeError, OSError, IOError) as e:
 741              logger.debug("Failed to read ~/.claude/.credentials.json: %s", e)
 742  
 743      return None
 744  
 745  
 746  def read_claude_managed_key() -> Optional[str]:
 747      """Read Claude's native managed key from ~/.claude.json for diagnostics only."""
 748      claude_json = Path.home() / ".claude.json"
 749      if claude_json.exists():
 750          try:
 751              data = json.loads(claude_json.read_text(encoding="utf-8"))
 752              primary_key = data.get("primaryApiKey", "")
 753              if isinstance(primary_key, str) and primary_key.strip():
 754                  return primary_key.strip()
 755          except (json.JSONDecodeError, OSError, IOError) as e:
 756              logger.debug("Failed to read ~/.claude.json: %s", e)
 757      return None
 758  
 759  
 760  def is_claude_code_token_valid(creds: Dict[str, Any]) -> bool:
 761      """Check if Claude Code credentials have a non-expired access token."""
 762      import time
 763  
 764      expires_at = creds.get("expiresAt", 0)
 765      if not expires_at:
 766          # No expiry set (managed keys) — valid if token is present
 767          return bool(creds.get("accessToken"))
 768  
 769      # expiresAt is in milliseconds since epoch
 770      now_ms = int(time.time() * 1000)
 771      # Allow 60 seconds of buffer
 772      return now_ms < (expires_at - 60_000)
 773  
 774  
 775  def refresh_anthropic_oauth_pure(refresh_token: str, *, use_json: bool = False) -> Dict[str, Any]:
 776      """Refresh an Anthropic OAuth token without mutating local credential files."""
 777      import time
 778      import urllib.parse
 779      import urllib.request
 780  
 781      if not refresh_token:
 782          raise ValueError("refresh_token is required")
 783  
 784      client_id = "9d1c250a-e61b-44d9-88ed-5944d1962f5e"
 785      if use_json:
 786          data = json.dumps({
 787              "grant_type": "refresh_token",
 788              "refresh_token": refresh_token,
 789              "client_id": client_id,
 790          }).encode()
 791          content_type = "application/json"
 792      else:
 793          data = urllib.parse.urlencode({
 794              "grant_type": "refresh_token",
 795              "refresh_token": refresh_token,
 796              "client_id": client_id,
 797          }).encode()
 798          content_type = "application/x-www-form-urlencoded"
 799  
 800      token_endpoints = [
 801          "https://platform.claude.com/v1/oauth/token",
 802          "https://console.anthropic.com/v1/oauth/token",
 803      ]
 804      last_error = None
 805      for endpoint in token_endpoints:
 806          req = urllib.request.Request(
 807              endpoint,
 808              data=data,
 809              headers={
 810                  "Content-Type": content_type,
 811                  "User-Agent": f"claude-cli/{_get_claude_code_version()} (external, cli)",
 812              },
 813              method="POST",
 814          )
 815          try:
 816              with urllib.request.urlopen(req, timeout=10) as resp:
 817                  result = json.loads(resp.read().decode())
 818          except Exception as exc:
 819              last_error = exc
 820              logger.debug("Anthropic token refresh failed at %s: %s", endpoint, exc)
 821              continue
 822  
 823          access_token = result.get("access_token", "")
 824          if not access_token:
 825              raise ValueError("Anthropic refresh response was missing access_token")
 826          next_refresh = result.get("refresh_token", refresh_token)
 827          expires_in = result.get("expires_in", 3600)
 828          return {
 829              "access_token": access_token,
 830              "refresh_token": next_refresh,
 831              "expires_at_ms": int(time.time() * 1000) + (expires_in * 1000),
 832          }
 833  
 834      if last_error is not None:
 835          raise last_error
 836      raise ValueError("Anthropic token refresh failed")
 837  
 838  
 839  def _refresh_oauth_token(creds: Dict[str, Any]) -> Optional[str]:
 840      """Attempt to refresh an expired Claude Code OAuth token."""
 841      refresh_token = creds.get("refreshToken", "")
 842      if not refresh_token:
 843          logger.debug("No refresh token available — cannot refresh")
 844          return None
 845  
 846      try:
 847          refreshed = refresh_anthropic_oauth_pure(refresh_token, use_json=False)
 848          _write_claude_code_credentials(
 849              refreshed["access_token"],
 850              refreshed["refresh_token"],
 851              refreshed["expires_at_ms"],
 852          )
 853          logger.debug("Successfully refreshed Claude Code OAuth token")
 854          return refreshed["access_token"]
 855      except Exception as e:
 856          logger.debug("Failed to refresh Claude Code token: %s", e)
 857          return None
 858  
 859  
 860  def _write_claude_code_credentials(
 861      access_token: str,
 862      refresh_token: str,
 863      expires_at_ms: int,
 864      *,
 865      scopes: Optional[list] = None,
 866  ) -> None:
 867      """Write refreshed credentials back to ~/.claude/.credentials.json.
 868  
 869      The optional *scopes* list (e.g. ``["user:inference", "user:profile", ...]``)
 870      is persisted so that Claude Code's own auth check recognises the credential
 871      as valid.  Claude Code >=2.1.81 gates on the presence of ``"user:inference"``
 872      in the stored scopes before it will use the token.
 873      """
 874      cred_path = Path.home() / ".claude" / ".credentials.json"
 875      try:
 876          # Read existing file to preserve other fields
 877          existing = {}
 878          if cred_path.exists():
 879              existing = json.loads(cred_path.read_text(encoding="utf-8"))
 880  
 881          oauth_data: Dict[str, Any] = {
 882              "accessToken": access_token,
 883              "refreshToken": refresh_token,
 884              "expiresAt": expires_at_ms,
 885          }
 886          if scopes is not None:
 887              oauth_data["scopes"] = scopes
 888          elif "claudeAiOauth" in existing and "scopes" in existing["claudeAiOauth"]:
 889              # Preserve previously-stored scopes when the refresh response
 890              # does not include a scope field.
 891              oauth_data["scopes"] = existing["claudeAiOauth"]["scopes"]
 892  
 893          existing["claudeAiOauth"] = oauth_data
 894  
 895          cred_path.parent.mkdir(parents=True, exist_ok=True)
 896          _tmp_cred = cred_path.with_suffix(".tmp")
 897          _tmp_cred.write_text(json.dumps(existing, indent=2), encoding="utf-8")
 898          _tmp_cred.replace(cred_path)
 899          # Restrict permissions (credentials file)
 900          cred_path.chmod(0o600)
 901      except (OSError, IOError) as e:
 902          logger.debug("Failed to write refreshed credentials: %s", e)
 903  
 904  
 905  def _resolve_claude_code_token_from_credentials(creds: Optional[Dict[str, Any]] = None) -> Optional[str]:
 906      """Resolve a token from Claude Code credential files, refreshing if needed."""
 907      creds = creds or read_claude_code_credentials()
 908      if creds and is_claude_code_token_valid(creds):
 909          logger.debug("Using Claude Code credentials (auto-detected)")
 910          return creds["accessToken"]
 911      if creds:
 912          logger.debug("Claude Code credentials expired — attempting refresh")
 913          refreshed = _refresh_oauth_token(creds)
 914          if refreshed:
 915              return refreshed
 916          logger.debug("Token refresh failed — re-run 'claude setup-token' to reauthenticate")
 917      return None
 918  
 919  
 920  def _prefer_refreshable_claude_code_token(env_token: str, creds: Optional[Dict[str, Any]]) -> Optional[str]:
 921      """Prefer Claude Code creds when a persisted env OAuth token would shadow refresh.
 922  
 923      Hermes historically persisted setup tokens into ANTHROPIC_TOKEN. That makes
 924      later refresh impossible because the static env token wins before we ever
 925      inspect Claude Code's refreshable credential file. If we have a refreshable
 926      Claude Code credential record, prefer it over the static env OAuth token.
 927      """
 928      if not env_token or not _is_oauth_token(env_token) or not isinstance(creds, dict):
 929          return None
 930      if not creds.get("refreshToken"):
 931          return None
 932  
 933      resolved = _resolve_claude_code_token_from_credentials(creds)
 934      if resolved and resolved != env_token:
 935          logger.debug(
 936              "Preferring Claude Code credential file over static env OAuth token so refresh can proceed"
 937          )
 938          return resolved
 939      return None
 940  
 941  
 942  def resolve_anthropic_token() -> Optional[str]:
 943      """Resolve an Anthropic token from all available sources.
 944  
 945      Priority:
 946        1. ANTHROPIC_TOKEN env var (OAuth/setup token saved by Hermes)
 947        2. CLAUDE_CODE_OAUTH_TOKEN env var
 948        3. Claude Code credentials (~/.claude.json or ~/.claude/.credentials.json)
 949           — with automatic refresh if expired and a refresh token is available
 950        4. ANTHROPIC_API_KEY env var (regular API key, or legacy fallback)
 951  
 952      Returns the token string or None.
 953      """
 954      creds = read_claude_code_credentials()
 955  
 956      # 1. Hermes-managed OAuth/setup token env var
 957      token = os.getenv("ANTHROPIC_TOKEN", "").strip()
 958      if token:
 959          preferred = _prefer_refreshable_claude_code_token(token, creds)
 960          if preferred:
 961              return preferred
 962          return token
 963  
 964      # 2. CLAUDE_CODE_OAUTH_TOKEN (used by Claude Code for setup-tokens)
 965      cc_token = os.getenv("CLAUDE_CODE_OAUTH_TOKEN", "").strip()
 966      if cc_token:
 967          preferred = _prefer_refreshable_claude_code_token(cc_token, creds)
 968          if preferred:
 969              return preferred
 970          return cc_token
 971  
 972      # 3. Claude Code credential file
 973      resolved_claude_token = _resolve_claude_code_token_from_credentials(creds)
 974      if resolved_claude_token:
 975          return resolved_claude_token
 976  
 977      # 4. Regular API key, or a legacy OAuth token saved in ANTHROPIC_API_KEY.
 978      # This remains as a compatibility fallback for pre-migration Hermes configs.
 979      api_key = os.getenv("ANTHROPIC_API_KEY", "").strip()
 980      if api_key:
 981          return api_key
 982  
 983      return None
 984  
 985  
 986  def run_oauth_setup_token() -> Optional[str]:
 987      """Run 'claude setup-token' interactively and return the resulting token.
 988  
 989      Checks multiple sources after the subprocess completes:
 990        1. Claude Code credential files (may be written by the subprocess)
 991        2. CLAUDE_CODE_OAUTH_TOKEN / ANTHROPIC_TOKEN env vars
 992  
 993      Returns the token string, or None if no credentials were obtained.
 994      Raises FileNotFoundError if the 'claude' CLI is not installed.
 995      """
 996      import shutil
 997      import subprocess
 998  
 999      claude_path = shutil.which("claude")
1000      if not claude_path:
1001          raise FileNotFoundError(
1002              "The 'claude' CLI is not installed. "
1003              "Install it with: npm install -g @anthropic-ai/claude-code"
1004          )
1005  
1006      # Run interactively — stdin/stdout/stderr inherited so user can interact
1007      try:
1008          subprocess.run([claude_path, "setup-token"])
1009      except (KeyboardInterrupt, EOFError):
1010          return None
1011  
1012      # Check if credentials were saved to Claude Code's config files
1013      creds = read_claude_code_credentials()
1014      if creds and is_claude_code_token_valid(creds):
1015          return creds["accessToken"]
1016  
1017      # Check env vars that may have been set
1018      for env_var in ("CLAUDE_CODE_OAUTH_TOKEN", "ANTHROPIC_TOKEN"):
1019          val = os.getenv(env_var, "").strip()
1020          if val:
1021              return val
1022  
1023      return None
1024  
1025  
1026  # ── Hermes-native PKCE OAuth flow ────────────────────────────────────────
1027  # Mirrors the flow used by Claude Code, pi-ai, and OpenCode.
1028  # Stores credentials in ~/.hermes/.anthropic_oauth.json (our own file).
1029  
1030  _OAUTH_CLIENT_ID = "9d1c250a-e61b-44d9-88ed-5944d1962f5e"
1031  _OAUTH_TOKEN_URL = "https://console.anthropic.com/v1/oauth/token"
1032  _OAUTH_REDIRECT_URI = "https://console.anthropic.com/oauth/code/callback"
1033  _OAUTH_SCOPES = "org:create_api_key user:profile user:inference"
1034  _HERMES_OAUTH_FILE = get_hermes_home() / ".anthropic_oauth.json"
1035  
1036  
1037  def _generate_pkce() -> tuple:
1038      """Generate PKCE code_verifier and code_challenge (S256)."""
1039      import base64
1040      import hashlib
1041      import secrets
1042  
1043      verifier = base64.urlsafe_b64encode(secrets.token_bytes(32)).rstrip(b"=").decode()
1044      challenge = base64.urlsafe_b64encode(
1045          hashlib.sha256(verifier.encode()).digest()
1046      ).rstrip(b"=").decode()
1047      return verifier, challenge
1048  
1049  
1050  def run_hermes_oauth_login_pure() -> Optional[Dict[str, Any]]:
1051      """Run Hermes-native OAuth PKCE flow and return credential state."""
1052      import time
1053      import webbrowser
1054  
1055      verifier, challenge = _generate_pkce()
1056  
1057      params = {
1058          "code": "true",
1059          "client_id": _OAUTH_CLIENT_ID,
1060          "response_type": "code",
1061          "redirect_uri": _OAUTH_REDIRECT_URI,
1062          "scope": _OAUTH_SCOPES,
1063          "code_challenge": challenge,
1064          "code_challenge_method": "S256",
1065          "state": verifier,
1066      }
1067      from urllib.parse import urlencode
1068  
1069      auth_url = f"https://claude.ai/oauth/authorize?{urlencode(params)}"
1070  
1071      print()
1072      print("Authorize Hermes with your Claude Pro/Max subscription.")
1073      print()
1074      print("╭─ Claude Pro/Max Authorization ────────────────────╮")
1075      print("│                                                   │")
1076      print("│  Open this link in your browser:                  │")
1077      print("╰───────────────────────────────────────────────────╯")
1078      print()
1079      print(f"  {auth_url}")
1080      print()
1081  
1082      try:
1083          webbrowser.open(auth_url)
1084          print("  (Browser opened automatically)")
1085      except Exception:
1086          pass
1087  
1088      print()
1089      print("After authorizing, you'll see a code. Paste it below.")
1090      print()
1091      try:
1092          auth_code = input("Authorization code: ").strip()
1093      except (KeyboardInterrupt, EOFError):
1094          return None
1095  
1096      if not auth_code:
1097          print("No code entered.")
1098          return None
1099  
1100      splits = auth_code.split("#")
1101      code = splits[0]
1102      state = splits[1] if len(splits) > 1 else ""
1103  
1104      try:
1105          import urllib.request
1106  
1107          exchange_data = json.dumps({
1108              "grant_type": "authorization_code",
1109              "client_id": _OAUTH_CLIENT_ID,
1110              "code": code,
1111              "state": state,
1112              "redirect_uri": _OAUTH_REDIRECT_URI,
1113              "code_verifier": verifier,
1114          }).encode()
1115  
1116          req = urllib.request.Request(
1117              _OAUTH_TOKEN_URL,
1118              data=exchange_data,
1119              headers={
1120                  "Content-Type": "application/json",
1121                  "User-Agent": f"claude-cli/{_get_claude_code_version()} (external, cli)",
1122              },
1123              method="POST",
1124          )
1125  
1126          with urllib.request.urlopen(req, timeout=15) as resp:
1127              result = json.loads(resp.read().decode())
1128      except Exception as e:
1129          print(f"Token exchange failed: {e}")
1130          return None
1131  
1132      access_token = result.get("access_token", "")
1133      refresh_token = result.get("refresh_token", "")
1134      expires_in = result.get("expires_in", 3600)
1135  
1136      if not access_token:
1137          print("No access token in response.")
1138          return None
1139  
1140      expires_at_ms = int(time.time() * 1000) + (expires_in * 1000)
1141      return {
1142          "access_token": access_token,
1143          "refresh_token": refresh_token,
1144          "expires_at_ms": expires_at_ms,
1145      }
1146  
1147  
1148  def read_hermes_oauth_credentials() -> Optional[Dict[str, Any]]:
1149      """Read Hermes-managed OAuth credentials from ~/.hermes/.anthropic_oauth.json."""
1150      if _HERMES_OAUTH_FILE.exists():
1151          try:
1152              data = json.loads(_HERMES_OAUTH_FILE.read_text(encoding="utf-8"))
1153              if data.get("accessToken"):
1154                  return data
1155          except (json.JSONDecodeError, OSError, IOError) as e:
1156              logger.debug("Failed to read Hermes OAuth credentials: %s", e)
1157      return None
1158  
1159  
1160  # ---------------------------------------------------------------------------
1161  # Message / tool / response format conversion
1162  # ---------------------------------------------------------------------------
1163  
1164  
1165  def _is_bedrock_model_id(model: str) -> bool:
1166      """Detect AWS Bedrock model IDs that use dots as namespace separators.
1167  
1168      Bedrock model IDs come in two forms:
1169      - Bare:    ``anthropic.claude-opus-4-7``
1170      - Regional (inference profiles): ``us.anthropic.claude-sonnet-4-5-v1:0``
1171  
1172      In both cases the dots separate namespace components, not version
1173      numbers, and must be preserved verbatim for the Bedrock API.
1174      """
1175      lower = model.lower()
1176      # Regional inference-profile prefixes
1177      if any(lower.startswith(p) for p in ("global.", "us.", "eu.", "ap.", "jp.")):
1178          return True
1179      # Bare Bedrock model IDs: provider.model-family
1180      if lower.startswith("anthropic."):
1181          return True
1182      return False
1183  
1184  
1185  def normalize_model_name(model: str, preserve_dots: bool = False) -> str:
1186      """Normalize a model name for the Anthropic API.
1187  
1188      - Strips 'anthropic/' prefix (OpenRouter format, case-insensitive)
1189      - Converts dots to hyphens in version numbers (OpenRouter uses dots,
1190        Anthropic uses hyphens: claude-opus-4.6 → claude-opus-4-6), unless
1191        preserve_dots is True (e.g. for Alibaba/DashScope: qwen3.5-plus).
1192      - Preserves Bedrock model IDs (``anthropic.claude-opus-4-7``) and
1193        regional inference profiles (``us.anthropic.claude-*``) whose dots
1194        are namespace separators, not version separators.
1195      """
1196      lower = model.lower()
1197      if lower.startswith("anthropic/"):
1198          model = model[len("anthropic/"):]
1199      if not preserve_dots:
1200          # Bedrock model IDs use dots as namespace separators
1201          # (e.g. "anthropic.claude-opus-4-7", "us.anthropic.claude-*").
1202          # These must not be converted to hyphens.  See issue #12295.
1203          if _is_bedrock_model_id(model):
1204              return model
1205          # Only convert dots to hyphens for Anthropic/Claude models.
1206          # Non-Anthropic models (gpt-5.4, gemini-2.5, etc.) use dots
1207          # as part of their canonical names.  See issue #17171.
1208          _lower = model.lower()
1209          if _lower.startswith("claude-") or _lower.startswith("anthropic/"):
1210              model = model.replace(".", "-")
1211      return model
1212  
1213  
1214  def _sanitize_tool_id(tool_id: str) -> str:
1215      """Sanitize a tool call ID for the Anthropic API.
1216  
1217      Anthropic requires IDs matching [a-zA-Z0-9_-]. Replace invalid
1218      characters with underscores and ensure non-empty.
1219      """
1220      import re
1221      if not tool_id:
1222          return "tool_0"
1223      sanitized = re.sub(r"[^a-zA-Z0-9_-]", "_", tool_id)
1224      return sanitized or "tool_0"
1225  
1226  
1227  def _normalize_tool_input_schema(schema: Any) -> Dict[str, Any]:
1228      """Normalize tool schemas before sending them to Anthropic.
1229  
1230      Anthropic's tool schema validator rejects nullable unions such as
1231      ``anyOf: [{"type": "string"}, {"type": "null"}]`` that Pydantic/MCP
1232      commonly emits for optional fields. Tool optionality is represented by
1233      the parent ``required`` array, so we delegate to the shared
1234      ``strip_nullable_unions`` helper to collapse nullable unions to the
1235      non-null branch while preserving metadata like description/default.
1236  
1237      ``keep_nullable_hint=False`` because the Anthropic validator does not
1238      recognize the OpenAPI-style ``nullable: true`` extension and strict
1239      schema-to-grammar converters may reject unknown keywords.
1240  
1241      Top-level ``oneOf``/``allOf``/``anyOf`` are also stripped here: the
1242      Anthropic API rejects union keywords at the schema root with a generic
1243      HTTP 400. Several upstream and plugin tools ship schemas with one of
1244      these keywords at the top level (commonly for Pydantic discriminated
1245      unions). If we land here with those keywords still present after
1246      nullable-union stripping, drop them and fall back to a plain object
1247      schema so the tool still validates at the Anthropic boundary.
1248      """
1249      if not schema:
1250          return {"type": "object", "properties": {}}
1251  
1252      from tools.schema_sanitizer import strip_nullable_unions
1253  
1254      normalized = strip_nullable_unions(schema, keep_nullable_hint=False)
1255      if not isinstance(normalized, dict):
1256          return {"type": "object", "properties": {}}
1257      # Strip top-level union keywords that Anthropic's validator rejects.
1258      banned = {"oneOf", "allOf", "anyOf"}
1259      if banned & normalized.keys():
1260          normalized = {k: v for k, v in normalized.items() if k not in banned}
1261          if "type" not in normalized:
1262              normalized["type"] = "object"
1263      if normalized.get("type") == "object" and not isinstance(normalized.get("properties"), dict):
1264          normalized = {**normalized, "properties": {}}
1265      return normalized
1266  
1267  
1268  def convert_tools_to_anthropic(tools: List[Dict]) -> List[Dict]:
1269      """Convert OpenAI tool definitions to Anthropic format."""
1270      if not tools:
1271          return []
1272      result = []
1273      seen_names: set = set()
1274      for t in tools:
1275          fn = t.get("function", {})
1276          name = fn.get("name", "")
1277          # Defensive dedup: Anthropic rejects requests with duplicate tool
1278          # names.  Upstream injection paths already dedup, but this guard
1279          # converts a hard API failure into a warning.  See: #18478
1280          if name and name in seen_names:
1281              logger.warning(
1282                  "convert_tools_to_anthropic: duplicate tool name '%s' "
1283                  "— dropping second occurrence",
1284                  name,
1285              )
1286              continue
1287          if name:
1288              seen_names.add(name)
1289          result.append({
1290              "name": name,
1291              "description": fn.get("description", ""),
1292              "input_schema": _normalize_tool_input_schema(
1293                  fn.get("parameters", {"type": "object", "properties": {}})
1294              ),
1295          })
1296      return result
1297  
1298  
1299  def _image_source_from_openai_url(url: str) -> Dict[str, str]:
1300      """Convert an OpenAI-style image URL/data URL into Anthropic image source."""
1301      url = str(url or "").strip()
1302      if not url:
1303          return {"type": "url", "url": ""}
1304  
1305      if url.startswith("data:"):
1306          header, _, data = url.partition(",")
1307          media_type = "image/jpeg"
1308          if header.startswith("data:"):
1309              mime_part = header[len("data:"):].split(";", 1)[0].strip()
1310              if mime_part.startswith("image/"):
1311                  media_type = mime_part
1312          return {
1313              "type": "base64",
1314              "media_type": media_type,
1315              "data": data,
1316          }
1317  
1318      return {"type": "url", "url": url}
1319  
1320  
1321  def _convert_content_part_to_anthropic(part: Any) -> Optional[Dict[str, Any]]:
1322      """Convert a single OpenAI-style content part to Anthropic format."""
1323      if part is None:
1324          return None
1325      if isinstance(part, str):
1326          return {"type": "text", "text": part}
1327      if not isinstance(part, dict):
1328          return {"type": "text", "text": str(part)}
1329  
1330      ptype = part.get("type")
1331  
1332      if ptype == "input_text":
1333          block: Dict[str, Any] = {"type": "text", "text": part.get("text", "")}
1334      elif ptype in {"image_url", "input_image"}:
1335          image_value = part.get("image_url", {})
1336          url = image_value.get("url", "") if isinstance(image_value, dict) else str(image_value or "")
1337          block = {"type": "image", "source": _image_source_from_openai_url(url)}
1338      else:
1339          block = dict(part)
1340  
1341      if isinstance(part.get("cache_control"), dict) and "cache_control" not in block:
1342          block["cache_control"] = dict(part["cache_control"])
1343      return block
1344  
1345  
1346  def _to_plain_data(value: Any, *, _depth: int = 0, _path: Optional[set] = None) -> Any:
1347      """Recursively convert SDK objects to plain Python data structures.
1348  
1349      Guards against circular references (``_path`` tracks ``id()`` of objects
1350      on the *current* recursion path) and runaway depth (capped at 20 levels).
1351      Uses path-based tracking so shared (but non-cyclic) objects referenced by
1352      multiple siblings are converted correctly rather than being stringified.
1353      """
1354      _MAX_DEPTH = 20
1355      if _depth > _MAX_DEPTH:
1356          return str(value)
1357  
1358      if _path is None:
1359          _path = set()
1360  
1361      obj_id = id(value)
1362      if obj_id in _path:
1363          return str(value)
1364  
1365      if hasattr(value, "model_dump"):
1366          _path.add(obj_id)
1367          result = _to_plain_data(value.model_dump(), _depth=_depth + 1, _path=_path)
1368          _path.discard(obj_id)
1369          return result
1370      if isinstance(value, dict):
1371          _path.add(obj_id)
1372          result = {k: _to_plain_data(v, _depth=_depth + 1, _path=_path) for k, v in value.items()}
1373          _path.discard(obj_id)
1374          return result
1375      if isinstance(value, (list, tuple)):
1376          _path.add(obj_id)
1377          result = [_to_plain_data(v, _depth=_depth + 1, _path=_path) for v in value]
1378          _path.discard(obj_id)
1379          return result
1380      if hasattr(value, "__dict__"):
1381          _path.add(obj_id)
1382          result = {
1383              k: _to_plain_data(v, _depth=_depth + 1, _path=_path)
1384              for k, v in vars(value).items()
1385              if not k.startswith("_")
1386          }
1387          _path.discard(obj_id)
1388          return result
1389      return value
1390  
1391  
1392  def _extract_preserved_thinking_blocks(message: Dict[str, Any]) -> List[Dict[str, Any]]:
1393      """Return Anthropic thinking blocks previously preserved on the message."""
1394      raw_details = message.get("reasoning_details")
1395      if not isinstance(raw_details, list):
1396          return []
1397  
1398      preserved: List[Dict[str, Any]] = []
1399      for detail in raw_details:
1400          if not isinstance(detail, dict):
1401              continue
1402          block_type = str(detail.get("type", "") or "").strip().lower()
1403          if block_type not in {"thinking", "redacted_thinking"}:
1404              continue
1405          preserved.append(copy.deepcopy(detail))
1406      return preserved
1407  
1408  
1409  def _convert_content_to_anthropic(content: Any) -> Any:
1410      """Convert OpenAI-style multimodal content arrays to Anthropic blocks."""
1411      if not isinstance(content, list):
1412          return content
1413  
1414      converted = []
1415      for part in content:
1416          block = _convert_content_part_to_anthropic(part)
1417          if block is not None:
1418              converted.append(block)
1419      return converted
1420  
1421  
1422  def convert_messages_to_anthropic(
1423      messages: List[Dict],
1424      base_url: str | None = None,
1425      model: str | None = None,
1426  ) -> Tuple[Optional[Any], List[Dict]]:
1427      """Convert OpenAI-format messages to Anthropic format.
1428  
1429      Returns (system_prompt, anthropic_messages).
1430      System messages are extracted since Anthropic takes them as a separate param.
1431      system_prompt is a string or list of content blocks (when cache_control present).
1432  
1433      When *base_url* is provided and points to a third-party Anthropic-compatible
1434      endpoint, all thinking block signatures are stripped.  Signatures are
1435      Anthropic-proprietary — third-party endpoints cannot validate them and will
1436      reject them with HTTP 400 "Invalid signature in thinking block".
1437  
1438      When *model* is provided and matches the Kimi / Moonshot family (or
1439      *base_url* is a Kimi / Moonshot host), unsigned thinking blocks
1440      synthesised from ``reasoning_content`` are preserved on replayed
1441      assistant tool-call messages — Kimi requires the field to exist, even
1442      if empty.
1443      """
1444      system = None
1445      result = []
1446  
1447      for m in messages:
1448          role = m.get("role", "user")
1449          content = m.get("content", "")
1450  
1451          if role == "system":
1452              if isinstance(content, list):
1453                  # Preserve cache_control markers on content blocks
1454                  has_cache = any(
1455                      p.get("cache_control") for p in content if isinstance(p, dict)
1456                  )
1457                  if has_cache:
1458                      system = [p for p in content if isinstance(p, dict)]
1459                  else:
1460                      system = "\n".join(
1461                          p["text"] for p in content if p.get("type") == "text"
1462                      )
1463              else:
1464                  system = content
1465              continue
1466  
1467          if role == "assistant":
1468              blocks = _extract_preserved_thinking_blocks(m)
1469              if content:
1470                  if isinstance(content, list):
1471                      converted_content = _convert_content_to_anthropic(content)
1472                      if isinstance(converted_content, list):
1473                          blocks.extend(converted_content)
1474                  else:
1475                      blocks.append({"type": "text", "text": str(content)})
1476              for tc in m.get("tool_calls", []):
1477                  if not tc or not isinstance(tc, dict):
1478                      continue
1479                  fn = tc.get("function", {})
1480                  args = fn.get("arguments", "{}")
1481                  try:
1482                      parsed_args = json.loads(args) if isinstance(args, str) else args
1483                  except (json.JSONDecodeError, ValueError):
1484                      parsed_args = {}
1485                  blocks.append({
1486                      "type": "tool_use",
1487                      "id": _sanitize_tool_id(tc.get("id", "")),
1488                      "name": fn.get("name", ""),
1489                      "input": parsed_args,
1490                  })
1491              # Kimi's /coding endpoint (Anthropic protocol) requires assistant
1492              # tool-call messages to carry reasoning_content when thinking is
1493              # enabled server-side.  Preserve it as a thinking block so Kimi
1494              # can validate the message history.  See hermes-agent#13848.
1495              #
1496              # Accept empty string "" — _copy_reasoning_content_for_api()
1497              # injects "" as a tier-3 fallback for Kimi tool-call messages
1498              # that had no reasoning.  Kimi requires the field to exist, even
1499              # if empty.
1500              #
1501              # Prepend (not append): Anthropic protocol requires thinking
1502              # blocks before text and tool_use blocks.
1503              #
1504              # Guard: only add when reasoning_details didn't already contribute
1505              # thinking blocks.  On native Anthropic, reasoning_details produces
1506              # signed thinking blocks — adding another unsigned one from
1507              # reasoning_content would create a duplicate (same text) that gets
1508              # downgraded to a spurious text block on the last assistant message.
1509              reasoning_content = m.get("reasoning_content")
1510              _already_has_thinking = any(
1511                  isinstance(b, dict) and b.get("type") in ("thinking", "redacted_thinking")
1512                  for b in blocks
1513              )
1514              if isinstance(reasoning_content, str) and not _already_has_thinking:
1515                  blocks.insert(0, {"type": "thinking", "thinking": reasoning_content})
1516              # Anthropic rejects empty assistant content
1517              effective = blocks or content
1518              if not effective or effective == "":
1519                  effective = [{"type": "text", "text": "(empty)"}]
1520              result.append({"role": "assistant", "content": effective})
1521              continue
1522  
1523          if role == "tool":
1524              # Sanitize tool_use_id and ensure non-empty content
1525              result_content = content if isinstance(content, str) else json.dumps(content)
1526              if not result_content:
1527                  result_content = "(no output)"
1528              tool_result = {
1529                  "type": "tool_result",
1530                  "tool_use_id": _sanitize_tool_id(m.get("tool_call_id", "")),
1531                  "content": result_content,
1532              }
1533              if isinstance(m.get("cache_control"), dict):
1534                  tool_result["cache_control"] = dict(m["cache_control"])
1535              # Merge consecutive tool results into one user message
1536              if (
1537                  result
1538                  and result[-1]["role"] == "user"
1539                  and isinstance(result[-1]["content"], list)
1540                  and result[-1]["content"]
1541                  and result[-1]["content"][0].get("type") == "tool_result"
1542              ):
1543                  result[-1]["content"].append(tool_result)
1544              else:
1545                  result.append({"role": "user", "content": [tool_result]})
1546              continue
1547  
1548          # Regular user message — validate non-empty content (Anthropic rejects empty)
1549          if isinstance(content, list):
1550              converted_blocks = _convert_content_to_anthropic(content)
1551              # Check if all text blocks are empty
1552              if not converted_blocks or all(
1553                  b.get("text", "").strip() == ""
1554                  for b in converted_blocks
1555                  if isinstance(b, dict) and b.get("type") == "text"
1556              ):
1557                  converted_blocks = [{"type": "text", "text": "(empty message)"}]
1558              result.append({"role": "user", "content": converted_blocks})
1559          else:
1560              # Validate string content is non-empty
1561              if not content or (isinstance(content, str) and not content.strip()):
1562                  content = "(empty message)"
1563              result.append({"role": "user", "content": content})
1564  
1565      # Strip orphaned tool_use blocks (no matching tool_result follows)
1566      tool_result_ids = set()
1567      for m in result:
1568          if m["role"] == "user" and isinstance(m["content"], list):
1569              for block in m["content"]:
1570                  if block.get("type") == "tool_result":
1571                      tool_result_ids.add(block.get("tool_use_id"))
1572      for m in result:
1573          if m["role"] == "assistant" and isinstance(m["content"], list):
1574              m["content"] = [
1575                  b
1576                  for b in m["content"]
1577                  if b.get("type") != "tool_use" or b.get("id") in tool_result_ids
1578              ]
1579              if not m["content"]:
1580                  m["content"] = [{"type": "text", "text": "(tool call removed)"}]
1581  
1582      # Strip orphaned tool_result blocks (no matching tool_use precedes them).
1583      # This is the mirror of the above: context compression or session truncation
1584      # can remove an assistant message containing a tool_use while leaving the
1585      # subsequent tool_result intact.  Anthropic rejects these with a 400.
1586      tool_use_ids = set()
1587      for m in result:
1588          if m["role"] == "assistant" and isinstance(m["content"], list):
1589              for block in m["content"]:
1590                  if block.get("type") == "tool_use":
1591                      tool_use_ids.add(block.get("id"))
1592      for m in result:
1593          if m["role"] == "user" and isinstance(m["content"], list):
1594              m["content"] = [
1595                  b
1596                  for b in m["content"]
1597                  if b.get("type") != "tool_result" or b.get("tool_use_id") in tool_use_ids
1598              ]
1599              if not m["content"]:
1600                  m["content"] = [{"type": "text", "text": "(tool result removed)"}]
1601  
1602      # Enforce strict role alternation (Anthropic rejects consecutive same-role messages)
1603      fixed = []
1604      for m in result:
1605          if fixed and fixed[-1]["role"] == m["role"]:
1606              if m["role"] == "user":
1607                  # Merge consecutive user messages
1608                  prev_content = fixed[-1]["content"]
1609                  curr_content = m["content"]
1610                  if isinstance(prev_content, str) and isinstance(curr_content, str):
1611                      fixed[-1]["content"] = prev_content + "\n" + curr_content
1612                  elif isinstance(prev_content, list) and isinstance(curr_content, list):
1613                      fixed[-1]["content"] = prev_content + curr_content
1614                  else:
1615                      # Mixed types — wrap string in list
1616                      if isinstance(prev_content, str):
1617                          prev_content = [{"type": "text", "text": prev_content}]
1618                      if isinstance(curr_content, str):
1619                          curr_content = [{"type": "text", "text": curr_content}]
1620                      fixed[-1]["content"] = prev_content + curr_content
1621              else:
1622                  # Consecutive assistant messages — merge text content.
1623                  # Drop thinking blocks from the *second* message: their
1624                  # signature was computed against a different turn boundary
1625                  # and becomes invalid once merged.
1626                  if isinstance(m["content"], list):
1627                      m["content"] = [
1628                          b for b in m["content"]
1629                          if not (isinstance(b, dict) and b.get("type") in ("thinking", "redacted_thinking"))
1630                      ]
1631                  prev_blocks = fixed[-1]["content"]
1632                  curr_blocks = m["content"]
1633                  if isinstance(prev_blocks, list) and isinstance(curr_blocks, list):
1634                      fixed[-1]["content"] = prev_blocks + curr_blocks
1635                  elif isinstance(prev_blocks, str) and isinstance(curr_blocks, str):
1636                      fixed[-1]["content"] = prev_blocks + "\n" + curr_blocks
1637                  else:
1638                      # Mixed types — normalize both to list and merge
1639                      if isinstance(prev_blocks, str):
1640                          prev_blocks = [{"type": "text", "text": prev_blocks}]
1641                      if isinstance(curr_blocks, str):
1642                          curr_blocks = [{"type": "text", "text": curr_blocks}]
1643                      fixed[-1]["content"] = prev_blocks + curr_blocks
1644          else:
1645              fixed.append(m)
1646      result = fixed
1647  
1648      # ── Thinking block signature management ──────────────────────────
1649      # Anthropic signs thinking blocks against the full turn content.
1650      # Any upstream mutation (context compression, session truncation,
1651      # orphan stripping, message merging) invalidates the signature,
1652      # causing HTTP 400 "Invalid signature in thinking block".
1653      #
1654      # Signatures are Anthropic-proprietary.  Third-party endpoints
1655      # (MiniMax, Azure AI Foundry, self-hosted proxies) cannot validate
1656      # them and will reject them outright.  When targeting a third-party
1657      # endpoint, strip ALL thinking/redacted_thinking blocks from every
1658      # assistant message — the third-party will generate its own
1659      # thinking blocks if it supports extended thinking.
1660      #
1661      # For direct Anthropic (strategy following clawdbot/OpenClaw):
1662      # 1. Strip thinking/redacted_thinking from all assistant messages
1663      #    EXCEPT the last one — preserves reasoning continuity on the
1664      #    current tool-use chain while avoiding stale signature errors.
1665      # 2. Downgrade unsigned thinking blocks (no signature) to text —
1666      #    Anthropic can't validate them and will reject them.
1667      # 3. Strip cache_control from thinking/redacted_thinking blocks —
1668      #    cache markers can interfere with signature validation.
1669      _THINKING_TYPES = frozenset(("thinking", "redacted_thinking"))
1670      _is_third_party = _is_third_party_anthropic_endpoint(base_url)
1671      # Kimi /coding and DeepSeek /anthropic share a contract: both speak the
1672      # Anthropic Messages protocol upstream but require that thinking blocks
1673      # synthesised from reasoning_content round-trip on subsequent turns when
1674      # thinking is enabled.  Signed Anthropic blocks still have to be stripped
1675      # (neither endpoint can validate Anthropic's signatures); unsigned blocks
1676      # are preserved.  See hermes-agent#13848 (Kimi) and #16748 (DeepSeek).
1677      _preserve_unsigned_thinking = (
1678          _is_kimi_family_endpoint(base_url, model)
1679          or _is_deepseek_anthropic_endpoint(base_url)
1680      )
1681  
1682      last_assistant_idx = None
1683      for i in range(len(result) - 1, -1, -1):
1684          if result[i].get("role") == "assistant":
1685              last_assistant_idx = i
1686              break
1687  
1688      for idx, m in enumerate(result):
1689          if m.get("role") != "assistant" or not isinstance(m.get("content"), list):
1690              continue
1691  
1692          if _preserve_unsigned_thinking:
1693              # Kimi's /coding and DeepSeek's /anthropic endpoints both enable
1694              # thinking server-side and require unsigned thinking blocks on
1695              # replayed assistant tool-call messages.  Strip signed Anthropic
1696              # blocks (neither upstream can validate Anthropic signatures) but
1697              # preserve the unsigned ones we synthesised from reasoning_content.
1698              new_content = []
1699              for b in m["content"]:
1700                  if not isinstance(b, dict) or b.get("type") not in _THINKING_TYPES:
1701                      new_content.append(b)
1702                      continue
1703                  if b.get("signature") or b.get("data"):
1704                      # Anthropic-signed block — upstream can't validate, strip
1705                      continue
1706                  # Unsigned thinking (synthesised from reasoning_content) —
1707                  # keep it: the upstream needs it for message-history validation.
1708                  new_content.append(b)
1709              m["content"] = new_content or [{"type": "text", "text": "(empty)"}]
1710          elif _is_third_party or idx != last_assistant_idx:
1711              # Third-party endpoint: strip ALL thinking blocks from every
1712              # assistant message — signatures are Anthropic-proprietary.
1713              # Direct Anthropic: strip from non-latest assistant messages only.
1714              stripped = [
1715                  b for b in m["content"]
1716                  if not (isinstance(b, dict) and b.get("type") in _THINKING_TYPES)
1717              ]
1718              m["content"] = stripped or [{"type": "text", "text": "(thinking elided)"}]
1719          else:
1720              # Latest assistant on direct Anthropic: keep signed thinking
1721              # blocks for reasoning continuity; downgrade unsigned ones to
1722              # plain text.
1723              new_content = []
1724              for b in m["content"]:
1725                  if not isinstance(b, dict) or b.get("type") not in _THINKING_TYPES:
1726                      new_content.append(b)
1727                      continue
1728                  if b.get("type") == "redacted_thinking":
1729                      # Redacted blocks use 'data' for the signature payload
1730                      if b.get("data"):
1731                          new_content.append(b)
1732                      # else: drop — no data means it can't be validated
1733                  elif b.get("signature"):
1734                      # Signed thinking block — keep it
1735                      new_content.append(b)
1736                  else:
1737                      # Unsigned thinking — downgrade to text so it's not lost
1738                      thinking_text = b.get("thinking", "")
1739                      if thinking_text:
1740                          new_content.append({"type": "text", "text": thinking_text})
1741              m["content"] = new_content or [{"type": "text", "text": "(empty)"}]
1742  
1743          # Strip cache_control from any remaining thinking/redacted_thinking
1744          # blocks — cache markers interfere with signature validation.
1745          for b in m["content"]:
1746              if isinstance(b, dict) and b.get("type") in _THINKING_TYPES:
1747                  b.pop("cache_control", None)
1748  
1749      return system, result
1750  
1751  
1752  def build_anthropic_kwargs(
1753      model: str,
1754      messages: List[Dict],
1755      tools: Optional[List[Dict]],
1756      max_tokens: Optional[int],
1757      reasoning_config: Optional[Dict[str, Any]],
1758      tool_choice: Optional[str] = None,
1759      is_oauth: bool = False,
1760      preserve_dots: bool = False,
1761      context_length: Optional[int] = None,
1762      base_url: str | None = None,
1763      fast_mode: bool = False,
1764      drop_context_1m_beta: bool = False,
1765  ) -> Dict[str, Any]:
1766      """Build kwargs for anthropic.messages.create().
1767  
1768      Naming note — two distinct concepts, easily confused:
1769        max_tokens     = OUTPUT token cap for a single response.
1770                         Anthropic's API calls this "max_tokens" but it only
1771                         limits the *output*.  Anthropic's own native SDK
1772                         renamed it "max_output_tokens" for clarity.
1773        context_length = TOTAL context window (input tokens + output tokens).
1774                         The API enforces: input_tokens + max_tokens ≤ context_length.
1775                         Stored on the ContextCompressor; reduced on overflow errors.
1776  
1777      When *max_tokens* is None the model's native output ceiling is used
1778      (e.g. 128K for Opus 4.6, 64K for Sonnet 4.6).
1779  
1780      When *context_length* is provided and the model's native output ceiling
1781      exceeds it (e.g. a local endpoint with an 8K window), the output cap is
1782      clamped to context_length − 1.  This only kicks in for unusually small
1783      context windows; for full-size models the native output cap is always
1784      smaller than the context window so no clamping happens.
1785      NOTE: this clamping does not account for prompt size — if the prompt is
1786      large, Anthropic may still reject the request.  The caller must detect
1787      "max_tokens too large given prompt" errors and retry with a smaller cap
1788      (see parse_available_output_tokens_from_error + _ephemeral_max_output_tokens).
1789  
1790      When *is_oauth* is True, applies Claude Code compatibility transforms:
1791      system prompt prefix, tool name prefixing, and prompt sanitization.
1792  
1793      When *preserve_dots* is True, model name dots are not converted to hyphens
1794      (for Alibaba/DashScope anthropic-compatible endpoints: qwen3.5-plus).
1795  
1796      When *base_url* points to a third-party Anthropic-compatible endpoint,
1797      thinking block signatures are stripped (they are Anthropic-proprietary).
1798  
1799      When *fast_mode* is True, adds ``extra_body["speed"] = "fast"`` and the
1800      fast-mode beta header for ~2.5x faster output throughput on Opus 4.6.
1801      Currently only supported on native Anthropic endpoints (not third-party
1802      compatible ones).
1803      """
1804      system, anthropic_messages = convert_messages_to_anthropic(
1805          messages, base_url=base_url, model=model
1806      )
1807      anthropic_tools = convert_tools_to_anthropic(tools) if tools else []
1808  
1809      model = normalize_model_name(model, preserve_dots=preserve_dots)
1810      # effective_max_tokens = output cap for this call (≠ total context window)
1811      # Use the resolver helper so non-positive values (negative ints,
1812      # fractional floats, NaN, non-numeric) fail locally with a clear error
1813      # rather than 400-ing at the Anthropic API. See openclaw/openclaw#66664.
1814      effective_max_tokens = _resolve_anthropic_messages_max_tokens(
1815          max_tokens, model, context_length=context_length
1816      )
1817  
1818      # Clamp output cap to fit inside the total context window.
1819      # Only matters for small custom endpoints where context_length < native
1820      # output ceiling.  For standard Anthropic models context_length (e.g.
1821      # 200K) is always larger than the output ceiling (e.g. 128K), so this
1822      # branch is not taken.
1823      if context_length and effective_max_tokens > context_length:
1824          effective_max_tokens = max(context_length - 1, 1)
1825  
1826      # ── OAuth: Claude Code identity ──────────────────────────────────
1827      if is_oauth:
1828          # 1. Prepend Claude Code system prompt identity
1829          cc_block = {"type": "text", "text": _CLAUDE_CODE_SYSTEM_PREFIX}
1830          if isinstance(system, list):
1831              system = [cc_block] + system
1832          elif isinstance(system, str) and system:
1833              system = [cc_block, {"type": "text", "text": system}]
1834          else:
1835              system = [cc_block]
1836  
1837          # 2. Sanitize system prompt — replace product name references
1838          #    to avoid Anthropic's server-side content filters.
1839          for block in system:
1840              if isinstance(block, dict) and block.get("type") == "text":
1841                  text = block.get("text", "")
1842                  text = text.replace("Hermes Agent", "Claude Code")
1843                  text = text.replace("Hermes agent", "Claude Code")
1844                  text = text.replace("hermes-agent", "claude-code")
1845                  text = text.replace("Nous Research", "Anthropic")
1846                  block["text"] = text
1847  
1848          # 3. Prefix tool names with mcp_ (Claude Code convention)
1849          if anthropic_tools:
1850              for tool in anthropic_tools:
1851                  if "name" in tool:
1852                      tool["name"] = _MCP_TOOL_PREFIX + tool["name"]
1853  
1854          # 4. Prefix tool names in message history (tool_use and tool_result blocks)
1855          for msg in anthropic_messages:
1856              content = msg.get("content")
1857              if isinstance(content, list):
1858                  for block in content:
1859                      if isinstance(block, dict):
1860                          if block.get("type") == "tool_use" and "name" in block:
1861                              if not block["name"].startswith(_MCP_TOOL_PREFIX):
1862                                  block["name"] = _MCP_TOOL_PREFIX + block["name"]
1863                          elif block.get("type") == "tool_result" and "tool_use_id" in block:
1864                              pass  # tool_result uses ID, not name
1865  
1866      kwargs: Dict[str, Any] = {
1867          "model": model,
1868          "messages": anthropic_messages,
1869          "max_tokens": effective_max_tokens,
1870      }
1871  
1872      if system:
1873          kwargs["system"] = system
1874  
1875      if anthropic_tools:
1876          kwargs["tools"] = anthropic_tools
1877          # Map OpenAI tool_choice to Anthropic format
1878          if tool_choice == "auto" or tool_choice is None:
1879              kwargs["tool_choice"] = {"type": "auto"}
1880          elif tool_choice == "required":
1881              kwargs["tool_choice"] = {"type": "any"}
1882          elif tool_choice == "none":
1883              # Anthropic has no tool_choice "none" — omit tools entirely to prevent use
1884              kwargs.pop("tools", None)
1885          elif isinstance(tool_choice, str):
1886              # Specific tool name
1887              kwargs["tool_choice"] = {"type": "tool", "name": tool_choice}
1888  
1889      # Map reasoning_config to Anthropic's thinking parameter.
1890      # Claude 4.6+ models use adaptive thinking + output_config.effort.
1891      # Older models use manual thinking with budget_tokens.
1892      # MiniMax Anthropic-compat endpoints support thinking (manual mode only,
1893      # not adaptive).  Haiku does NOT support extended thinking — skip entirely.
1894      #
1895      # Kimi's /coding endpoint speaks the Anthropic Messages protocol but has
1896      # its own thinking semantics: when ``thinking.enabled`` is sent, Kimi
1897      # validates the message history and requires every prior assistant
1898      # tool-call message to carry OpenAI-style ``reasoning_content``.  The
1899      # Anthropic path never populates that field, and
1900      # ``convert_messages_to_anthropic`` strips all Anthropic thinking blocks
1901      # on third-party endpoints — so the request fails with HTTP 400
1902      # "thinking is enabled but reasoning_content is missing in assistant
1903      # tool call message at index N".  Kimi's reasoning is driven server-side
1904      # on the /coding route, so skip Anthropic's thinking parameter entirely
1905      # for that host.  (Kimi on chat_completions enables thinking via
1906      # extra_body in the ChatCompletionsTransport — see #13503.)
1907      #
1908      # On 4.7+ the `thinking.display` field defaults to "omitted", which
1909      # silently hides reasoning text that Hermes surfaces in its CLI. We
1910      # request "summarized" so the reasoning blocks stay populated — matching
1911      # 4.6 behavior and preserving the activity-feed UX during long tool runs.
1912      _is_kimi_coding = _is_kimi_family_endpoint(base_url, model)
1913      if reasoning_config and isinstance(reasoning_config, dict) and not _is_kimi_coding:
1914          if reasoning_config.get("enabled") is not False and "haiku" not in model.lower():
1915              effort = str(reasoning_config.get("effort", "medium")).lower()
1916              budget = THINKING_BUDGET.get(effort, 8000)
1917              if _supports_adaptive_thinking(model):
1918                  kwargs["thinking"] = {
1919                      "type": "adaptive",
1920                      "display": "summarized",
1921                  }
1922                  adaptive_effort = ADAPTIVE_EFFORT_MAP.get(effort, "medium")
1923                  # Downgrade xhigh→max on models that don't list xhigh as a
1924                  # supported level (Opus/Sonnet 4.6). Opus 4.7+ keeps xhigh.
1925                  if adaptive_effort == "xhigh" and not _supports_xhigh_effort(model):
1926                      adaptive_effort = "max"
1927                  kwargs["output_config"] = {
1928                      "effort": adaptive_effort,
1929                  }
1930              else:
1931                  kwargs["thinking"] = {"type": "enabled", "budget_tokens": budget}
1932                  # Anthropic requires temperature=1 when thinking is enabled on older models
1933                  kwargs["temperature"] = 1
1934                  kwargs["max_tokens"] = max(effective_max_tokens, budget + 4096)
1935  
1936      # ── Strip sampling params on 4.7+ ─────────────────────────────────
1937      # Opus 4.7 rejects any non-default temperature/top_p/top_k with a 400.
1938      # Callers (auxiliary_client, etc.) may set these for older models;
1939      # drop them here as a safety net so upstream 4.6 → 4.7 migrations
1940      # don't require coordinated edits everywhere.
1941      if _forbids_sampling_params(model):
1942          for _sampling_key in ("temperature", "top_p", "top_k"):
1943              kwargs.pop(_sampling_key, None)
1944  
1945      # ── Fast mode (Opus 4.6 only) ────────────────────────────────────
1946      # Adds extra_body.speed="fast" + the fast-mode beta header for ~2.5x
1947      # output speed. Per Anthropic docs, fast mode is only supported on
1948      # Opus 4.6 — Opus 4.7 and other models 400 on the speed parameter.
1949      # Only for native Anthropic endpoints — third-party providers would
1950      # reject the unknown beta header and speed parameter.
1951      if (
1952          fast_mode
1953          and not _is_third_party_anthropic_endpoint(base_url)
1954          and _supports_fast_mode(model)
1955      ):
1956          kwargs.setdefault("extra_body", {})["speed"] = "fast"
1957          # Build extra_headers with ALL applicable betas (the per-request
1958          # extra_headers override the client-level anthropic-beta header).
1959          betas = list(_common_betas_for_base_url(
1960              base_url,
1961              drop_context_1m_beta=drop_context_1m_beta,
1962          ))
1963          if is_oauth:
1964              betas.extend(_OAUTH_ONLY_BETAS)
1965          betas.append(_FAST_MODE_BETA)
1966          kwargs["extra_headers"] = {"anthropic-beta": ",".join(betas)}
1967  
1968      return kwargs
1969  
1970