Cradicle Explorer

/ hermes_cli / runtime_provider.py
runtime_provider.py
   1  """Shared runtime provider resolution for CLI, gateway, cron, and helpers."""
   2  
   3  from __future__ import annotations
   4  
   5  import logging
   6  import os
   7  import re
   8  from typing import Any, Dict, Optional
   9  
  10  logger = logging.getLogger(__name__)
  11  
  12  from hermes_cli import auth as auth_mod
  13  from agent.credential_pool import CredentialPool, PooledCredential, get_custom_provider_pool_key, load_pool
  14  from hermes_cli.auth import (
  15      AuthError,
  16      DEFAULT_CODEX_BASE_URL,
  17      DEFAULT_QWEN_BASE_URL,
  18      PROVIDER_REGISTRY,
  19      _agent_key_is_usable,
  20      format_auth_error,
  21      resolve_provider,
  22      resolve_nous_runtime_credentials,
  23      resolve_codex_runtime_credentials,
  24      resolve_qwen_runtime_credentials,
  25      resolve_gemini_oauth_runtime_credentials,
  26      resolve_api_key_provider_credentials,
  27      resolve_external_process_provider_credentials,
  28      has_usable_secret,
  29  )
  30  from hermes_cli.config import get_compatible_custom_providers, load_config
  31  from hermes_constants import OPENROUTER_BASE_URL
  32  from utils import base_url_host_matches, base_url_hostname
  33  
  34  
  35  def _normalize_custom_provider_name(value: str) -> str:
  36      return value.strip().lower().replace(" ", "-")
  37  
  38  
  39  def _loopback_hostname(host: str) -> bool:
  40      h = (host or "").lower().rstrip(".")
  41      return h in {"localhost", "127.0.0.1", "::1", "0.0.0.0"}
  42  
  43  
  44  def _config_base_url_trustworthy_for_bare_custom(cfg_base_url: str, cfg_provider: str) -> bool:
  45      """Decide whether ``model.base_url`` may back bare ``custom`` runtime resolution.
  46  
  47      GitHub #14676: the model picker can select Custom while ``model.provider`` still reflects a
  48      previous provider. Reject non-loopback URLs unless the YAML provider is already ``custom``,
  49      so a stale OpenRouter/Z.ai base_url cannot hijack local ``custom`` sessions.
  50      """
  51      cfg_provider_norm = (cfg_provider or "").strip().lower()
  52      bu = (cfg_base_url or "").strip()
  53      if not bu:
  54          return False
  55      if cfg_provider_norm == "custom":
  56          return True
  57      if base_url_host_matches(bu, "openrouter.ai"):
  58          return False
  59      return _loopback_hostname(base_url_hostname(bu))
  60  
  61  
  62  def _detect_api_mode_for_url(base_url: str) -> Optional[str]:
  63      """Auto-detect api_mode from the resolved base URL.
  64  
  65      - Direct api.openai.com endpoints need the Responses API for GPT-5.x
  66        tool calls with reasoning (chat/completions returns 400).
  67      - Third-party Anthropic-compatible gateways (MiniMax, Zhipu GLM,
  68        LiteLLM proxies, etc.) conventionally expose the native Anthropic
  69        protocol under a ``/anthropic`` suffix — treat those as
  70        ``anthropic_messages`` transport instead of the default
  71        ``chat_completions``.
  72      - Kimi Code's ``api.kimi.com/coding`` endpoint also speaks the
  73        Anthropic Messages protocol (the /coding route accepts Claude
  74        Code's native request shape).
  75      """
  76      normalized = (base_url or "").strip().lower().rstrip("/")
  77      hostname = base_url_hostname(base_url)
  78      if hostname == "api.x.ai":
  79          return "codex_responses"
  80      if hostname == "api.openai.com":
  81          return "codex_responses"
  82      if normalized.endswith("/anthropic"):
  83          return "anthropic_messages"
  84      if hostname == "api.kimi.com" and "/coding" in normalized:
  85          return "anthropic_messages"
  86      return None
  87  
  88  
  89  def _auto_detect_local_model(base_url: str) -> str:
  90      """Query a local server for its model name when only one model is loaded."""
  91      if not base_url:
  92          return ""
  93      try:
  94          import requests
  95          url = base_url.rstrip("/")
  96          if not url.endswith("/v1"):
  97              url += "/v1"
  98          resp = requests.get(url + "/models", timeout=5)
  99          if resp.ok:
 100              models = resp.json().get("data", [])
 101              if len(models) == 1:
 102                  model_id = models[0].get("id", "")
 103                  if model_id:
 104                      return model_id
 105      except Exception:
 106          pass
 107      return ""
 108  
 109  
 110  def _get_model_config() -> Dict[str, Any]:
 111      config = load_config()
 112      model_cfg = config.get("model")
 113      if isinstance(model_cfg, dict):
 114          cfg = dict(model_cfg)
 115          # Accept "model" as alias for "default" (users intuitively write model.model)
 116          if not cfg.get("default") and cfg.get("model"):
 117              cfg["default"] = cfg["model"]
 118          default = (cfg.get("default") or "").strip()
 119          base_url = (cfg.get("base_url") or "").strip()
 120          is_local = "localhost" in base_url or "127.0.0.1" in base_url
 121          is_fallback = not default
 122          if is_local and is_fallback and base_url:
 123              detected = _auto_detect_local_model(base_url)
 124              if detected:
 125                  cfg["default"] = detected
 126          return cfg
 127      if isinstance(model_cfg, str) and model_cfg.strip():
 128          return {"default": model_cfg.strip()}
 129      return {}
 130  
 131  
 132  def _provider_supports_explicit_api_mode(provider: Optional[str], configured_provider: Optional[str] = None) -> bool:
 133      """Check whether a persisted api_mode should be honored for a given provider.
 134  
 135      Prevents stale api_mode from a previous provider leaking into a
 136      different one after a model/provider switch.  Only applies the
 137      persisted mode when the config's provider matches the runtime
 138      provider (or when no configured provider is recorded).
 139      """
 140      normalized_provider = (provider or "").strip().lower()
 141      normalized_configured = (configured_provider or "").strip().lower()
 142      if not normalized_configured:
 143          return True
 144      if normalized_provider == "custom":
 145          return normalized_configured == "custom" or normalized_configured.startswith("custom:")
 146      return normalized_configured == normalized_provider
 147  
 148  
 149  def _copilot_runtime_api_mode(model_cfg: Dict[str, Any], api_key: str) -> str:
 150      configured_provider = str(model_cfg.get("provider") or "").strip().lower()
 151      configured_mode = _parse_api_mode(model_cfg.get("api_mode"))
 152      if configured_mode and _provider_supports_explicit_api_mode("copilot", configured_provider):
 153          return configured_mode
 154  
 155      model_name = str(model_cfg.get("default") or "").strip()
 156      if not model_name:
 157          return "chat_completions"
 158  
 159      try:
 160          from hermes_cli.models import copilot_model_api_mode
 161  
 162          return copilot_model_api_mode(model_name, api_key=api_key)
 163      except Exception:
 164          return "chat_completions"
 165  
 166  
 167  _VALID_API_MODES = {"chat_completions", "codex_responses", "anthropic_messages", "bedrock_converse"}
 168  
 169  
 170  def _parse_api_mode(raw: Any) -> Optional[str]:
 171      """Validate an api_mode value from config. Returns None if invalid."""
 172      if isinstance(raw, str):
 173          normalized = raw.strip().lower()
 174          if normalized in _VALID_API_MODES:
 175              return normalized
 176      return None
 177  
 178  
 179  def _resolve_runtime_from_pool_entry(
 180      *,
 181      provider: str,
 182      entry: PooledCredential,
 183      requested_provider: str,
 184      model_cfg: Optional[Dict[str, Any]] = None,
 185      pool: Optional[CredentialPool] = None,
 186      target_model: Optional[str] = None,
 187  ) -> Dict[str, Any]:
 188      model_cfg = model_cfg or _get_model_config()
 189      # When the caller is resolving for a specific target model (e.g. a /model
 190      # mid-session switch), prefer that over the persisted model.default. This
 191      # prevents api_mode being computed from a stale config default that no
 192      # longer matches the model actually being used — the bug that caused
 193      # opencode-zen /v1 to be stripped for chat_completions requests when
 194      # config.default was still a Claude model.
 195      effective_model = (target_model or model_cfg.get("default") or "")
 196      base_url = (getattr(entry, "runtime_base_url", None) or getattr(entry, "base_url", None) or "").rstrip("/")
 197      api_key = getattr(entry, "runtime_api_key", None) or getattr(entry, "access_token", "")
 198      api_mode = "chat_completions"
 199      if provider == "openai-codex":
 200          api_mode = "codex_responses"
 201          base_url = base_url or DEFAULT_CODEX_BASE_URL
 202      elif provider == "qwen-oauth":
 203          api_mode = "chat_completions"
 204          base_url = base_url or DEFAULT_QWEN_BASE_URL
 205      elif provider == "google-gemini-cli":
 206          api_mode = "chat_completions"
 207          base_url = base_url or "cloudcode-pa://google"
 208      elif provider == "anthropic":
 209          api_mode = "anthropic_messages"
 210          cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
 211          cfg_base_url = ""
 212          if cfg_provider == "anthropic":
 213              cfg_base_url = str(model_cfg.get("base_url") or "").strip().rstrip("/")
 214          base_url = cfg_base_url or base_url or "https://api.anthropic.com"
 215      elif provider == "openrouter":
 216          base_url = base_url or OPENROUTER_BASE_URL
 217      elif provider == "xai":
 218          api_mode = "codex_responses"
 219      elif provider == "nous":
 220          api_mode = "chat_completions"
 221      elif provider == "copilot":
 222          api_mode = _copilot_runtime_api_mode(model_cfg, getattr(entry, "runtime_api_key", ""))
 223          base_url = base_url or PROVIDER_REGISTRY["copilot"].inference_base_url
 224      elif provider == "azure-foundry":
 225          # Azure Foundry: read api_mode and base_url from config
 226          cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
 227          if cfg_provider == "azure-foundry":
 228              cfg_base_url = str(model_cfg.get("base_url") or "").strip().rstrip("/")
 229              if cfg_base_url:
 230                  base_url = cfg_base_url
 231              configured_mode = _parse_api_mode(model_cfg.get("api_mode"))
 232              if configured_mode:
 233                  api_mode = configured_mode
 234          # Model-family inference for GPT-5.x / codex / o1-o4: Azure rejects
 235          # /chat/completions on these with 400 "operation unsupported" — see
 236          # azure_foundry_model_api_mode() for rationale.  Skip when the user
 237          # explicitly picked anthropic_messages (Anthropic-style endpoint).
 238          if effective_model and api_mode != "anthropic_messages":
 239              try:
 240                  from hermes_cli.models import azure_foundry_model_api_mode
 241  
 242                  inferred = azure_foundry_model_api_mode(effective_model)
 243              except Exception:
 244                  inferred = None
 245              if inferred:
 246                  api_mode = inferred
 247          # For Anthropic-style endpoints, strip /v1 suffix
 248          if api_mode == "anthropic_messages":
 249              base_url = re.sub(r"/v1/?$", "", base_url)
 250      else:
 251          configured_provider = str(model_cfg.get("provider") or "").strip().lower()
 252          # Honour model.base_url from config.yaml when the configured provider
 253          # matches this provider — same pattern as the Anthropic branch above.
 254          # Only override when the pool entry has no explicit base_url (i.e. it
 255          # fell back to the hardcoded default).  Env var overrides win (#6039).
 256          pconfig = PROVIDER_REGISTRY.get(provider)
 257          pool_url_is_default = pconfig and base_url.rstrip("/") == pconfig.inference_base_url.rstrip("/")
 258          if configured_provider == provider and pool_url_is_default:
 259              cfg_base_url = str(model_cfg.get("base_url") or "").strip().rstrip("/")
 260              if cfg_base_url:
 261                  base_url = cfg_base_url
 262          configured_mode = _parse_api_mode(model_cfg.get("api_mode"))
 263          if provider in ("opencode-zen", "opencode-go"):
 264              # Re-derive api_mode from the effective model rather than the
 265              # persisted api_mode: the opencode providers serve both
 266              # anthropic_messages and chat_completions models, so the previous
 267              # session's mode must not leak across /model switches.
 268              # Refs #16878.
 269              from hermes_cli.models import opencode_model_api_mode
 270              api_mode = opencode_model_api_mode(provider, effective_model)
 271          elif configured_mode and _provider_supports_explicit_api_mode(provider, configured_provider):
 272              api_mode = configured_mode
 273          else:
 274              # Auto-detect Anthropic-compatible endpoints (/anthropic suffix,
 275              # Kimi /coding, api.openai.com → codex_responses, api.x.ai →
 276              # codex_responses).
 277              detected = _detect_api_mode_for_url(base_url)
 278              if detected:
 279                  api_mode = detected
 280  
 281      # OpenCode base URLs end with /v1 for OpenAI-compatible models, but the
 282      # Anthropic SDK prepends its own /v1/messages to the base_url.  Strip the
 283      # trailing /v1 so the SDK constructs the correct path (e.g.
 284      # https://opencode.ai/zen/go/v1/messages instead of .../v1/v1/messages).
 285      if api_mode == "anthropic_messages" and provider in ("opencode-zen", "opencode-go"):
 286          base_url = re.sub(r"/v1/?$", "", base_url)
 287  
 288      return {
 289          "provider": provider,
 290          "api_mode": api_mode,
 291          "base_url": base_url,
 292          "api_key": api_key,
 293          "source": getattr(entry, "source", "pool"),
 294          "credential_pool": pool,
 295          "requested_provider": requested_provider,
 296      }
 297  
 298  
 299  def resolve_requested_provider(requested: Optional[str] = None) -> str:
 300      """Resolve provider request from explicit arg, config, then env."""
 301      if requested and requested.strip():
 302          return requested.strip().lower()
 303  
 304      model_cfg = _get_model_config()
 305      cfg_provider = model_cfg.get("provider")
 306      if isinstance(cfg_provider, str) and cfg_provider.strip():
 307          return cfg_provider.strip().lower()
 308  
 309      # Prefer the persisted config selection over any stale shell/.env
 310      # provider override so chat uses the endpoint the user last saved.
 311      env_provider = os.getenv("HERMES_INFERENCE_PROVIDER", "").strip().lower()
 312      if env_provider:
 313          return env_provider
 314  
 315      return "auto"
 316  
 317  
 318  def _try_resolve_from_custom_pool(
 319      base_url: str,
 320      provider_label: str,
 321      api_mode_override: Optional[str] = None,
 322  ) -> Optional[Dict[str, Any]]:
 323      """Check if a credential pool exists for a custom endpoint and return a runtime dict if so."""
 324      pool_key = get_custom_provider_pool_key(base_url)
 325      if not pool_key:
 326          return None
 327      try:
 328          pool = load_pool(pool_key)
 329          if not pool.has_credentials():
 330              return None
 331          entry = pool.select()
 332          if entry is None:
 333              return None
 334          pool_api_key = getattr(entry, "runtime_api_key", None) or getattr(entry, "access_token", "")
 335          if not pool_api_key:
 336              return None
 337          return {
 338              "provider": provider_label,
 339              "api_mode": api_mode_override or _detect_api_mode_for_url(base_url) or "chat_completions",
 340              "base_url": base_url,
 341              "api_key": pool_api_key,
 342              "source": f"pool:{pool_key}",
 343              "credential_pool": pool,
 344          }
 345      except Exception:
 346          return None
 347  
 348  
 349  def _get_named_custom_provider(requested_provider: str) -> Optional[Dict[str, Any]]:
 350      requested_norm = _normalize_custom_provider_name(requested_provider or "")
 351      if not requested_norm or requested_norm == "custom":
 352          return None
 353  
 354      # Raw names should only map to custom providers when they are not already
 355      # valid built-in providers or aliases. Explicit menu keys like
 356      # ``custom:local`` always target the saved custom provider.
 357      if requested_norm == "auto":
 358          return None
 359      if not requested_norm.startswith("custom:"):
 360          try:
 361              canonical = auth_mod.resolve_provider(requested_norm)
 362          except AuthError:
 363              pass
 364          else:
 365              # A user-declared ``custom_providers`` entry whose name matches
 366              # only an *alias* (``kimi`` → built-in ``kimi-coding``) is the
 367              # user's intended target — alias rewriting would otherwise hijack
 368              # the request.  We only defer to the built-in when the raw name is
 369              # the canonical provider itself (``nous``, ``openrouter``, …) so
 370              # accidentally shadowing a canonical provider still resolves to
 371              # the built-in. See tests/hermes_cli/test_runtime_provider_resolution.py
 372              # ``test_named_custom_provider_does_not_shadow_builtin_provider``.
 373              if (canonical or "").strip().lower() == requested_norm:
 374                  return None
 375  
 376      config = load_config()
 377      
 378      # First check providers: dict (new-style user-defined providers)
 379      providers = config.get("providers")
 380      if isinstance(providers, dict):
 381          for ep_name, entry in providers.items():
 382              if not isinstance(entry, dict):
 383                  continue
 384              # Match exact name or normalized name
 385              name_norm = _normalize_custom_provider_name(ep_name)
 386              # Resolve the API key from the env var name stored in key_env
 387              key_env = str(entry.get("key_env", "") or "").strip()
 388              resolved_api_key = os.getenv(key_env, "").strip() if key_env else ""
 389              # Fall back to inline api_key when key_env is absent or unresolvable
 390              if not resolved_api_key:
 391                  resolved_api_key = str(entry.get("api_key", "") or "").strip()
 392  
 393              if requested_norm in {ep_name, name_norm, f"custom:{name_norm}"}:
 394                  # Found match by provider key
 395                  base_url = entry.get("api") or entry.get("url") or entry.get("base_url") or ""
 396                  if base_url:
 397                      result = {
 398                          "name": entry.get("name", ep_name),
 399                          "base_url": base_url.strip(),
 400                          "api_key": resolved_api_key,
 401                          "model": entry.get("default_model", ""),
 402                      }
 403                      # The v11→v12 migration writes the API mode under the new
 404                      # ``transport`` field, but hand-edited configs may still
 405                      # use the legacy ``api_mode`` spelling.  Accept both —
 406                      # the runtime normaliser ``_normalize_custom_provider_entry``
 407                      # already does, so without this lift every migrated config
 408                      # silently downgrades codex_responses / anthropic_messages
 409                      # providers to chat_completions in the resolved runtime.
 410                      api_mode = _parse_api_mode(entry.get("api_mode") or entry.get("transport"))
 411                      if api_mode:
 412                          result["api_mode"] = api_mode
 413                      return result
 414              # Also check the 'name' field if present
 415              display_name = entry.get("name", "")
 416              if display_name:
 417                  display_norm = _normalize_custom_provider_name(display_name)
 418                  if requested_norm in {display_name, display_norm, f"custom:{display_norm}"}:
 419                      # Found match by display name
 420                      base_url = entry.get("api") or entry.get("url") or entry.get("base_url") or ""
 421                      if base_url:
 422                          result = {
 423                              "name": display_name,
 424                              "base_url": base_url.strip(),
 425                              "api_key": resolved_api_key,
 426                              "model": entry.get("default_model", ""),
 427                          }
 428                          api_mode = _parse_api_mode(entry.get("api_mode") or entry.get("transport"))
 429                          if api_mode:
 430                              result["api_mode"] = api_mode
 431                          return result
 432  
 433      # Fall back to custom_providers: list (legacy format)
 434      custom_providers = config.get("custom_providers")
 435      if isinstance(custom_providers, dict):
 436          logger.warning(
 437              "custom_providers in config.yaml is a dict, not a list. "
 438              "Each entry must be prefixed with '-' in YAML. "
 439              "Run 'hermes doctor' for details."
 440          )
 441          return None
 442  
 443      custom_providers = get_compatible_custom_providers(config)
 444      if not custom_providers:
 445          return None
 446  
 447      for entry in custom_providers:
 448          if not isinstance(entry, dict):
 449              continue
 450          name = entry.get("name")
 451          base_url = entry.get("base_url")
 452          if not isinstance(name, str) or not isinstance(base_url, str):
 453              continue
 454          name_norm = _normalize_custom_provider_name(name)
 455          menu_key = f"custom:{name_norm}"
 456          provider_key = str(entry.get("provider_key", "") or "").strip()
 457          provider_key_norm = _normalize_custom_provider_name(provider_key) if provider_key else ""
 458          provider_menu_key = f"custom:{provider_key_norm}" if provider_key_norm else ""
 459          if requested_norm not in {name_norm, menu_key, provider_key_norm, provider_menu_key}:
 460              continue
 461          result = {
 462              "name": name.strip(),
 463              "base_url": base_url.strip(),
 464              "api_key": str(entry.get("api_key", "") or "").strip(),
 465          }
 466          key_env = str(entry.get("key_env", "") or "").strip()
 467          if key_env:
 468              result["key_env"] = key_env
 469          if provider_key:
 470              result["provider_key"] = provider_key
 471          api_mode = _parse_api_mode(entry.get("api_mode"))
 472          if api_mode:
 473              result["api_mode"] = api_mode
 474          model_name = str(entry.get("model", "") or "").strip()
 475          if model_name:
 476              result["model"] = model_name
 477          return result
 478  
 479      return None
 480  
 481  
 482  def _resolve_named_custom_runtime(
 483      *,
 484      requested_provider: str,
 485      explicit_api_key: Optional[str] = None,
 486      explicit_base_url: Optional[str] = None,
 487  ) -> Optional[Dict[str, Any]]:
 488      # Bare `provider="custom"` with an explicit base_url (e.g. propagated
 489      # from a `model_aliases:` direct-alias resolution) — build a runtime
 490      # directly so the alias's base_url actually takes effect.
 491      requested_norm = (requested_provider or "").strip().lower()
 492      if requested_norm == "custom" and explicit_base_url:
 493          base_url = explicit_base_url.strip().rstrip("/")
 494          api_key_candidates = [
 495              (explicit_api_key or "").strip(),
 496              os.getenv("OPENAI_API_KEY", "").strip(),
 497              os.getenv("OPENROUTER_API_KEY", "").strip(),
 498          ]
 499          api_key = next(
 500              (c for c in api_key_candidates if has_usable_secret(c)),
 501              "",
 502          ) or "no-key-required"
 503          return {
 504              "provider": "custom",
 505              "api_mode": _detect_api_mode_for_url(base_url) or "chat_completions",
 506              "base_url": base_url,
 507              "api_key": api_key,
 508              "source": "direct-alias",
 509              "requested_provider": requested_provider,
 510          }
 511  
 512      custom_provider = _get_named_custom_provider(requested_provider)
 513      if not custom_provider:
 514          return None
 515  
 516      base_url = (
 517          (explicit_base_url or "").strip()
 518          or custom_provider.get("base_url", "")
 519      ).rstrip("/")
 520      if not base_url:
 521          return None
 522  
 523      # Check if a credential pool exists for this custom endpoint
 524      pool_result = _try_resolve_from_custom_pool(base_url, "custom", custom_provider.get("api_mode"))
 525      if pool_result:
 526          # Propagate the model name even when using pooled credentials —
 527          # the pool doesn't know about the custom_providers model field.
 528          model_name = custom_provider.get("model")
 529          if model_name:
 530              pool_result["model"] = model_name
 531          return pool_result
 532  
 533      api_key_candidates = [
 534          (explicit_api_key or "").strip(),
 535          str(custom_provider.get("api_key", "") or "").strip(),
 536          os.getenv(str(custom_provider.get("key_env", "") or "").strip(), "").strip(),
 537          os.getenv("OPENAI_API_KEY", "").strip(),
 538          os.getenv("OPENROUTER_API_KEY", "").strip(),
 539      ]
 540      api_key = next((candidate for candidate in api_key_candidates if has_usable_secret(candidate)), "")
 541  
 542      result = {
 543          "provider": "custom",
 544          "api_mode": custom_provider.get("api_mode")
 545          or _detect_api_mode_for_url(base_url)
 546          or "chat_completions",
 547          "base_url": base_url,
 548          "api_key": api_key or "no-key-required",
 549          "source": f"custom_provider:{custom_provider.get('name', requested_provider)}",
 550      }
 551      # Propagate the model name so callers can override self.model when the
 552      # provider name differs from the actual model string the API expects.
 553      if custom_provider.get("model"):
 554          result["model"] = custom_provider["model"]
 555      return result
 556  
 557  
 558  def _resolve_openrouter_runtime(
 559      *,
 560      requested_provider: str,
 561      explicit_api_key: Optional[str] = None,
 562      explicit_base_url: Optional[str] = None,
 563  ) -> Dict[str, Any]:
 564      model_cfg = _get_model_config()
 565      cfg_base_url = model_cfg.get("base_url") if isinstance(model_cfg.get("base_url"), str) else ""
 566      cfg_provider = model_cfg.get("provider") if isinstance(model_cfg.get("provider"), str) else ""
 567      cfg_api_key = ""
 568      for k in ("api_key", "api"):
 569          v = model_cfg.get(k)
 570          if isinstance(v, str) and v.strip():
 571              cfg_api_key = v.strip()
 572              break
 573      requested_norm = (requested_provider or "").strip().lower()
 574      cfg_provider = cfg_provider.strip().lower()
 575  
 576      env_openrouter_base_url = os.getenv("OPENROUTER_BASE_URL", "").strip()
 577      env_custom_base_url = os.getenv("CUSTOM_BASE_URL", "").strip()
 578  
 579      # Use config base_url when available and the provider context matches.
 580      # OPENAI_BASE_URL env var is no longer consulted — config.yaml is
 581      # the single source of truth for endpoint URLs.
 582      use_config_base_url = False
 583      if cfg_base_url.strip() and not explicit_base_url:
 584          if requested_norm == "auto":
 585              if not cfg_provider or cfg_provider == "auto":
 586                  use_config_base_url = True
 587          elif requested_norm == "custom" and _config_base_url_trustworthy_for_bare_custom(
 588              cfg_base_url, cfg_provider
 589          ):
 590              use_config_base_url = True
 591  
 592      base_url = (
 593          (explicit_base_url or "").strip()
 594          or env_custom_base_url
 595          or (cfg_base_url.strip() if use_config_base_url else "")
 596          or env_openrouter_base_url
 597          or OPENROUTER_BASE_URL
 598      ).rstrip("/")
 599  
 600      # Choose API key based on whether the resolved base_url targets OpenRouter.
 601      # When hitting OpenRouter, prefer OPENROUTER_API_KEY (issue #289).
 602      # When hitting a custom endpoint (e.g. Z.ai, local LLM), prefer
 603      # OPENAI_API_KEY so the OpenRouter key doesn't leak to an unrelated
 604      # provider (issues #420, #560).
 605      _is_openrouter_url = base_url_host_matches(base_url, "openrouter.ai")
 606      if _is_openrouter_url:
 607          api_key_candidates = [
 608              explicit_api_key,
 609              os.getenv("OPENROUTER_API_KEY"),
 610              os.getenv("OPENAI_API_KEY"),
 611          ]
 612      else:
 613          # Custom endpoint: use api_key from config when using config base_url (#1760).
 614          # When the endpoint is Ollama Cloud, check OLLAMA_API_KEY — it's
 615          # the canonical env var for ollama.com authentication. Match on
 616          # HOST, not substring — a custom base_url whose path contains
 617          # "ollama.com" (e.g. http://127.0.0.1/ollama.com/v1) or whose
 618          # hostname is a look-alike (ollama.com.attacker.test) must not
 619          # receive the Ollama credential. See GHSA-76xc-57q6-vm5m.
 620          _is_ollama_url = base_url_host_matches(base_url, "ollama.com")
 621          api_key_candidates = [
 622              explicit_api_key,
 623              (cfg_api_key if use_config_base_url else ""),
 624              (os.getenv("OLLAMA_API_KEY") if _is_ollama_url else ""),
 625              os.getenv("OPENAI_API_KEY"),
 626              os.getenv("OPENROUTER_API_KEY"),
 627          ]
 628      api_key = next(
 629          (str(candidate or "").strip() for candidate in api_key_candidates if has_usable_secret(candidate)),
 630          "",
 631      )
 632  
 633      source = "explicit" if (explicit_api_key or explicit_base_url) else "env/config"
 634  
 635      # When "custom" was explicitly requested, preserve that as the provider
 636      # name instead of silently relabeling to "openrouter" (#2562).
 637      # Also provide a placeholder API key for local servers that don't require
 638      # authentication — the OpenAI SDK requires a non-empty api_key string.
 639      effective_provider = "custom" if requested_norm == "custom" else "openrouter"
 640  
 641      # For custom endpoints, check if a credential pool exists
 642      if effective_provider == "custom" and base_url:
 643          pool_result = _try_resolve_from_custom_pool(
 644              base_url, effective_provider, _parse_api_mode(model_cfg.get("api_mode")),
 645          )
 646          if pool_result:
 647              return pool_result
 648  
 649      if effective_provider == "custom" and not api_key and not _is_openrouter_url:
 650          api_key = "no-key-required"
 651  
 652      return {
 653          "provider": effective_provider,
 654          "api_mode": _parse_api_mode(model_cfg.get("api_mode"))
 655          or _detect_api_mode_for_url(base_url)
 656          or "chat_completions",
 657          "base_url": base_url,
 658          "api_key": api_key,
 659          "source": source,
 660      }
 661  
 662  
 663  def _resolve_azure_foundry_runtime(
 664      *,
 665      requested_provider: str,
 666      model_cfg: Dict[str, Any],
 667      explicit_api_key: Optional[str] = None,
 668      explicit_base_url: Optional[str] = None,
 669      target_model: Optional[str] = None,
 670  ) -> Dict[str, Any]:
 671      """Resolve an Azure Foundry runtime entry.
 672  
 673      Reads ``model.base_url`` + ``model.api_mode`` from config.yaml (or
 674      explicit overrides), pulls the API key from ``.env`` / env var, and
 675      strips a trailing ``/v1`` for Anthropic-style endpoints because the
 676      Anthropic SDK appends ``/v1/messages`` internally.
 677  
 678      Raises :class:`AuthError` when required values are missing.
 679      """
 680      explicit_api_key = str(explicit_api_key or "").strip()
 681      explicit_base_url_clean = str(explicit_base_url or "").strip().rstrip("/")
 682  
 683      cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
 684      cfg_base_url = ""
 685      cfg_api_mode = "chat_completions"
 686      if cfg_provider == "azure-foundry":
 687          cfg_base_url = str(model_cfg.get("base_url") or "").strip().rstrip("/")
 688          cfg_api_mode = _parse_api_mode(model_cfg.get("api_mode")) or "chat_completions"
 689  
 690      # Model-family inference: Azure Foundry deploys GPT-5.x / codex / o1-o4
 691      # reasoning models as Responses-API-only.  Calling /chat/completions
 692      # against them returns 400 "The requested operation is unsupported."
 693      # Upgrade api_mode when the model name matches, unless the user has
 694      # explicitly chosen anthropic_messages (Anthropic-style endpoint).
 695      effective_model = str(target_model or model_cfg.get("default") or "").strip()
 696      if effective_model and cfg_api_mode != "anthropic_messages":
 697          try:
 698              from hermes_cli.models import azure_foundry_model_api_mode
 699  
 700              inferred = azure_foundry_model_api_mode(effective_model)
 701          except Exception:
 702              inferred = None
 703          if inferred:
 704              cfg_api_mode = inferred
 705  
 706      env_base_url = os.getenv("AZURE_FOUNDRY_BASE_URL", "").strip().rstrip("/")
 707      base_url = explicit_base_url_clean or cfg_base_url or env_base_url
 708      if not base_url:
 709          raise AuthError(
 710              "Azure Foundry requires a base URL. Set it via 'hermes model' or "
 711              "the AZURE_FOUNDRY_BASE_URL environment variable."
 712          )
 713  
 714      api_key = explicit_api_key
 715      if not api_key:
 716          try:
 717              from hermes_cli.config import get_env_value
 718              api_key = get_env_value("AZURE_FOUNDRY_API_KEY") or ""
 719          except Exception:
 720              api_key = ""
 721      if not api_key:
 722          api_key = os.getenv("AZURE_FOUNDRY_API_KEY", "").strip()
 723      if not api_key:
 724          raise AuthError(
 725              "Azure Foundry requires an API key. Set AZURE_FOUNDRY_API_KEY in "
 726              "~/.hermes/.env or run 'hermes model' to configure."
 727          )
 728  
 729      # Anthropic SDK appends /v1/messages itself, so strip any trailing /v1
 730      # we inherited from the configured base_url to avoid double-/v1 paths.
 731      if cfg_api_mode == "anthropic_messages":
 732          base_url = re.sub(r"/v1/?$", "", base_url)
 733  
 734      source = "explicit" if (explicit_api_key or explicit_base_url) else "config"
 735      return {
 736          "provider": "azure-foundry",
 737          "api_mode": cfg_api_mode,
 738          "base_url": base_url,
 739          "api_key": api_key,
 740          "source": source,
 741          "requested_provider": requested_provider,
 742      }
 743  
 744  
 745  def _resolve_explicit_runtime(
 746      *,
 747      provider: str,
 748      requested_provider: str,
 749      model_cfg: Dict[str, Any],
 750      explicit_api_key: Optional[str] = None,
 751      explicit_base_url: Optional[str] = None,
 752  ) -> Optional[Dict[str, Any]]:
 753      explicit_api_key = str(explicit_api_key or "").strip()
 754      explicit_base_url = str(explicit_base_url or "").strip().rstrip("/")
 755      if not explicit_api_key and not explicit_base_url:
 756          return None
 757  
 758      if provider == "anthropic":
 759          cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
 760          cfg_base_url = ""
 761          if cfg_provider == "anthropic":
 762              cfg_base_url = str(model_cfg.get("base_url") or "").strip().rstrip("/")
 763          base_url = explicit_base_url or cfg_base_url or "https://api.anthropic.com"
 764          api_key = explicit_api_key
 765          if not api_key:
 766              from agent.anthropic_adapter import resolve_anthropic_token
 767  
 768              api_key = resolve_anthropic_token()
 769              if not api_key:
 770                  raise AuthError(
 771                      "No Anthropic credentials found. Set ANTHROPIC_TOKEN or ANTHROPIC_API_KEY, "
 772                      "run 'claude setup-token', or authenticate with 'claude /login'."
 773                  )
 774          return {
 775              "provider": "anthropic",
 776              "api_mode": "anthropic_messages",
 777              "base_url": base_url,
 778              "api_key": api_key,
 779              "source": "explicit",
 780              "requested_provider": requested_provider,
 781          }
 782  
 783      if provider == "openai-codex":
 784          base_url = explicit_base_url or DEFAULT_CODEX_BASE_URL
 785          api_key = explicit_api_key
 786          last_refresh = None
 787          if not api_key:
 788              creds = resolve_codex_runtime_credentials()
 789              api_key = creds.get("api_key", "")
 790              last_refresh = creds.get("last_refresh")
 791              if not explicit_base_url:
 792                  base_url = creds.get("base_url", "").rstrip("/") or base_url
 793          return {
 794              "provider": "openai-codex",
 795              "api_mode": "codex_responses",
 796              "base_url": base_url,
 797              "api_key": api_key,
 798              "source": "explicit",
 799              "last_refresh": last_refresh,
 800              "requested_provider": requested_provider,
 801          }
 802  
 803      if provider == "nous":
 804          state = auth_mod.get_provider_auth_state("nous") or {}
 805          base_url = (
 806              explicit_base_url
 807              or str(state.get("inference_base_url") or auth_mod.DEFAULT_NOUS_INFERENCE_URL).strip().rstrip("/")
 808          )
 809          # Only use agent_key for inference — access_token is an OAuth token for the
 810          # portal API (minting keys, refreshing tokens), not for the inference API.
 811          # Falling back to access_token sends an OAuth bearer token to the inference
 812          # endpoint, which returns 404 because it is not a valid inference credential.
 813          api_key = explicit_api_key or str(state.get("agent_key") or "").strip()
 814          expires_at = state.get("agent_key_expires_at") or state.get("expires_at")
 815          if not api_key:
 816              creds = resolve_nous_runtime_credentials(
 817                  min_key_ttl_seconds=max(60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800"))),
 818                  timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")),
 819              )
 820              api_key = creds.get("api_key", "")
 821              expires_at = creds.get("expires_at")
 822              if not explicit_base_url:
 823                  base_url = creds.get("base_url", "").rstrip("/") or base_url
 824          return {
 825              "provider": "nous",
 826              "api_mode": "chat_completions",
 827              "base_url": base_url,
 828              "api_key": api_key,
 829              "source": "explicit",
 830              "expires_at": expires_at,
 831              "requested_provider": requested_provider,
 832          }
 833  
 834      # Azure Foundry: user-configured endpoint with selectable API mode
 835      if provider == "azure-foundry":
 836          return _resolve_azure_foundry_runtime(
 837              requested_provider=requested_provider,
 838              model_cfg=model_cfg,
 839              explicit_api_key=explicit_api_key,
 840              explicit_base_url=explicit_base_url,
 841          )
 842  
 843      pconfig = PROVIDER_REGISTRY.get(provider)
 844      if pconfig and pconfig.auth_type == "api_key":
 845          env_url = ""
 846          if pconfig.base_url_env_var:
 847              env_url = os.getenv(pconfig.base_url_env_var, "").strip().rstrip("/")
 848  
 849          base_url = explicit_base_url
 850          if not base_url:
 851              if provider in ("kimi-coding", "kimi-coding-cn"):
 852                  creds = resolve_api_key_provider_credentials(provider)
 853                  base_url = creds.get("base_url", "").rstrip("/")
 854              else:
 855                  base_url = env_url or pconfig.inference_base_url
 856  
 857          api_key = explicit_api_key
 858          if not api_key:
 859              creds = resolve_api_key_provider_credentials(provider)
 860              api_key = creds.get("api_key", "")
 861              if not base_url:
 862                  base_url = creds.get("base_url", "").rstrip("/")
 863  
 864          api_mode = "chat_completions"
 865          if provider == "copilot":
 866              api_mode = _copilot_runtime_api_mode(model_cfg, api_key)
 867          elif provider == "xai":
 868              api_mode = "codex_responses"
 869          else:
 870              configured_mode = _parse_api_mode(model_cfg.get("api_mode"))
 871              if configured_mode:
 872                  api_mode = configured_mode
 873              else:
 874                  # Auto-detect from URL (Anthropic /anthropic suffix,
 875                  # api.openai.com → Responses, Kimi /coding, etc.).
 876                  detected = _detect_api_mode_for_url(base_url)
 877                  if detected:
 878                      api_mode = detected
 879  
 880          return {
 881              "provider": provider,
 882              "api_mode": api_mode,
 883              "base_url": base_url.rstrip("/"),
 884              "api_key": api_key,
 885              "source": "explicit",
 886              "requested_provider": requested_provider,
 887          }
 888  
 889      return None
 890  
 891  
 892  def resolve_runtime_provider(
 893      *,
 894      requested: Optional[str] = None,
 895      explicit_api_key: Optional[str] = None,
 896      explicit_base_url: Optional[str] = None,
 897      target_model: Optional[str] = None,
 898  ) -> Dict[str, Any]:
 899      """Resolve runtime provider credentials for agent execution.
 900  
 901      target_model: Optional override for model_cfg.get("default") when
 902      computing provider-specific api_mode (e.g. OpenCode Zen/Go where different
 903      models route through different API surfaces). Callers performing an
 904      explicit mid-session model switch should pass the new model here so
 905      api_mode is derived from the model they are switching TO, not the stale
 906      persisted default. Other callers can leave it None to preserve existing
 907      behavior (api_mode derived from config).
 908      """
 909      requested_provider = resolve_requested_provider(requested)
 910  
 911      # Azure Anthropic short-circuit: when explicitly targeting an Azure endpoint
 912      # with provider="anthropic", bypass _resolve_named_custom_runtime (which would
 913      # return provider="custom" with chat_completions api_mode and no valid key).
 914      # Instead, use the Azure key directly with anthropic_messages api_mode.
 915      _eff_base = (explicit_base_url or "").strip()
 916      if requested_provider == "anthropic" and "azure.com" in _eff_base:
 917          _azure_key = (
 918              (explicit_api_key or "").strip()
 919              or os.getenv("AZURE_ANTHROPIC_KEY", "").strip()
 920              or os.getenv("ANTHROPIC_API_KEY", "").strip()
 921          )
 922          return {
 923              "provider": "anthropic",
 924              "api_mode": "anthropic_messages",
 925              "base_url": _eff_base.rstrip("/"),
 926              "api_key": _azure_key,
 927              "source": "azure-explicit",
 928              "requested_provider": requested_provider,
 929          }
 930  
 931      # Azure Foundry: user-configured endpoint with selectable API mode
 932      # (OpenAI-style chat_completions or Anthropic-style anthropic_messages).
 933      # Resolve before the custom-runtime / pool / generic paths so Azure
 934      # config is always picked up from model.base_url + model.api_mode,
 935      # regardless of whether the caller passed explicit_* args.
 936      if requested_provider == "azure-foundry":
 937          azure_runtime = _resolve_azure_foundry_runtime(
 938              requested_provider=requested_provider,
 939              model_cfg=_get_model_config(),
 940              explicit_api_key=explicit_api_key,
 941              explicit_base_url=explicit_base_url,
 942              target_model=target_model,
 943          )
 944          return azure_runtime
 945  
 946      custom_runtime = _resolve_named_custom_runtime(
 947          requested_provider=requested_provider,
 948          explicit_api_key=explicit_api_key,
 949          explicit_base_url=explicit_base_url,
 950      )
 951      if custom_runtime:
 952          custom_runtime["requested_provider"] = requested_provider
 953          return custom_runtime
 954  
 955      provider = resolve_provider(
 956          requested_provider,
 957          explicit_api_key=explicit_api_key,
 958          explicit_base_url=explicit_base_url,
 959      )
 960      model_cfg = _get_model_config()
 961      explicit_runtime = _resolve_explicit_runtime(
 962          provider=provider,
 963          requested_provider=requested_provider,
 964          model_cfg=model_cfg,
 965          explicit_api_key=explicit_api_key,
 966          explicit_base_url=explicit_base_url,
 967      )
 968      if explicit_runtime:
 969          return explicit_runtime
 970  
 971      should_use_pool = provider != "openrouter"
 972      if provider == "openrouter":
 973          cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
 974          cfg_base_url = str(model_cfg.get("base_url") or "").strip()
 975          env_openai_base_url = os.getenv("OPENAI_BASE_URL", "").strip()
 976          env_openrouter_base_url = os.getenv("OPENROUTER_BASE_URL", "").strip()
 977          has_custom_endpoint = bool(
 978              explicit_base_url
 979              or env_openai_base_url
 980              or env_openrouter_base_url
 981          )
 982          if cfg_base_url and cfg_provider in {"auto", "custom"}:
 983              has_custom_endpoint = True
 984          has_runtime_override = bool(explicit_api_key or explicit_base_url)
 985          should_use_pool = (
 986              requested_provider in {"openrouter", "auto"}
 987              and not has_custom_endpoint
 988              and not has_runtime_override
 989          )
 990  
 991      try:
 992          pool = load_pool(provider) if should_use_pool else None
 993      except Exception:
 994          pool = None
 995      if pool and pool.has_credentials():
 996          entry = pool.select()
 997          pool_api_key = ""
 998          if entry is not None:
 999              pool_api_key = (
1000                  getattr(entry, "runtime_api_key", None)
1001                  or getattr(entry, "access_token", "")
1002              )
1003          # For Nous, the pool entry's runtime_api_key is the agent_key — a
1004          # short-lived inference credential (~30 min TTL).  The pool doesn't
1005          # refresh it during selection (that would trigger network calls in
1006          # non-runtime contexts like `hermes auth list`).  If the key is
1007          # expired, clear pool_api_key so we fall through to
1008          # resolve_nous_runtime_credentials() which handles refresh + mint.
1009          if provider == "nous" and entry is not None and pool_api_key:
1010              min_ttl = max(60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800")))
1011              nous_state = {
1012                  "agent_key": getattr(entry, "agent_key", None),
1013                  "agent_key_expires_at": getattr(entry, "agent_key_expires_at", None),
1014              }
1015              if not _agent_key_is_usable(nous_state, min_ttl):
1016                  logger.debug("Nous pool entry agent_key expired/missing, falling through to runtime resolution")
1017                  pool_api_key = ""
1018          if entry is not None and pool_api_key:
1019              return _resolve_runtime_from_pool_entry(
1020                  provider=provider,
1021                  entry=entry,
1022                  requested_provider=requested_provider,
1023                  model_cfg=model_cfg,
1024                  pool=pool,
1025                  target_model=target_model,
1026              )
1027  
1028      if provider == "nous":
1029          try:
1030              creds = resolve_nous_runtime_credentials(
1031                  min_key_ttl_seconds=max(60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800"))),
1032                  timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")),
1033              )
1034              return {
1035                  "provider": "nous",
1036                  "api_mode": "chat_completions",
1037                  "base_url": creds.get("base_url", "").rstrip("/"),
1038                  "api_key": creds.get("api_key", ""),
1039                  "source": creds.get("source", "portal"),
1040                  "expires_at": creds.get("expires_at"),
1041                  "requested_provider": requested_provider,
1042              }
1043          except AuthError:
1044              if requested_provider != "auto":
1045                  raise
1046              # Auto-detected Nous but credentials are stale/revoked —
1047              # fall through to env-var providers (e.g. OpenRouter).
1048              logger.info("Auto-detected Nous provider but credentials failed; "
1049                          "falling through to next provider.")
1050  
1051      if provider == "openai-codex":
1052          try:
1053              creds = resolve_codex_runtime_credentials()
1054              return {
1055                  "provider": "openai-codex",
1056                  "api_mode": "codex_responses",
1057                  "base_url": creds.get("base_url", "").rstrip("/"),
1058                  "api_key": creds.get("api_key", ""),
1059                  "source": creds.get("source", "hermes-auth-store"),
1060                  "last_refresh": creds.get("last_refresh"),
1061                  "requested_provider": requested_provider,
1062              }
1063          except AuthError:
1064              if requested_provider != "auto":
1065                  raise
1066              # Auto-detected Codex but credentials are stale/revoked —
1067              # fall through to env-var providers (e.g. OpenRouter).
1068              logger.info("Auto-detected Codex provider but credentials failed; "
1069                          "falling through to next provider.")
1070  
1071      if provider == "qwen-oauth":
1072          try:
1073              creds = resolve_qwen_runtime_credentials()
1074              return {
1075                  "provider": "qwen-oauth",
1076                  "api_mode": "chat_completions",
1077                  "base_url": creds.get("base_url", "").rstrip("/"),
1078                  "api_key": creds.get("api_key", ""),
1079                  "source": creds.get("source", "qwen-cli"),
1080                  "expires_at_ms": creds.get("expires_at_ms"),
1081                  "requested_provider": requested_provider,
1082              }
1083          except AuthError:
1084              if requested_provider != "auto":
1085                  raise
1086              logger.info("Qwen OAuth credentials failed; "
1087                          "falling through to next provider.")
1088  
1089      if provider == "minimax-oauth":
1090          pconfig = PROVIDER_REGISTRY.get(provider)
1091          if pconfig and pconfig.auth_type == "oauth_minimax":
1092              from hermes_cli.auth import resolve_minimax_oauth_runtime_credentials
1093              creds = resolve_minimax_oauth_runtime_credentials()
1094              return {
1095                  "provider": provider,
1096                  "api_mode": "anthropic_messages",
1097                  "base_url": creds["base_url"],
1098                  "api_key": creds["api_key"],
1099                  "source": creds.get("source", "oauth"),
1100                  "requested_provider": requested_provider,
1101              }
1102  
1103      if provider == "google-gemini-cli":
1104          try:
1105              creds = resolve_gemini_oauth_runtime_credentials()
1106              return {
1107                  "provider": "google-gemini-cli",
1108                  "api_mode": "chat_completions",
1109                  "base_url": creds.get("base_url", ""),
1110                  "api_key": creds.get("api_key", ""),
1111                  "source": creds.get("source", "google-oauth"),
1112                  "expires_at_ms": creds.get("expires_at_ms"),
1113                  "email": creds.get("email", ""),
1114                  "project_id": creds.get("project_id", ""),
1115                  "requested_provider": requested_provider,
1116              }
1117          except AuthError:
1118              if requested_provider != "auto":
1119                  raise
1120              logger.info("Google Gemini OAuth credentials failed; "
1121                          "falling through to next provider.")
1122  
1123      pconfig = PROVIDER_REGISTRY.get(provider)
1124      if pconfig and pconfig.auth_type == "external_process":
1125          creds = resolve_external_process_provider_credentials(provider)
1126          return {
1127              "provider": provider,
1128              "api_mode": "chat_completions",
1129              "base_url": creds.get("base_url", "").rstrip("/"),
1130              "api_key": creds.get("api_key", ""),
1131              "command": creds.get("command", ""),
1132              "args": list(creds.get("args") or []),
1133              "source": creds.get("source", "process"),
1134              "requested_provider": requested_provider,
1135          }
1136  
1137      # Anthropic (native Messages API)
1138      if provider == "anthropic":
1139          # Allow base URL override from config.yaml model.base_url, but only
1140          # when the configured provider is anthropic — otherwise a non-Anthropic
1141          # base_url (e.g. Codex endpoint) would leak into Anthropic requests.
1142          cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
1143          cfg_base_url = ""
1144          if cfg_provider == "anthropic":
1145              cfg_base_url = (model_cfg.get("base_url") or "").strip().rstrip("/")
1146          base_url = cfg_base_url or "https://api.anthropic.com"
1147  
1148          # For Azure AI Foundry endpoints, use ANTHROPIC_API_KEY directly —
1149          # Claude Code OAuth tokens (sk-ant-oat01) are not accepted by Azure.
1150          # Azure keys don't start with "sk-ant-" so resolve_anthropic_token()
1151          # would find the Claude Code OAuth token first (priority 3) and return
1152          # that instead, causing 401s. Detect Azure endpoints and use the env
1153          # key directly to bypass the OAuth priority chain.
1154          _is_azure_endpoint = "azure.com" in base_url.lower() or (
1155              cfg_base_url and "azure.com" in cfg_base_url.lower()
1156          )
1157          if _is_azure_endpoint:
1158              # Honor user-specified env var hints on the model config before
1159              # falling back to the built-in AZURE_ANTHROPIC_KEY / ANTHROPIC_API_KEY
1160              # chain.  Accept both `key_env` (Hermes canonical — matches the
1161              # custom_providers field name) and `api_key_env` (documented in the
1162              # Azure Foundry guide and read by most Hermes-compatible importers).
1163              # Matches the config.yaml examples in website/docs/guides/azure-foundry.md.
1164              token = ""
1165              for hint_key in ("key_env", "api_key_env"):
1166                  env_var = str(model_cfg.get(hint_key) or "").strip()
1167                  if env_var:
1168                      token = os.getenv(env_var, "").strip()
1169                      if token:
1170                          break
1171              # Next: an inline api_key on the model config (useful in multi-profile
1172              # setups that want to avoid env-var juggling).
1173              if not token:
1174                  token = str(model_cfg.get("api_key") or "").strip()
1175              # Finally fall back to the historical fixed names.
1176              if not token:
1177                  token = (
1178                      os.getenv("AZURE_ANTHROPIC_KEY", "").strip()
1179                      or os.getenv("ANTHROPIC_API_KEY", "").strip()
1180                  )
1181              if not token:
1182                  raise AuthError(
1183                      "No Azure Anthropic API key found. Set AZURE_ANTHROPIC_KEY or "
1184                      "ANTHROPIC_API_KEY, or point key_env/api_key_env in your "
1185                      "config.yaml model section at a custom env var."
1186                  )
1187          else:
1188              from agent.anthropic_adapter import resolve_anthropic_token
1189              token = resolve_anthropic_token()
1190              if not token:
1191                  raise AuthError(
1192                      "No Anthropic credentials found. Set ANTHROPIC_TOKEN or ANTHROPIC_API_KEY, "
1193                      "run 'claude setup-token', or authenticate with 'claude /login'."
1194                  )
1195          return {
1196              "provider": "anthropic",
1197              "api_mode": "anthropic_messages",
1198              "base_url": base_url,
1199              "api_key": token,
1200              "source": "env",
1201              "requested_provider": requested_provider,
1202          }
1203  
1204      # AWS Bedrock (native Converse API via boto3)
1205      if provider == "bedrock":
1206          from agent.bedrock_adapter import (
1207              has_aws_credentials,
1208              resolve_aws_auth_env_var,
1209              resolve_bedrock_region,
1210              is_anthropic_bedrock_model,
1211          )
1212          # When the user explicitly selected bedrock (not auto-detected),
1213          # trust boto3's credential chain — it handles IMDS, ECS task roles,
1214          # Lambda execution roles, SSO, and other implicit sources that our
1215          # env-var check can't detect.
1216          is_explicit = requested_provider in ("bedrock", "aws", "aws-bedrock", "amazon-bedrock", "amazon")
1217          if not is_explicit and not has_aws_credentials():
1218              raise AuthError(
1219                  "No AWS credentials found for Bedrock. Configure one of:\n"
1220                  "  - AWS_ACCESS_KEY_ID + AWS_SECRET_ACCESS_KEY\n"
1221                  "  - AWS_PROFILE (for SSO / named profiles)\n"
1222                  "  - IAM instance role (EC2, ECS, Lambda)\n"
1223                  "Or run 'aws configure' to set up credentials.",
1224                  code="no_aws_credentials",
1225              )
1226          # Read bedrock-specific config from config.yaml
1227          _bedrock_cfg = load_config().get("bedrock", {})
1228          # Region priority: config.yaml bedrock.region → env var → us-east-1
1229          region = (_bedrock_cfg.get("region") or "").strip() or resolve_bedrock_region()
1230          auth_source = resolve_aws_auth_env_var() or "aws-sdk-default-chain"
1231          # Build guardrail config if configured
1232          _gr = _bedrock_cfg.get("guardrail", {})
1233          guardrail_config = None
1234          if _gr.get("guardrail_identifier") and _gr.get("guardrail_version"):
1235              guardrail_config = {
1236                  "guardrailIdentifier": _gr["guardrail_identifier"],
1237                  "guardrailVersion": _gr["guardrail_version"],
1238              }
1239              if _gr.get("stream_processing_mode"):
1240                  guardrail_config["streamProcessingMode"] = _gr["stream_processing_mode"]
1241              if _gr.get("trace"):
1242                  guardrail_config["trace"] = _gr["trace"]
1243          # Dual-path routing: Claude models use AnthropicBedrock SDK for full
1244          # feature parity (prompt caching, thinking budgets, adaptive thinking).
1245          # Non-Claude models use the Converse API for multi-model support.
1246          _current_model = str(model_cfg.get("default") or "").strip()
1247          if is_anthropic_bedrock_model(_current_model):
1248              # Claude on Bedrock → AnthropicBedrock SDK → anthropic_messages path
1249              runtime = {
1250                  "provider": "bedrock",
1251                  "api_mode": "anthropic_messages",
1252                  "base_url": f"https://bedrock-runtime.{region}.amazonaws.com",
1253                  "api_key": "aws-sdk",
1254                  "source": auth_source,
1255                  "region": region,
1256                  "bedrock_anthropic": True,  # Signal to use AnthropicBedrock client
1257                  "requested_provider": requested_provider,
1258              }
1259          else:
1260              # Non-Claude (Nova, DeepSeek, Llama, etc.) → Converse API
1261              runtime = {
1262                  "provider": "bedrock",
1263                  "api_mode": "bedrock_converse",
1264                  "base_url": f"https://bedrock-runtime.{region}.amazonaws.com",
1265                  "api_key": "aws-sdk",
1266                  "source": auth_source,
1267                  "region": region,
1268                  "requested_provider": requested_provider,
1269              }
1270          if guardrail_config:
1271              runtime["guardrail_config"] = guardrail_config
1272          return runtime
1273  
1274      # API-key providers (z.ai/GLM, Kimi, MiniMax, MiniMax-CN)
1275      pconfig = PROVIDER_REGISTRY.get(provider)
1276      if pconfig and pconfig.auth_type == "api_key":
1277          creds = resolve_api_key_provider_credentials(provider)
1278          # Honour model.base_url from config.yaml when the configured provider
1279          # matches this provider — mirrors the Anthropic path above.  Without
1280          # this, users who set model.base_url to e.g. api.minimaxi.com/anthropic
1281          # (China endpoint) still get the hardcoded api.minimax.io default (#6039).
1282          cfg_provider = str(model_cfg.get("provider") or "").strip().lower()
1283          cfg_base_url = ""
1284          if cfg_provider == provider:
1285              cfg_base_url = (model_cfg.get("base_url") or "").strip().rstrip("/")
1286          base_url = cfg_base_url or creds.get("base_url", "").rstrip("/")
1287          api_mode = "chat_completions"
1288          if provider == "copilot":
1289              api_mode = _copilot_runtime_api_mode(model_cfg, creds.get("api_key", ""))
1290          elif provider == "xai":
1291              api_mode = "codex_responses"
1292          else:
1293              configured_provider = str(model_cfg.get("provider") or "").strip().lower()
1294              # Only honor persisted api_mode when it belongs to the same provider family.
1295              configured_mode = _parse_api_mode(model_cfg.get("api_mode"))
1296              if provider in ("opencode-zen", "opencode-go"):
1297                  # opencode-zen/go must always re-derive api_mode from the
1298                  # target model (not the stale persisted api_mode), because
1299                  # the same provider serves both anthropic_messages
1300                  # (e.g. minimax-m2.7) and chat_completions (e.g.
1301                  # deepseek-v4-flash) and switching models via /model would
1302                  # otherwise carry the previous mode forward, stripping /v1
1303                  # from base_url for chat_completions models and 404'ing.
1304                  # Refs #16878.
1305                  from hermes_cli.models import opencode_model_api_mode
1306                  _effective = target_model or model_cfg.get("default", "")
1307                  api_mode = opencode_model_api_mode(provider, _effective)
1308              elif configured_mode and _provider_supports_explicit_api_mode(provider, configured_provider):
1309                  api_mode = configured_mode
1310              else:
1311                  # Auto-detect Anthropic-compatible endpoints by URL convention
1312                  # (e.g. https://api.minimax.io/anthropic, https://dashscope.../anthropic)
1313                  # plus api.openai.com → codex_responses and api.x.ai → codex_responses.
1314                  detected = _detect_api_mode_for_url(base_url)
1315                  if detected:
1316                      api_mode = detected
1317          # Strip trailing /v1 for OpenCode Anthropic models (see comment above).
1318          if api_mode == "anthropic_messages" and provider in ("opencode-zen", "opencode-go"):
1319              base_url = re.sub(r"/v1/?$", "", base_url)
1320          return {
1321              "provider": provider,
1322              "api_mode": api_mode,
1323              "base_url": base_url,
1324              "api_key": creds.get("api_key", ""),
1325              "source": creds.get("source", "env"),
1326              "requested_provider": requested_provider,
1327          }
1328  
1329      runtime = _resolve_openrouter_runtime(
1330          requested_provider=requested_provider,
1331          explicit_api_key=explicit_api_key,
1332          explicit_base_url=explicit_base_url,
1333      )
1334      runtime["requested_provider"] = requested_provider
1335      return runtime
1336  
1337  
1338  def format_runtime_provider_error(error: Exception) -> str:
1339      if isinstance(error, AuthError):
1340          return format_auth_error(error)
1341      return str(error)