/ hermes_cli / runtime_provider.py
runtime_provider.py
1 """Shared runtime provider resolution for CLI, gateway, cron, and helpers.""" 2 3 from __future__ import annotations 4 5 import logging 6 import os 7 import re 8 from typing import Any, Dict, Optional 9 10 logger = logging.getLogger(__name__) 11 12 from hermes_cli import auth as auth_mod 13 from agent.credential_pool import CredentialPool, PooledCredential, get_custom_provider_pool_key, load_pool 14 from hermes_cli.auth import ( 15 AuthError, 16 DEFAULT_CODEX_BASE_URL, 17 DEFAULT_QWEN_BASE_URL, 18 PROVIDER_REGISTRY, 19 _agent_key_is_usable, 20 format_auth_error, 21 resolve_provider, 22 resolve_nous_runtime_credentials, 23 resolve_codex_runtime_credentials, 24 resolve_qwen_runtime_credentials, 25 resolve_gemini_oauth_runtime_credentials, 26 resolve_api_key_provider_credentials, 27 resolve_external_process_provider_credentials, 28 has_usable_secret, 29 ) 30 from hermes_cli.config import get_compatible_custom_providers, load_config 31 from hermes_constants import OPENROUTER_BASE_URL 32 from utils import base_url_host_matches, base_url_hostname 33 34 35 def _normalize_custom_provider_name(value: str) -> str: 36 return value.strip().lower().replace(" ", "-") 37 38 39 def _loopback_hostname(host: str) -> bool: 40 h = (host or "").lower().rstrip(".") 41 return h in {"localhost", "127.0.0.1", "::1", "0.0.0.0"} 42 43 44 def _config_base_url_trustworthy_for_bare_custom(cfg_base_url: str, cfg_provider: str) -> bool: 45 """Decide whether ``model.base_url`` may back bare ``custom`` runtime resolution. 46 47 GitHub #14676: the model picker can select Custom while ``model.provider`` still reflects a 48 previous provider. Reject non-loopback URLs unless the YAML provider is already ``custom``, 49 so a stale OpenRouter/Z.ai base_url cannot hijack local ``custom`` sessions. 50 """ 51 cfg_provider_norm = (cfg_provider or "").strip().lower() 52 bu = (cfg_base_url or "").strip() 53 if not bu: 54 return False 55 if cfg_provider_norm == "custom": 56 return True 57 if base_url_host_matches(bu, "openrouter.ai"): 58 return False 59 return _loopback_hostname(base_url_hostname(bu)) 60 61 62 def _detect_api_mode_for_url(base_url: str) -> Optional[str]: 63 """Auto-detect api_mode from the resolved base URL. 64 65 - Direct api.openai.com endpoints need the Responses API for GPT-5.x 66 tool calls with reasoning (chat/completions returns 400). 67 - Third-party Anthropic-compatible gateways (MiniMax, Zhipu GLM, 68 LiteLLM proxies, etc.) conventionally expose the native Anthropic 69 protocol under a ``/anthropic`` suffix — treat those as 70 ``anthropic_messages`` transport instead of the default 71 ``chat_completions``. 72 - Kimi Code's ``api.kimi.com/coding`` endpoint also speaks the 73 Anthropic Messages protocol (the /coding route accepts Claude 74 Code's native request shape). 75 """ 76 normalized = (base_url or "").strip().lower().rstrip("/") 77 hostname = base_url_hostname(base_url) 78 if hostname == "api.x.ai": 79 return "codex_responses" 80 if hostname == "api.openai.com": 81 return "codex_responses" 82 if normalized.endswith("/anthropic"): 83 return "anthropic_messages" 84 if hostname == "api.kimi.com" and "/coding" in normalized: 85 return "anthropic_messages" 86 return None 87 88 89 def _auto_detect_local_model(base_url: str) -> str: 90 """Query a local server for its model name when only one model is loaded.""" 91 if not base_url: 92 return "" 93 try: 94 import requests 95 url = base_url.rstrip("/") 96 if not url.endswith("/v1"): 97 url += "/v1" 98 resp = requests.get(url + "/models", timeout=5) 99 if resp.ok: 100 models = resp.json().get("data", []) 101 if len(models) == 1: 102 model_id = models[0].get("id", "") 103 if model_id: 104 return model_id 105 except Exception: 106 pass 107 return "" 108 109 110 def _get_model_config() -> Dict[str, Any]: 111 config = load_config() 112 model_cfg = config.get("model") 113 if isinstance(model_cfg, dict): 114 cfg = dict(model_cfg) 115 # Accept "model" as alias for "default" (users intuitively write model.model) 116 if not cfg.get("default") and cfg.get("model"): 117 cfg["default"] = cfg["model"] 118 default = (cfg.get("default") or "").strip() 119 base_url = (cfg.get("base_url") or "").strip() 120 is_local = "localhost" in base_url or "127.0.0.1" in base_url 121 is_fallback = not default 122 if is_local and is_fallback and base_url: 123 detected = _auto_detect_local_model(base_url) 124 if detected: 125 cfg["default"] = detected 126 return cfg 127 if isinstance(model_cfg, str) and model_cfg.strip(): 128 return {"default": model_cfg.strip()} 129 return {} 130 131 132 def _provider_supports_explicit_api_mode(provider: Optional[str], configured_provider: Optional[str] = None) -> bool: 133 """Check whether a persisted api_mode should be honored for a given provider. 134 135 Prevents stale api_mode from a previous provider leaking into a 136 different one after a model/provider switch. Only applies the 137 persisted mode when the config's provider matches the runtime 138 provider (or when no configured provider is recorded). 139 """ 140 normalized_provider = (provider or "").strip().lower() 141 normalized_configured = (configured_provider or "").strip().lower() 142 if not normalized_configured: 143 return True 144 if normalized_provider == "custom": 145 return normalized_configured == "custom" or normalized_configured.startswith("custom:") 146 return normalized_configured == normalized_provider 147 148 149 def _copilot_runtime_api_mode(model_cfg: Dict[str, Any], api_key: str) -> str: 150 configured_provider = str(model_cfg.get("provider") or "").strip().lower() 151 configured_mode = _parse_api_mode(model_cfg.get("api_mode")) 152 if configured_mode and _provider_supports_explicit_api_mode("copilot", configured_provider): 153 return configured_mode 154 155 model_name = str(model_cfg.get("default") or "").strip() 156 if not model_name: 157 return "chat_completions" 158 159 try: 160 from hermes_cli.models import copilot_model_api_mode 161 162 return copilot_model_api_mode(model_name, api_key=api_key) 163 except Exception: 164 return "chat_completions" 165 166 167 _VALID_API_MODES = {"chat_completions", "codex_responses", "anthropic_messages", "bedrock_converse"} 168 169 170 def _parse_api_mode(raw: Any) -> Optional[str]: 171 """Validate an api_mode value from config. Returns None if invalid.""" 172 if isinstance(raw, str): 173 normalized = raw.strip().lower() 174 if normalized in _VALID_API_MODES: 175 return normalized 176 return None 177 178 179 def _resolve_runtime_from_pool_entry( 180 *, 181 provider: str, 182 entry: PooledCredential, 183 requested_provider: str, 184 model_cfg: Optional[Dict[str, Any]] = None, 185 pool: Optional[CredentialPool] = None, 186 target_model: Optional[str] = None, 187 ) -> Dict[str, Any]: 188 model_cfg = model_cfg or _get_model_config() 189 # When the caller is resolving for a specific target model (e.g. a /model 190 # mid-session switch), prefer that over the persisted model.default. This 191 # prevents api_mode being computed from a stale config default that no 192 # longer matches the model actually being used — the bug that caused 193 # opencode-zen /v1 to be stripped for chat_completions requests when 194 # config.default was still a Claude model. 195 effective_model = (target_model or model_cfg.get("default") or "") 196 base_url = (getattr(entry, "runtime_base_url", None) or getattr(entry, "base_url", None) or "").rstrip("/") 197 api_key = getattr(entry, "runtime_api_key", None) or getattr(entry, "access_token", "") 198 api_mode = "chat_completions" 199 if provider == "openai-codex": 200 api_mode = "codex_responses" 201 base_url = base_url or DEFAULT_CODEX_BASE_URL 202 elif provider == "qwen-oauth": 203 api_mode = "chat_completions" 204 base_url = base_url or DEFAULT_QWEN_BASE_URL 205 elif provider == "google-gemini-cli": 206 api_mode = "chat_completions" 207 base_url = base_url or "cloudcode-pa://google" 208 elif provider == "anthropic": 209 api_mode = "anthropic_messages" 210 cfg_provider = str(model_cfg.get("provider") or "").strip().lower() 211 cfg_base_url = "" 212 if cfg_provider == "anthropic": 213 cfg_base_url = str(model_cfg.get("base_url") or "").strip().rstrip("/") 214 base_url = cfg_base_url or base_url or "https://api.anthropic.com" 215 elif provider == "openrouter": 216 base_url = base_url or OPENROUTER_BASE_URL 217 elif provider == "xai": 218 api_mode = "codex_responses" 219 elif provider == "nous": 220 api_mode = "chat_completions" 221 elif provider == "copilot": 222 api_mode = _copilot_runtime_api_mode(model_cfg, getattr(entry, "runtime_api_key", "")) 223 base_url = base_url or PROVIDER_REGISTRY["copilot"].inference_base_url 224 elif provider == "azure-foundry": 225 # Azure Foundry: read api_mode and base_url from config 226 cfg_provider = str(model_cfg.get("provider") or "").strip().lower() 227 if cfg_provider == "azure-foundry": 228 cfg_base_url = str(model_cfg.get("base_url") or "").strip().rstrip("/") 229 if cfg_base_url: 230 base_url = cfg_base_url 231 configured_mode = _parse_api_mode(model_cfg.get("api_mode")) 232 if configured_mode: 233 api_mode = configured_mode 234 # Model-family inference for GPT-5.x / codex / o1-o4: Azure rejects 235 # /chat/completions on these with 400 "operation unsupported" — see 236 # azure_foundry_model_api_mode() for rationale. Skip when the user 237 # explicitly picked anthropic_messages (Anthropic-style endpoint). 238 if effective_model and api_mode != "anthropic_messages": 239 try: 240 from hermes_cli.models import azure_foundry_model_api_mode 241 242 inferred = azure_foundry_model_api_mode(effective_model) 243 except Exception: 244 inferred = None 245 if inferred: 246 api_mode = inferred 247 # For Anthropic-style endpoints, strip /v1 suffix 248 if api_mode == "anthropic_messages": 249 base_url = re.sub(r"/v1/?$", "", base_url) 250 else: 251 configured_provider = str(model_cfg.get("provider") or "").strip().lower() 252 # Honour model.base_url from config.yaml when the configured provider 253 # matches this provider — same pattern as the Anthropic branch above. 254 # Only override when the pool entry has no explicit base_url (i.e. it 255 # fell back to the hardcoded default). Env var overrides win (#6039). 256 pconfig = PROVIDER_REGISTRY.get(provider) 257 pool_url_is_default = pconfig and base_url.rstrip("/") == pconfig.inference_base_url.rstrip("/") 258 if configured_provider == provider and pool_url_is_default: 259 cfg_base_url = str(model_cfg.get("base_url") or "").strip().rstrip("/") 260 if cfg_base_url: 261 base_url = cfg_base_url 262 configured_mode = _parse_api_mode(model_cfg.get("api_mode")) 263 if provider in ("opencode-zen", "opencode-go"): 264 # Re-derive api_mode from the effective model rather than the 265 # persisted api_mode: the opencode providers serve both 266 # anthropic_messages and chat_completions models, so the previous 267 # session's mode must not leak across /model switches. 268 # Refs #16878. 269 from hermes_cli.models import opencode_model_api_mode 270 api_mode = opencode_model_api_mode(provider, effective_model) 271 elif configured_mode and _provider_supports_explicit_api_mode(provider, configured_provider): 272 api_mode = configured_mode 273 else: 274 # Auto-detect Anthropic-compatible endpoints (/anthropic suffix, 275 # Kimi /coding, api.openai.com → codex_responses, api.x.ai → 276 # codex_responses). 277 detected = _detect_api_mode_for_url(base_url) 278 if detected: 279 api_mode = detected 280 281 # OpenCode base URLs end with /v1 for OpenAI-compatible models, but the 282 # Anthropic SDK prepends its own /v1/messages to the base_url. Strip the 283 # trailing /v1 so the SDK constructs the correct path (e.g. 284 # https://opencode.ai/zen/go/v1/messages instead of .../v1/v1/messages). 285 if api_mode == "anthropic_messages" and provider in ("opencode-zen", "opencode-go"): 286 base_url = re.sub(r"/v1/?$", "", base_url) 287 288 return { 289 "provider": provider, 290 "api_mode": api_mode, 291 "base_url": base_url, 292 "api_key": api_key, 293 "source": getattr(entry, "source", "pool"), 294 "credential_pool": pool, 295 "requested_provider": requested_provider, 296 } 297 298 299 def resolve_requested_provider(requested: Optional[str] = None) -> str: 300 """Resolve provider request from explicit arg, config, then env.""" 301 if requested and requested.strip(): 302 return requested.strip().lower() 303 304 model_cfg = _get_model_config() 305 cfg_provider = model_cfg.get("provider") 306 if isinstance(cfg_provider, str) and cfg_provider.strip(): 307 return cfg_provider.strip().lower() 308 309 # Prefer the persisted config selection over any stale shell/.env 310 # provider override so chat uses the endpoint the user last saved. 311 env_provider = os.getenv("HERMES_INFERENCE_PROVIDER", "").strip().lower() 312 if env_provider: 313 return env_provider 314 315 return "auto" 316 317 318 def _try_resolve_from_custom_pool( 319 base_url: str, 320 provider_label: str, 321 api_mode_override: Optional[str] = None, 322 ) -> Optional[Dict[str, Any]]: 323 """Check if a credential pool exists for a custom endpoint and return a runtime dict if so.""" 324 pool_key = get_custom_provider_pool_key(base_url) 325 if not pool_key: 326 return None 327 try: 328 pool = load_pool(pool_key) 329 if not pool.has_credentials(): 330 return None 331 entry = pool.select() 332 if entry is None: 333 return None 334 pool_api_key = getattr(entry, "runtime_api_key", None) or getattr(entry, "access_token", "") 335 if not pool_api_key: 336 return None 337 return { 338 "provider": provider_label, 339 "api_mode": api_mode_override or _detect_api_mode_for_url(base_url) or "chat_completions", 340 "base_url": base_url, 341 "api_key": pool_api_key, 342 "source": f"pool:{pool_key}", 343 "credential_pool": pool, 344 } 345 except Exception: 346 return None 347 348 349 def _get_named_custom_provider(requested_provider: str) -> Optional[Dict[str, Any]]: 350 requested_norm = _normalize_custom_provider_name(requested_provider or "") 351 if not requested_norm or requested_norm == "custom": 352 return None 353 354 # Raw names should only map to custom providers when they are not already 355 # valid built-in providers or aliases. Explicit menu keys like 356 # ``custom:local`` always target the saved custom provider. 357 if requested_norm == "auto": 358 return None 359 if not requested_norm.startswith("custom:"): 360 try: 361 canonical = auth_mod.resolve_provider(requested_norm) 362 except AuthError: 363 pass 364 else: 365 # A user-declared ``custom_providers`` entry whose name matches 366 # only an *alias* (``kimi`` → built-in ``kimi-coding``) is the 367 # user's intended target — alias rewriting would otherwise hijack 368 # the request. We only defer to the built-in when the raw name is 369 # the canonical provider itself (``nous``, ``openrouter``, …) so 370 # accidentally shadowing a canonical provider still resolves to 371 # the built-in. See tests/hermes_cli/test_runtime_provider_resolution.py 372 # ``test_named_custom_provider_does_not_shadow_builtin_provider``. 373 if (canonical or "").strip().lower() == requested_norm: 374 return None 375 376 config = load_config() 377 378 # First check providers: dict (new-style user-defined providers) 379 providers = config.get("providers") 380 if isinstance(providers, dict): 381 for ep_name, entry in providers.items(): 382 if not isinstance(entry, dict): 383 continue 384 # Match exact name or normalized name 385 name_norm = _normalize_custom_provider_name(ep_name) 386 # Resolve the API key from the env var name stored in key_env 387 key_env = str(entry.get("key_env", "") or "").strip() 388 resolved_api_key = os.getenv(key_env, "").strip() if key_env else "" 389 # Fall back to inline api_key when key_env is absent or unresolvable 390 if not resolved_api_key: 391 resolved_api_key = str(entry.get("api_key", "") or "").strip() 392 393 if requested_norm in {ep_name, name_norm, f"custom:{name_norm}"}: 394 # Found match by provider key 395 base_url = entry.get("api") or entry.get("url") or entry.get("base_url") or "" 396 if base_url: 397 result = { 398 "name": entry.get("name", ep_name), 399 "base_url": base_url.strip(), 400 "api_key": resolved_api_key, 401 "model": entry.get("default_model", ""), 402 } 403 # The v11→v12 migration writes the API mode under the new 404 # ``transport`` field, but hand-edited configs may still 405 # use the legacy ``api_mode`` spelling. Accept both — 406 # the runtime normaliser ``_normalize_custom_provider_entry`` 407 # already does, so without this lift every migrated config 408 # silently downgrades codex_responses / anthropic_messages 409 # providers to chat_completions in the resolved runtime. 410 api_mode = _parse_api_mode(entry.get("api_mode") or entry.get("transport")) 411 if api_mode: 412 result["api_mode"] = api_mode 413 return result 414 # Also check the 'name' field if present 415 display_name = entry.get("name", "") 416 if display_name: 417 display_norm = _normalize_custom_provider_name(display_name) 418 if requested_norm in {display_name, display_norm, f"custom:{display_norm}"}: 419 # Found match by display name 420 base_url = entry.get("api") or entry.get("url") or entry.get("base_url") or "" 421 if base_url: 422 result = { 423 "name": display_name, 424 "base_url": base_url.strip(), 425 "api_key": resolved_api_key, 426 "model": entry.get("default_model", ""), 427 } 428 api_mode = _parse_api_mode(entry.get("api_mode") or entry.get("transport")) 429 if api_mode: 430 result["api_mode"] = api_mode 431 return result 432 433 # Fall back to custom_providers: list (legacy format) 434 custom_providers = config.get("custom_providers") 435 if isinstance(custom_providers, dict): 436 logger.warning( 437 "custom_providers in config.yaml is a dict, not a list. " 438 "Each entry must be prefixed with '-' in YAML. " 439 "Run 'hermes doctor' for details." 440 ) 441 return None 442 443 custom_providers = get_compatible_custom_providers(config) 444 if not custom_providers: 445 return None 446 447 for entry in custom_providers: 448 if not isinstance(entry, dict): 449 continue 450 name = entry.get("name") 451 base_url = entry.get("base_url") 452 if not isinstance(name, str) or not isinstance(base_url, str): 453 continue 454 name_norm = _normalize_custom_provider_name(name) 455 menu_key = f"custom:{name_norm}" 456 provider_key = str(entry.get("provider_key", "") or "").strip() 457 provider_key_norm = _normalize_custom_provider_name(provider_key) if provider_key else "" 458 provider_menu_key = f"custom:{provider_key_norm}" if provider_key_norm else "" 459 if requested_norm not in {name_norm, menu_key, provider_key_norm, provider_menu_key}: 460 continue 461 result = { 462 "name": name.strip(), 463 "base_url": base_url.strip(), 464 "api_key": str(entry.get("api_key", "") or "").strip(), 465 } 466 key_env = str(entry.get("key_env", "") or "").strip() 467 if key_env: 468 result["key_env"] = key_env 469 if provider_key: 470 result["provider_key"] = provider_key 471 api_mode = _parse_api_mode(entry.get("api_mode")) 472 if api_mode: 473 result["api_mode"] = api_mode 474 model_name = str(entry.get("model", "") or "").strip() 475 if model_name: 476 result["model"] = model_name 477 return result 478 479 return None 480 481 482 def _resolve_named_custom_runtime( 483 *, 484 requested_provider: str, 485 explicit_api_key: Optional[str] = None, 486 explicit_base_url: Optional[str] = None, 487 ) -> Optional[Dict[str, Any]]: 488 # Bare `provider="custom"` with an explicit base_url (e.g. propagated 489 # from a `model_aliases:` direct-alias resolution) — build a runtime 490 # directly so the alias's base_url actually takes effect. 491 requested_norm = (requested_provider or "").strip().lower() 492 if requested_norm == "custom" and explicit_base_url: 493 base_url = explicit_base_url.strip().rstrip("/") 494 api_key_candidates = [ 495 (explicit_api_key or "").strip(), 496 os.getenv("OPENAI_API_KEY", "").strip(), 497 os.getenv("OPENROUTER_API_KEY", "").strip(), 498 ] 499 api_key = next( 500 (c for c in api_key_candidates if has_usable_secret(c)), 501 "", 502 ) or "no-key-required" 503 return { 504 "provider": "custom", 505 "api_mode": _detect_api_mode_for_url(base_url) or "chat_completions", 506 "base_url": base_url, 507 "api_key": api_key, 508 "source": "direct-alias", 509 "requested_provider": requested_provider, 510 } 511 512 custom_provider = _get_named_custom_provider(requested_provider) 513 if not custom_provider: 514 return None 515 516 base_url = ( 517 (explicit_base_url or "").strip() 518 or custom_provider.get("base_url", "") 519 ).rstrip("/") 520 if not base_url: 521 return None 522 523 # Check if a credential pool exists for this custom endpoint 524 pool_result = _try_resolve_from_custom_pool(base_url, "custom", custom_provider.get("api_mode")) 525 if pool_result: 526 # Propagate the model name even when using pooled credentials — 527 # the pool doesn't know about the custom_providers model field. 528 model_name = custom_provider.get("model") 529 if model_name: 530 pool_result["model"] = model_name 531 return pool_result 532 533 api_key_candidates = [ 534 (explicit_api_key or "").strip(), 535 str(custom_provider.get("api_key", "") or "").strip(), 536 os.getenv(str(custom_provider.get("key_env", "") or "").strip(), "").strip(), 537 os.getenv("OPENAI_API_KEY", "").strip(), 538 os.getenv("OPENROUTER_API_KEY", "").strip(), 539 ] 540 api_key = next((candidate for candidate in api_key_candidates if has_usable_secret(candidate)), "") 541 542 result = { 543 "provider": "custom", 544 "api_mode": custom_provider.get("api_mode") 545 or _detect_api_mode_for_url(base_url) 546 or "chat_completions", 547 "base_url": base_url, 548 "api_key": api_key or "no-key-required", 549 "source": f"custom_provider:{custom_provider.get('name', requested_provider)}", 550 } 551 # Propagate the model name so callers can override self.model when the 552 # provider name differs from the actual model string the API expects. 553 if custom_provider.get("model"): 554 result["model"] = custom_provider["model"] 555 return result 556 557 558 def _resolve_openrouter_runtime( 559 *, 560 requested_provider: str, 561 explicit_api_key: Optional[str] = None, 562 explicit_base_url: Optional[str] = None, 563 ) -> Dict[str, Any]: 564 model_cfg = _get_model_config() 565 cfg_base_url = model_cfg.get("base_url") if isinstance(model_cfg.get("base_url"), str) else "" 566 cfg_provider = model_cfg.get("provider") if isinstance(model_cfg.get("provider"), str) else "" 567 cfg_api_key = "" 568 for k in ("api_key", "api"): 569 v = model_cfg.get(k) 570 if isinstance(v, str) and v.strip(): 571 cfg_api_key = v.strip() 572 break 573 requested_norm = (requested_provider or "").strip().lower() 574 cfg_provider = cfg_provider.strip().lower() 575 576 env_openrouter_base_url = os.getenv("OPENROUTER_BASE_URL", "").strip() 577 env_custom_base_url = os.getenv("CUSTOM_BASE_URL", "").strip() 578 579 # Use config base_url when available and the provider context matches. 580 # OPENAI_BASE_URL env var is no longer consulted — config.yaml is 581 # the single source of truth for endpoint URLs. 582 use_config_base_url = False 583 if cfg_base_url.strip() and not explicit_base_url: 584 if requested_norm == "auto": 585 if not cfg_provider or cfg_provider == "auto": 586 use_config_base_url = True 587 elif requested_norm == "custom" and _config_base_url_trustworthy_for_bare_custom( 588 cfg_base_url, cfg_provider 589 ): 590 use_config_base_url = True 591 592 base_url = ( 593 (explicit_base_url or "").strip() 594 or env_custom_base_url 595 or (cfg_base_url.strip() if use_config_base_url else "") 596 or env_openrouter_base_url 597 or OPENROUTER_BASE_URL 598 ).rstrip("/") 599 600 # Choose API key based on whether the resolved base_url targets OpenRouter. 601 # When hitting OpenRouter, prefer OPENROUTER_API_KEY (issue #289). 602 # When hitting a custom endpoint (e.g. Z.ai, local LLM), prefer 603 # OPENAI_API_KEY so the OpenRouter key doesn't leak to an unrelated 604 # provider (issues #420, #560). 605 _is_openrouter_url = base_url_host_matches(base_url, "openrouter.ai") 606 if _is_openrouter_url: 607 api_key_candidates = [ 608 explicit_api_key, 609 os.getenv("OPENROUTER_API_KEY"), 610 os.getenv("OPENAI_API_KEY"), 611 ] 612 else: 613 # Custom endpoint: use api_key from config when using config base_url (#1760). 614 # When the endpoint is Ollama Cloud, check OLLAMA_API_KEY — it's 615 # the canonical env var for ollama.com authentication. Match on 616 # HOST, not substring — a custom base_url whose path contains 617 # "ollama.com" (e.g. http://127.0.0.1/ollama.com/v1) or whose 618 # hostname is a look-alike (ollama.com.attacker.test) must not 619 # receive the Ollama credential. See GHSA-76xc-57q6-vm5m. 620 _is_ollama_url = base_url_host_matches(base_url, "ollama.com") 621 api_key_candidates = [ 622 explicit_api_key, 623 (cfg_api_key if use_config_base_url else ""), 624 (os.getenv("OLLAMA_API_KEY") if _is_ollama_url else ""), 625 os.getenv("OPENAI_API_KEY"), 626 os.getenv("OPENROUTER_API_KEY"), 627 ] 628 api_key = next( 629 (str(candidate or "").strip() for candidate in api_key_candidates if has_usable_secret(candidate)), 630 "", 631 ) 632 633 source = "explicit" if (explicit_api_key or explicit_base_url) else "env/config" 634 635 # When "custom" was explicitly requested, preserve that as the provider 636 # name instead of silently relabeling to "openrouter" (#2562). 637 # Also provide a placeholder API key for local servers that don't require 638 # authentication — the OpenAI SDK requires a non-empty api_key string. 639 effective_provider = "custom" if requested_norm == "custom" else "openrouter" 640 641 # For custom endpoints, check if a credential pool exists 642 if effective_provider == "custom" and base_url: 643 pool_result = _try_resolve_from_custom_pool( 644 base_url, effective_provider, _parse_api_mode(model_cfg.get("api_mode")), 645 ) 646 if pool_result: 647 return pool_result 648 649 if effective_provider == "custom" and not api_key and not _is_openrouter_url: 650 api_key = "no-key-required" 651 652 return { 653 "provider": effective_provider, 654 "api_mode": _parse_api_mode(model_cfg.get("api_mode")) 655 or _detect_api_mode_for_url(base_url) 656 or "chat_completions", 657 "base_url": base_url, 658 "api_key": api_key, 659 "source": source, 660 } 661 662 663 def _resolve_azure_foundry_runtime( 664 *, 665 requested_provider: str, 666 model_cfg: Dict[str, Any], 667 explicit_api_key: Optional[str] = None, 668 explicit_base_url: Optional[str] = None, 669 target_model: Optional[str] = None, 670 ) -> Dict[str, Any]: 671 """Resolve an Azure Foundry runtime entry. 672 673 Reads ``model.base_url`` + ``model.api_mode`` from config.yaml (or 674 explicit overrides), pulls the API key from ``.env`` / env var, and 675 strips a trailing ``/v1`` for Anthropic-style endpoints because the 676 Anthropic SDK appends ``/v1/messages`` internally. 677 678 Raises :class:`AuthError` when required values are missing. 679 """ 680 explicit_api_key = str(explicit_api_key or "").strip() 681 explicit_base_url_clean = str(explicit_base_url or "").strip().rstrip("/") 682 683 cfg_provider = str(model_cfg.get("provider") or "").strip().lower() 684 cfg_base_url = "" 685 cfg_api_mode = "chat_completions" 686 if cfg_provider == "azure-foundry": 687 cfg_base_url = str(model_cfg.get("base_url") or "").strip().rstrip("/") 688 cfg_api_mode = _parse_api_mode(model_cfg.get("api_mode")) or "chat_completions" 689 690 # Model-family inference: Azure Foundry deploys GPT-5.x / codex / o1-o4 691 # reasoning models as Responses-API-only. Calling /chat/completions 692 # against them returns 400 "The requested operation is unsupported." 693 # Upgrade api_mode when the model name matches, unless the user has 694 # explicitly chosen anthropic_messages (Anthropic-style endpoint). 695 effective_model = str(target_model or model_cfg.get("default") or "").strip() 696 if effective_model and cfg_api_mode != "anthropic_messages": 697 try: 698 from hermes_cli.models import azure_foundry_model_api_mode 699 700 inferred = azure_foundry_model_api_mode(effective_model) 701 except Exception: 702 inferred = None 703 if inferred: 704 cfg_api_mode = inferred 705 706 env_base_url = os.getenv("AZURE_FOUNDRY_BASE_URL", "").strip().rstrip("/") 707 base_url = explicit_base_url_clean or cfg_base_url or env_base_url 708 if not base_url: 709 raise AuthError( 710 "Azure Foundry requires a base URL. Set it via 'hermes model' or " 711 "the AZURE_FOUNDRY_BASE_URL environment variable." 712 ) 713 714 api_key = explicit_api_key 715 if not api_key: 716 try: 717 from hermes_cli.config import get_env_value 718 api_key = get_env_value("AZURE_FOUNDRY_API_KEY") or "" 719 except Exception: 720 api_key = "" 721 if not api_key: 722 api_key = os.getenv("AZURE_FOUNDRY_API_KEY", "").strip() 723 if not api_key: 724 raise AuthError( 725 "Azure Foundry requires an API key. Set AZURE_FOUNDRY_API_KEY in " 726 "~/.hermes/.env or run 'hermes model' to configure." 727 ) 728 729 # Anthropic SDK appends /v1/messages itself, so strip any trailing /v1 730 # we inherited from the configured base_url to avoid double-/v1 paths. 731 if cfg_api_mode == "anthropic_messages": 732 base_url = re.sub(r"/v1/?$", "", base_url) 733 734 source = "explicit" if (explicit_api_key or explicit_base_url) else "config" 735 return { 736 "provider": "azure-foundry", 737 "api_mode": cfg_api_mode, 738 "base_url": base_url, 739 "api_key": api_key, 740 "source": source, 741 "requested_provider": requested_provider, 742 } 743 744 745 def _resolve_explicit_runtime( 746 *, 747 provider: str, 748 requested_provider: str, 749 model_cfg: Dict[str, Any], 750 explicit_api_key: Optional[str] = None, 751 explicit_base_url: Optional[str] = None, 752 ) -> Optional[Dict[str, Any]]: 753 explicit_api_key = str(explicit_api_key or "").strip() 754 explicit_base_url = str(explicit_base_url or "").strip().rstrip("/") 755 if not explicit_api_key and not explicit_base_url: 756 return None 757 758 if provider == "anthropic": 759 cfg_provider = str(model_cfg.get("provider") or "").strip().lower() 760 cfg_base_url = "" 761 if cfg_provider == "anthropic": 762 cfg_base_url = str(model_cfg.get("base_url") or "").strip().rstrip("/") 763 base_url = explicit_base_url or cfg_base_url or "https://api.anthropic.com" 764 api_key = explicit_api_key 765 if not api_key: 766 from agent.anthropic_adapter import resolve_anthropic_token 767 768 api_key = resolve_anthropic_token() 769 if not api_key: 770 raise AuthError( 771 "No Anthropic credentials found. Set ANTHROPIC_TOKEN or ANTHROPIC_API_KEY, " 772 "run 'claude setup-token', or authenticate with 'claude /login'." 773 ) 774 return { 775 "provider": "anthropic", 776 "api_mode": "anthropic_messages", 777 "base_url": base_url, 778 "api_key": api_key, 779 "source": "explicit", 780 "requested_provider": requested_provider, 781 } 782 783 if provider == "openai-codex": 784 base_url = explicit_base_url or DEFAULT_CODEX_BASE_URL 785 api_key = explicit_api_key 786 last_refresh = None 787 if not api_key: 788 creds = resolve_codex_runtime_credentials() 789 api_key = creds.get("api_key", "") 790 last_refresh = creds.get("last_refresh") 791 if not explicit_base_url: 792 base_url = creds.get("base_url", "").rstrip("/") or base_url 793 return { 794 "provider": "openai-codex", 795 "api_mode": "codex_responses", 796 "base_url": base_url, 797 "api_key": api_key, 798 "source": "explicit", 799 "last_refresh": last_refresh, 800 "requested_provider": requested_provider, 801 } 802 803 if provider == "nous": 804 state = auth_mod.get_provider_auth_state("nous") or {} 805 base_url = ( 806 explicit_base_url 807 or str(state.get("inference_base_url") or auth_mod.DEFAULT_NOUS_INFERENCE_URL).strip().rstrip("/") 808 ) 809 # Only use agent_key for inference — access_token is an OAuth token for the 810 # portal API (minting keys, refreshing tokens), not for the inference API. 811 # Falling back to access_token sends an OAuth bearer token to the inference 812 # endpoint, which returns 404 because it is not a valid inference credential. 813 api_key = explicit_api_key or str(state.get("agent_key") or "").strip() 814 expires_at = state.get("agent_key_expires_at") or state.get("expires_at") 815 if not api_key: 816 creds = resolve_nous_runtime_credentials( 817 min_key_ttl_seconds=max(60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800"))), 818 timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")), 819 ) 820 api_key = creds.get("api_key", "") 821 expires_at = creds.get("expires_at") 822 if not explicit_base_url: 823 base_url = creds.get("base_url", "").rstrip("/") or base_url 824 return { 825 "provider": "nous", 826 "api_mode": "chat_completions", 827 "base_url": base_url, 828 "api_key": api_key, 829 "source": "explicit", 830 "expires_at": expires_at, 831 "requested_provider": requested_provider, 832 } 833 834 # Azure Foundry: user-configured endpoint with selectable API mode 835 if provider == "azure-foundry": 836 return _resolve_azure_foundry_runtime( 837 requested_provider=requested_provider, 838 model_cfg=model_cfg, 839 explicit_api_key=explicit_api_key, 840 explicit_base_url=explicit_base_url, 841 ) 842 843 pconfig = PROVIDER_REGISTRY.get(provider) 844 if pconfig and pconfig.auth_type == "api_key": 845 env_url = "" 846 if pconfig.base_url_env_var: 847 env_url = os.getenv(pconfig.base_url_env_var, "").strip().rstrip("/") 848 849 base_url = explicit_base_url 850 if not base_url: 851 if provider in ("kimi-coding", "kimi-coding-cn"): 852 creds = resolve_api_key_provider_credentials(provider) 853 base_url = creds.get("base_url", "").rstrip("/") 854 else: 855 base_url = env_url or pconfig.inference_base_url 856 857 api_key = explicit_api_key 858 if not api_key: 859 creds = resolve_api_key_provider_credentials(provider) 860 api_key = creds.get("api_key", "") 861 if not base_url: 862 base_url = creds.get("base_url", "").rstrip("/") 863 864 api_mode = "chat_completions" 865 if provider == "copilot": 866 api_mode = _copilot_runtime_api_mode(model_cfg, api_key) 867 elif provider == "xai": 868 api_mode = "codex_responses" 869 else: 870 configured_mode = _parse_api_mode(model_cfg.get("api_mode")) 871 if configured_mode: 872 api_mode = configured_mode 873 else: 874 # Auto-detect from URL (Anthropic /anthropic suffix, 875 # api.openai.com → Responses, Kimi /coding, etc.). 876 detected = _detect_api_mode_for_url(base_url) 877 if detected: 878 api_mode = detected 879 880 return { 881 "provider": provider, 882 "api_mode": api_mode, 883 "base_url": base_url.rstrip("/"), 884 "api_key": api_key, 885 "source": "explicit", 886 "requested_provider": requested_provider, 887 } 888 889 return None 890 891 892 def resolve_runtime_provider( 893 *, 894 requested: Optional[str] = None, 895 explicit_api_key: Optional[str] = None, 896 explicit_base_url: Optional[str] = None, 897 target_model: Optional[str] = None, 898 ) -> Dict[str, Any]: 899 """Resolve runtime provider credentials for agent execution. 900 901 target_model: Optional override for model_cfg.get("default") when 902 computing provider-specific api_mode (e.g. OpenCode Zen/Go where different 903 models route through different API surfaces). Callers performing an 904 explicit mid-session model switch should pass the new model here so 905 api_mode is derived from the model they are switching TO, not the stale 906 persisted default. Other callers can leave it None to preserve existing 907 behavior (api_mode derived from config). 908 """ 909 requested_provider = resolve_requested_provider(requested) 910 911 # Azure Anthropic short-circuit: when explicitly targeting an Azure endpoint 912 # with provider="anthropic", bypass _resolve_named_custom_runtime (which would 913 # return provider="custom" with chat_completions api_mode and no valid key). 914 # Instead, use the Azure key directly with anthropic_messages api_mode. 915 _eff_base = (explicit_base_url or "").strip() 916 if requested_provider == "anthropic" and "azure.com" in _eff_base: 917 _azure_key = ( 918 (explicit_api_key or "").strip() 919 or os.getenv("AZURE_ANTHROPIC_KEY", "").strip() 920 or os.getenv("ANTHROPIC_API_KEY", "").strip() 921 ) 922 return { 923 "provider": "anthropic", 924 "api_mode": "anthropic_messages", 925 "base_url": _eff_base.rstrip("/"), 926 "api_key": _azure_key, 927 "source": "azure-explicit", 928 "requested_provider": requested_provider, 929 } 930 931 # Azure Foundry: user-configured endpoint with selectable API mode 932 # (OpenAI-style chat_completions or Anthropic-style anthropic_messages). 933 # Resolve before the custom-runtime / pool / generic paths so Azure 934 # config is always picked up from model.base_url + model.api_mode, 935 # regardless of whether the caller passed explicit_* args. 936 if requested_provider == "azure-foundry": 937 azure_runtime = _resolve_azure_foundry_runtime( 938 requested_provider=requested_provider, 939 model_cfg=_get_model_config(), 940 explicit_api_key=explicit_api_key, 941 explicit_base_url=explicit_base_url, 942 target_model=target_model, 943 ) 944 return azure_runtime 945 946 custom_runtime = _resolve_named_custom_runtime( 947 requested_provider=requested_provider, 948 explicit_api_key=explicit_api_key, 949 explicit_base_url=explicit_base_url, 950 ) 951 if custom_runtime: 952 custom_runtime["requested_provider"] = requested_provider 953 return custom_runtime 954 955 provider = resolve_provider( 956 requested_provider, 957 explicit_api_key=explicit_api_key, 958 explicit_base_url=explicit_base_url, 959 ) 960 model_cfg = _get_model_config() 961 explicit_runtime = _resolve_explicit_runtime( 962 provider=provider, 963 requested_provider=requested_provider, 964 model_cfg=model_cfg, 965 explicit_api_key=explicit_api_key, 966 explicit_base_url=explicit_base_url, 967 ) 968 if explicit_runtime: 969 return explicit_runtime 970 971 should_use_pool = provider != "openrouter" 972 if provider == "openrouter": 973 cfg_provider = str(model_cfg.get("provider") or "").strip().lower() 974 cfg_base_url = str(model_cfg.get("base_url") or "").strip() 975 env_openai_base_url = os.getenv("OPENAI_BASE_URL", "").strip() 976 env_openrouter_base_url = os.getenv("OPENROUTER_BASE_URL", "").strip() 977 has_custom_endpoint = bool( 978 explicit_base_url 979 or env_openai_base_url 980 or env_openrouter_base_url 981 ) 982 if cfg_base_url and cfg_provider in {"auto", "custom"}: 983 has_custom_endpoint = True 984 has_runtime_override = bool(explicit_api_key or explicit_base_url) 985 should_use_pool = ( 986 requested_provider in {"openrouter", "auto"} 987 and not has_custom_endpoint 988 and not has_runtime_override 989 ) 990 991 try: 992 pool = load_pool(provider) if should_use_pool else None 993 except Exception: 994 pool = None 995 if pool and pool.has_credentials(): 996 entry = pool.select() 997 pool_api_key = "" 998 if entry is not None: 999 pool_api_key = ( 1000 getattr(entry, "runtime_api_key", None) 1001 or getattr(entry, "access_token", "") 1002 ) 1003 # For Nous, the pool entry's runtime_api_key is the agent_key — a 1004 # short-lived inference credential (~30 min TTL). The pool doesn't 1005 # refresh it during selection (that would trigger network calls in 1006 # non-runtime contexts like `hermes auth list`). If the key is 1007 # expired, clear pool_api_key so we fall through to 1008 # resolve_nous_runtime_credentials() which handles refresh + mint. 1009 if provider == "nous" and entry is not None and pool_api_key: 1010 min_ttl = max(60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800"))) 1011 nous_state = { 1012 "agent_key": getattr(entry, "agent_key", None), 1013 "agent_key_expires_at": getattr(entry, "agent_key_expires_at", None), 1014 } 1015 if not _agent_key_is_usable(nous_state, min_ttl): 1016 logger.debug("Nous pool entry agent_key expired/missing, falling through to runtime resolution") 1017 pool_api_key = "" 1018 if entry is not None and pool_api_key: 1019 return _resolve_runtime_from_pool_entry( 1020 provider=provider, 1021 entry=entry, 1022 requested_provider=requested_provider, 1023 model_cfg=model_cfg, 1024 pool=pool, 1025 target_model=target_model, 1026 ) 1027 1028 if provider == "nous": 1029 try: 1030 creds = resolve_nous_runtime_credentials( 1031 min_key_ttl_seconds=max(60, int(os.getenv("HERMES_NOUS_MIN_KEY_TTL_SECONDS", "1800"))), 1032 timeout_seconds=float(os.getenv("HERMES_NOUS_TIMEOUT_SECONDS", "15")), 1033 ) 1034 return { 1035 "provider": "nous", 1036 "api_mode": "chat_completions", 1037 "base_url": creds.get("base_url", "").rstrip("/"), 1038 "api_key": creds.get("api_key", ""), 1039 "source": creds.get("source", "portal"), 1040 "expires_at": creds.get("expires_at"), 1041 "requested_provider": requested_provider, 1042 } 1043 except AuthError: 1044 if requested_provider != "auto": 1045 raise 1046 # Auto-detected Nous but credentials are stale/revoked — 1047 # fall through to env-var providers (e.g. OpenRouter). 1048 logger.info("Auto-detected Nous provider but credentials failed; " 1049 "falling through to next provider.") 1050 1051 if provider == "openai-codex": 1052 try: 1053 creds = resolve_codex_runtime_credentials() 1054 return { 1055 "provider": "openai-codex", 1056 "api_mode": "codex_responses", 1057 "base_url": creds.get("base_url", "").rstrip("/"), 1058 "api_key": creds.get("api_key", ""), 1059 "source": creds.get("source", "hermes-auth-store"), 1060 "last_refresh": creds.get("last_refresh"), 1061 "requested_provider": requested_provider, 1062 } 1063 except AuthError: 1064 if requested_provider != "auto": 1065 raise 1066 # Auto-detected Codex but credentials are stale/revoked — 1067 # fall through to env-var providers (e.g. OpenRouter). 1068 logger.info("Auto-detected Codex provider but credentials failed; " 1069 "falling through to next provider.") 1070 1071 if provider == "qwen-oauth": 1072 try: 1073 creds = resolve_qwen_runtime_credentials() 1074 return { 1075 "provider": "qwen-oauth", 1076 "api_mode": "chat_completions", 1077 "base_url": creds.get("base_url", "").rstrip("/"), 1078 "api_key": creds.get("api_key", ""), 1079 "source": creds.get("source", "qwen-cli"), 1080 "expires_at_ms": creds.get("expires_at_ms"), 1081 "requested_provider": requested_provider, 1082 } 1083 except AuthError: 1084 if requested_provider != "auto": 1085 raise 1086 logger.info("Qwen OAuth credentials failed; " 1087 "falling through to next provider.") 1088 1089 if provider == "minimax-oauth": 1090 pconfig = PROVIDER_REGISTRY.get(provider) 1091 if pconfig and pconfig.auth_type == "oauth_minimax": 1092 from hermes_cli.auth import resolve_minimax_oauth_runtime_credentials 1093 creds = resolve_minimax_oauth_runtime_credentials() 1094 return { 1095 "provider": provider, 1096 "api_mode": "anthropic_messages", 1097 "base_url": creds["base_url"], 1098 "api_key": creds["api_key"], 1099 "source": creds.get("source", "oauth"), 1100 "requested_provider": requested_provider, 1101 } 1102 1103 if provider == "google-gemini-cli": 1104 try: 1105 creds = resolve_gemini_oauth_runtime_credentials() 1106 return { 1107 "provider": "google-gemini-cli", 1108 "api_mode": "chat_completions", 1109 "base_url": creds.get("base_url", ""), 1110 "api_key": creds.get("api_key", ""), 1111 "source": creds.get("source", "google-oauth"), 1112 "expires_at_ms": creds.get("expires_at_ms"), 1113 "email": creds.get("email", ""), 1114 "project_id": creds.get("project_id", ""), 1115 "requested_provider": requested_provider, 1116 } 1117 except AuthError: 1118 if requested_provider != "auto": 1119 raise 1120 logger.info("Google Gemini OAuth credentials failed; " 1121 "falling through to next provider.") 1122 1123 pconfig = PROVIDER_REGISTRY.get(provider) 1124 if pconfig and pconfig.auth_type == "external_process": 1125 creds = resolve_external_process_provider_credentials(provider) 1126 return { 1127 "provider": provider, 1128 "api_mode": "chat_completions", 1129 "base_url": creds.get("base_url", "").rstrip("/"), 1130 "api_key": creds.get("api_key", ""), 1131 "command": creds.get("command", ""), 1132 "args": list(creds.get("args") or []), 1133 "source": creds.get("source", "process"), 1134 "requested_provider": requested_provider, 1135 } 1136 1137 # Anthropic (native Messages API) 1138 if provider == "anthropic": 1139 # Allow base URL override from config.yaml model.base_url, but only 1140 # when the configured provider is anthropic — otherwise a non-Anthropic 1141 # base_url (e.g. Codex endpoint) would leak into Anthropic requests. 1142 cfg_provider = str(model_cfg.get("provider") or "").strip().lower() 1143 cfg_base_url = "" 1144 if cfg_provider == "anthropic": 1145 cfg_base_url = (model_cfg.get("base_url") or "").strip().rstrip("/") 1146 base_url = cfg_base_url or "https://api.anthropic.com" 1147 1148 # For Azure AI Foundry endpoints, use ANTHROPIC_API_KEY directly — 1149 # Claude Code OAuth tokens (sk-ant-oat01) are not accepted by Azure. 1150 # Azure keys don't start with "sk-ant-" so resolve_anthropic_token() 1151 # would find the Claude Code OAuth token first (priority 3) and return 1152 # that instead, causing 401s. Detect Azure endpoints and use the env 1153 # key directly to bypass the OAuth priority chain. 1154 _is_azure_endpoint = "azure.com" in base_url.lower() or ( 1155 cfg_base_url and "azure.com" in cfg_base_url.lower() 1156 ) 1157 if _is_azure_endpoint: 1158 # Honor user-specified env var hints on the model config before 1159 # falling back to the built-in AZURE_ANTHROPIC_KEY / ANTHROPIC_API_KEY 1160 # chain. Accept both `key_env` (Hermes canonical — matches the 1161 # custom_providers field name) and `api_key_env` (documented in the 1162 # Azure Foundry guide and read by most Hermes-compatible importers). 1163 # Matches the config.yaml examples in website/docs/guides/azure-foundry.md. 1164 token = "" 1165 for hint_key in ("key_env", "api_key_env"): 1166 env_var = str(model_cfg.get(hint_key) or "").strip() 1167 if env_var: 1168 token = os.getenv(env_var, "").strip() 1169 if token: 1170 break 1171 # Next: an inline api_key on the model config (useful in multi-profile 1172 # setups that want to avoid env-var juggling). 1173 if not token: 1174 token = str(model_cfg.get("api_key") or "").strip() 1175 # Finally fall back to the historical fixed names. 1176 if not token: 1177 token = ( 1178 os.getenv("AZURE_ANTHROPIC_KEY", "").strip() 1179 or os.getenv("ANTHROPIC_API_KEY", "").strip() 1180 ) 1181 if not token: 1182 raise AuthError( 1183 "No Azure Anthropic API key found. Set AZURE_ANTHROPIC_KEY or " 1184 "ANTHROPIC_API_KEY, or point key_env/api_key_env in your " 1185 "config.yaml model section at a custom env var." 1186 ) 1187 else: 1188 from agent.anthropic_adapter import resolve_anthropic_token 1189 token = resolve_anthropic_token() 1190 if not token: 1191 raise AuthError( 1192 "No Anthropic credentials found. Set ANTHROPIC_TOKEN or ANTHROPIC_API_KEY, " 1193 "run 'claude setup-token', or authenticate with 'claude /login'." 1194 ) 1195 return { 1196 "provider": "anthropic", 1197 "api_mode": "anthropic_messages", 1198 "base_url": base_url, 1199 "api_key": token, 1200 "source": "env", 1201 "requested_provider": requested_provider, 1202 } 1203 1204 # AWS Bedrock (native Converse API via boto3) 1205 if provider == "bedrock": 1206 from agent.bedrock_adapter import ( 1207 has_aws_credentials, 1208 resolve_aws_auth_env_var, 1209 resolve_bedrock_region, 1210 is_anthropic_bedrock_model, 1211 ) 1212 # When the user explicitly selected bedrock (not auto-detected), 1213 # trust boto3's credential chain — it handles IMDS, ECS task roles, 1214 # Lambda execution roles, SSO, and other implicit sources that our 1215 # env-var check can't detect. 1216 is_explicit = requested_provider in ("bedrock", "aws", "aws-bedrock", "amazon-bedrock", "amazon") 1217 if not is_explicit and not has_aws_credentials(): 1218 raise AuthError( 1219 "No AWS credentials found for Bedrock. Configure one of:\n" 1220 " - AWS_ACCESS_KEY_ID + AWS_SECRET_ACCESS_KEY\n" 1221 " - AWS_PROFILE (for SSO / named profiles)\n" 1222 " - IAM instance role (EC2, ECS, Lambda)\n" 1223 "Or run 'aws configure' to set up credentials.", 1224 code="no_aws_credentials", 1225 ) 1226 # Read bedrock-specific config from config.yaml 1227 _bedrock_cfg = load_config().get("bedrock", {}) 1228 # Region priority: config.yaml bedrock.region → env var → us-east-1 1229 region = (_bedrock_cfg.get("region") or "").strip() or resolve_bedrock_region() 1230 auth_source = resolve_aws_auth_env_var() or "aws-sdk-default-chain" 1231 # Build guardrail config if configured 1232 _gr = _bedrock_cfg.get("guardrail", {}) 1233 guardrail_config = None 1234 if _gr.get("guardrail_identifier") and _gr.get("guardrail_version"): 1235 guardrail_config = { 1236 "guardrailIdentifier": _gr["guardrail_identifier"], 1237 "guardrailVersion": _gr["guardrail_version"], 1238 } 1239 if _gr.get("stream_processing_mode"): 1240 guardrail_config["streamProcessingMode"] = _gr["stream_processing_mode"] 1241 if _gr.get("trace"): 1242 guardrail_config["trace"] = _gr["trace"] 1243 # Dual-path routing: Claude models use AnthropicBedrock SDK for full 1244 # feature parity (prompt caching, thinking budgets, adaptive thinking). 1245 # Non-Claude models use the Converse API for multi-model support. 1246 _current_model = str(model_cfg.get("default") or "").strip() 1247 if is_anthropic_bedrock_model(_current_model): 1248 # Claude on Bedrock → AnthropicBedrock SDK → anthropic_messages path 1249 runtime = { 1250 "provider": "bedrock", 1251 "api_mode": "anthropic_messages", 1252 "base_url": f"https://bedrock-runtime.{region}.amazonaws.com", 1253 "api_key": "aws-sdk", 1254 "source": auth_source, 1255 "region": region, 1256 "bedrock_anthropic": True, # Signal to use AnthropicBedrock client 1257 "requested_provider": requested_provider, 1258 } 1259 else: 1260 # Non-Claude (Nova, DeepSeek, Llama, etc.) → Converse API 1261 runtime = { 1262 "provider": "bedrock", 1263 "api_mode": "bedrock_converse", 1264 "base_url": f"https://bedrock-runtime.{region}.amazonaws.com", 1265 "api_key": "aws-sdk", 1266 "source": auth_source, 1267 "region": region, 1268 "requested_provider": requested_provider, 1269 } 1270 if guardrail_config: 1271 runtime["guardrail_config"] = guardrail_config 1272 return runtime 1273 1274 # API-key providers (z.ai/GLM, Kimi, MiniMax, MiniMax-CN) 1275 pconfig = PROVIDER_REGISTRY.get(provider) 1276 if pconfig and pconfig.auth_type == "api_key": 1277 creds = resolve_api_key_provider_credentials(provider) 1278 # Honour model.base_url from config.yaml when the configured provider 1279 # matches this provider — mirrors the Anthropic path above. Without 1280 # this, users who set model.base_url to e.g. api.minimaxi.com/anthropic 1281 # (China endpoint) still get the hardcoded api.minimax.io default (#6039). 1282 cfg_provider = str(model_cfg.get("provider") or "").strip().lower() 1283 cfg_base_url = "" 1284 if cfg_provider == provider: 1285 cfg_base_url = (model_cfg.get("base_url") or "").strip().rstrip("/") 1286 base_url = cfg_base_url or creds.get("base_url", "").rstrip("/") 1287 api_mode = "chat_completions" 1288 if provider == "copilot": 1289 api_mode = _copilot_runtime_api_mode(model_cfg, creds.get("api_key", "")) 1290 elif provider == "xai": 1291 api_mode = "codex_responses" 1292 else: 1293 configured_provider = str(model_cfg.get("provider") or "").strip().lower() 1294 # Only honor persisted api_mode when it belongs to the same provider family. 1295 configured_mode = _parse_api_mode(model_cfg.get("api_mode")) 1296 if provider in ("opencode-zen", "opencode-go"): 1297 # opencode-zen/go must always re-derive api_mode from the 1298 # target model (not the stale persisted api_mode), because 1299 # the same provider serves both anthropic_messages 1300 # (e.g. minimax-m2.7) and chat_completions (e.g. 1301 # deepseek-v4-flash) and switching models via /model would 1302 # otherwise carry the previous mode forward, stripping /v1 1303 # from base_url for chat_completions models and 404'ing. 1304 # Refs #16878. 1305 from hermes_cli.models import opencode_model_api_mode 1306 _effective = target_model or model_cfg.get("default", "") 1307 api_mode = opencode_model_api_mode(provider, _effective) 1308 elif configured_mode and _provider_supports_explicit_api_mode(provider, configured_provider): 1309 api_mode = configured_mode 1310 else: 1311 # Auto-detect Anthropic-compatible endpoints by URL convention 1312 # (e.g. https://api.minimax.io/anthropic, https://dashscope.../anthropic) 1313 # plus api.openai.com → codex_responses and api.x.ai → codex_responses. 1314 detected = _detect_api_mode_for_url(base_url) 1315 if detected: 1316 api_mode = detected 1317 # Strip trailing /v1 for OpenCode Anthropic models (see comment above). 1318 if api_mode == "anthropic_messages" and provider in ("opencode-zen", "opencode-go"): 1319 base_url = re.sub(r"/v1/?$", "", base_url) 1320 return { 1321 "provider": provider, 1322 "api_mode": api_mode, 1323 "base_url": base_url, 1324 "api_key": creds.get("api_key", ""), 1325 "source": creds.get("source", "env"), 1326 "requested_provider": requested_provider, 1327 } 1328 1329 runtime = _resolve_openrouter_runtime( 1330 requested_provider=requested_provider, 1331 explicit_api_key=explicit_api_key, 1332 explicit_base_url=explicit_base_url, 1333 ) 1334 runtime["requested_provider"] = requested_provider 1335 return runtime 1336 1337 1338 def format_runtime_provider_error(error: Exception) -> str: 1339 if isinstance(error, AuthError): 1340 return format_auth_error(error) 1341 return str(error)