/ hermes_cli / azure_detect.py
azure_detect.py
1 """Azure Foundry endpoint auto-detection. 2 3 Inspect an Azure AI Foundry / Azure OpenAI endpoint to determine: 4 - API transport (OpenAI-style ``chat_completions`` vs 5 Anthropic-style ``anthropic_messages``) 6 - Available models (best effort — Azure does not expose a deployment 7 listing via the inference API key, but Azure OpenAI v1 endpoints 8 return the resource's model catalog via ``GET /models``) 9 - Context length for each discovered/entered model, via the existing 10 :func:`agent.model_metadata.get_model_context_length` resolver. 11 12 Rationale: 13 14 Azure has no pure-API-key deployment-listing endpoint — per Microsoft, 15 deployment enumeration requires ARM management-plane auth. Azure 16 OpenAI v1 endpoints ``{resource}.openai.azure.com/openai/v1`` do return 17 a ``/models`` list, but it reflects the resource's *available* models 18 rather than the user's *deployed* deployment names. In practice it is 19 still a useful hint — the user picks a familiar model name and we look 20 up its context length from the catalog. 21 22 The detector never crashes on errors (every HTTP call is wrapped in a 23 broad try/except). Callers get a :class:`DetectionResult` with whatever 24 information could be gathered, and fall back to manual entry for the 25 rest. 26 """ 27 28 from __future__ import annotations 29 30 import json 31 import logging 32 import re 33 from dataclasses import dataclass, field 34 from typing import Optional 35 from urllib import request as urllib_request 36 from urllib.error import HTTPError, URLError 37 from urllib.parse import urlparse 38 39 logger = logging.getLogger(__name__) 40 41 42 # Default Azure OpenAI ``api-version`` to probe with. The v1 GA endpoint 43 # accepts requests without ``api-version`` entirely, so this is only used 44 # as a fallback for pre-v1 resources that still require it. 45 _AZURE_OPENAI_PROBE_API_VERSIONS = ( 46 "2025-04-01-preview", 47 "2024-10-21", # oldest GA that supports /models 48 ) 49 50 # Default Azure Anthropic ``api-version``. Matches the value used by 51 # ``agent/anthropic_adapter.py`` when building the Anthropic client. 52 _AZURE_ANTHROPIC_API_VERSION = "2025-04-15" 53 54 55 @dataclass 56 class DetectionResult: 57 """Everything auto-detection could gather from a base URL + API key.""" 58 59 #: Detected API transport: ``"chat_completions"``, 60 #: ``"anthropic_messages"``, or ``None`` when detection failed. 61 api_mode: Optional[str] = None 62 63 #: Deployment / model IDs returned by ``/models`` (best effort). 64 #: Empty when the endpoint doesn't expose the list with an API key. 65 models: list[str] = field(default_factory=list) 66 67 #: Lowercased host from the base URL (used for display messages). 68 hostname: str = "" 69 70 #: Human-readable reason the detector chose ``api_mode``. Useful 71 #: for explaining auto-detection to the user in the wizard. 72 reason: str = "" 73 74 #: ``True`` when ``/models`` returned a valid OpenAI-shaped payload. 75 models_probe_ok: bool = False 76 77 #: ``True`` when the URL was determined to be an Anthropic-style 78 #: endpoint (from path suffix or live probe). 79 is_anthropic: bool = False 80 81 82 def _http_get_json(url: str, api_key: str, timeout: float = 6.0) -> tuple[int, Optional[dict]]: 83 """GET a URL with ``api-key`` + ``Authorization`` headers. Return 84 ``(status_code, parsed_json_or_None)``. Never raises.""" 85 req = urllib_request.Request(url, method="GET") 86 # Azure OpenAI uses ``api-key``. Some Azure deployments (and 87 # Anthropic-style routes) use ``Authorization: Bearer``. Send both 88 # so we probe once per URL rather than twice. 89 req.add_header("api-key", api_key) 90 req.add_header("Authorization", f"Bearer {api_key}") 91 req.add_header("User-Agent", "hermes-agent/azure-detect") 92 try: 93 with urllib_request.urlopen(req, timeout=timeout) as resp: 94 body = resp.read() 95 try: 96 return resp.status, json.loads(body.decode("utf-8", errors="replace")) 97 except Exception: 98 return resp.status, None 99 except HTTPError as exc: 100 return exc.code, None 101 except (URLError, TimeoutError, OSError) as exc: 102 logger.debug("azure_detect: GET %s failed: %s", url, exc) 103 return 0, None 104 except Exception as exc: # pragma: no cover — defensive 105 logger.debug("azure_detect: GET %s unexpected error: %s", url, exc) 106 return 0, None 107 108 109 def _strip_trailing_v1(url: str) -> str: 110 """Strip trailing ``/v1`` or ``/v1/`` so we can construct sub-paths.""" 111 return re.sub(r"/v1/?$", "", url.rstrip("/")) 112 113 114 def _looks_like_anthropic_path(url: str) -> bool: 115 """Return True when the URL's path ends in ``/anthropic`` or 116 contains a ``/anthropic/`` segment. Used by Azure Foundry 117 resources that route Claude traffic through a dedicated path.""" 118 try: 119 parsed = urlparse(url) 120 path = (parsed.path or "").lower().rstrip("/") 121 return path.endswith("/anthropic") or "/anthropic/" in path + "/" 122 except Exception: 123 return False 124 125 126 def _extract_model_ids(payload: dict) -> list[str]: 127 """Extract a list of model IDs from an OpenAI-shaped ``/models`` 128 response. Returns ``[]`` on any shape mismatch.""" 129 data = payload.get("data") if isinstance(payload, dict) else None 130 if not isinstance(data, list): 131 return [] 132 ids: list[str] = [] 133 for item in data: 134 if not isinstance(item, dict): 135 continue 136 # OpenAI shape: {"id": "gpt-5.4", "object": "model", ...} 137 mid = item.get("id") or item.get("model") or item.get("name") 138 if isinstance(mid, str) and mid: 139 ids.append(mid) 140 return ids 141 142 143 def _probe_openai_models(base_url: str, api_key: str) -> tuple[bool, list[str]]: 144 """Probe ``<base>/models`` for an OpenAI-shaped response. 145 146 Returns ``(ok, models)``. ``ok`` is True iff the endpoint accepted 147 us as an OpenAI-style caller (200 OK + OpenAI-shaped JSON body). 148 """ 149 base_url = base_url.rstrip("/") 150 151 # Azure OpenAI v1: {resource}.openai.azure.com/openai/v1 — no 152 # api-version required for GA paths, so probe without first. 153 candidates = [f"{base_url}/models"] 154 # Fallback: explicit api-version for pre-v1 resources 155 for v in _AZURE_OPENAI_PROBE_API_VERSIONS: 156 candidates.append(f"{base_url}/models?api-version={v}") 157 158 for url in candidates: 159 status, body = _http_get_json(url, api_key) 160 if status == 200 and body is not None: 161 ids = _extract_model_ids(body) 162 if ids: 163 logger.info( 164 "azure_detect: /models probe OK at %s (%d models)", 165 url, len(ids), 166 ) 167 return True, ids 168 # 200 + empty list still counts as "OpenAI shape, no models 169 # listed" — let the user proceed with manual entry. 170 if isinstance(body, dict) and "data" in body: 171 return True, [] 172 return False, [] 173 174 175 def _probe_anthropic_messages(base_url: str, api_key: str) -> bool: 176 """Send a zero-token request to ``<base>/v1/messages`` and check 177 whether the endpoint at least *recognises* the Anthropic Messages 178 shape (any 4xx that mentions ``messages`` or ``model``, or a 400 179 ``invalid_request`` with an Anthropic error shape). Never completes 180 a real chat. 181 """ 182 base = _strip_trailing_v1(base_url) 183 url = f"{base}/v1/messages?api-version={_AZURE_ANTHROPIC_API_VERSION}" 184 payload = json.dumps({ 185 "model": "probe", 186 "max_tokens": 1, 187 "messages": [{"role": "user", "content": "ping"}], 188 }).encode("utf-8") 189 req = urllib_request.Request(url, method="POST", data=payload) 190 req.add_header("api-key", api_key) 191 req.add_header("Authorization", f"Bearer {api_key}") 192 req.add_header("anthropic-version", "2023-06-01") 193 req.add_header("content-type", "application/json") 194 req.add_header("User-Agent", "hermes-agent/azure-detect") 195 try: 196 with urllib_request.urlopen(req, timeout=6.0) as resp: 197 # Should never 200 — "probe" isn't a real deployment. But 198 # if it does, the endpoint definitely speaks Anthropic. 199 return resp.status < 500 200 except HTTPError as exc: 201 # 4xx with an Anthropic-shaped error body = Anthropic endpoint. 202 try: 203 body = exc.read().decode("utf-8", errors="replace") 204 lowered = body.lower() 205 if "anthropic" in lowered or '"type"' in lowered and '"error"' in lowered: 206 return True 207 # Pre-Azure-v1 Azure Foundry returns a plain 404 for 208 # Anthropic-style calls on non-Anthropic deployments. A 209 # 400 "model not found" IS Anthropic though. 210 if exc.code == 400 and ("messages" in lowered or "model" in lowered): 211 return True 212 return False 213 except Exception: 214 return False 215 except (URLError, TimeoutError, OSError): 216 return False 217 except Exception: # pragma: no cover 218 return False 219 220 221 def detect(base_url: str, api_key: str) -> DetectionResult: 222 """Inspect an Azure endpoint and describe its transport + models. 223 224 Call this from the wizard before asking the user to pick an API 225 mode manually. The caller should treat the returned 226 :class:`DetectionResult` as *advisory* — if ``api_mode`` is None, 227 fall back to asking the user. 228 """ 229 result = DetectionResult() 230 231 try: 232 parsed = urlparse(base_url) 233 result.hostname = (parsed.hostname or "").lower() 234 except Exception: 235 result.hostname = "" 236 237 # 1. Path sniff. Azure Foundry exposes Anthropic-style deployments 238 # under a dedicated ``/anthropic`` path. 239 if _looks_like_anthropic_path(base_url): 240 result.is_anthropic = True 241 result.api_mode = "anthropic_messages" 242 result.reason = "URL path ends in /anthropic → Anthropic Messages API" 243 return result 244 245 # 2. Try the OpenAI-style /models probe. If this works, the 246 # endpoint definitely speaks OpenAI wire. 247 ok, models = _probe_openai_models(base_url, api_key) 248 if ok: 249 result.models_probe_ok = True 250 result.models = models 251 result.api_mode = "chat_completions" 252 result.reason = ( 253 f"GET /models returned {len(models)} model(s) — OpenAI-style endpoint" 254 if models 255 else "GET /models returned an OpenAI-shaped empty list — OpenAI-style endpoint" 256 ) 257 return result 258 259 # 3. Fallback: probe the Anthropic Messages shape. Slower and more 260 # intrusive than /models, so only run it when the OpenAI probe 261 # failed. 262 if _probe_anthropic_messages(base_url, api_key): 263 result.is_anthropic = True 264 result.api_mode = "anthropic_messages" 265 result.reason = "Endpoint accepts Anthropic Messages shape" 266 return result 267 268 # Nothing matched. Caller falls back to manual selection. 269 result.reason = ( 270 "Could not probe endpoint (private network, missing model list, or " 271 "non-standard path) — falling back to manual API-mode selection" 272 ) 273 return result 274 275 276 def lookup_context_length(model: str, base_url: str, api_key: str) -> Optional[int]: 277 """Thin wrapper around :func:`agent.model_metadata.get_model_context_length` 278 that returns ``None`` when only the fallback default (128k) would 279 fire, so the wizard can distinguish "we actually know this" from 280 "we guessed.""" 281 try: 282 from agent.model_metadata import ( 283 DEFAULT_FALLBACK_CONTEXT, 284 get_model_context_length, 285 ) 286 except Exception: 287 return None 288 289 try: 290 n = get_model_context_length(model, base_url=base_url, api_key=api_key) 291 except Exception as exc: 292 logger.debug("azure_detect: context length lookup failed: %s", exc) 293 return None 294 295 if isinstance(n, int) and n > 0 and n != DEFAULT_FALLBACK_CONTEXT: 296 return n 297 return None 298 299 300 __all__ = ["DetectionResult", "detect", "lookup_context_length"]