/ hermes_cli / azure_detect.py
azure_detect.py
  1  """Azure Foundry endpoint auto-detection.
  2  
  3  Inspect an Azure AI Foundry / Azure OpenAI endpoint to determine:
  4    - API transport (OpenAI-style ``chat_completions`` vs
  5      Anthropic-style ``anthropic_messages``)
  6    - Available models (best effort — Azure does not expose a deployment
  7      listing via the inference API key, but Azure OpenAI v1 endpoints
  8      return the resource's model catalog via ``GET /models``)
  9    - Context length for each discovered/entered model, via the existing
 10      :func:`agent.model_metadata.get_model_context_length` resolver.
 11  
 12  Rationale:
 13  
 14  Azure has no pure-API-key deployment-listing endpoint — per Microsoft,
 15  deployment enumeration requires ARM management-plane auth.  Azure
 16  OpenAI v1 endpoints ``{resource}.openai.azure.com/openai/v1`` do return
 17  a ``/models`` list, but it reflects the resource's *available* models
 18  rather than the user's *deployed* deployment names.  In practice it is
 19  still a useful hint — the user picks a familiar model name and we look
 20  up its context length from the catalog.
 21  
 22  The detector never crashes on errors (every HTTP call is wrapped in a
 23  broad try/except).  Callers get a :class:`DetectionResult` with whatever
 24  information could be gathered, and fall back to manual entry for the
 25  rest.
 26  """
 27  
 28  from __future__ import annotations
 29  
 30  import json
 31  import logging
 32  import re
 33  from dataclasses import dataclass, field
 34  from typing import Optional
 35  from urllib import request as urllib_request
 36  from urllib.error import HTTPError, URLError
 37  from urllib.parse import urlparse
 38  
 39  logger = logging.getLogger(__name__)
 40  
 41  
 42  # Default Azure OpenAI ``api-version`` to probe with.  The v1 GA endpoint
 43  # accepts requests without ``api-version`` entirely, so this is only used
 44  # as a fallback for pre-v1 resources that still require it.
 45  _AZURE_OPENAI_PROBE_API_VERSIONS = (
 46      "2025-04-01-preview",
 47      "2024-10-21",  # oldest GA that supports /models
 48  )
 49  
 50  # Default Azure Anthropic ``api-version``.  Matches the value used by
 51  # ``agent/anthropic_adapter.py`` when building the Anthropic client.
 52  _AZURE_ANTHROPIC_API_VERSION = "2025-04-15"
 53  
 54  
 55  @dataclass
 56  class DetectionResult:
 57      """Everything auto-detection could gather from a base URL + API key."""
 58  
 59      #: Detected API transport: ``"chat_completions"``,
 60      #: ``"anthropic_messages"``, or ``None`` when detection failed.
 61      api_mode: Optional[str] = None
 62  
 63      #: Deployment / model IDs returned by ``/models`` (best effort).
 64      #: Empty when the endpoint doesn't expose the list with an API key.
 65      models: list[str] = field(default_factory=list)
 66  
 67      #: Lowercased host from the base URL (used for display messages).
 68      hostname: str = ""
 69  
 70      #: Human-readable reason the detector chose ``api_mode``.  Useful
 71      #: for explaining auto-detection to the user in the wizard.
 72      reason: str = ""
 73  
 74      #: ``True`` when ``/models`` returned a valid OpenAI-shaped payload.
 75      models_probe_ok: bool = False
 76  
 77      #: ``True`` when the URL was determined to be an Anthropic-style
 78      #: endpoint (from path suffix or live probe).
 79      is_anthropic: bool = False
 80  
 81  
 82  def _http_get_json(url: str, api_key: str, timeout: float = 6.0) -> tuple[int, Optional[dict]]:
 83      """GET a URL with ``api-key`` + ``Authorization`` headers.  Return
 84      ``(status_code, parsed_json_or_None)``.  Never raises."""
 85      req = urllib_request.Request(url, method="GET")
 86      # Azure OpenAI uses ``api-key``.  Some Azure deployments (and
 87      # Anthropic-style routes) use ``Authorization: Bearer``.  Send both
 88      # so we probe once per URL rather than twice.
 89      req.add_header("api-key", api_key)
 90      req.add_header("Authorization", f"Bearer {api_key}")
 91      req.add_header("User-Agent", "hermes-agent/azure-detect")
 92      try:
 93          with urllib_request.urlopen(req, timeout=timeout) as resp:
 94              body = resp.read()
 95              try:
 96                  return resp.status, json.loads(body.decode("utf-8", errors="replace"))
 97              except Exception:
 98                  return resp.status, None
 99      except HTTPError as exc:
100          return exc.code, None
101      except (URLError, TimeoutError, OSError) as exc:
102          logger.debug("azure_detect: GET %s failed: %s", url, exc)
103          return 0, None
104      except Exception as exc:  # pragma: no cover — defensive
105          logger.debug("azure_detect: GET %s unexpected error: %s", url, exc)
106          return 0, None
107  
108  
109  def _strip_trailing_v1(url: str) -> str:
110      """Strip trailing ``/v1`` or ``/v1/`` so we can construct sub-paths."""
111      return re.sub(r"/v1/?$", "", url.rstrip("/"))
112  
113  
114  def _looks_like_anthropic_path(url: str) -> bool:
115      """Return True when the URL's path ends in ``/anthropic`` or
116      contains a ``/anthropic/`` segment.  Used by Azure Foundry
117      resources that route Claude traffic through a dedicated path."""
118      try:
119          parsed = urlparse(url)
120          path = (parsed.path or "").lower().rstrip("/")
121          return path.endswith("/anthropic") or "/anthropic/" in path + "/"
122      except Exception:
123          return False
124  
125  
126  def _extract_model_ids(payload: dict) -> list[str]:
127      """Extract a list of model IDs from an OpenAI-shaped ``/models``
128      response.  Returns ``[]`` on any shape mismatch."""
129      data = payload.get("data") if isinstance(payload, dict) else None
130      if not isinstance(data, list):
131          return []
132      ids: list[str] = []
133      for item in data:
134          if not isinstance(item, dict):
135              continue
136          # OpenAI shape: {"id": "gpt-5.4", "object": "model", ...}
137          mid = item.get("id") or item.get("model") or item.get("name")
138          if isinstance(mid, str) and mid:
139              ids.append(mid)
140      return ids
141  
142  
143  def _probe_openai_models(base_url: str, api_key: str) -> tuple[bool, list[str]]:
144      """Probe ``<base>/models`` for an OpenAI-shaped response.
145  
146      Returns ``(ok, models)``.  ``ok`` is True iff the endpoint accepted
147      us as an OpenAI-style caller (200 OK + OpenAI-shaped JSON body).
148      """
149      base_url = base_url.rstrip("/")
150  
151      # Azure OpenAI v1: {resource}.openai.azure.com/openai/v1 — no
152      # api-version required for GA paths, so probe without first.
153      candidates = [f"{base_url}/models"]
154      # Fallback: explicit api-version for pre-v1 resources
155      for v in _AZURE_OPENAI_PROBE_API_VERSIONS:
156          candidates.append(f"{base_url}/models?api-version={v}")
157  
158      for url in candidates:
159          status, body = _http_get_json(url, api_key)
160          if status == 200 and body is not None:
161              ids = _extract_model_ids(body)
162              if ids:
163                  logger.info(
164                      "azure_detect: /models probe OK at %s (%d models)",
165                      url, len(ids),
166                  )
167                  return True, ids
168              # 200 + empty list still counts as "OpenAI shape, no models
169              # listed" — let the user proceed with manual entry.
170              if isinstance(body, dict) and "data" in body:
171                  return True, []
172      return False, []
173  
174  
175  def _probe_anthropic_messages(base_url: str, api_key: str) -> bool:
176      """Send a zero-token request to ``<base>/v1/messages`` and check
177      whether the endpoint at least *recognises* the Anthropic Messages
178      shape (any 4xx that mentions ``messages`` or ``model``, or a 400
179      ``invalid_request`` with an Anthropic error shape).  Never completes
180      a real chat.
181      """
182      base = _strip_trailing_v1(base_url)
183      url = f"{base}/v1/messages?api-version={_AZURE_ANTHROPIC_API_VERSION}"
184      payload = json.dumps({
185          "model": "probe",
186          "max_tokens": 1,
187          "messages": [{"role": "user", "content": "ping"}],
188      }).encode("utf-8")
189      req = urllib_request.Request(url, method="POST", data=payload)
190      req.add_header("api-key", api_key)
191      req.add_header("Authorization", f"Bearer {api_key}")
192      req.add_header("anthropic-version", "2023-06-01")
193      req.add_header("content-type", "application/json")
194      req.add_header("User-Agent", "hermes-agent/azure-detect")
195      try:
196          with urllib_request.urlopen(req, timeout=6.0) as resp:
197              # Should never 200 — "probe" isn't a real deployment.  But
198              # if it does, the endpoint definitely speaks Anthropic.
199              return resp.status < 500
200      except HTTPError as exc:
201          # 4xx with an Anthropic-shaped error body = Anthropic endpoint.
202          try:
203              body = exc.read().decode("utf-8", errors="replace")
204              lowered = body.lower()
205              if "anthropic" in lowered or '"type"' in lowered and '"error"' in lowered:
206                  return True
207              # Pre-Azure-v1 Azure Foundry returns a plain 404 for
208              # Anthropic-style calls on non-Anthropic deployments.  A
209              # 400 "model not found" IS Anthropic though.
210              if exc.code == 400 and ("messages" in lowered or "model" in lowered):
211                  return True
212              return False
213          except Exception:
214              return False
215      except (URLError, TimeoutError, OSError):
216          return False
217      except Exception:  # pragma: no cover
218          return False
219  
220  
221  def detect(base_url: str, api_key: str) -> DetectionResult:
222      """Inspect an Azure endpoint and describe its transport + models.
223  
224      Call this from the wizard before asking the user to pick an API
225      mode manually.  The caller should treat the returned
226      :class:`DetectionResult` as *advisory* — if ``api_mode`` is None,
227      fall back to asking the user.
228      """
229      result = DetectionResult()
230  
231      try:
232          parsed = urlparse(base_url)
233          result.hostname = (parsed.hostname or "").lower()
234      except Exception:
235          result.hostname = ""
236  
237      # 1. Path sniff.  Azure Foundry exposes Anthropic-style deployments
238      #    under a dedicated ``/anthropic`` path.
239      if _looks_like_anthropic_path(base_url):
240          result.is_anthropic = True
241          result.api_mode = "anthropic_messages"
242          result.reason = "URL path ends in /anthropic → Anthropic Messages API"
243          return result
244  
245      # 2. Try the OpenAI-style /models probe.  If this works, the
246      #    endpoint definitely speaks OpenAI wire.
247      ok, models = _probe_openai_models(base_url, api_key)
248      if ok:
249          result.models_probe_ok = True
250          result.models = models
251          result.api_mode = "chat_completions"
252          result.reason = (
253              f"GET /models returned {len(models)} model(s) — OpenAI-style endpoint"
254              if models
255              else "GET /models returned an OpenAI-shaped empty list — OpenAI-style endpoint"
256          )
257          return result
258  
259      # 3. Fallback: probe the Anthropic Messages shape.  Slower and more
260      #    intrusive than /models, so only run it when the OpenAI probe
261      #    failed.
262      if _probe_anthropic_messages(base_url, api_key):
263          result.is_anthropic = True
264          result.api_mode = "anthropic_messages"
265          result.reason = "Endpoint accepts Anthropic Messages shape"
266          return result
267  
268      # Nothing matched.  Caller falls back to manual selection.
269      result.reason = (
270          "Could not probe endpoint (private network, missing model list, or "
271          "non-standard path) — falling back to manual API-mode selection"
272      )
273      return result
274  
275  
276  def lookup_context_length(model: str, base_url: str, api_key: str) -> Optional[int]:
277      """Thin wrapper around :func:`agent.model_metadata.get_model_context_length`
278      that returns ``None`` when only the fallback default (128k) would
279      fire, so the wizard can distinguish "we actually know this" from
280      "we guessed."""
281      try:
282          from agent.model_metadata import (
283              DEFAULT_FALLBACK_CONTEXT,
284              get_model_context_length,
285          )
286      except Exception:
287          return None
288  
289      try:
290          n = get_model_context_length(model, base_url=base_url, api_key=api_key)
291      except Exception as exc:
292          logger.debug("azure_detect: context length lookup failed: %s", exc)
293          return None
294  
295      if isinstance(n, int) and n > 0 and n != DEFAULT_FALLBACK_CONTEXT:
296          return n
297      return None
298  
299  
300  __all__ = ["DetectionResult", "detect", "lookup_context_length"]