Cradicle Explorer

/ tools / image_generation_tool.py
image_generation_tool.py
   1  #!/usr/bin/env python3
   2  """
   3  Image Generation Tools Module
   4  
   5  Provides image generation via FAL.ai. Multiple FAL models are supported and
   6  selectable via ``hermes tools`` → Image Generation; the active model is
   7  persisted to ``image_gen.model`` in ``config.yaml``.
   8  
   9  Architecture:
  10  - ``FAL_MODELS`` is a catalog of supported models with per-model metadata
  11    (size-style family, defaults, ``supports`` whitelist, upscaler flag).
  12  - ``_build_fal_payload()`` translates the agent's unified inputs (prompt +
  13    aspect_ratio) into the model-specific payload and filters to the
  14    ``supports`` whitelist so models never receive rejected keys.
  15  - Upscaling via FAL's Clarity Upscaler is gated per-model via the ``upscale``
  16    flag — on for FLUX 2 Pro (backward-compat), off for all faster/newer models
  17    where upscaling would either hurt latency or add marginal quality.
  18  
  19  Pricing shown in UI strings is as-of the initial commit; we accept drift and
  20  update when it's noticed.
  21  """
  22  
  23  import json
  24  import logging
  25  import os
  26  import datetime
  27  import threading
  28  import uuid
  29  from typing import Any, Dict, Optional, Union
  30  from urllib.parse import urlencode
  31  
  32  import fal_client
  33  
  34  from tools.debug_helpers import DebugSession
  35  from tools.managed_tool_gateway import resolve_managed_tool_gateway
  36  from tools.tool_backend_helpers import (
  37      fal_key_is_configured,
  38      managed_nous_tools_enabled,
  39      prefers_gateway,
  40  )
  41  
  42  logger = logging.getLogger(__name__)
  43  
  44  
  45  # ---------------------------------------------------------------------------
  46  # FAL model catalog
  47  # ---------------------------------------------------------------------------
  48  #
  49  # Each entry declares how to translate our unified inputs into the model's
  50  # native payload shape. Size specification falls into three families:
  51  #
  52  #   "image_size_preset" — preset enum ("square_hd", "landscape_16_9", ...)
  53  #                          used by the flux family, z-image, qwen, recraft,
  54  #                          ideogram.
  55  #   "aspect_ratio"      — aspect ratio enum ("16:9", "1:1", ...) used by
  56  #                          nano-banana (Gemini).
  57  #   "gpt_literal"       — literal dimension strings ("1024x1024", etc.)
  58  #                          used by gpt-image-1.5.
  59  #
  60  # ``supports`` is a whitelist of keys allowed in the outgoing payload — any
  61  # key outside this set is stripped before submission so models never receive
  62  # rejected parameters (each FAL model rejects unknown keys differently).
  63  #
  64  # ``upscale`` controls whether to chain Clarity Upscaler after generation.
  65  
  66  FAL_MODELS: Dict[str, Dict[str, Any]] = {
  67      "fal-ai/flux-2/klein/9b": {
  68          "display": "FLUX 2 Klein 9B",
  69          "speed": "<1s",
  70          "strengths": "Fast, crisp text",
  71          "price": "$0.006/MP",
  72          "size_style": "image_size_preset",
  73          "sizes": {
  74              "landscape": "landscape_16_9",
  75              "square": "square_hd",
  76              "portrait": "portrait_16_9",
  77          },
  78          "defaults": {
  79              "num_inference_steps": 4,
  80              "output_format": "png",
  81              "enable_safety_checker": False,
  82          },
  83          "supports": {
  84              "prompt", "image_size", "num_inference_steps", "seed",
  85              "output_format", "enable_safety_checker",
  86          },
  87          "upscale": False,
  88      },
  89      "fal-ai/flux-2-pro": {
  90          "display": "FLUX 2 Pro",
  91          "speed": "~6s",
  92          "strengths": "Studio photorealism",
  93          "price": "$0.03/MP",
  94          "size_style": "image_size_preset",
  95          "sizes": {
  96              "landscape": "landscape_16_9",
  97              "square": "square_hd",
  98              "portrait": "portrait_16_9",
  99          },
 100          "defaults": {
 101              "num_inference_steps": 50,
 102              "guidance_scale": 4.5,
 103              "num_images": 1,
 104              "output_format": "png",
 105              "enable_safety_checker": False,
 106              "safety_tolerance": "5",
 107              "sync_mode": True,
 108          },
 109          "supports": {
 110              "prompt", "image_size", "num_inference_steps", "guidance_scale",
 111              "num_images", "output_format", "enable_safety_checker",
 112              "safety_tolerance", "sync_mode", "seed",
 113          },
 114          "upscale": True,   # Backward-compat: current default behavior.
 115      },
 116      "fal-ai/z-image/turbo": {
 117          "display": "Z-Image Turbo",
 118          "speed": "~2s",
 119          "strengths": "Bilingual EN/CN, 6B",
 120          "price": "$0.005/MP",
 121          "size_style": "image_size_preset",
 122          "sizes": {
 123              "landscape": "landscape_16_9",
 124              "square": "square_hd",
 125              "portrait": "portrait_16_9",
 126          },
 127          "defaults": {
 128              "num_inference_steps": 8,
 129              "num_images": 1,
 130              "output_format": "png",
 131              "enable_safety_checker": False,
 132              "enable_prompt_expansion": False,  # avoid the extra per-request charge
 133          },
 134          "supports": {
 135              "prompt", "image_size", "num_inference_steps", "num_images",
 136              "seed", "output_format", "enable_safety_checker",
 137              "enable_prompt_expansion",
 138          },
 139          "upscale": False,
 140      },
 141      "fal-ai/nano-banana-pro": {
 142          "display": "Nano Banana Pro (Gemini 3 Pro Image)",
 143          "speed": "~8s",
 144          "strengths": "Gemini 3 Pro, reasoning depth, text rendering",
 145          "price": "$0.15/image (1K)",
 146          "size_style": "aspect_ratio",
 147          "sizes": {
 148              "landscape": "16:9",
 149              "square": "1:1",
 150              "portrait": "9:16",
 151          },
 152          "defaults": {
 153              "num_images": 1,
 154              "output_format": "png",
 155              "safety_tolerance": "5",
 156              # "1K" is the cheapest tier; 4K doubles the per-image cost.
 157              # Users on Nous Subscription should stay at 1K for predictable billing.
 158              "resolution": "1K",
 159          },
 160          "supports": {
 161              "prompt", "aspect_ratio", "num_images", "output_format",
 162              "safety_tolerance", "seed", "sync_mode", "resolution",
 163              "enable_web_search", "limit_generations",
 164          },
 165          "upscale": False,
 166      },
 167      "fal-ai/gpt-image-1.5": {
 168          "display": "GPT Image 1.5",
 169          "speed": "~15s",
 170          "strengths": "Prompt adherence",
 171          "price": "$0.034/image",
 172          "size_style": "gpt_literal",
 173          "sizes": {
 174              "landscape": "1536x1024",
 175              "square": "1024x1024",
 176              "portrait": "1024x1536",
 177          },
 178          "defaults": {
 179              # Quality is pinned to medium to keep portal billing predictable
 180              # across all users (low is too rough, high is 4-6x more expensive).
 181              "quality": "medium",
 182              "num_images": 1,
 183              "output_format": "png",
 184          },
 185          "supports": {
 186              "prompt", "image_size", "quality", "num_images", "output_format",
 187              "background", "sync_mode",
 188          },
 189          "upscale": False,
 190      },
 191      "fal-ai/gpt-image-2": {
 192          "display": "GPT Image 2",
 193          "speed": "~20s",
 194          "strengths": "SOTA text rendering + CJK, world-aware photorealism",
 195          "price": "$0.04–0.06/image",
 196          # GPT Image 2 uses FAL's standard preset enum (unlike 1.5's literal
 197          # dimensions). We map to the 4:3 variants — the 16:9 presets
 198          # (1024x576) fall below GPT-Image-2's 655,360 min-pixel requirement
 199          # and would be rejected. 4:3 keeps us above the minimum on all
 200          # three aspect ratios.
 201          "size_style": "image_size_preset",
 202          "sizes": {
 203              "landscape": "landscape_4_3",   # 1024x768
 204              "square": "square_hd",            # 1024x1024
 205              "portrait": "portrait_4_3",       # 768x1024
 206          },
 207          "defaults": {
 208              # Same quality pinning as gpt-image-1.5: medium keeps Nous
 209              # Portal billing predictable. "high" is 3-4x the per-image
 210              # cost at the same size; "low" is too rough for production use.
 211              "quality": "medium",
 212              "num_images": 1,
 213              "output_format": "png",
 214          },
 215          "supports": {
 216              "prompt", "image_size", "quality", "num_images", "output_format",
 217              "sync_mode",
 218              # openai_api_key (BYOK) intentionally omitted — all users go
 219              # through the shared FAL billing path.
 220          },
 221          "upscale": False,
 222      },
 223      "fal-ai/ideogram/v3": {
 224          "display": "Ideogram V3",
 225          "speed": "~5s",
 226          "strengths": "Best typography",
 227          "price": "$0.03-0.09/image",
 228          "size_style": "image_size_preset",
 229          "sizes": {
 230              "landscape": "landscape_16_9",
 231              "square": "square_hd",
 232              "portrait": "portrait_16_9",
 233          },
 234          "defaults": {
 235              "rendering_speed": "BALANCED",
 236              "expand_prompt": True,
 237              "style": "AUTO",
 238          },
 239          "supports": {
 240              "prompt", "image_size", "rendering_speed", "expand_prompt",
 241              "style", "seed",
 242          },
 243          "upscale": False,
 244      },
 245      "fal-ai/recraft/v4/pro/text-to-image": {
 246          "display": "Recraft V4 Pro",
 247          "speed": "~8s",
 248          "strengths": "Design, brand systems, production-ready",
 249          "price": "$0.25/image",
 250          "size_style": "image_size_preset",
 251          "sizes": {
 252              "landscape": "landscape_16_9",
 253              "square": "square_hd",
 254              "portrait": "portrait_16_9",
 255          },
 256          "defaults": {
 257              # V4 Pro dropped V3's required `style` enum — defaults handle taste now.
 258              "enable_safety_checker": False,
 259          },
 260          "supports": {
 261              "prompt", "image_size", "enable_safety_checker",
 262              "colors", "background_color",
 263          },
 264          "upscale": False,
 265      },
 266      "fal-ai/qwen-image": {
 267          "display": "Qwen Image",
 268          "speed": "~12s",
 269          "strengths": "LLM-based, complex text",
 270          "price": "$0.02/MP",
 271          "size_style": "image_size_preset",
 272          "sizes": {
 273              "landscape": "landscape_16_9",
 274              "square": "square_hd",
 275              "portrait": "portrait_16_9",
 276          },
 277          "defaults": {
 278              "num_inference_steps": 30,
 279              "guidance_scale": 2.5,
 280              "num_images": 1,
 281              "output_format": "png",
 282              "acceleration": "regular",
 283          },
 284          "supports": {
 285              "prompt", "image_size", "num_inference_steps", "guidance_scale",
 286              "num_images", "output_format", "acceleration", "seed", "sync_mode",
 287          },
 288          "upscale": False,
 289      },
 290  }
 291  
 292  # Default model is the fastest reasonable option. Kept cheap and sub-1s.
 293  DEFAULT_MODEL = "fal-ai/flux-2/klein/9b"
 294  
 295  DEFAULT_ASPECT_RATIO = "landscape"
 296  VALID_ASPECT_RATIOS = ("landscape", "square", "portrait")
 297  
 298  
 299  # ---------------------------------------------------------------------------
 300  # Upscaler (Clarity Upscaler — unchanged from previous implementation)
 301  # ---------------------------------------------------------------------------
 302  UPSCALER_MODEL = "fal-ai/clarity-upscaler"
 303  UPSCALER_FACTOR = 2
 304  UPSCALER_SAFETY_CHECKER = False
 305  UPSCALER_DEFAULT_PROMPT = "masterpiece, best quality, highres"
 306  UPSCALER_NEGATIVE_PROMPT = "(worst quality, low quality, normal quality:2)"
 307  UPSCALER_CREATIVITY = 0.35
 308  UPSCALER_RESEMBLANCE = 0.6
 309  UPSCALER_GUIDANCE_SCALE = 4
 310  UPSCALER_NUM_INFERENCE_STEPS = 18
 311  
 312  
 313  _debug = DebugSession("image_tools", env_var="IMAGE_TOOLS_DEBUG")
 314  _managed_fal_client = None
 315  _managed_fal_client_config = None
 316  _managed_fal_client_lock = threading.Lock()
 317  
 318  
 319  # ---------------------------------------------------------------------------
 320  # Managed FAL gateway (Nous Subscription)
 321  # ---------------------------------------------------------------------------
 322  def _resolve_managed_fal_gateway():
 323      """Return managed fal-queue gateway config when the user prefers the gateway
 324      or direct FAL credentials are absent."""
 325      if fal_key_is_configured() and not prefers_gateway("image_gen"):
 326          return None
 327      return resolve_managed_tool_gateway("fal-queue")
 328  
 329  
 330  def _normalize_fal_queue_url_format(queue_run_origin: str) -> str:
 331      normalized_origin = str(queue_run_origin or "").strip().rstrip("/")
 332      if not normalized_origin:
 333          raise ValueError("Managed FAL queue origin is required")
 334      return f"{normalized_origin}/"
 335  
 336  
 337  class _ManagedFalSyncClient:
 338      """Small per-instance wrapper around fal_client.SyncClient for managed queue hosts."""
 339  
 340      def __init__(self, *, key: str, queue_run_origin: str):
 341          sync_client_class = getattr(fal_client, "SyncClient", None)
 342          if sync_client_class is None:
 343              raise RuntimeError("fal_client.SyncClient is required for managed FAL gateway mode")
 344  
 345          client_module = getattr(fal_client, "client", None)
 346          if client_module is None:
 347              raise RuntimeError("fal_client.client is required for managed FAL gateway mode")
 348  
 349          self._queue_url_format = _normalize_fal_queue_url_format(queue_run_origin)
 350          self._sync_client = sync_client_class(key=key)
 351          self._http_client = getattr(self._sync_client, "_client", None)
 352          self._maybe_retry_request = getattr(client_module, "_maybe_retry_request", None)
 353          self._raise_for_status = getattr(client_module, "_raise_for_status", None)
 354          self._request_handle_class = getattr(client_module, "SyncRequestHandle", None)
 355          self._add_hint_header = getattr(client_module, "add_hint_header", None)
 356          self._add_priority_header = getattr(client_module, "add_priority_header", None)
 357          self._add_timeout_header = getattr(client_module, "add_timeout_header", None)
 358  
 359          if self._http_client is None:
 360              raise RuntimeError("fal_client.SyncClient._client is required for managed FAL gateway mode")
 361          if self._maybe_retry_request is None or self._raise_for_status is None:
 362              raise RuntimeError("fal_client.client request helpers are required for managed FAL gateway mode")
 363          if self._request_handle_class is None:
 364              raise RuntimeError("fal_client.client.SyncRequestHandle is required for managed FAL gateway mode")
 365  
 366      def submit(
 367          self,
 368          application: str,
 369          arguments: Dict[str, Any],
 370          *,
 371          path: str = "",
 372          hint: Optional[str] = None,
 373          webhook_url: Optional[str] = None,
 374          priority: Any = None,
 375          headers: Optional[Dict[str, str]] = None,
 376          start_timeout: Optional[Union[int, float]] = None,
 377      ):
 378          url = self._queue_url_format + application
 379          if path:
 380              url += "/" + path.lstrip("/")
 381          if webhook_url is not None:
 382              url += "?" + urlencode({"fal_webhook": webhook_url})
 383  
 384          request_headers = dict(headers or {})
 385          if hint is not None and self._add_hint_header is not None:
 386              self._add_hint_header(hint, request_headers)
 387          if priority is not None:
 388              if self._add_priority_header is None:
 389                  raise RuntimeError("fal_client.client.add_priority_header is required for priority requests")
 390              self._add_priority_header(priority, request_headers)
 391          if start_timeout is not None:
 392              if self._add_timeout_header is None:
 393                  raise RuntimeError("fal_client.client.add_timeout_header is required for timeout requests")
 394              self._add_timeout_header(start_timeout, request_headers)
 395  
 396          response = self._maybe_retry_request(
 397              self._http_client,
 398              "POST",
 399              url,
 400              json=arguments,
 401              timeout=getattr(self._sync_client, "default_timeout", 120.0),
 402              headers=request_headers,
 403          )
 404          self._raise_for_status(response)
 405  
 406          data = response.json()
 407          return self._request_handle_class(
 408              request_id=data["request_id"],
 409              response_url=data["response_url"],
 410              status_url=data["status_url"],
 411              cancel_url=data["cancel_url"],
 412              client=self._http_client,
 413          )
 414  
 415  
 416  def _get_managed_fal_client(managed_gateway):
 417      """Reuse the managed FAL client so its internal httpx.Client is not leaked per call."""
 418      global _managed_fal_client, _managed_fal_client_config
 419  
 420      client_config = (
 421          managed_gateway.gateway_origin.rstrip("/"),
 422          managed_gateway.nous_user_token,
 423      )
 424      with _managed_fal_client_lock:
 425          if _managed_fal_client is not None and _managed_fal_client_config == client_config:
 426              return _managed_fal_client
 427  
 428          _managed_fal_client = _ManagedFalSyncClient(
 429              key=managed_gateway.nous_user_token,
 430              queue_run_origin=managed_gateway.gateway_origin,
 431          )
 432          _managed_fal_client_config = client_config
 433          return _managed_fal_client
 434  
 435  
 436  def _submit_fal_request(model: str, arguments: Dict[str, Any]):
 437      """Submit a FAL request using direct credentials or the managed queue gateway."""
 438      request_headers = {"x-idempotency-key": str(uuid.uuid4())}
 439      managed_gateway = _resolve_managed_fal_gateway()
 440      if managed_gateway is None:
 441          return fal_client.submit(model, arguments=arguments, headers=request_headers)
 442  
 443      managed_client = _get_managed_fal_client(managed_gateway)
 444      try:
 445          return managed_client.submit(
 446              model,
 447              arguments=arguments,
 448              headers=request_headers,
 449          )
 450      except Exception as exc:
 451          # 4xx from the managed gateway typically means the portal doesn't
 452          # currently proxy this model (allowlist miss, billing gate, etc.)
 453          # — surface a clearer message with actionable remediation instead
 454          # of a raw HTTP error from httpx.
 455          status = _extract_http_status(exc)
 456          if status is not None and 400 <= status < 500:
 457              raise ValueError(
 458                  f"Nous Subscription gateway rejected model '{model}' "
 459                  f"(HTTP {status}). This model may not yet be enabled on "
 460                  f"the Nous Portal's FAL proxy. Either:\n"
 461                  f"  • Set FAL_KEY in your environment to use FAL.ai directly, or\n"
 462                  f"  • Pick a different model via `hermes tools` → Image Generation."
 463              ) from exc
 464          raise
 465  
 466  
 467  def _extract_http_status(exc: BaseException) -> Optional[int]:
 468      """Return an HTTP status code from httpx/fal exceptions, else None.
 469  
 470      Defensive across exception shapes — httpx.HTTPStatusError exposes
 471      ``.response.status_code`` while fal_client wrappers may expose
 472      ``.status_code`` directly.
 473      """
 474      response = getattr(exc, "response", None)
 475      if response is not None:
 476          status = getattr(response, "status_code", None)
 477          if isinstance(status, int):
 478              return status
 479      status = getattr(exc, "status_code", None)
 480      if isinstance(status, int):
 481          return status
 482      return None
 483  
 484  
 485  # ---------------------------------------------------------------------------
 486  # Model resolution + payload construction
 487  # ---------------------------------------------------------------------------
 488  def _resolve_fal_model() -> tuple:
 489      """Resolve the active FAL model from config.yaml (primary) or default.
 490  
 491      Returns (model_id, metadata_dict). Falls back to DEFAULT_MODEL if the
 492      configured model is unknown (logged as a warning).
 493      """
 494      model_id = ""
 495      try:
 496          from hermes_cli.config import load_config
 497          cfg = load_config()
 498          img_cfg = cfg.get("image_gen") if isinstance(cfg, dict) else None
 499          if isinstance(img_cfg, dict):
 500              raw = img_cfg.get("model")
 501              if isinstance(raw, str):
 502                  model_id = raw.strip()
 503      except Exception as exc:
 504          logger.debug("Could not load image_gen.model from config: %s", exc)
 505  
 506      # Env var escape hatch (undocumented; backward-compat for tests/scripts).
 507      if not model_id:
 508          model_id = os.getenv("FAL_IMAGE_MODEL", "").strip()
 509  
 510      if not model_id:
 511          return DEFAULT_MODEL, FAL_MODELS[DEFAULT_MODEL]
 512  
 513      if model_id not in FAL_MODELS:
 514          logger.warning(
 515              "Unknown FAL model '%s' in config; falling back to %s",
 516              model_id, DEFAULT_MODEL,
 517          )
 518          return DEFAULT_MODEL, FAL_MODELS[DEFAULT_MODEL]
 519  
 520      return model_id, FAL_MODELS[model_id]
 521  
 522  
 523  def _build_fal_payload(
 524      model_id: str,
 525      prompt: str,
 526      aspect_ratio: str = DEFAULT_ASPECT_RATIO,
 527      seed: Optional[int] = None,
 528      overrides: Optional[Dict[str, Any]] = None,
 529  ) -> Dict[str, Any]:
 530      """Build a FAL request payload for `model_id` from unified inputs.
 531  
 532      Translates aspect_ratio into the model's native size spec (preset enum,
 533      aspect-ratio enum, or GPT literal string), merges model defaults, applies
 534      caller overrides, then filters to the model's ``supports`` whitelist.
 535      """
 536      meta = FAL_MODELS[model_id]
 537      size_style = meta["size_style"]
 538      sizes = meta["sizes"]
 539  
 540      aspect = (aspect_ratio or DEFAULT_ASPECT_RATIO).lower().strip()
 541      if aspect not in sizes:
 542          aspect = DEFAULT_ASPECT_RATIO
 543  
 544      payload: Dict[str, Any] = dict(meta.get("defaults", {}))
 545      payload["prompt"] = (prompt or "").strip()
 546  
 547      if size_style in ("image_size_preset", "gpt_literal"):
 548          payload["image_size"] = sizes[aspect]
 549      elif size_style == "aspect_ratio":
 550          payload["aspect_ratio"] = sizes[aspect]
 551      else:
 552          raise ValueError(f"Unknown size_style: {size_style!r}")
 553  
 554      if seed is not None and isinstance(seed, int):
 555          payload["seed"] = seed
 556  
 557      if overrides:
 558          for k, v in overrides.items():
 559              if v is not None:
 560                  payload[k] = v
 561  
 562      supports = meta["supports"]
 563      return {k: v for k, v in payload.items() if k in supports}
 564  
 565  
 566  # ---------------------------------------------------------------------------
 567  # Upscaler
 568  # ---------------------------------------------------------------------------
 569  def _upscale_image(image_url: str, original_prompt: str) -> Optional[Dict[str, Any]]:
 570      """Upscale an image using FAL.ai's Clarity Upscaler.
 571  
 572      Returns upscaled image dict, or None on failure (caller falls back to
 573      the original image).
 574      """
 575      try:
 576          logger.info("Upscaling image with Clarity Upscaler...")
 577  
 578          upscaler_arguments = {
 579              "image_url": image_url,
 580              "prompt": f"{UPSCALER_DEFAULT_PROMPT}, {original_prompt}",
 581              "upscale_factor": UPSCALER_FACTOR,
 582              "negative_prompt": UPSCALER_NEGATIVE_PROMPT,
 583              "creativity": UPSCALER_CREATIVITY,
 584              "resemblance": UPSCALER_RESEMBLANCE,
 585              "guidance_scale": UPSCALER_GUIDANCE_SCALE,
 586              "num_inference_steps": UPSCALER_NUM_INFERENCE_STEPS,
 587              "enable_safety_checker": UPSCALER_SAFETY_CHECKER,
 588          }
 589  
 590          handler = _submit_fal_request(UPSCALER_MODEL, arguments=upscaler_arguments)
 591          result = handler.get()
 592  
 593          if result and "image" in result:
 594              upscaled_image = result["image"]
 595              logger.info(
 596                  "Image upscaled successfully to %sx%s",
 597                  upscaled_image.get("width", "unknown"),
 598                  upscaled_image.get("height", "unknown"),
 599              )
 600              return {
 601                  "url": upscaled_image["url"],
 602                  "width": upscaled_image.get("width", 0),
 603                  "height": upscaled_image.get("height", 0),
 604                  "upscaled": True,
 605                  "upscale_factor": UPSCALER_FACTOR,
 606              }
 607          logger.error("Upscaler returned invalid response")
 608          return None
 609  
 610      except Exception as e:
 611          logger.error("Error upscaling image: %s", e, exc_info=True)
 612          return None
 613  
 614  
 615  # ---------------------------------------------------------------------------
 616  # Tool entry point
 617  # ---------------------------------------------------------------------------
 618  def image_generate_tool(
 619      prompt: str,
 620      aspect_ratio: str = DEFAULT_ASPECT_RATIO,
 621      num_inference_steps: Optional[int] = None,
 622      guidance_scale: Optional[float] = None,
 623      num_images: Optional[int] = None,
 624      output_format: Optional[str] = None,
 625      seed: Optional[int] = None,
 626  ) -> str:
 627      """Generate an image from a text prompt using the configured FAL model.
 628  
 629      The agent-facing schema exposes only ``prompt`` and ``aspect_ratio``; the
 630      remaining kwargs are overrides for direct Python callers and are filtered
 631      per-model via the ``supports`` whitelist (unsupported overrides are
 632      silently dropped so legacy callers don't break when switching models).
 633  
 634      Returns a JSON string with ``{"success": bool, "image": url | None,
 635      "error": str, "error_type": str}``.
 636      """
 637      model_id, meta = _resolve_fal_model()
 638  
 639      debug_call_data = {
 640          "model": model_id,
 641          "parameters": {
 642              "prompt": prompt,
 643              "aspect_ratio": aspect_ratio,
 644              "num_inference_steps": num_inference_steps,
 645              "guidance_scale": guidance_scale,
 646              "num_images": num_images,
 647              "output_format": output_format,
 648              "seed": seed,
 649          },
 650          "error": None,
 651          "success": False,
 652          "images_generated": 0,
 653          "generation_time": 0,
 654      }
 655  
 656      start_time = datetime.datetime.now()
 657  
 658      try:
 659          if not prompt or not isinstance(prompt, str) or len(prompt.strip()) == 0:
 660              raise ValueError("Prompt is required and must be a non-empty string")
 661  
 662          if not (fal_key_is_configured() or _resolve_managed_fal_gateway()):
 663              message = "FAL_KEY environment variable not set"
 664              if managed_nous_tools_enabled():
 665                  message += " and managed FAL gateway is unavailable"
 666              raise ValueError(message)
 667  
 668          aspect_lc = (aspect_ratio or DEFAULT_ASPECT_RATIO).lower().strip()
 669          if aspect_lc not in VALID_ASPECT_RATIOS:
 670              logger.warning(
 671                  "Invalid aspect_ratio '%s', defaulting to '%s'",
 672                  aspect_ratio, DEFAULT_ASPECT_RATIO,
 673              )
 674              aspect_lc = DEFAULT_ASPECT_RATIO
 675  
 676          overrides: Dict[str, Any] = {}
 677          if num_inference_steps is not None:
 678              overrides["num_inference_steps"] = num_inference_steps
 679          if guidance_scale is not None:
 680              overrides["guidance_scale"] = guidance_scale
 681          if num_images is not None:
 682              overrides["num_images"] = num_images
 683          if output_format is not None:
 684              overrides["output_format"] = output_format
 685  
 686          arguments = _build_fal_payload(
 687              model_id, prompt, aspect_lc, seed=seed, overrides=overrides,
 688          )
 689  
 690          logger.info(
 691              "Generating image with %s (%s) — prompt: %s",
 692              meta.get("display", model_id), model_id, prompt[:80],
 693          )
 694  
 695          handler = _submit_fal_request(model_id, arguments=arguments)
 696          result = handler.get()
 697  
 698          generation_time = (datetime.datetime.now() - start_time).total_seconds()
 699  
 700          if not result or "images" not in result:
 701              raise ValueError("Invalid response from FAL.ai API — no images returned")
 702  
 703          images = result.get("images", [])
 704          if not images:
 705              raise ValueError("No images were generated")
 706  
 707          should_upscale = bool(meta.get("upscale", False))
 708  
 709          formatted_images = []
 710          for img in images:
 711              if not (isinstance(img, dict) and "url" in img):
 712                  continue
 713              original_image = {
 714                  "url": img["url"],
 715                  "width": img.get("width", 0),
 716                  "height": img.get("height", 0),
 717              }
 718  
 719              if should_upscale:
 720                  upscaled_image = _upscale_image(img["url"], prompt.strip())
 721                  if upscaled_image:
 722                      formatted_images.append(upscaled_image)
 723                      continue
 724                  logger.warning("Using original image as fallback (upscale failed)")
 725  
 726              original_image["upscaled"] = False
 727              formatted_images.append(original_image)
 728  
 729          if not formatted_images:
 730              raise ValueError("No valid image URLs returned from API")
 731  
 732          upscaled_count = sum(1 for img in formatted_images if img.get("upscaled"))
 733          logger.info(
 734              "Generated %s image(s) in %.1fs (%s upscaled) via %s",
 735              len(formatted_images), generation_time, upscaled_count, model_id,
 736          )
 737  
 738          response_data = {
 739              "success": True,
 740              "image": formatted_images[0]["url"] if formatted_images else None,
 741          }
 742  
 743          debug_call_data["success"] = True
 744          debug_call_data["images_generated"] = len(formatted_images)
 745          debug_call_data["generation_time"] = generation_time
 746          _debug.log_call("image_generate_tool", debug_call_data)
 747          _debug.save()
 748  
 749          return json.dumps(response_data, indent=2, ensure_ascii=False)
 750  
 751      except Exception as e:
 752          generation_time = (datetime.datetime.now() - start_time).total_seconds()
 753          error_msg = f"Error generating image: {str(e)}"
 754          logger.error("%s", error_msg, exc_info=True)
 755  
 756          response_data = {
 757              "success": False,
 758              "image": None,
 759              "error": str(e),
 760              "error_type": type(e).__name__,
 761          }
 762  
 763          debug_call_data["error"] = error_msg
 764          debug_call_data["generation_time"] = generation_time
 765          _debug.log_call("image_generate_tool", debug_call_data)
 766          _debug.save()
 767  
 768          return json.dumps(response_data, indent=2, ensure_ascii=False)
 769  
 770  
 771  def check_fal_api_key() -> bool:
 772      """True if the FAL.ai API key (direct or managed gateway) is available."""
 773      return bool(fal_key_is_configured() or _resolve_managed_fal_gateway())
 774  
 775  
 776  def check_image_generation_requirements() -> bool:
 777      """True if any image gen backend is available.
 778  
 779      Providers are considered in this order:
 780  
 781      1. The in-tree FAL backend (FAL_KEY or managed gateway).
 782      2. Any plugin-registered provider whose ``is_available()`` returns True.
 783  
 784      Plugins win only when the in-tree FAL path is NOT ready, which matches
 785      the historical behavior: shipping hermes with a FAL key configured
 786      should still expose the tool. The active selection among ready
 787      providers is resolved per-call by ``image_gen.provider``.
 788      """
 789      try:
 790          if check_fal_api_key():
 791              fal_client  # noqa: F401 — SDK presence check
 792              return True
 793      except ImportError:
 794          pass
 795  
 796      # Probe plugin providers. Discovery is idempotent and cheap.
 797      try:
 798          from agent.image_gen_registry import list_providers
 799          from hermes_cli.plugins import _ensure_plugins_discovered
 800  
 801          _ensure_plugins_discovered()
 802          for provider in list_providers():
 803              try:
 804                  if provider.is_available():
 805                      return True
 806              except Exception:
 807                  continue
 808      except Exception:
 809          pass
 810  
 811      return False
 812  
 813  
 814  # ---------------------------------------------------------------------------
 815  # Demo / CLI entry point
 816  # ---------------------------------------------------------------------------
 817  if __name__ == "__main__":
 818      print("🎨 Image Generation Tools — FAL.ai multi-model support")
 819      print("=" * 60)
 820  
 821      if not check_fal_api_key():
 822          print("❌ FAL_KEY environment variable not set")
 823          print("   Set it via: export FAL_KEY='your-key-here'")
 824          print("   Get a key: https://fal.ai/")
 825          raise SystemExit(1)
 826      print("✅ FAL.ai API key found")
 827  
 828      try:
 829          import fal_client  # noqa: F401
 830          print("✅ fal_client library available")
 831      except ImportError:
 832          print("❌ fal_client library not found — pip install fal-client")
 833          raise SystemExit(1)
 834  
 835      model_id, meta = _resolve_fal_model()
 836      print(f"🤖 Active model: {meta.get('display', model_id)} ({model_id})")
 837      print(f"   Speed: {meta.get('speed', '?')}  ·  Price: {meta.get('price', '?')}")
 838      print(f"   Upscaler: {'on' if meta.get('upscale') else 'off'}")
 839  
 840      print("\nAvailable models:")
 841      for mid, m in FAL_MODELS.items():
 842          marker = " ← active" if mid == model_id else ""
 843          print(f"  {mid:<32}  {m.get('speed', '?'):<6}  {m.get('price', '?')}{marker}")
 844  
 845      if _debug.active:
 846          print(f"\n🐛 Debug mode enabled — session {_debug.session_id}")
 847  
 848  
 849  # ---------------------------------------------------------------------------
 850  # Registry
 851  # ---------------------------------------------------------------------------
 852  from tools.registry import registry, tool_error
 853  
 854  IMAGE_GENERATE_SCHEMA = {
 855      "name": "image_generate",
 856      "description": (
 857          "Generate high-quality images from text prompts. The underlying "
 858          "backend (FAL, OpenAI, etc.) and model are user-configured and not "
 859          "selectable by the agent. Returns either a URL or an absolute file "
 860          "path in the `image` field; display it with markdown "
 861          "![description](url-or-path) and the gateway will deliver it."
 862      ),
 863      "parameters": {
 864          "type": "object",
 865          "properties": {
 866              "prompt": {
 867                  "type": "string",
 868                  "description": "The text prompt describing the desired image. Be detailed and descriptive.",
 869              },
 870              "aspect_ratio": {
 871                  "type": "string",
 872                  "enum": list(VALID_ASPECT_RATIOS),
 873                  "description": "The aspect ratio of the generated image. 'landscape' is 16:9 wide, 'portrait' is 16:9 tall, 'square' is 1:1.",
 874                  "default": DEFAULT_ASPECT_RATIO,
 875              },
 876          },
 877          "required": ["prompt"],
 878      },
 879  }
 880  
 881  
 882  def _read_configured_image_provider():
 883      """Return the value of ``image_gen.provider`` from config.yaml, or None.
 884  
 885      We only consult the plugin registry when this is explicitly set — an
 886      unset value keeps users on the legacy in-tree FAL path even when other
 887      providers happen to be registered (e.g. a user has OPENAI_API_KEY set
 888      for other features but never asked for OpenAI image gen).
 889      """
 890      try:
 891          from hermes_cli.config import load_config
 892          cfg = load_config()
 893          section = cfg.get("image_gen") if isinstance(cfg, dict) else None
 894          if isinstance(section, dict):
 895              value = section.get("provider")
 896              if isinstance(value, str) and value.strip():
 897                  return value.strip()
 898      except Exception as exc:
 899          logger.debug("Could not read image_gen.provider: %s", exc)
 900      return None
 901  
 902  
 903  def _dispatch_to_plugin_provider(prompt: str, aspect_ratio: str):
 904      """Route the call to a plugin-registered provider when one is selected.
 905  
 906      Returns a JSON string on dispatch, or ``None`` to fall through to the
 907      built-in FAL path.
 908  
 909      Dispatch only fires when ``image_gen.provider`` is explicitly set AND
 910      it does not point to ``fal`` (FAL still lives in-tree in this PR;
 911      a later PR ports it into ``plugins/image_gen/fal/``). Any other value
 912      that matches a registered plugin provider wins.
 913      """
 914      configured = _read_configured_image_provider()
 915      if not configured or configured == "fal":
 916          return None
 917  
 918      try:
 919          # Import locally so plugin discovery isn't triggered just by
 920          # importing this module (tests rely on that).
 921          from agent.image_gen_registry import get_provider
 922          from hermes_cli.plugins import _ensure_plugins_discovered
 923  
 924          _ensure_plugins_discovered()
 925          provider = get_provider(configured)
 926      except Exception as exc:
 927          logger.debug("image_gen plugin dispatch skipped: %s", exc)
 928          return None
 929  
 930      if provider is None:
 931          try:
 932              # Long-lived sessions may have discovered plugins before a bundled
 933              # backend was patched in or before config changed. Retry once with
 934              # a forced refresh before surfacing a missing-provider error.
 935              _ensure_plugins_discovered(force=True)
 936              provider = get_provider(configured)
 937          except Exception as exc:
 938              logger.debug("image_gen plugin force-refresh skipped: %s", exc)
 939  
 940      if provider is None:
 941          return json.dumps({
 942              "success": False,
 943              "image": None,
 944              "error": (
 945                  f"image_gen.provider='{configured}' is set but no plugin "
 946                  f"registered that name. Run `hermes plugins list` to see "
 947                  f"available image gen backends."
 948              ),
 949              "error_type": "provider_not_registered",
 950          })
 951  
 952      try:
 953          result = provider.generate(prompt=prompt, aspect_ratio=aspect_ratio)
 954      except Exception as exc:
 955          logger.warning(
 956              "Image gen provider '%s' raised: %s",
 957              getattr(provider, "name", "?"), exc,
 958          )
 959          return json.dumps({
 960              "success": False,
 961              "image": None,
 962              "error": f"Provider '{getattr(provider, 'name', '?')}' error: {exc}",
 963              "error_type": "provider_exception",
 964          })
 965      if not isinstance(result, dict):
 966          return json.dumps({
 967              "success": False,
 968              "image": None,
 969              "error": "Provider returned a non-dict result",
 970              "error_type": "provider_contract",
 971          })
 972      return json.dumps(result)
 973  
 974  
 975  def _handle_image_generate(args, **kw):
 976      prompt = args.get("prompt", "")
 977      if not prompt:
 978          return tool_error("prompt is required for image generation")
 979      aspect_ratio = args.get("aspect_ratio", DEFAULT_ASPECT_RATIO)
 980  
 981      # Route to a plugin-registered provider if one is active (and it's
 982      # not the in-tree FAL path).
 983      dispatched = _dispatch_to_plugin_provider(prompt, aspect_ratio)
 984      if dispatched is not None:
 985          return dispatched
 986  
 987      return image_generate_tool(
 988          prompt=prompt,
 989          aspect_ratio=aspect_ratio,
 990      )
 991  
 992  
 993  registry.register(
 994      name="image_generate",
 995      toolset="image_gen",
 996      schema=IMAGE_GENERATE_SCHEMA,
 997      handler=_handle_image_generate,
 998      check_fn=check_image_generation_requirements,
 999      requires_env=[],
1000      is_async=False,   # sync fal_client API to avoid "Event loop is closed" in gateway
1001      emoji="🎨",
1002  )