image_generation_tool.py
1 #!/usr/bin/env python3 2 """ 3 Image Generation Tools Module 4 5 Provides image generation via FAL.ai. Multiple FAL models are supported and 6 selectable via ``hermes tools`` → Image Generation; the active model is 7 persisted to ``image_gen.model`` in ``config.yaml``. 8 9 Architecture: 10 - ``FAL_MODELS`` is a catalog of supported models with per-model metadata 11 (size-style family, defaults, ``supports`` whitelist, upscaler flag). 12 - ``_build_fal_payload()`` translates the agent's unified inputs (prompt + 13 aspect_ratio) into the model-specific payload and filters to the 14 ``supports`` whitelist so models never receive rejected keys. 15 - Upscaling via FAL's Clarity Upscaler is gated per-model via the ``upscale`` 16 flag — on for FLUX 2 Pro (backward-compat), off for all faster/newer models 17 where upscaling would either hurt latency or add marginal quality. 18 19 Pricing shown in UI strings is as-of the initial commit; we accept drift and 20 update when it's noticed. 21 """ 22 23 import json 24 import logging 25 import os 26 import datetime 27 import threading 28 import uuid 29 from typing import Any, Dict, Optional, Union 30 from urllib.parse import urlencode 31 32 import fal_client 33 34 from tools.debug_helpers import DebugSession 35 from tools.managed_tool_gateway import resolve_managed_tool_gateway 36 from tools.tool_backend_helpers import ( 37 fal_key_is_configured, 38 managed_nous_tools_enabled, 39 prefers_gateway, 40 ) 41 42 logger = logging.getLogger(__name__) 43 44 45 # --------------------------------------------------------------------------- 46 # FAL model catalog 47 # --------------------------------------------------------------------------- 48 # 49 # Each entry declares how to translate our unified inputs into the model's 50 # native payload shape. Size specification falls into three families: 51 # 52 # "image_size_preset" — preset enum ("square_hd", "landscape_16_9", ...) 53 # used by the flux family, z-image, qwen, recraft, 54 # ideogram. 55 # "aspect_ratio" — aspect ratio enum ("16:9", "1:1", ...) used by 56 # nano-banana (Gemini). 57 # "gpt_literal" — literal dimension strings ("1024x1024", etc.) 58 # used by gpt-image-1.5. 59 # 60 # ``supports`` is a whitelist of keys allowed in the outgoing payload — any 61 # key outside this set is stripped before submission so models never receive 62 # rejected parameters (each FAL model rejects unknown keys differently). 63 # 64 # ``upscale`` controls whether to chain Clarity Upscaler after generation. 65 66 FAL_MODELS: Dict[str, Dict[str, Any]] = { 67 "fal-ai/flux-2/klein/9b": { 68 "display": "FLUX 2 Klein 9B", 69 "speed": "<1s", 70 "strengths": "Fast, crisp text", 71 "price": "$0.006/MP", 72 "size_style": "image_size_preset", 73 "sizes": { 74 "landscape": "landscape_16_9", 75 "square": "square_hd", 76 "portrait": "portrait_16_9", 77 }, 78 "defaults": { 79 "num_inference_steps": 4, 80 "output_format": "png", 81 "enable_safety_checker": False, 82 }, 83 "supports": { 84 "prompt", "image_size", "num_inference_steps", "seed", 85 "output_format", "enable_safety_checker", 86 }, 87 "upscale": False, 88 }, 89 "fal-ai/flux-2-pro": { 90 "display": "FLUX 2 Pro", 91 "speed": "~6s", 92 "strengths": "Studio photorealism", 93 "price": "$0.03/MP", 94 "size_style": "image_size_preset", 95 "sizes": { 96 "landscape": "landscape_16_9", 97 "square": "square_hd", 98 "portrait": "portrait_16_9", 99 }, 100 "defaults": { 101 "num_inference_steps": 50, 102 "guidance_scale": 4.5, 103 "num_images": 1, 104 "output_format": "png", 105 "enable_safety_checker": False, 106 "safety_tolerance": "5", 107 "sync_mode": True, 108 }, 109 "supports": { 110 "prompt", "image_size", "num_inference_steps", "guidance_scale", 111 "num_images", "output_format", "enable_safety_checker", 112 "safety_tolerance", "sync_mode", "seed", 113 }, 114 "upscale": True, # Backward-compat: current default behavior. 115 }, 116 "fal-ai/z-image/turbo": { 117 "display": "Z-Image Turbo", 118 "speed": "~2s", 119 "strengths": "Bilingual EN/CN, 6B", 120 "price": "$0.005/MP", 121 "size_style": "image_size_preset", 122 "sizes": { 123 "landscape": "landscape_16_9", 124 "square": "square_hd", 125 "portrait": "portrait_16_9", 126 }, 127 "defaults": { 128 "num_inference_steps": 8, 129 "num_images": 1, 130 "output_format": "png", 131 "enable_safety_checker": False, 132 "enable_prompt_expansion": False, # avoid the extra per-request charge 133 }, 134 "supports": { 135 "prompt", "image_size", "num_inference_steps", "num_images", 136 "seed", "output_format", "enable_safety_checker", 137 "enable_prompt_expansion", 138 }, 139 "upscale": False, 140 }, 141 "fal-ai/nano-banana-pro": { 142 "display": "Nano Banana Pro (Gemini 3 Pro Image)", 143 "speed": "~8s", 144 "strengths": "Gemini 3 Pro, reasoning depth, text rendering", 145 "price": "$0.15/image (1K)", 146 "size_style": "aspect_ratio", 147 "sizes": { 148 "landscape": "16:9", 149 "square": "1:1", 150 "portrait": "9:16", 151 }, 152 "defaults": { 153 "num_images": 1, 154 "output_format": "png", 155 "safety_tolerance": "5", 156 # "1K" is the cheapest tier; 4K doubles the per-image cost. 157 # Users on Nous Subscription should stay at 1K for predictable billing. 158 "resolution": "1K", 159 }, 160 "supports": { 161 "prompt", "aspect_ratio", "num_images", "output_format", 162 "safety_tolerance", "seed", "sync_mode", "resolution", 163 "enable_web_search", "limit_generations", 164 }, 165 "upscale": False, 166 }, 167 "fal-ai/gpt-image-1.5": { 168 "display": "GPT Image 1.5", 169 "speed": "~15s", 170 "strengths": "Prompt adherence", 171 "price": "$0.034/image", 172 "size_style": "gpt_literal", 173 "sizes": { 174 "landscape": "1536x1024", 175 "square": "1024x1024", 176 "portrait": "1024x1536", 177 }, 178 "defaults": { 179 # Quality is pinned to medium to keep portal billing predictable 180 # across all users (low is too rough, high is 4-6x more expensive). 181 "quality": "medium", 182 "num_images": 1, 183 "output_format": "png", 184 }, 185 "supports": { 186 "prompt", "image_size", "quality", "num_images", "output_format", 187 "background", "sync_mode", 188 }, 189 "upscale": False, 190 }, 191 "fal-ai/gpt-image-2": { 192 "display": "GPT Image 2", 193 "speed": "~20s", 194 "strengths": "SOTA text rendering + CJK, world-aware photorealism", 195 "price": "$0.04–0.06/image", 196 # GPT Image 2 uses FAL's standard preset enum (unlike 1.5's literal 197 # dimensions). We map to the 4:3 variants — the 16:9 presets 198 # (1024x576) fall below GPT-Image-2's 655,360 min-pixel requirement 199 # and would be rejected. 4:3 keeps us above the minimum on all 200 # three aspect ratios. 201 "size_style": "image_size_preset", 202 "sizes": { 203 "landscape": "landscape_4_3", # 1024x768 204 "square": "square_hd", # 1024x1024 205 "portrait": "portrait_4_3", # 768x1024 206 }, 207 "defaults": { 208 # Same quality pinning as gpt-image-1.5: medium keeps Nous 209 # Portal billing predictable. "high" is 3-4x the per-image 210 # cost at the same size; "low" is too rough for production use. 211 "quality": "medium", 212 "num_images": 1, 213 "output_format": "png", 214 }, 215 "supports": { 216 "prompt", "image_size", "quality", "num_images", "output_format", 217 "sync_mode", 218 # openai_api_key (BYOK) intentionally omitted — all users go 219 # through the shared FAL billing path. 220 }, 221 "upscale": False, 222 }, 223 "fal-ai/ideogram/v3": { 224 "display": "Ideogram V3", 225 "speed": "~5s", 226 "strengths": "Best typography", 227 "price": "$0.03-0.09/image", 228 "size_style": "image_size_preset", 229 "sizes": { 230 "landscape": "landscape_16_9", 231 "square": "square_hd", 232 "portrait": "portrait_16_9", 233 }, 234 "defaults": { 235 "rendering_speed": "BALANCED", 236 "expand_prompt": True, 237 "style": "AUTO", 238 }, 239 "supports": { 240 "prompt", "image_size", "rendering_speed", "expand_prompt", 241 "style", "seed", 242 }, 243 "upscale": False, 244 }, 245 "fal-ai/recraft/v4/pro/text-to-image": { 246 "display": "Recraft V4 Pro", 247 "speed": "~8s", 248 "strengths": "Design, brand systems, production-ready", 249 "price": "$0.25/image", 250 "size_style": "image_size_preset", 251 "sizes": { 252 "landscape": "landscape_16_9", 253 "square": "square_hd", 254 "portrait": "portrait_16_9", 255 }, 256 "defaults": { 257 # V4 Pro dropped V3's required `style` enum — defaults handle taste now. 258 "enable_safety_checker": False, 259 }, 260 "supports": { 261 "prompt", "image_size", "enable_safety_checker", 262 "colors", "background_color", 263 }, 264 "upscale": False, 265 }, 266 "fal-ai/qwen-image": { 267 "display": "Qwen Image", 268 "speed": "~12s", 269 "strengths": "LLM-based, complex text", 270 "price": "$0.02/MP", 271 "size_style": "image_size_preset", 272 "sizes": { 273 "landscape": "landscape_16_9", 274 "square": "square_hd", 275 "portrait": "portrait_16_9", 276 }, 277 "defaults": { 278 "num_inference_steps": 30, 279 "guidance_scale": 2.5, 280 "num_images": 1, 281 "output_format": "png", 282 "acceleration": "regular", 283 }, 284 "supports": { 285 "prompt", "image_size", "num_inference_steps", "guidance_scale", 286 "num_images", "output_format", "acceleration", "seed", "sync_mode", 287 }, 288 "upscale": False, 289 }, 290 } 291 292 # Default model is the fastest reasonable option. Kept cheap and sub-1s. 293 DEFAULT_MODEL = "fal-ai/flux-2/klein/9b" 294 295 DEFAULT_ASPECT_RATIO = "landscape" 296 VALID_ASPECT_RATIOS = ("landscape", "square", "portrait") 297 298 299 # --------------------------------------------------------------------------- 300 # Upscaler (Clarity Upscaler — unchanged from previous implementation) 301 # --------------------------------------------------------------------------- 302 UPSCALER_MODEL = "fal-ai/clarity-upscaler" 303 UPSCALER_FACTOR = 2 304 UPSCALER_SAFETY_CHECKER = False 305 UPSCALER_DEFAULT_PROMPT = "masterpiece, best quality, highres" 306 UPSCALER_NEGATIVE_PROMPT = "(worst quality, low quality, normal quality:2)" 307 UPSCALER_CREATIVITY = 0.35 308 UPSCALER_RESEMBLANCE = 0.6 309 UPSCALER_GUIDANCE_SCALE = 4 310 UPSCALER_NUM_INFERENCE_STEPS = 18 311 312 313 _debug = DebugSession("image_tools", env_var="IMAGE_TOOLS_DEBUG") 314 _managed_fal_client = None 315 _managed_fal_client_config = None 316 _managed_fal_client_lock = threading.Lock() 317 318 319 # --------------------------------------------------------------------------- 320 # Managed FAL gateway (Nous Subscription) 321 # --------------------------------------------------------------------------- 322 def _resolve_managed_fal_gateway(): 323 """Return managed fal-queue gateway config when the user prefers the gateway 324 or direct FAL credentials are absent.""" 325 if fal_key_is_configured() and not prefers_gateway("image_gen"): 326 return None 327 return resolve_managed_tool_gateway("fal-queue") 328 329 330 def _normalize_fal_queue_url_format(queue_run_origin: str) -> str: 331 normalized_origin = str(queue_run_origin or "").strip().rstrip("/") 332 if not normalized_origin: 333 raise ValueError("Managed FAL queue origin is required") 334 return f"{normalized_origin}/" 335 336 337 class _ManagedFalSyncClient: 338 """Small per-instance wrapper around fal_client.SyncClient for managed queue hosts.""" 339 340 def __init__(self, *, key: str, queue_run_origin: str): 341 sync_client_class = getattr(fal_client, "SyncClient", None) 342 if sync_client_class is None: 343 raise RuntimeError("fal_client.SyncClient is required for managed FAL gateway mode") 344 345 client_module = getattr(fal_client, "client", None) 346 if client_module is None: 347 raise RuntimeError("fal_client.client is required for managed FAL gateway mode") 348 349 self._queue_url_format = _normalize_fal_queue_url_format(queue_run_origin) 350 self._sync_client = sync_client_class(key=key) 351 self._http_client = getattr(self._sync_client, "_client", None) 352 self._maybe_retry_request = getattr(client_module, "_maybe_retry_request", None) 353 self._raise_for_status = getattr(client_module, "_raise_for_status", None) 354 self._request_handle_class = getattr(client_module, "SyncRequestHandle", None) 355 self._add_hint_header = getattr(client_module, "add_hint_header", None) 356 self._add_priority_header = getattr(client_module, "add_priority_header", None) 357 self._add_timeout_header = getattr(client_module, "add_timeout_header", None) 358 359 if self._http_client is None: 360 raise RuntimeError("fal_client.SyncClient._client is required for managed FAL gateway mode") 361 if self._maybe_retry_request is None or self._raise_for_status is None: 362 raise RuntimeError("fal_client.client request helpers are required for managed FAL gateway mode") 363 if self._request_handle_class is None: 364 raise RuntimeError("fal_client.client.SyncRequestHandle is required for managed FAL gateway mode") 365 366 def submit( 367 self, 368 application: str, 369 arguments: Dict[str, Any], 370 *, 371 path: str = "", 372 hint: Optional[str] = None, 373 webhook_url: Optional[str] = None, 374 priority: Any = None, 375 headers: Optional[Dict[str, str]] = None, 376 start_timeout: Optional[Union[int, float]] = None, 377 ): 378 url = self._queue_url_format + application 379 if path: 380 url += "/" + path.lstrip("/") 381 if webhook_url is not None: 382 url += "?" + urlencode({"fal_webhook": webhook_url}) 383 384 request_headers = dict(headers or {}) 385 if hint is not None and self._add_hint_header is not None: 386 self._add_hint_header(hint, request_headers) 387 if priority is not None: 388 if self._add_priority_header is None: 389 raise RuntimeError("fal_client.client.add_priority_header is required for priority requests") 390 self._add_priority_header(priority, request_headers) 391 if start_timeout is not None: 392 if self._add_timeout_header is None: 393 raise RuntimeError("fal_client.client.add_timeout_header is required for timeout requests") 394 self._add_timeout_header(start_timeout, request_headers) 395 396 response = self._maybe_retry_request( 397 self._http_client, 398 "POST", 399 url, 400 json=arguments, 401 timeout=getattr(self._sync_client, "default_timeout", 120.0), 402 headers=request_headers, 403 ) 404 self._raise_for_status(response) 405 406 data = response.json() 407 return self._request_handle_class( 408 request_id=data["request_id"], 409 response_url=data["response_url"], 410 status_url=data["status_url"], 411 cancel_url=data["cancel_url"], 412 client=self._http_client, 413 ) 414 415 416 def _get_managed_fal_client(managed_gateway): 417 """Reuse the managed FAL client so its internal httpx.Client is not leaked per call.""" 418 global _managed_fal_client, _managed_fal_client_config 419 420 client_config = ( 421 managed_gateway.gateway_origin.rstrip("/"), 422 managed_gateway.nous_user_token, 423 ) 424 with _managed_fal_client_lock: 425 if _managed_fal_client is not None and _managed_fal_client_config == client_config: 426 return _managed_fal_client 427 428 _managed_fal_client = _ManagedFalSyncClient( 429 key=managed_gateway.nous_user_token, 430 queue_run_origin=managed_gateway.gateway_origin, 431 ) 432 _managed_fal_client_config = client_config 433 return _managed_fal_client 434 435 436 def _submit_fal_request(model: str, arguments: Dict[str, Any]): 437 """Submit a FAL request using direct credentials or the managed queue gateway.""" 438 request_headers = {"x-idempotency-key": str(uuid.uuid4())} 439 managed_gateway = _resolve_managed_fal_gateway() 440 if managed_gateway is None: 441 return fal_client.submit(model, arguments=arguments, headers=request_headers) 442 443 managed_client = _get_managed_fal_client(managed_gateway) 444 try: 445 return managed_client.submit( 446 model, 447 arguments=arguments, 448 headers=request_headers, 449 ) 450 except Exception as exc: 451 # 4xx from the managed gateway typically means the portal doesn't 452 # currently proxy this model (allowlist miss, billing gate, etc.) 453 # — surface a clearer message with actionable remediation instead 454 # of a raw HTTP error from httpx. 455 status = _extract_http_status(exc) 456 if status is not None and 400 <= status < 500: 457 raise ValueError( 458 f"Nous Subscription gateway rejected model '{model}' " 459 f"(HTTP {status}). This model may not yet be enabled on " 460 f"the Nous Portal's FAL proxy. Either:\n" 461 f" • Set FAL_KEY in your environment to use FAL.ai directly, or\n" 462 f" • Pick a different model via `hermes tools` → Image Generation." 463 ) from exc 464 raise 465 466 467 def _extract_http_status(exc: BaseException) -> Optional[int]: 468 """Return an HTTP status code from httpx/fal exceptions, else None. 469 470 Defensive across exception shapes — httpx.HTTPStatusError exposes 471 ``.response.status_code`` while fal_client wrappers may expose 472 ``.status_code`` directly. 473 """ 474 response = getattr(exc, "response", None) 475 if response is not None: 476 status = getattr(response, "status_code", None) 477 if isinstance(status, int): 478 return status 479 status = getattr(exc, "status_code", None) 480 if isinstance(status, int): 481 return status 482 return None 483 484 485 # --------------------------------------------------------------------------- 486 # Model resolution + payload construction 487 # --------------------------------------------------------------------------- 488 def _resolve_fal_model() -> tuple: 489 """Resolve the active FAL model from config.yaml (primary) or default. 490 491 Returns (model_id, metadata_dict). Falls back to DEFAULT_MODEL if the 492 configured model is unknown (logged as a warning). 493 """ 494 model_id = "" 495 try: 496 from hermes_cli.config import load_config 497 cfg = load_config() 498 img_cfg = cfg.get("image_gen") if isinstance(cfg, dict) else None 499 if isinstance(img_cfg, dict): 500 raw = img_cfg.get("model") 501 if isinstance(raw, str): 502 model_id = raw.strip() 503 except Exception as exc: 504 logger.debug("Could not load image_gen.model from config: %s", exc) 505 506 # Env var escape hatch (undocumented; backward-compat for tests/scripts). 507 if not model_id: 508 model_id = os.getenv("FAL_IMAGE_MODEL", "").strip() 509 510 if not model_id: 511 return DEFAULT_MODEL, FAL_MODELS[DEFAULT_MODEL] 512 513 if model_id not in FAL_MODELS: 514 logger.warning( 515 "Unknown FAL model '%s' in config; falling back to %s", 516 model_id, DEFAULT_MODEL, 517 ) 518 return DEFAULT_MODEL, FAL_MODELS[DEFAULT_MODEL] 519 520 return model_id, FAL_MODELS[model_id] 521 522 523 def _build_fal_payload( 524 model_id: str, 525 prompt: str, 526 aspect_ratio: str = DEFAULT_ASPECT_RATIO, 527 seed: Optional[int] = None, 528 overrides: Optional[Dict[str, Any]] = None, 529 ) -> Dict[str, Any]: 530 """Build a FAL request payload for `model_id` from unified inputs. 531 532 Translates aspect_ratio into the model's native size spec (preset enum, 533 aspect-ratio enum, or GPT literal string), merges model defaults, applies 534 caller overrides, then filters to the model's ``supports`` whitelist. 535 """ 536 meta = FAL_MODELS[model_id] 537 size_style = meta["size_style"] 538 sizes = meta["sizes"] 539 540 aspect = (aspect_ratio or DEFAULT_ASPECT_RATIO).lower().strip() 541 if aspect not in sizes: 542 aspect = DEFAULT_ASPECT_RATIO 543 544 payload: Dict[str, Any] = dict(meta.get("defaults", {})) 545 payload["prompt"] = (prompt or "").strip() 546 547 if size_style in ("image_size_preset", "gpt_literal"): 548 payload["image_size"] = sizes[aspect] 549 elif size_style == "aspect_ratio": 550 payload["aspect_ratio"] = sizes[aspect] 551 else: 552 raise ValueError(f"Unknown size_style: {size_style!r}") 553 554 if seed is not None and isinstance(seed, int): 555 payload["seed"] = seed 556 557 if overrides: 558 for k, v in overrides.items(): 559 if v is not None: 560 payload[k] = v 561 562 supports = meta["supports"] 563 return {k: v for k, v in payload.items() if k in supports} 564 565 566 # --------------------------------------------------------------------------- 567 # Upscaler 568 # --------------------------------------------------------------------------- 569 def _upscale_image(image_url: str, original_prompt: str) -> Optional[Dict[str, Any]]: 570 """Upscale an image using FAL.ai's Clarity Upscaler. 571 572 Returns upscaled image dict, or None on failure (caller falls back to 573 the original image). 574 """ 575 try: 576 logger.info("Upscaling image with Clarity Upscaler...") 577 578 upscaler_arguments = { 579 "image_url": image_url, 580 "prompt": f"{UPSCALER_DEFAULT_PROMPT}, {original_prompt}", 581 "upscale_factor": UPSCALER_FACTOR, 582 "negative_prompt": UPSCALER_NEGATIVE_PROMPT, 583 "creativity": UPSCALER_CREATIVITY, 584 "resemblance": UPSCALER_RESEMBLANCE, 585 "guidance_scale": UPSCALER_GUIDANCE_SCALE, 586 "num_inference_steps": UPSCALER_NUM_INFERENCE_STEPS, 587 "enable_safety_checker": UPSCALER_SAFETY_CHECKER, 588 } 589 590 handler = _submit_fal_request(UPSCALER_MODEL, arguments=upscaler_arguments) 591 result = handler.get() 592 593 if result and "image" in result: 594 upscaled_image = result["image"] 595 logger.info( 596 "Image upscaled successfully to %sx%s", 597 upscaled_image.get("width", "unknown"), 598 upscaled_image.get("height", "unknown"), 599 ) 600 return { 601 "url": upscaled_image["url"], 602 "width": upscaled_image.get("width", 0), 603 "height": upscaled_image.get("height", 0), 604 "upscaled": True, 605 "upscale_factor": UPSCALER_FACTOR, 606 } 607 logger.error("Upscaler returned invalid response") 608 return None 609 610 except Exception as e: 611 logger.error("Error upscaling image: %s", e, exc_info=True) 612 return None 613 614 615 # --------------------------------------------------------------------------- 616 # Tool entry point 617 # --------------------------------------------------------------------------- 618 def image_generate_tool( 619 prompt: str, 620 aspect_ratio: str = DEFAULT_ASPECT_RATIO, 621 num_inference_steps: Optional[int] = None, 622 guidance_scale: Optional[float] = None, 623 num_images: Optional[int] = None, 624 output_format: Optional[str] = None, 625 seed: Optional[int] = None, 626 ) -> str: 627 """Generate an image from a text prompt using the configured FAL model. 628 629 The agent-facing schema exposes only ``prompt`` and ``aspect_ratio``; the 630 remaining kwargs are overrides for direct Python callers and are filtered 631 per-model via the ``supports`` whitelist (unsupported overrides are 632 silently dropped so legacy callers don't break when switching models). 633 634 Returns a JSON string with ``{"success": bool, "image": url | None, 635 "error": str, "error_type": str}``. 636 """ 637 model_id, meta = _resolve_fal_model() 638 639 debug_call_data = { 640 "model": model_id, 641 "parameters": { 642 "prompt": prompt, 643 "aspect_ratio": aspect_ratio, 644 "num_inference_steps": num_inference_steps, 645 "guidance_scale": guidance_scale, 646 "num_images": num_images, 647 "output_format": output_format, 648 "seed": seed, 649 }, 650 "error": None, 651 "success": False, 652 "images_generated": 0, 653 "generation_time": 0, 654 } 655 656 start_time = datetime.datetime.now() 657 658 try: 659 if not prompt or not isinstance(prompt, str) or len(prompt.strip()) == 0: 660 raise ValueError("Prompt is required and must be a non-empty string") 661 662 if not (fal_key_is_configured() or _resolve_managed_fal_gateway()): 663 message = "FAL_KEY environment variable not set" 664 if managed_nous_tools_enabled(): 665 message += " and managed FAL gateway is unavailable" 666 raise ValueError(message) 667 668 aspect_lc = (aspect_ratio or DEFAULT_ASPECT_RATIO).lower().strip() 669 if aspect_lc not in VALID_ASPECT_RATIOS: 670 logger.warning( 671 "Invalid aspect_ratio '%s', defaulting to '%s'", 672 aspect_ratio, DEFAULT_ASPECT_RATIO, 673 ) 674 aspect_lc = DEFAULT_ASPECT_RATIO 675 676 overrides: Dict[str, Any] = {} 677 if num_inference_steps is not None: 678 overrides["num_inference_steps"] = num_inference_steps 679 if guidance_scale is not None: 680 overrides["guidance_scale"] = guidance_scale 681 if num_images is not None: 682 overrides["num_images"] = num_images 683 if output_format is not None: 684 overrides["output_format"] = output_format 685 686 arguments = _build_fal_payload( 687 model_id, prompt, aspect_lc, seed=seed, overrides=overrides, 688 ) 689 690 logger.info( 691 "Generating image with %s (%s) — prompt: %s", 692 meta.get("display", model_id), model_id, prompt[:80], 693 ) 694 695 handler = _submit_fal_request(model_id, arguments=arguments) 696 result = handler.get() 697 698 generation_time = (datetime.datetime.now() - start_time).total_seconds() 699 700 if not result or "images" not in result: 701 raise ValueError("Invalid response from FAL.ai API — no images returned") 702 703 images = result.get("images", []) 704 if not images: 705 raise ValueError("No images were generated") 706 707 should_upscale = bool(meta.get("upscale", False)) 708 709 formatted_images = [] 710 for img in images: 711 if not (isinstance(img, dict) and "url" in img): 712 continue 713 original_image = { 714 "url": img["url"], 715 "width": img.get("width", 0), 716 "height": img.get("height", 0), 717 } 718 719 if should_upscale: 720 upscaled_image = _upscale_image(img["url"], prompt.strip()) 721 if upscaled_image: 722 formatted_images.append(upscaled_image) 723 continue 724 logger.warning("Using original image as fallback (upscale failed)") 725 726 original_image["upscaled"] = False 727 formatted_images.append(original_image) 728 729 if not formatted_images: 730 raise ValueError("No valid image URLs returned from API") 731 732 upscaled_count = sum(1 for img in formatted_images if img.get("upscaled")) 733 logger.info( 734 "Generated %s image(s) in %.1fs (%s upscaled) via %s", 735 len(formatted_images), generation_time, upscaled_count, model_id, 736 ) 737 738 response_data = { 739 "success": True, 740 "image": formatted_images[0]["url"] if formatted_images else None, 741 } 742 743 debug_call_data["success"] = True 744 debug_call_data["images_generated"] = len(formatted_images) 745 debug_call_data["generation_time"] = generation_time 746 _debug.log_call("image_generate_tool", debug_call_data) 747 _debug.save() 748 749 return json.dumps(response_data, indent=2, ensure_ascii=False) 750 751 except Exception as e: 752 generation_time = (datetime.datetime.now() - start_time).total_seconds() 753 error_msg = f"Error generating image: {str(e)}" 754 logger.error("%s", error_msg, exc_info=True) 755 756 response_data = { 757 "success": False, 758 "image": None, 759 "error": str(e), 760 "error_type": type(e).__name__, 761 } 762 763 debug_call_data["error"] = error_msg 764 debug_call_data["generation_time"] = generation_time 765 _debug.log_call("image_generate_tool", debug_call_data) 766 _debug.save() 767 768 return json.dumps(response_data, indent=2, ensure_ascii=False) 769 770 771 def check_fal_api_key() -> bool: 772 """True if the FAL.ai API key (direct or managed gateway) is available.""" 773 return bool(fal_key_is_configured() or _resolve_managed_fal_gateway()) 774 775 776 def check_image_generation_requirements() -> bool: 777 """True if any image gen backend is available. 778 779 Providers are considered in this order: 780 781 1. The in-tree FAL backend (FAL_KEY or managed gateway). 782 2. Any plugin-registered provider whose ``is_available()`` returns True. 783 784 Plugins win only when the in-tree FAL path is NOT ready, which matches 785 the historical behavior: shipping hermes with a FAL key configured 786 should still expose the tool. The active selection among ready 787 providers is resolved per-call by ``image_gen.provider``. 788 """ 789 try: 790 if check_fal_api_key(): 791 fal_client # noqa: F401 — SDK presence check 792 return True 793 except ImportError: 794 pass 795 796 # Probe plugin providers. Discovery is idempotent and cheap. 797 try: 798 from agent.image_gen_registry import list_providers 799 from hermes_cli.plugins import _ensure_plugins_discovered 800 801 _ensure_plugins_discovered() 802 for provider in list_providers(): 803 try: 804 if provider.is_available(): 805 return True 806 except Exception: 807 continue 808 except Exception: 809 pass 810 811 return False 812 813 814 # --------------------------------------------------------------------------- 815 # Demo / CLI entry point 816 # --------------------------------------------------------------------------- 817 if __name__ == "__main__": 818 print("🎨 Image Generation Tools — FAL.ai multi-model support") 819 print("=" * 60) 820 821 if not check_fal_api_key(): 822 print("❌ FAL_KEY environment variable not set") 823 print(" Set it via: export FAL_KEY='your-key-here'") 824 print(" Get a key: https://fal.ai/") 825 raise SystemExit(1) 826 print("✅ FAL.ai API key found") 827 828 try: 829 import fal_client # noqa: F401 830 print("✅ fal_client library available") 831 except ImportError: 832 print("❌ fal_client library not found — pip install fal-client") 833 raise SystemExit(1) 834 835 model_id, meta = _resolve_fal_model() 836 print(f"🤖 Active model: {meta.get('display', model_id)} ({model_id})") 837 print(f" Speed: {meta.get('speed', '?')} · Price: {meta.get('price', '?')}") 838 print(f" Upscaler: {'on' if meta.get('upscale') else 'off'}") 839 840 print("\nAvailable models:") 841 for mid, m in FAL_MODELS.items(): 842 marker = " ← active" if mid == model_id else "" 843 print(f" {mid:<32} {m.get('speed', '?'):<6} {m.get('price', '?')}{marker}") 844 845 if _debug.active: 846 print(f"\n🐛 Debug mode enabled — session {_debug.session_id}") 847 848 849 # --------------------------------------------------------------------------- 850 # Registry 851 # --------------------------------------------------------------------------- 852 from tools.registry import registry, tool_error 853 854 IMAGE_GENERATE_SCHEMA = { 855 "name": "image_generate", 856 "description": ( 857 "Generate high-quality images from text prompts. The underlying " 858 "backend (FAL, OpenAI, etc.) and model are user-configured and not " 859 "selectable by the agent. Returns either a URL or an absolute file " 860 "path in the `image` field; display it with markdown " 861 " and the gateway will deliver it." 862 ), 863 "parameters": { 864 "type": "object", 865 "properties": { 866 "prompt": { 867 "type": "string", 868 "description": "The text prompt describing the desired image. Be detailed and descriptive.", 869 }, 870 "aspect_ratio": { 871 "type": "string", 872 "enum": list(VALID_ASPECT_RATIOS), 873 "description": "The aspect ratio of the generated image. 'landscape' is 16:9 wide, 'portrait' is 16:9 tall, 'square' is 1:1.", 874 "default": DEFAULT_ASPECT_RATIO, 875 }, 876 }, 877 "required": ["prompt"], 878 }, 879 } 880 881 882 def _read_configured_image_provider(): 883 """Return the value of ``image_gen.provider`` from config.yaml, or None. 884 885 We only consult the plugin registry when this is explicitly set — an 886 unset value keeps users on the legacy in-tree FAL path even when other 887 providers happen to be registered (e.g. a user has OPENAI_API_KEY set 888 for other features but never asked for OpenAI image gen). 889 """ 890 try: 891 from hermes_cli.config import load_config 892 cfg = load_config() 893 section = cfg.get("image_gen") if isinstance(cfg, dict) else None 894 if isinstance(section, dict): 895 value = section.get("provider") 896 if isinstance(value, str) and value.strip(): 897 return value.strip() 898 except Exception as exc: 899 logger.debug("Could not read image_gen.provider: %s", exc) 900 return None 901 902 903 def _dispatch_to_plugin_provider(prompt: str, aspect_ratio: str): 904 """Route the call to a plugin-registered provider when one is selected. 905 906 Returns a JSON string on dispatch, or ``None`` to fall through to the 907 built-in FAL path. 908 909 Dispatch only fires when ``image_gen.provider`` is explicitly set AND 910 it does not point to ``fal`` (FAL still lives in-tree in this PR; 911 a later PR ports it into ``plugins/image_gen/fal/``). Any other value 912 that matches a registered plugin provider wins. 913 """ 914 configured = _read_configured_image_provider() 915 if not configured or configured == "fal": 916 return None 917 918 try: 919 # Import locally so plugin discovery isn't triggered just by 920 # importing this module (tests rely on that). 921 from agent.image_gen_registry import get_provider 922 from hermes_cli.plugins import _ensure_plugins_discovered 923 924 _ensure_plugins_discovered() 925 provider = get_provider(configured) 926 except Exception as exc: 927 logger.debug("image_gen plugin dispatch skipped: %s", exc) 928 return None 929 930 if provider is None: 931 try: 932 # Long-lived sessions may have discovered plugins before a bundled 933 # backend was patched in or before config changed. Retry once with 934 # a forced refresh before surfacing a missing-provider error. 935 _ensure_plugins_discovered(force=True) 936 provider = get_provider(configured) 937 except Exception as exc: 938 logger.debug("image_gen plugin force-refresh skipped: %s", exc) 939 940 if provider is None: 941 return json.dumps({ 942 "success": False, 943 "image": None, 944 "error": ( 945 f"image_gen.provider='{configured}' is set but no plugin " 946 f"registered that name. Run `hermes plugins list` to see " 947 f"available image gen backends." 948 ), 949 "error_type": "provider_not_registered", 950 }) 951 952 try: 953 result = provider.generate(prompt=prompt, aspect_ratio=aspect_ratio) 954 except Exception as exc: 955 logger.warning( 956 "Image gen provider '%s' raised: %s", 957 getattr(provider, "name", "?"), exc, 958 ) 959 return json.dumps({ 960 "success": False, 961 "image": None, 962 "error": f"Provider '{getattr(provider, 'name', '?')}' error: {exc}", 963 "error_type": "provider_exception", 964 }) 965 if not isinstance(result, dict): 966 return json.dumps({ 967 "success": False, 968 "image": None, 969 "error": "Provider returned a non-dict result", 970 "error_type": "provider_contract", 971 }) 972 return json.dumps(result) 973 974 975 def _handle_image_generate(args, **kw): 976 prompt = args.get("prompt", "") 977 if not prompt: 978 return tool_error("prompt is required for image generation") 979 aspect_ratio = args.get("aspect_ratio", DEFAULT_ASPECT_RATIO) 980 981 # Route to a plugin-registered provider if one is active (and it's 982 # not the in-tree FAL path). 983 dispatched = _dispatch_to_plugin_provider(prompt, aspect_ratio) 984 if dispatched is not None: 985 return dispatched 986 987 return image_generate_tool( 988 prompt=prompt, 989 aspect_ratio=aspect_ratio, 990 ) 991 992 993 registry.register( 994 name="image_generate", 995 toolset="image_gen", 996 schema=IMAGE_GENERATE_SCHEMA, 997 handler=_handle_image_generate, 998 check_fn=check_image_generation_requirements, 999 requires_env=[], 1000 is_async=False, # sync fal_client API to avoid "Event loop is closed" in gateway 1001 emoji="🎨", 1002 )