image_gen_provider.py
1 """ 2 Image Generation Provider ABC 3 ============================= 4 5 Defines the pluggable-backend interface for image generation. Providers register 6 instances via ``PluginContext.register_image_gen_provider()``; the active one 7 (selected via ``image_gen.provider`` in ``config.yaml``) services every 8 ``image_generate`` tool call. 9 10 Providers live in ``<repo>/plugins/image_gen/<name>/`` (built-in, auto-loaded 11 as ``kind: backend``) or ``~/.hermes/plugins/image_gen/<name>/`` (user, opt-in 12 via ``plugins.enabled``). 13 14 Response shape 15 -------------- 16 All providers return a dict that :func:`success_response` / :func:`error_response` 17 produce. The tool wrapper JSON-serializes it. Keys: 18 19 success bool 20 image str | None URL or absolute file path 21 model str provider-specific model identifier 22 prompt str echoed prompt 23 aspect_ratio str "landscape" | "square" | "portrait" 24 provider str provider name (for diagnostics) 25 error str only when success=False 26 error_type str only when success=False 27 """ 28 29 from __future__ import annotations 30 31 import abc 32 import base64 33 import datetime 34 import logging 35 import uuid 36 from pathlib import Path 37 from typing import Any, Dict, List, Optional, Tuple 38 39 logger = logging.getLogger(__name__) 40 41 42 VALID_ASPECT_RATIOS: Tuple[str, ...] = ("landscape", "square", "portrait") 43 DEFAULT_ASPECT_RATIO = "landscape" 44 45 46 # --------------------------------------------------------------------------- 47 # ABC 48 # --------------------------------------------------------------------------- 49 50 51 class ImageGenProvider(abc.ABC): 52 """Abstract base class for an image generation backend. 53 54 Subclasses must implement :meth:`generate`. Everything else has sane 55 defaults — override only what your provider needs. 56 """ 57 58 @property 59 @abc.abstractmethod 60 def name(self) -> str: 61 """Stable short identifier used in ``image_gen.provider`` config. 62 63 Lowercase, no spaces. Examples: ``fal``, ``openai``, ``replicate``. 64 """ 65 66 @property 67 def display_name(self) -> str: 68 """Human-readable label shown in ``hermes tools``. Defaults to ``name.title()``.""" 69 return self.name.title() 70 71 def is_available(self) -> bool: 72 """Return True when this provider can service calls. 73 74 Typically checks for a required API key. Default: True 75 (providers with no external dependencies are always available). 76 """ 77 return True 78 79 def list_models(self) -> List[Dict[str, Any]]: 80 """Return catalog entries for ``hermes tools`` model picker. 81 82 Each entry:: 83 84 { 85 "id": "gpt-image-1.5", # required 86 "display": "GPT Image 1.5", # optional; defaults to id 87 "speed": "~10s", # optional 88 "strengths": "...", # optional 89 "price": "$...", # optional 90 } 91 92 Default: empty list (provider has no user-selectable models). 93 """ 94 return [] 95 96 def get_setup_schema(self) -> Dict[str, Any]: 97 """Return provider metadata for the ``hermes tools`` picker. 98 99 Used by ``tools_config.py`` to inject this provider as a row in 100 the Image Generation provider list. Shape:: 101 102 { 103 "name": "OpenAI", # picker label 104 "badge": "paid", # optional short tag 105 "tag": "One-line description...", # optional subtitle 106 "env_vars": [ # keys to prompt for 107 {"key": "OPENAI_API_KEY", 108 "prompt": "OpenAI API key", 109 "url": "https://platform.openai.com/api-keys"}, 110 ], 111 } 112 113 Default: minimal entry derived from ``display_name``. Override to 114 expose API key prompts and custom badges. 115 """ 116 return { 117 "name": self.display_name, 118 "badge": "", 119 "tag": "", 120 "env_vars": [], 121 } 122 123 def default_model(self) -> Optional[str]: 124 """Return the default model id, or None if not applicable.""" 125 models = self.list_models() 126 if models: 127 return models[0].get("id") 128 return None 129 130 @abc.abstractmethod 131 def generate( 132 self, 133 prompt: str, 134 aspect_ratio: str = DEFAULT_ASPECT_RATIO, 135 **kwargs: Any, 136 ) -> Dict[str, Any]: 137 """Generate an image. 138 139 Implementations should return the dict from :func:`success_response` 140 or :func:`error_response`. ``kwargs`` may contain forward-compat 141 parameters future versions of the schema will expose — implementations 142 should ignore unknown keys. 143 """ 144 145 146 # --------------------------------------------------------------------------- 147 # Helpers 148 # --------------------------------------------------------------------------- 149 150 151 def resolve_aspect_ratio(value: Optional[str]) -> str: 152 """Clamp an aspect_ratio value to the valid set, defaulting to landscape. 153 154 Invalid values are coerced rather than rejected so the tool surface is 155 forgiving of agent mistakes. 156 """ 157 if not isinstance(value, str): 158 return DEFAULT_ASPECT_RATIO 159 v = value.strip().lower() 160 if v in VALID_ASPECT_RATIOS: 161 return v 162 return DEFAULT_ASPECT_RATIO 163 164 165 def _images_cache_dir() -> Path: 166 """Return ``$HERMES_HOME/cache/images/``, creating parents as needed.""" 167 from hermes_constants import get_hermes_home 168 169 path = get_hermes_home() / "cache" / "images" 170 path.mkdir(parents=True, exist_ok=True) 171 return path 172 173 174 def save_b64_image( 175 b64_data: str, 176 *, 177 prefix: str = "image", 178 extension: str = "png", 179 ) -> Path: 180 """Decode base64 image data and write it under ``$HERMES_HOME/cache/images/``. 181 182 Returns the absolute :class:`Path` to the saved file. 183 184 Filename format: ``<prefix>_<YYYYMMDD_HHMMSS>_<short-uuid>.<ext>``. 185 """ 186 raw = base64.b64decode(b64_data) 187 ts = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") 188 short = uuid.uuid4().hex[:8] 189 path = _images_cache_dir() / f"{prefix}_{ts}_{short}.{extension}" 190 path.write_bytes(raw) 191 return path 192 193 194 def success_response( 195 *, 196 image: str, 197 model: str, 198 prompt: str, 199 aspect_ratio: str, 200 provider: str, 201 extra: Optional[Dict[str, Any]] = None, 202 ) -> Dict[str, Any]: 203 """Build a uniform success response dict. 204 205 ``image`` may be an HTTP URL or an absolute filesystem path (for b64 206 providers like OpenAI). Callers that need to pass through additional 207 backend-specific fields can supply ``extra``. 208 """ 209 payload: Dict[str, Any] = { 210 "success": True, 211 "image": image, 212 "model": model, 213 "prompt": prompt, 214 "aspect_ratio": aspect_ratio, 215 "provider": provider, 216 } 217 if extra: 218 for k, v in extra.items(): 219 payload.setdefault(k, v) 220 return payload 221 222 223 def error_response( 224 *, 225 error: str, 226 error_type: str = "provider_error", 227 provider: str = "", 228 model: str = "", 229 prompt: str = "", 230 aspect_ratio: str = DEFAULT_ASPECT_RATIO, 231 ) -> Dict[str, Any]: 232 """Build a uniform error response dict.""" 233 return { 234 "success": False, 235 "image": None, 236 "error": error, 237 "error_type": error_type, 238 "model": model, 239 "prompt": prompt, 240 "aspect_ratio": aspect_ratio, 241 "provider": provider, 242 }