/ model_tools.py
model_tools.py
1 #!/usr/bin/env python3 2 """ 3 Model Tools Module 4 5 Thin orchestration layer over the tool registry. Each tool file in tools/ 6 self-registers its schema, handler, and metadata via tools.registry.register(). 7 This module triggers discovery (by importing all tool modules), then provides 8 the public API that run_agent.py, cli.py, batch_runner.py, and the RL 9 environments consume. 10 11 Public API (signatures preserved from the original 2,400-line version): 12 get_tool_definitions(enabled_toolsets, disabled_toolsets, quiet_mode) -> list 13 handle_function_call(function_name, function_args, task_id, user_task) -> str 14 TOOL_TO_TOOLSET_MAP: dict (for batch_runner.py) 15 TOOLSET_REQUIREMENTS: dict (for cli.py, doctor.py) 16 get_all_tool_names() -> list 17 get_toolset_for_tool(name) -> str 18 get_available_toolsets() -> dict 19 check_toolset_requirements() -> dict 20 check_tool_availability(quiet) -> tuple 21 """ 22 23 import json 24 import asyncio 25 import logging 26 import threading 27 import time 28 from typing import Dict, Any, List, Optional, Tuple 29 30 from tools.registry import discover_builtin_tools, registry 31 from toolsets import resolve_toolset, validate_toolset 32 33 logger = logging.getLogger(__name__) 34 35 36 # ============================================================================= 37 # Async Bridging (single source of truth -- used by registry.dispatch too) 38 # ============================================================================= 39 40 _tool_loop = None # persistent loop for the main (CLI) thread 41 _tool_loop_lock = threading.Lock() 42 _worker_thread_local = threading.local() # per-worker-thread persistent loops 43 44 45 def _get_tool_loop(): 46 """Return a long-lived event loop for running async tool handlers. 47 48 Using a persistent loop (instead of asyncio.run() which creates and 49 *closes* a fresh loop every time) prevents "Event loop is closed" 50 errors that occur when cached httpx/AsyncOpenAI clients attempt to 51 close their transport on a dead loop during garbage collection. 52 """ 53 global _tool_loop 54 with _tool_loop_lock: 55 if _tool_loop is None or _tool_loop.is_closed(): 56 _tool_loop = asyncio.new_event_loop() 57 return _tool_loop 58 59 60 def _get_worker_loop(): 61 """Return a persistent event loop for the current worker thread. 62 63 Each worker thread (e.g., delegate_task's ThreadPoolExecutor threads) 64 gets its own long-lived loop stored in thread-local storage. This 65 prevents the "Event loop is closed" errors that occurred when 66 asyncio.run() was used per-call: asyncio.run() creates a loop, runs 67 the coroutine, then *closes* the loop — but cached httpx/AsyncOpenAI 68 clients remain bound to that now-dead loop and raise RuntimeError 69 during garbage collection or subsequent use. 70 71 By keeping the loop alive for the thread's lifetime, cached clients 72 stay valid and their cleanup runs on a live loop. 73 """ 74 loop = getattr(_worker_thread_local, 'loop', None) 75 if loop is None or loop.is_closed(): 76 loop = asyncio.new_event_loop() 77 asyncio.set_event_loop(loop) 78 _worker_thread_local.loop = loop 79 return loop 80 81 82 def _run_async(coro): 83 """Run an async coroutine from a sync context. 84 85 If the current thread already has a running event loop (e.g., inside 86 the gateway's async stack or Atropos's event loop), we spin up a 87 disposable thread so asyncio.run() can create its own loop without 88 conflicting. 89 90 For the common CLI path (no running loop), we use a persistent event 91 loop so that cached async clients (httpx / AsyncOpenAI) remain bound 92 to a live loop and don't trigger "Event loop is closed" on GC. 93 94 When called from a worker thread (parallel tool execution), we use a 95 per-thread persistent loop to avoid both contention with the main 96 thread's shared loop AND the "Event loop is closed" errors caused by 97 asyncio.run()'s create-and-destroy lifecycle. 98 99 This is the single source of truth for sync->async bridging in tool 100 handlers. The RL paths (agent_loop.py, tool_context.py) also provide 101 outer thread-pool wrapping as defense-in-depth, but each handler is 102 self-protecting via this function. 103 """ 104 try: 105 loop = asyncio.get_running_loop() 106 except RuntimeError: 107 loop = None 108 109 if loop and loop.is_running(): 110 # Inside an async context (gateway, RL env) — run in a fresh thread 111 # with its own event loop we own a reference to, so on timeout we 112 # can cancel the task inside that loop (ThreadPoolExecutor.cancel() 113 # only works on not-yet-started futures — it's a no-op on a running 114 # worker, which previously leaked the thread on every 300 s timeout). 115 import concurrent.futures 116 117 worker_loop: Optional[asyncio.AbstractEventLoop] = None 118 loop_ready = threading.Event() 119 120 def _run_in_worker(): 121 nonlocal worker_loop 122 worker_loop = asyncio.new_event_loop() 123 loop_ready.set() 124 try: 125 asyncio.set_event_loop(worker_loop) 126 return worker_loop.run_until_complete(coro) 127 finally: 128 try: 129 # Cancel anything still pending (e.g. task cancelled 130 # externally via call_soon_threadsafe on timeout). 131 pending = asyncio.all_tasks(worker_loop) 132 for t in pending: 133 t.cancel() 134 if pending: 135 worker_loop.run_until_complete( 136 asyncio.gather(*pending, return_exceptions=True) 137 ) 138 except Exception: 139 pass 140 worker_loop.close() 141 142 pool = concurrent.futures.ThreadPoolExecutor(max_workers=1) 143 future = pool.submit(_run_in_worker) 144 try: 145 return future.result(timeout=300) 146 except concurrent.futures.TimeoutError: 147 # Cancel the coroutine inside its own loop so the worker thread 148 # can wind down instead of running forever. 149 if loop_ready.wait(timeout=1.0) and worker_loop is not None: 150 try: 151 for t in asyncio.all_tasks(worker_loop): 152 worker_loop.call_soon_threadsafe(t.cancel) 153 except RuntimeError: 154 # Loop already closed — nothing to cancel. 155 pass 156 raise 157 finally: 158 # wait=False: don't block the caller on a stuck coroutine. We've 159 # already requested cancellation above; the worker will exit 160 # once the coroutine observes it (usually at the next await). 161 pool.shutdown(wait=False) 162 163 # If we're on a worker thread (e.g., parallel tool execution in 164 # delegate_task), use a per-thread persistent loop. This avoids 165 # contention with the main thread's shared loop while keeping cached 166 # httpx/AsyncOpenAI clients bound to a live loop for the thread's 167 # lifetime — preventing "Event loop is closed" on GC cleanup. 168 if threading.current_thread() is not threading.main_thread(): 169 worker_loop = _get_worker_loop() 170 return worker_loop.run_until_complete(coro) 171 172 tool_loop = _get_tool_loop() 173 return tool_loop.run_until_complete(coro) 174 175 176 # ============================================================================= 177 # Tool Discovery (importing each module triggers its registry.register calls) 178 # ============================================================================= 179 180 discover_builtin_tools() 181 182 # MCP tool discovery (external MCP servers from config) used to run here as 183 # a module-level side effect. It was removed because discover_mcp_tools() 184 # internally uses a blocking future.result(timeout=120) wait, and the 185 # gateway lazy-imports this module from inside the asyncio event loop on 186 # the first user message — freezing Discord/Telegram heartbeats for up to 187 # 120s whenever any configured MCP server was slow or unreachable (#16856). 188 # 189 # Each entry point now runs discovery explicitly at its own startup: 190 # - gateway/run.py -> start_gateway() uses run_in_executor 191 # - cli.py, hermes_cli/* -> inline on startup (no event loop) 192 # - tui_gateway/server.py -> inline on startup (no event loop) 193 # - acp_adapter/server.py -> asyncio.to_thread on session init 194 195 # Plugin tool discovery (user/project/pip plugins) 196 try: 197 from hermes_cli.plugins import discover_plugins 198 discover_plugins() 199 except Exception as e: 200 logger.debug("Plugin discovery failed: %s", e) 201 202 203 # ============================================================================= 204 # Backward-compat constants (built once after discovery) 205 # ============================================================================= 206 207 TOOL_TO_TOOLSET_MAP: Dict[str, str] = registry.get_tool_to_toolset_map() 208 209 TOOLSET_REQUIREMENTS: Dict[str, dict] = registry.get_toolset_requirements() 210 211 # Resolved tool names from the last get_tool_definitions() call. 212 # Used by code_execution_tool to know which tools are available in this session. 213 _last_resolved_tool_names: List[str] = [] 214 215 216 # ============================================================================= 217 # Legacy toolset name mapping (old _tools-suffixed names -> tool name lists) 218 # ============================================================================= 219 220 _LEGACY_TOOLSET_MAP = { 221 "web_tools": ["web_search", "web_extract"], 222 "terminal_tools": ["terminal"], 223 "vision_tools": ["vision_analyze"], 224 "moa_tools": ["mixture_of_agents"], 225 "image_tools": ["image_generate"], 226 "skills_tools": ["skills_list", "skill_view", "skill_manage"], 227 "browser_tools": [ 228 "browser_navigate", "browser_snapshot", "browser_click", 229 "browser_type", "browser_scroll", "browser_back", 230 "browser_press", "browser_get_images", 231 "browser_vision", "browser_console" 232 ], 233 "cronjob_tools": ["cronjob"], 234 "rl_tools": [ 235 "rl_list_environments", "rl_select_environment", 236 "rl_get_current_config", "rl_edit_config", 237 "rl_start_training", "rl_check_status", 238 "rl_stop_training", "rl_get_results", 239 "rl_list_runs", "rl_test_inference" 240 ], 241 "file_tools": ["read_file", "write_file", "patch", "search_files"], 242 "tts_tools": ["text_to_speech"], 243 } 244 245 246 # ============================================================================= 247 # get_tool_definitions (the main schema provider) 248 # ============================================================================= 249 250 # Module-level memoization for get_tool_definitions(). Keyed on 251 # (frozenset(enabled_toolsets), frozenset(disabled_toolsets), registry._generation). 252 # Hot callers (gateway runner, AIAgent.__init__) invoke this on every turn 253 # with quiet_mode=True; caching avoids ~7 ms of registry walking + schema 254 # filtering + check_fn probing per call. Only active when quiet_mode=True 255 # because quiet_mode=False has stdout side effects (tool-selection prints). 256 # 257 # Invalidation happens transparently via the registry's _generation counter, 258 # which bumps on register() / deregister() / register_toolset_alias(). The 259 # inner check_fn TTL cache in registry.py handles environment drift (Docker 260 # daemon start/stop, env var changes, etc.) on a 30 s horizon. 261 _tool_defs_cache: Dict[tuple, List[Dict[str, Any]]] = {} 262 263 264 def _clear_tool_defs_cache() -> None: 265 """Drop memoized get_tool_definitions() results. Called when dynamic 266 schema dependencies change (e.g. discord capability cache reset, 267 execute_code sandbox reconfigured).""" 268 _tool_defs_cache.clear() 269 270 271 def get_tool_definitions( 272 enabled_toolsets: List[str] = None, 273 disabled_toolsets: List[str] = None, 274 quiet_mode: bool = False, 275 ) -> List[Dict[str, Any]]: 276 """ 277 Get tool definitions for model API calls with toolset-based filtering. 278 279 All tools must be part of a toolset to be accessible. 280 281 Args: 282 enabled_toolsets: Only include tools from these toolsets. 283 disabled_toolsets: Exclude tools from these toolsets (if enabled_toolsets is None). 284 quiet_mode: Suppress status prints. 285 286 Returns: 287 Filtered list of OpenAI-format tool definitions. 288 """ 289 # Fast path: memoized result when the caller doesn't need stdout prints. 290 # The cache key captures every argument-level input; the registry 291 # generation captures registry mutations (MCP refresh, plugin load). 292 # check_fn results are TTL-cached one level down, inside 293 # registry.get_definitions. The config-mtime fingerprint below captures 294 # user-visible config edits that affect dynamic schemas (execute_code 295 # mode, discord action allowlist, etc.) without needing an explicit 296 # invalidate hook on every config-writer. 297 if quiet_mode: 298 try: 299 from hermes_cli.config import get_config_path 300 cfg_path = get_config_path() 301 cfg_stat = cfg_path.stat() 302 cfg_fp = (cfg_stat.st_mtime_ns, cfg_stat.st_size) 303 except (FileNotFoundError, OSError, ImportError): 304 cfg_fp = None 305 cache_key = ( 306 frozenset(enabled_toolsets) if enabled_toolsets is not None else None, 307 frozenset(disabled_toolsets) if disabled_toolsets else None, 308 registry._generation, 309 cfg_fp, 310 ) 311 cached = _tool_defs_cache.get(cache_key) 312 if cached is not None: 313 # Update _last_resolved_tool_names so downstream callers see 314 # consistent state even on a cache hit. 315 global _last_resolved_tool_names 316 _last_resolved_tool_names = [t["function"]["name"] for t in cached] 317 # Return a shallow copy of the list but share the dict references — 318 # schemas are treated as read-only by all known callers. 319 return list(cached) 320 321 result = _compute_tool_definitions(enabled_toolsets, disabled_toolsets, quiet_mode) 322 if quiet_mode: 323 # Cache the freshly-computed list, but hand callers a shallow copy so 324 # downstream mutations (e.g. run_agent appending memory/LCM tool 325 # schemas to self.tools) don't poison the cache. Without this, a 326 # long-lived Gateway process accumulates duplicate tool names across 327 # agent inits and providers that enforce unique tool names 328 # (DeepSeek, Xiaomi MiMo, Moonshot Kimi) reject the request with 329 # HTTP 400. Mirrors the cache-hit path above. (issue #17335) 330 _tool_defs_cache[cache_key] = result 331 return list(result) 332 return result 333 334 335 def _compute_tool_definitions( 336 enabled_toolsets: List[str] = None, 337 disabled_toolsets: List[str] = None, 338 quiet_mode: bool = False, 339 ) -> List[Dict[str, Any]]: 340 """Uncached implementation of :func:`get_tool_definitions`.""" 341 # Determine which tool names the caller wants 342 tools_to_include: set = set() 343 344 if enabled_toolsets is not None: 345 for toolset_name in enabled_toolsets: 346 if validate_toolset(toolset_name): 347 resolved = resolve_toolset(toolset_name) 348 tools_to_include.update(resolved) 349 if not quiet_mode: 350 print(f"✅ Enabled toolset '{toolset_name}': {', '.join(resolved) if resolved else 'no tools'}") 351 elif toolset_name in _LEGACY_TOOLSET_MAP: 352 legacy_tools = _LEGACY_TOOLSET_MAP[toolset_name] 353 tools_to_include.update(legacy_tools) 354 if not quiet_mode: 355 print(f"✅ Enabled legacy toolset '{toolset_name}': {', '.join(legacy_tools)}") 356 else: 357 if not quiet_mode: 358 print(f"⚠️ Unknown toolset: {toolset_name}") 359 else: 360 # Default: start with everything 361 from toolsets import get_all_toolsets 362 for ts_name in get_all_toolsets(): 363 tools_to_include.update(resolve_toolset(ts_name)) 364 365 # Always apply disabled toolsets as a subtraction step at the end. 366 # This ensures that even if a composite toolset (like hermes-cli) 367 # is enabled, any tools belonging to a disabled toolset are strictly 368 # stripped out. See issue #17309. 369 if disabled_toolsets: 370 for toolset_name in disabled_toolsets: 371 if validate_toolset(toolset_name): 372 resolved = resolve_toolset(toolset_name) 373 tools_to_include.difference_update(resolved) 374 if not quiet_mode: 375 print(f"🚫 Disabled toolset '{toolset_name}': {', '.join(resolved) if resolved else 'no tools'}") 376 elif toolset_name in _LEGACY_TOOLSET_MAP: 377 legacy_tools = _LEGACY_TOOLSET_MAP[toolset_name] 378 tools_to_include.difference_update(legacy_tools) 379 if not quiet_mode: 380 print(f"🚫 Disabled legacy toolset '{toolset_name}': {', '.join(legacy_tools)}") 381 else: 382 if not quiet_mode: 383 print(f"⚠️ Unknown toolset: {toolset_name}") 384 385 # Plugin-registered tools are now resolved through the normal toolset 386 # path — validate_toolset() / resolve_toolset() / get_all_toolsets() 387 # all check the tool registry for plugin-provided toolsets. No bypass 388 # needed; plugins respect enabled_toolsets / disabled_toolsets like any 389 # other toolset. 390 391 # Ask the registry for schemas (only returns tools whose check_fn passes) 392 filtered_tools = registry.get_definitions(tools_to_include, quiet=quiet_mode) 393 394 # The set of tool names that actually passed check_fn filtering. 395 # Use this (not tools_to_include) for any downstream schema that references 396 # other tools by name — otherwise the model sees tools mentioned in 397 # descriptions that don't actually exist, and hallucinates calls to them. 398 available_tool_names = {t["function"]["name"] for t in filtered_tools} 399 400 # Rebuild execute_code schema to only list sandbox tools that are actually 401 # available. Without this, the model sees "web_search is available in 402 # execute_code" even when the API key isn't configured or the toolset is 403 # disabled (#560-discord). 404 if "execute_code" in available_tool_names: 405 from tools.code_execution_tool import SANDBOX_ALLOWED_TOOLS, build_execute_code_schema, _get_execution_mode 406 sandbox_enabled = SANDBOX_ALLOWED_TOOLS & available_tool_names 407 dynamic_schema = build_execute_code_schema(sandbox_enabled, mode=_get_execution_mode()) 408 for i, td in enumerate(filtered_tools): 409 if td.get("function", {}).get("name") == "execute_code": 410 filtered_tools[i] = {"type": "function", "function": dynamic_schema} 411 break 412 413 # Rebuild discord / discord_admin schemas based on the bot's privileged 414 # intents (detected from GET /applications/@me) and the user's action 415 # allowlist in config. Hides actions the bot's intents don't support so 416 # the model never attempts them, and annotates fetch_messages when the 417 # MESSAGE_CONTENT intent is missing. 418 _discord_schema_fns = { 419 "discord": "get_dynamic_schema_core", 420 "discord_admin": "get_dynamic_schema_admin", 421 } 422 for discord_tool_name in _discord_schema_fns: 423 if discord_tool_name in available_tool_names: 424 try: 425 from tools import discord_tool as _dt 426 schema_fn = getattr(_dt, _discord_schema_fns[discord_tool_name]) 427 dynamic = schema_fn() 428 except Exception: 429 dynamic = None 430 if dynamic is None: 431 filtered_tools = [ 432 t for t in filtered_tools 433 if t.get("function", {}).get("name") != discord_tool_name 434 ] 435 available_tool_names.discard(discord_tool_name) 436 else: 437 for i, td in enumerate(filtered_tools): 438 if td.get("function", {}).get("name") == discord_tool_name: 439 filtered_tools[i] = {"type": "function", "function": dynamic} 440 break 441 442 # Strip web tool cross-references from browser_navigate description when 443 # web_search / web_extract are not available. The static schema says 444 # "prefer web_search or web_extract" which causes the model to hallucinate 445 # those tools when they're missing. 446 if "browser_navigate" in available_tool_names: 447 web_tools_available = {"web_search", "web_extract"} & available_tool_names 448 if not web_tools_available: 449 for i, td in enumerate(filtered_tools): 450 if td.get("function", {}).get("name") == "browser_navigate": 451 desc = td["function"].get("description", "") 452 desc = desc.replace( 453 " For simple information retrieval, prefer web_search or web_extract (faster, cheaper).", 454 "", 455 ) 456 filtered_tools[i] = { 457 "type": "function", 458 "function": {**td["function"], "description": desc}, 459 } 460 break 461 462 if not quiet_mode: 463 if filtered_tools: 464 tool_names = [t["function"]["name"] for t in filtered_tools] 465 print(f"🛠️ Final tool selection ({len(filtered_tools)} tools): {', '.join(tool_names)}") 466 else: 467 print("🛠️ No tools selected (all filtered out or unavailable)") 468 469 global _last_resolved_tool_names 470 _last_resolved_tool_names = [t["function"]["name"] for t in filtered_tools] 471 472 # Sanitize schemas for broad backend compatibility. llama.cpp's 473 # json-schema-to-grammar converter (used by its OAI server to build 474 # GBNF tool-call parsers) rejects some shapes that cloud providers 475 # silently accept — bare "type": "object" with no properties, 476 # string-valued schema nodes from malformed MCP servers, etc. This 477 # is a no-op for schemas that are already well-formed. 478 try: 479 from tools.schema_sanitizer import sanitize_tool_schemas 480 filtered_tools = sanitize_tool_schemas(filtered_tools) 481 except Exception as e: # pragma: no cover — defensive 482 logger.warning("Schema sanitization skipped: %s", e) 483 484 return filtered_tools 485 486 487 # ============================================================================= 488 # handle_function_call (the main dispatcher) 489 # ============================================================================= 490 491 # Tools whose execution is intercepted by the agent loop (run_agent.py) 492 # because they need agent-level state (TodoStore, MemoryStore, etc.). 493 # The registry still holds their schemas; dispatch just returns a stub error 494 # so if something slips through, the LLM sees a sensible message. 495 _AGENT_LOOP_TOOLS = {"todo", "memory", "session_search", "delegate_task"} 496 _READ_SEARCH_TOOLS = {"read_file", "search_files"} 497 498 499 # ========================================================================= 500 # Tool argument type coercion 501 # ========================================================================= 502 503 def coerce_tool_args(tool_name: str, args: Dict[str, Any]) -> Dict[str, Any]: 504 """Coerce tool call arguments to match their JSON Schema types. 505 506 LLMs frequently return numbers as strings (``"42"`` instead of ``42``) 507 and booleans as strings (``"true"`` instead of ``true``). This compares 508 each argument value against the tool's registered JSON Schema and attempts 509 safe coercion when the value is a string but the schema expects a different 510 type. Original values are preserved when coercion fails. 511 512 Handles ``"type": "integer"``, ``"type": "number"``, ``"type": "boolean"``, 513 and union types (``"type": ["integer", "string"]``). 514 515 Also wraps bare scalar values in a single-element list when the schema 516 declares ``"type": "array"``. Open-weight models (DeepSeek, Qwen, GLM) 517 sometimes emit ``{"urls": "https://a.com"}`` when the tool expects 518 ``{"urls": ["https://a.com"]}``; wrapping here avoids a confusing tool 519 failure on what is otherwise a well-formed call. 520 """ 521 if not args or not isinstance(args, dict): 522 return args 523 524 schema = registry.get_schema(tool_name) 525 if not schema: 526 return args 527 528 properties = (schema.get("parameters") or {}).get("properties") 529 if not properties: 530 return args 531 532 for key, value in list(args.items()): 533 prop_schema = properties.get(key) 534 if not prop_schema: 535 continue 536 expected = prop_schema.get("type") 537 538 # Wrap bare non-list values when the schema declares ``array``. 539 # Strings still go through _coerce_value first so JSON-encoded 540 # arrays (``'["a","b"]'``) get parsed and nullable ``"null"`` 541 # becomes ``None`` rather than ``["null"]``. 542 # ``None`` itself is preserved — we don't know whether the model 543 # meant "omit" or "empty list", and tools with sensible defaults 544 # (e.g. read_file's normalize_read_pagination) already handle it. 545 if expected == "array" and value is not None and not isinstance(value, (list, tuple)): 546 if isinstance(value, str): 547 coerced = _coerce_value(value, expected, schema=prop_schema) 548 if coerced is not value: 549 # _coerce_value handled it (JSON-parsed list or 550 # nullable "null" → None). 551 args[key] = coerced 552 continue 553 args[key] = [value] 554 logger.info( 555 "coerce_tool_args: wrapped bare string in list for %s.%s", 556 tool_name, key, 557 ) 558 continue 559 args[key] = [value] 560 logger.info( 561 "coerce_tool_args: wrapped bare %s in list for %s.%s", 562 type(value).__name__, tool_name, key, 563 ) 564 continue 565 566 if not isinstance(value, str): 567 continue 568 if not expected and not _schema_allows_null(prop_schema): 569 continue 570 coerced = _coerce_value(value, expected, schema=prop_schema) 571 if coerced is not value: 572 args[key] = coerced 573 574 return args 575 576 577 def _coerce_value(value: str, expected_type, schema: dict | None = None): 578 """Attempt to coerce a string *value* to *expected_type*. 579 580 Returns the original string when coercion is not applicable or fails. 581 """ 582 if _schema_allows_null(schema) and value.strip().lower() == "null": 583 return None 584 585 if isinstance(expected_type, list): 586 # Union type — try each in order, return first successful coercion 587 for t in expected_type: 588 result = _coerce_value(value, t, schema=schema) 589 if result is not value: 590 return result 591 return value 592 593 if expected_type in ("integer", "number"): 594 return _coerce_number(value, integer_only=(expected_type == "integer")) 595 if expected_type == "boolean": 596 return _coerce_boolean(value) 597 if expected_type == "array": 598 return _coerce_json(value, list) 599 if expected_type == "object": 600 return _coerce_json(value, dict) 601 if expected_type == "null" and value.strip().lower() == "null": 602 return None 603 return value 604 605 606 def _schema_allows_null(schema: dict | None) -> bool: 607 """Return True when a JSON Schema fragment explicitly permits null.""" 608 if not isinstance(schema, dict): 609 return False 610 611 schema_type = schema.get("type") 612 if schema_type == "null": 613 return True 614 if isinstance(schema_type, list) and "null" in schema_type: 615 return True 616 if schema.get("nullable") is True: 617 return True 618 619 for union_key in ("anyOf", "oneOf"): 620 variants = schema.get(union_key) 621 if not isinstance(variants, list): 622 continue 623 for variant in variants: 624 if isinstance(variant, dict) and variant.get("type") == "null": 625 return True 626 627 return False 628 629 630 def _coerce_json(value: str, expected_python_type: type): 631 """Parse *value* as JSON when the schema expects an array or object. 632 633 Handles model output drift where a complex oneOf/discriminated-union schema 634 causes the LLM to emit the array/object as a JSON string instead of a native 635 structure. Returns the original string if parsing fails or yields the wrong 636 Python type. 637 """ 638 try: 639 parsed = json.loads(value) 640 except (ValueError, TypeError): 641 return value 642 if isinstance(parsed, expected_python_type): 643 logger.debug( 644 "coerce_tool_args: coerced string to %s via json.loads", 645 expected_python_type.__name__, 646 ) 647 return parsed 648 return value 649 650 651 def _coerce_number(value: str, integer_only: bool = False): 652 """Try to parse *value* as a number. Returns original string on failure.""" 653 try: 654 f = float(value) 655 except (ValueError, OverflowError): 656 return value 657 # Guard against inf/nan — not JSON-serializable, keep original string 658 if f != f or f == float("inf") or f == float("-inf"): 659 return value 660 # If it looks like an integer (no fractional part), return int 661 if f == int(f): 662 return int(f) 663 if integer_only: 664 # Schema wants an integer but value has decimals — keep as string 665 return value 666 return f 667 668 669 def _coerce_boolean(value: str): 670 """Try to parse *value* as a boolean. Returns original string on failure.""" 671 low = value.strip().lower() 672 if low == "true": 673 return True 674 if low == "false": 675 return False 676 return value 677 678 679 def handle_function_call( 680 function_name: str, 681 function_args: Dict[str, Any], 682 task_id: Optional[str] = None, 683 tool_call_id: Optional[str] = None, 684 session_id: Optional[str] = None, 685 user_task: Optional[str] = None, 686 enabled_tools: Optional[List[str]] = None, 687 skip_pre_tool_call_hook: bool = False, 688 ) -> str: 689 """ 690 Main function call dispatcher that routes calls to the tool registry. 691 692 Args: 693 function_name: Name of the function to call. 694 function_args: Arguments for the function. 695 task_id: Unique identifier for terminal/browser session isolation. 696 user_task: The user's original task (for browser_snapshot context). 697 enabled_tools: Tool names enabled for this session. When provided, 698 execute_code uses this list to determine which sandbox 699 tools to generate. Falls back to the process-global 700 ``_last_resolved_tool_names`` for backward compat. 701 702 Returns: 703 Function result as a JSON string. 704 """ 705 # Coerce string arguments to their schema-declared types (e.g. "42"→42) 706 function_args = coerce_tool_args(function_name, function_args) 707 708 try: 709 if function_name in _AGENT_LOOP_TOOLS: 710 return json.dumps({"error": f"{function_name} must be handled by the agent loop"}) 711 712 # Check plugin hooks for a block directive (unless caller already 713 # checked — e.g. run_agent._invoke_tool passes skip=True to 714 # avoid double-firing the hook). 715 # 716 # Single-fire contract: pre_tool_call fires exactly once per tool 717 # execution. get_pre_tool_call_block_message() internally calls 718 # invoke_hook("pre_tool_call", ...) and returns the first block 719 # directive (if any), so observer plugins see the hook on that same 720 # pass. When skip=True, the caller already fired it — do nothing 721 # here. 722 if not skip_pre_tool_call_hook: 723 block_message: Optional[str] = None 724 try: 725 from hermes_cli.plugins import get_pre_tool_call_block_message 726 block_message = get_pre_tool_call_block_message( 727 function_name, 728 function_args, 729 task_id=task_id or "", 730 session_id=session_id or "", 731 tool_call_id=tool_call_id or "", 732 ) 733 except Exception: 734 pass 735 736 if block_message is not None: 737 return json.dumps({"error": block_message}, ensure_ascii=False) 738 739 # Notify the read-loop tracker when a non-read/search tool runs, 740 # so the *consecutive* counter resets (reads after other work are fine). 741 if function_name not in _READ_SEARCH_TOOLS: 742 try: 743 from tools.file_tools import notify_other_tool_call 744 notify_other_tool_call(task_id or "default") 745 except Exception: 746 pass # file_tools may not be loaded yet 747 748 # Measure tool dispatch latency so post_tool_call and 749 # transform_tool_result hooks can observe per-tool duration. 750 # Inspired by Claude Code 2.1.119, which added ``duration_ms`` to 751 # PostToolUse hook inputs so plugin authors can build latency 752 # dashboards, budget alerts, and regression canaries without having 753 # to wrap every tool manually. We use monotonic() so the value is 754 # unaffected by wall-clock adjustments during the call. 755 _dispatch_start = time.monotonic() 756 if function_name == "execute_code": 757 # Prefer the caller-provided list so subagents can't overwrite 758 # the parent's tool set via the process-global. 759 sandbox_enabled = enabled_tools if enabled_tools is not None else _last_resolved_tool_names 760 result = registry.dispatch( 761 function_name, function_args, 762 task_id=task_id, 763 enabled_tools=sandbox_enabled, 764 ) 765 else: 766 result = registry.dispatch( 767 function_name, function_args, 768 task_id=task_id, 769 user_task=user_task, 770 ) 771 duration_ms = int((time.monotonic() - _dispatch_start) * 1000) 772 773 try: 774 from hermes_cli.plugins import invoke_hook 775 invoke_hook( 776 "post_tool_call", 777 tool_name=function_name, 778 args=function_args, 779 result=result, 780 task_id=task_id or "", 781 session_id=session_id or "", 782 tool_call_id=tool_call_id or "", 783 duration_ms=duration_ms, 784 ) 785 except Exception: 786 pass 787 788 # Generic tool-result canonicalization seam: plugins receive the 789 # final result string (JSON, usually) and may replace it by 790 # returning a string from transform_tool_result. Runs after 791 # post_tool_call (which stays observational) and before the result 792 # is appended back into conversation context. Fail-open; the first 793 # valid string return wins; non-string returns are ignored. 794 try: 795 from hermes_cli.plugins import invoke_hook 796 hook_results = invoke_hook( 797 "transform_tool_result", 798 tool_name=function_name, 799 args=function_args, 800 result=result, 801 task_id=task_id or "", 802 session_id=session_id or "", 803 tool_call_id=tool_call_id or "", 804 duration_ms=duration_ms, 805 ) 806 for hook_result in hook_results: 807 if isinstance(hook_result, str): 808 result = hook_result 809 break 810 except Exception: 811 pass 812 813 return result 814 815 except Exception as e: 816 error_msg = f"Error executing {function_name}: {str(e)}" 817 logger.exception(error_msg) 818 return json.dumps({"error": error_msg}, ensure_ascii=False) 819 820 821 # ============================================================================= 822 # Backward-compat wrapper functions 823 # ============================================================================= 824 825 def get_all_tool_names() -> List[str]: 826 """Return all registered tool names.""" 827 return registry.get_all_tool_names() 828 829 830 def get_toolset_for_tool(tool_name: str) -> Optional[str]: 831 """Return the toolset a tool belongs to.""" 832 return registry.get_toolset_for_tool(tool_name) 833 834 835 def get_available_toolsets() -> Dict[str, dict]: 836 """Return toolset availability info for UI display.""" 837 return registry.get_available_toolsets() 838 839 840 def check_toolset_requirements() -> Dict[str, bool]: 841 """Return {toolset: available_bool} for every registered toolset.""" 842 return registry.check_toolset_requirements() 843 844 845 def check_tool_availability(quiet: bool = False) -> Tuple[List[str], List[dict]]: 846 """Return (available_toolsets, unavailable_info).""" 847 return registry.check_tool_availability(quiet=quiet)