Cradicle Explorer

/ model_tools.py
model_tools.py
  1  #!/usr/bin/env python3
  2  """
  3  Model Tools Module
  4  
  5  Thin orchestration layer over the tool registry. Each tool file in tools/
  6  self-registers its schema, handler, and metadata via tools.registry.register().
  7  This module triggers discovery (by importing all tool modules), then provides
  8  the public API that run_agent.py, cli.py, batch_runner.py, and the RL
  9  environments consume.
 10  
 11  Public API (signatures preserved from the original 2,400-line version):
 12      get_tool_definitions(enabled_toolsets, disabled_toolsets, quiet_mode) -> list
 13      handle_function_call(function_name, function_args, task_id, user_task) -> str
 14      TOOL_TO_TOOLSET_MAP: dict          (for batch_runner.py)
 15      TOOLSET_REQUIREMENTS: dict         (for cli.py, doctor.py)
 16      get_all_tool_names() -> list
 17      get_toolset_for_tool(name) -> str
 18      get_available_toolsets() -> dict
 19      check_toolset_requirements() -> dict
 20      check_tool_availability(quiet) -> tuple
 21  """
 22  
 23  import json
 24  import asyncio
 25  import logging
 26  import threading
 27  import time
 28  from typing import Dict, Any, List, Optional, Tuple
 29  
 30  from tools.registry import discover_builtin_tools, registry
 31  from toolsets import resolve_toolset, validate_toolset
 32  
 33  logger = logging.getLogger(__name__)
 34  
 35  
 36  # =============================================================================
 37  # Async Bridging  (single source of truth -- used by registry.dispatch too)
 38  # =============================================================================
 39  
 40  _tool_loop = None          # persistent loop for the main (CLI) thread
 41  _tool_loop_lock = threading.Lock()
 42  _worker_thread_local = threading.local()  # per-worker-thread persistent loops
 43  
 44  
 45  def _get_tool_loop():
 46      """Return a long-lived event loop for running async tool handlers.
 47  
 48      Using a persistent loop (instead of asyncio.run() which creates and
 49      *closes* a fresh loop every time) prevents "Event loop is closed"
 50      errors that occur when cached httpx/AsyncOpenAI clients attempt to
 51      close their transport on a dead loop during garbage collection.
 52      """
 53      global _tool_loop
 54      with _tool_loop_lock:
 55          if _tool_loop is None or _tool_loop.is_closed():
 56              _tool_loop = asyncio.new_event_loop()
 57          return _tool_loop
 58  
 59  
 60  def _get_worker_loop():
 61      """Return a persistent event loop for the current worker thread.
 62  
 63      Each worker thread (e.g., delegate_task's ThreadPoolExecutor threads)
 64      gets its own long-lived loop stored in thread-local storage.  This
 65      prevents the "Event loop is closed" errors that occurred when
 66      asyncio.run() was used per-call: asyncio.run() creates a loop, runs
 67      the coroutine, then *closes* the loop — but cached httpx/AsyncOpenAI
 68      clients remain bound to that now-dead loop and raise RuntimeError
 69      during garbage collection or subsequent use.
 70  
 71      By keeping the loop alive for the thread's lifetime, cached clients
 72      stay valid and their cleanup runs on a live loop.
 73      """
 74      loop = getattr(_worker_thread_local, 'loop', None)
 75      if loop is None or loop.is_closed():
 76          loop = asyncio.new_event_loop()
 77          asyncio.set_event_loop(loop)
 78          _worker_thread_local.loop = loop
 79      return loop
 80  
 81  
 82  def _run_async(coro):
 83      """Run an async coroutine from a sync context.
 84  
 85      If the current thread already has a running event loop (e.g., inside
 86      the gateway's async stack or Atropos's event loop), we spin up a
 87      disposable thread so asyncio.run() can create its own loop without
 88      conflicting.
 89  
 90      For the common CLI path (no running loop), we use a persistent event
 91      loop so that cached async clients (httpx / AsyncOpenAI) remain bound
 92      to a live loop and don't trigger "Event loop is closed" on GC.
 93  
 94      When called from a worker thread (parallel tool execution), we use a
 95      per-thread persistent loop to avoid both contention with the main
 96      thread's shared loop AND the "Event loop is closed" errors caused by
 97      asyncio.run()'s create-and-destroy lifecycle.
 98  
 99      This is the single source of truth for sync->async bridging in tool
100      handlers. The RL paths (agent_loop.py, tool_context.py) also provide
101      outer thread-pool wrapping as defense-in-depth, but each handler is
102      self-protecting via this function.
103      """
104      try:
105          loop = asyncio.get_running_loop()
106      except RuntimeError:
107          loop = None
108  
109      if loop and loop.is_running():
110          # Inside an async context (gateway, RL env) — run in a fresh thread
111          # with its own event loop we own a reference to, so on timeout we
112          # can cancel the task inside that loop (ThreadPoolExecutor.cancel()
113          # only works on not-yet-started futures — it's a no-op on a running
114          # worker, which previously leaked the thread on every 300 s timeout).
115          import concurrent.futures
116  
117          worker_loop: Optional[asyncio.AbstractEventLoop] = None
118          loop_ready = threading.Event()
119  
120          def _run_in_worker():
121              nonlocal worker_loop
122              worker_loop = asyncio.new_event_loop()
123              loop_ready.set()
124              try:
125                  asyncio.set_event_loop(worker_loop)
126                  return worker_loop.run_until_complete(coro)
127              finally:
128                  try:
129                      # Cancel anything still pending (e.g. task cancelled
130                      # externally via call_soon_threadsafe on timeout).
131                      pending = asyncio.all_tasks(worker_loop)
132                      for t in pending:
133                          t.cancel()
134                      if pending:
135                          worker_loop.run_until_complete(
136                              asyncio.gather(*pending, return_exceptions=True)
137                          )
138                  except Exception:
139                      pass
140                  worker_loop.close()
141  
142          pool = concurrent.futures.ThreadPoolExecutor(max_workers=1)
143          future = pool.submit(_run_in_worker)
144          try:
145              return future.result(timeout=300)
146          except concurrent.futures.TimeoutError:
147              # Cancel the coroutine inside its own loop so the worker thread
148              # can wind down instead of running forever.
149              if loop_ready.wait(timeout=1.0) and worker_loop is not None:
150                  try:
151                      for t in asyncio.all_tasks(worker_loop):
152                          worker_loop.call_soon_threadsafe(t.cancel)
153                  except RuntimeError:
154                      # Loop already closed — nothing to cancel.
155                      pass
156              raise
157          finally:
158              # wait=False: don't block the caller on a stuck coroutine. We've
159              # already requested cancellation above; the worker will exit
160              # once the coroutine observes it (usually at the next await).
161              pool.shutdown(wait=False)
162  
163      # If we're on a worker thread (e.g., parallel tool execution in
164      # delegate_task), use a per-thread persistent loop.  This avoids
165      # contention with the main thread's shared loop while keeping cached
166      # httpx/AsyncOpenAI clients bound to a live loop for the thread's
167      # lifetime — preventing "Event loop is closed" on GC cleanup.
168      if threading.current_thread() is not threading.main_thread():
169          worker_loop = _get_worker_loop()
170          return worker_loop.run_until_complete(coro)
171  
172      tool_loop = _get_tool_loop()
173      return tool_loop.run_until_complete(coro)
174  
175  
176  # =============================================================================
177  # Tool Discovery  (importing each module triggers its registry.register calls)
178  # =============================================================================
179  
180  discover_builtin_tools()
181  
182  # MCP tool discovery (external MCP servers from config) used to run here as
183  # a module-level side effect.  It was removed because discover_mcp_tools()
184  # internally uses a blocking future.result(timeout=120) wait, and the
185  # gateway lazy-imports this module from inside the asyncio event loop on
186  # the first user message — freezing Discord/Telegram heartbeats for up to
187  # 120s whenever any configured MCP server was slow or unreachable (#16856).
188  #
189  # Each entry point now runs discovery explicitly at its own startup:
190  #   - gateway/run.py            -> start_gateway() uses run_in_executor
191  #   - cli.py, hermes_cli/*      -> inline on startup (no event loop)
192  #   - tui_gateway/server.py     -> inline on startup (no event loop)
193  #   - acp_adapter/server.py     -> asyncio.to_thread on session init
194  
195  # Plugin tool discovery (user/project/pip plugins)
196  try:
197      from hermes_cli.plugins import discover_plugins
198      discover_plugins()
199  except Exception as e:
200      logger.debug("Plugin discovery failed: %s", e)
201  
202  
203  # =============================================================================
204  # Backward-compat constants  (built once after discovery)
205  # =============================================================================
206  
207  TOOL_TO_TOOLSET_MAP: Dict[str, str] = registry.get_tool_to_toolset_map()
208  
209  TOOLSET_REQUIREMENTS: Dict[str, dict] = registry.get_toolset_requirements()
210  
211  # Resolved tool names from the last get_tool_definitions() call.
212  # Used by code_execution_tool to know which tools are available in this session.
213  _last_resolved_tool_names: List[str] = []
214  
215  
216  # =============================================================================
217  # Legacy toolset name mapping  (old _tools-suffixed names -> tool name lists)
218  # =============================================================================
219  
220  _LEGACY_TOOLSET_MAP = {
221      "web_tools": ["web_search", "web_extract"],
222      "terminal_tools": ["terminal"],
223      "vision_tools": ["vision_analyze"],
224      "moa_tools": ["mixture_of_agents"],
225      "image_tools": ["image_generate"],
226      "skills_tools": ["skills_list", "skill_view", "skill_manage"],
227      "browser_tools": [
228          "browser_navigate", "browser_snapshot", "browser_click",
229          "browser_type", "browser_scroll", "browser_back",
230          "browser_press", "browser_get_images",
231          "browser_vision", "browser_console"
232      ],
233      "cronjob_tools": ["cronjob"],
234      "rl_tools": [
235          "rl_list_environments", "rl_select_environment",
236          "rl_get_current_config", "rl_edit_config",
237          "rl_start_training", "rl_check_status",
238          "rl_stop_training", "rl_get_results",
239          "rl_list_runs", "rl_test_inference"
240      ],
241      "file_tools": ["read_file", "write_file", "patch", "search_files"],
242      "tts_tools": ["text_to_speech"],
243  }
244  
245  
246  # =============================================================================
247  # get_tool_definitions  (the main schema provider)
248  # =============================================================================
249  
250  # Module-level memoization for get_tool_definitions(). Keyed on
251  # (frozenset(enabled_toolsets), frozenset(disabled_toolsets), registry._generation).
252  # Hot callers (gateway runner, AIAgent.__init__) invoke this on every turn
253  # with quiet_mode=True; caching avoids ~7 ms of registry walking + schema
254  # filtering + check_fn probing per call. Only active when quiet_mode=True
255  # because quiet_mode=False has stdout side effects (tool-selection prints).
256  #
257  # Invalidation happens transparently via the registry's _generation counter,
258  # which bumps on register() / deregister() / register_toolset_alias(). The
259  # inner check_fn TTL cache in registry.py handles environment drift (Docker
260  # daemon start/stop, env var changes, etc.) on a 30 s horizon.
261  _tool_defs_cache: Dict[tuple, List[Dict[str, Any]]] = {}
262  
263  
264  def _clear_tool_defs_cache() -> None:
265      """Drop memoized get_tool_definitions() results. Called when dynamic
266      schema dependencies change (e.g. discord capability cache reset,
267      execute_code sandbox reconfigured)."""
268      _tool_defs_cache.clear()
269  
270  
271  def get_tool_definitions(
272      enabled_toolsets: List[str] = None,
273      disabled_toolsets: List[str] = None,
274      quiet_mode: bool = False,
275  ) -> List[Dict[str, Any]]:
276      """
277      Get tool definitions for model API calls with toolset-based filtering.
278  
279      All tools must be part of a toolset to be accessible.
280  
281      Args:
282          enabled_toolsets: Only include tools from these toolsets.
283          disabled_toolsets: Exclude tools from these toolsets (if enabled_toolsets is None).
284          quiet_mode: Suppress status prints.
285  
286      Returns:
287          Filtered list of OpenAI-format tool definitions.
288      """
289      # Fast path: memoized result when the caller doesn't need stdout prints.
290      # The cache key captures every argument-level input; the registry
291      # generation captures registry mutations (MCP refresh, plugin load).
292      # check_fn results are TTL-cached one level down, inside
293      # registry.get_definitions. The config-mtime fingerprint below captures
294      # user-visible config edits that affect dynamic schemas (execute_code
295      # mode, discord action allowlist, etc.) without needing an explicit
296      # invalidate hook on every config-writer.
297      if quiet_mode:
298          try:
299              from hermes_cli.config import get_config_path
300              cfg_path = get_config_path()
301              cfg_stat = cfg_path.stat()
302              cfg_fp = (cfg_stat.st_mtime_ns, cfg_stat.st_size)
303          except (FileNotFoundError, OSError, ImportError):
304              cfg_fp = None
305          cache_key = (
306              frozenset(enabled_toolsets) if enabled_toolsets is not None else None,
307              frozenset(disabled_toolsets) if disabled_toolsets else None,
308              registry._generation,
309              cfg_fp,
310          )
311          cached = _tool_defs_cache.get(cache_key)
312          if cached is not None:
313              # Update _last_resolved_tool_names so downstream callers see
314              # consistent state even on a cache hit.
315              global _last_resolved_tool_names
316              _last_resolved_tool_names = [t["function"]["name"] for t in cached]
317              # Return a shallow copy of the list but share the dict references —
318              # schemas are treated as read-only by all known callers.
319              return list(cached)
320  
321      result = _compute_tool_definitions(enabled_toolsets, disabled_toolsets, quiet_mode)
322      if quiet_mode:
323          # Cache the freshly-computed list, but hand callers a shallow copy so
324          # downstream mutations (e.g. run_agent appending memory/LCM tool
325          # schemas to self.tools) don't poison the cache. Without this, a
326          # long-lived Gateway process accumulates duplicate tool names across
327          # agent inits and providers that enforce unique tool names
328          # (DeepSeek, Xiaomi MiMo, Moonshot Kimi) reject the request with
329          # HTTP 400. Mirrors the cache-hit path above. (issue #17335)
330          _tool_defs_cache[cache_key] = result
331          return list(result)
332      return result
333  
334  
335  def _compute_tool_definitions(
336      enabled_toolsets: List[str] = None,
337      disabled_toolsets: List[str] = None,
338      quiet_mode: bool = False,
339  ) -> List[Dict[str, Any]]:
340      """Uncached implementation of :func:`get_tool_definitions`."""
341      # Determine which tool names the caller wants
342      tools_to_include: set = set()
343  
344      if enabled_toolsets is not None:
345          for toolset_name in enabled_toolsets:
346              if validate_toolset(toolset_name):
347                  resolved = resolve_toolset(toolset_name)
348                  tools_to_include.update(resolved)
349                  if not quiet_mode:
350                      print(f"✅ Enabled toolset '{toolset_name}': {', '.join(resolved) if resolved else 'no tools'}")
351              elif toolset_name in _LEGACY_TOOLSET_MAP:
352                  legacy_tools = _LEGACY_TOOLSET_MAP[toolset_name]
353                  tools_to_include.update(legacy_tools)
354                  if not quiet_mode:
355                      print(f"✅ Enabled legacy toolset '{toolset_name}': {', '.join(legacy_tools)}")
356              else:
357                  if not quiet_mode:
358                      print(f"⚠️  Unknown toolset: {toolset_name}")
359      else:
360          # Default: start with everything
361          from toolsets import get_all_toolsets
362          for ts_name in get_all_toolsets():
363              tools_to_include.update(resolve_toolset(ts_name))
364  
365      # Always apply disabled toolsets as a subtraction step at the end.
366      # This ensures that even if a composite toolset (like hermes-cli)
367      # is enabled, any tools belonging to a disabled toolset are strictly
368      # stripped out. See issue #17309.
369      if disabled_toolsets:
370          for toolset_name in disabled_toolsets:
371              if validate_toolset(toolset_name):
372                  resolved = resolve_toolset(toolset_name)
373                  tools_to_include.difference_update(resolved)
374                  if not quiet_mode:
375                      print(f"🚫 Disabled toolset '{toolset_name}': {', '.join(resolved) if resolved else 'no tools'}")
376              elif toolset_name in _LEGACY_TOOLSET_MAP:
377                  legacy_tools = _LEGACY_TOOLSET_MAP[toolset_name]
378                  tools_to_include.difference_update(legacy_tools)
379                  if not quiet_mode:
380                      print(f"🚫 Disabled legacy toolset '{toolset_name}': {', '.join(legacy_tools)}")
381              else:
382                  if not quiet_mode:
383                      print(f"⚠️  Unknown toolset: {toolset_name}")
384  
385      # Plugin-registered tools are now resolved through the normal toolset
386      # path — validate_toolset() / resolve_toolset() / get_all_toolsets()
387      # all check the tool registry for plugin-provided toolsets.  No bypass
388      # needed; plugins respect enabled_toolsets / disabled_toolsets like any
389      # other toolset.
390  
391      # Ask the registry for schemas (only returns tools whose check_fn passes)
392      filtered_tools = registry.get_definitions(tools_to_include, quiet=quiet_mode)
393  
394      # The set of tool names that actually passed check_fn filtering.
395      # Use this (not tools_to_include) for any downstream schema that references
396      # other tools by name — otherwise the model sees tools mentioned in
397      # descriptions that don't actually exist, and hallucinates calls to them.
398      available_tool_names = {t["function"]["name"] for t in filtered_tools}
399  
400      # Rebuild execute_code schema to only list sandbox tools that are actually
401      # available.  Without this, the model sees "web_search is available in
402      # execute_code" even when the API key isn't configured or the toolset is
403      # disabled (#560-discord).
404      if "execute_code" in available_tool_names:
405          from tools.code_execution_tool import SANDBOX_ALLOWED_TOOLS, build_execute_code_schema, _get_execution_mode
406          sandbox_enabled = SANDBOX_ALLOWED_TOOLS & available_tool_names
407          dynamic_schema = build_execute_code_schema(sandbox_enabled, mode=_get_execution_mode())
408          for i, td in enumerate(filtered_tools):
409              if td.get("function", {}).get("name") == "execute_code":
410                  filtered_tools[i] = {"type": "function", "function": dynamic_schema}
411                  break
412  
413      # Rebuild discord / discord_admin schemas based on the bot's privileged
414      # intents (detected from GET /applications/@me) and the user's action
415      # allowlist in config.  Hides actions the bot's intents don't support so
416      # the model never attempts them, and annotates fetch_messages when the
417      # MESSAGE_CONTENT intent is missing.
418      _discord_schema_fns = {
419          "discord": "get_dynamic_schema_core",
420          "discord_admin": "get_dynamic_schema_admin",
421      }
422      for discord_tool_name in _discord_schema_fns:
423          if discord_tool_name in available_tool_names:
424              try:
425                  from tools import discord_tool as _dt
426                  schema_fn = getattr(_dt, _discord_schema_fns[discord_tool_name])
427                  dynamic = schema_fn()
428              except Exception:
429                  dynamic = None
430              if dynamic is None:
431                  filtered_tools = [
432                      t for t in filtered_tools
433                      if t.get("function", {}).get("name") != discord_tool_name
434                  ]
435                  available_tool_names.discard(discord_tool_name)
436              else:
437                  for i, td in enumerate(filtered_tools):
438                      if td.get("function", {}).get("name") == discord_tool_name:
439                          filtered_tools[i] = {"type": "function", "function": dynamic}
440                          break
441  
442      # Strip web tool cross-references from browser_navigate description when
443      # web_search / web_extract are not available.  The static schema says
444      # "prefer web_search or web_extract" which causes the model to hallucinate
445      # those tools when they're missing.
446      if "browser_navigate" in available_tool_names:
447          web_tools_available = {"web_search", "web_extract"} & available_tool_names
448          if not web_tools_available:
449              for i, td in enumerate(filtered_tools):
450                  if td.get("function", {}).get("name") == "browser_navigate":
451                      desc = td["function"].get("description", "")
452                      desc = desc.replace(
453                          " For simple information retrieval, prefer web_search or web_extract (faster, cheaper).",
454                          "",
455                      )
456                      filtered_tools[i] = {
457                          "type": "function",
458                          "function": {**td["function"], "description": desc},
459                      }
460                      break
461  
462      if not quiet_mode:
463          if filtered_tools:
464              tool_names = [t["function"]["name"] for t in filtered_tools]
465              print(f"🛠️  Final tool selection ({len(filtered_tools)} tools): {', '.join(tool_names)}")
466          else:
467              print("🛠️  No tools selected (all filtered out or unavailable)")
468  
469      global _last_resolved_tool_names
470      _last_resolved_tool_names = [t["function"]["name"] for t in filtered_tools]
471  
472      # Sanitize schemas for broad backend compatibility. llama.cpp's
473      # json-schema-to-grammar converter (used by its OAI server to build
474      # GBNF tool-call parsers) rejects some shapes that cloud providers
475      # silently accept — bare "type": "object" with no properties,
476      # string-valued schema nodes from malformed MCP servers, etc. This
477      # is a no-op for schemas that are already well-formed.
478      try:
479          from tools.schema_sanitizer import sanitize_tool_schemas
480          filtered_tools = sanitize_tool_schemas(filtered_tools)
481      except Exception as e:  # pragma: no cover — defensive
482          logger.warning("Schema sanitization skipped: %s", e)
483  
484      return filtered_tools
485  
486  
487  # =============================================================================
488  # handle_function_call  (the main dispatcher)
489  # =============================================================================
490  
491  # Tools whose execution is intercepted by the agent loop (run_agent.py)
492  # because they need agent-level state (TodoStore, MemoryStore, etc.).
493  # The registry still holds their schemas; dispatch just returns a stub error
494  # so if something slips through, the LLM sees a sensible message.
495  _AGENT_LOOP_TOOLS = {"todo", "memory", "session_search", "delegate_task"}
496  _READ_SEARCH_TOOLS = {"read_file", "search_files"}
497  
498  
499  # =========================================================================
500  # Tool argument type coercion
501  # =========================================================================
502  
503  def coerce_tool_args(tool_name: str, args: Dict[str, Any]) -> Dict[str, Any]:
504      """Coerce tool call arguments to match their JSON Schema types.
505  
506      LLMs frequently return numbers as strings (``"42"`` instead of ``42``)
507      and booleans as strings (``"true"`` instead of ``true``).  This compares
508      each argument value against the tool's registered JSON Schema and attempts
509      safe coercion when the value is a string but the schema expects a different
510      type.  Original values are preserved when coercion fails.
511  
512      Handles ``"type": "integer"``, ``"type": "number"``, ``"type": "boolean"``,
513      and union types (``"type": ["integer", "string"]``).
514  
515      Also wraps bare scalar values in a single-element list when the schema
516      declares ``"type": "array"``.  Open-weight models (DeepSeek, Qwen, GLM)
517      sometimes emit ``{"urls": "https://a.com"}`` when the tool expects
518      ``{"urls": ["https://a.com"]}``; wrapping here avoids a confusing tool
519      failure on what is otherwise a well-formed call.
520      """
521      if not args or not isinstance(args, dict):
522          return args
523  
524      schema = registry.get_schema(tool_name)
525      if not schema:
526          return args
527  
528      properties = (schema.get("parameters") or {}).get("properties")
529      if not properties:
530          return args
531  
532      for key, value in list(args.items()):
533          prop_schema = properties.get(key)
534          if not prop_schema:
535              continue
536          expected = prop_schema.get("type")
537  
538          # Wrap bare non-list values when the schema declares ``array``.
539          # Strings still go through _coerce_value first so JSON-encoded
540          # arrays (``'["a","b"]'``) get parsed and nullable ``"null"``
541          # becomes ``None`` rather than ``["null"]``.
542          # ``None`` itself is preserved — we don't know whether the model
543          # meant "omit" or "empty list", and tools with sensible defaults
544          # (e.g. read_file's normalize_read_pagination) already handle it.
545          if expected == "array" and value is not None and not isinstance(value, (list, tuple)):
546              if isinstance(value, str):
547                  coerced = _coerce_value(value, expected, schema=prop_schema)
548                  if coerced is not value:
549                      # _coerce_value handled it (JSON-parsed list or
550                      # nullable "null" → None).
551                      args[key] = coerced
552                      continue
553                  args[key] = [value]
554                  logger.info(
555                      "coerce_tool_args: wrapped bare string in list for %s.%s",
556                      tool_name, key,
557                  )
558                  continue
559              args[key] = [value]
560              logger.info(
561                  "coerce_tool_args: wrapped bare %s in list for %s.%s",
562                  type(value).__name__, tool_name, key,
563              )
564              continue
565  
566          if not isinstance(value, str):
567              continue
568          if not expected and not _schema_allows_null(prop_schema):
569              continue
570          coerced = _coerce_value(value, expected, schema=prop_schema)
571          if coerced is not value:
572              args[key] = coerced
573  
574      return args
575  
576  
577  def _coerce_value(value: str, expected_type, schema: dict | None = None):
578      """Attempt to coerce a string *value* to *expected_type*.
579  
580      Returns the original string when coercion is not applicable or fails.
581      """
582      if _schema_allows_null(schema) and value.strip().lower() == "null":
583          return None
584  
585      if isinstance(expected_type, list):
586          # Union type — try each in order, return first successful coercion
587          for t in expected_type:
588              result = _coerce_value(value, t, schema=schema)
589              if result is not value:
590                  return result
591          return value
592  
593      if expected_type in ("integer", "number"):
594          return _coerce_number(value, integer_only=(expected_type == "integer"))
595      if expected_type == "boolean":
596          return _coerce_boolean(value)
597      if expected_type == "array":
598          return _coerce_json(value, list)
599      if expected_type == "object":
600          return _coerce_json(value, dict)
601      if expected_type == "null" and value.strip().lower() == "null":
602          return None
603      return value
604  
605  
606  def _schema_allows_null(schema: dict | None) -> bool:
607      """Return True when a JSON Schema fragment explicitly permits null."""
608      if not isinstance(schema, dict):
609          return False
610  
611      schema_type = schema.get("type")
612      if schema_type == "null":
613          return True
614      if isinstance(schema_type, list) and "null" in schema_type:
615          return True
616      if schema.get("nullable") is True:
617          return True
618  
619      for union_key in ("anyOf", "oneOf"):
620          variants = schema.get(union_key)
621          if not isinstance(variants, list):
622              continue
623          for variant in variants:
624              if isinstance(variant, dict) and variant.get("type") == "null":
625                  return True
626  
627      return False
628  
629  
630  def _coerce_json(value: str, expected_python_type: type):
631      """Parse *value* as JSON when the schema expects an array or object.
632  
633      Handles model output drift where a complex oneOf/discriminated-union schema
634      causes the LLM to emit the array/object as a JSON string instead of a native
635      structure.  Returns the original string if parsing fails or yields the wrong
636      Python type.
637      """
638      try:
639          parsed = json.loads(value)
640      except (ValueError, TypeError):
641          return value
642      if isinstance(parsed, expected_python_type):
643          logger.debug(
644              "coerce_tool_args: coerced string to %s via json.loads",
645              expected_python_type.__name__,
646          )
647          return parsed
648      return value
649  
650  
651  def _coerce_number(value: str, integer_only: bool = False):
652      """Try to parse *value* as a number.  Returns original string on failure."""
653      try:
654          f = float(value)
655      except (ValueError, OverflowError):
656          return value
657      # Guard against inf/nan — not JSON-serializable, keep original string
658      if f != f or f == float("inf") or f == float("-inf"):
659          return value
660      # If it looks like an integer (no fractional part), return int
661      if f == int(f):
662          return int(f)
663      if integer_only:
664          # Schema wants an integer but value has decimals — keep as string
665          return value
666      return f
667  
668  
669  def _coerce_boolean(value: str):
670      """Try to parse *value* as a boolean.  Returns original string on failure."""
671      low = value.strip().lower()
672      if low == "true":
673          return True
674      if low == "false":
675          return False
676      return value
677  
678  
679  def handle_function_call(
680      function_name: str,
681      function_args: Dict[str, Any],
682      task_id: Optional[str] = None,
683      tool_call_id: Optional[str] = None,
684      session_id: Optional[str] = None,
685      user_task: Optional[str] = None,
686      enabled_tools: Optional[List[str]] = None,
687      skip_pre_tool_call_hook: bool = False,
688  ) -> str:
689      """
690      Main function call dispatcher that routes calls to the tool registry.
691  
692      Args:
693          function_name: Name of the function to call.
694          function_args: Arguments for the function.
695          task_id: Unique identifier for terminal/browser session isolation.
696          user_task: The user's original task (for browser_snapshot context).
697          enabled_tools: Tool names enabled for this session.  When provided,
698                         execute_code uses this list to determine which sandbox
699                         tools to generate.  Falls back to the process-global
700                         ``_last_resolved_tool_names`` for backward compat.
701  
702      Returns:
703          Function result as a JSON string.
704      """
705      # Coerce string arguments to their schema-declared types (e.g. "42"→42)
706      function_args = coerce_tool_args(function_name, function_args)
707  
708      try:
709          if function_name in _AGENT_LOOP_TOOLS:
710              return json.dumps({"error": f"{function_name} must be handled by the agent loop"})
711  
712          # Check plugin hooks for a block directive (unless caller already
713          # checked — e.g. run_agent._invoke_tool passes skip=True to
714          # avoid double-firing the hook).
715          #
716          # Single-fire contract: pre_tool_call fires exactly once per tool
717          # execution. get_pre_tool_call_block_message() internally calls
718          # invoke_hook("pre_tool_call", ...) and returns the first block
719          # directive (if any), so observer plugins see the hook on that same
720          # pass. When skip=True, the caller already fired it — do nothing
721          # here.
722          if not skip_pre_tool_call_hook:
723              block_message: Optional[str] = None
724              try:
725                  from hermes_cli.plugins import get_pre_tool_call_block_message
726                  block_message = get_pre_tool_call_block_message(
727                      function_name,
728                      function_args,
729                      task_id=task_id or "",
730                      session_id=session_id or "",
731                      tool_call_id=tool_call_id or "",
732                  )
733              except Exception:
734                  pass
735  
736              if block_message is not None:
737                  return json.dumps({"error": block_message}, ensure_ascii=False)
738  
739          # Notify the read-loop tracker when a non-read/search tool runs,
740          # so the *consecutive* counter resets (reads after other work are fine).
741          if function_name not in _READ_SEARCH_TOOLS:
742              try:
743                  from tools.file_tools import notify_other_tool_call
744                  notify_other_tool_call(task_id or "default")
745              except Exception:
746                  pass  # file_tools may not be loaded yet
747  
748          # Measure tool dispatch latency so post_tool_call and
749          # transform_tool_result hooks can observe per-tool duration.
750          # Inspired by Claude Code 2.1.119, which added ``duration_ms`` to
751          # PostToolUse hook inputs so plugin authors can build latency
752          # dashboards, budget alerts, and regression canaries without having
753          # to wrap every tool manually.  We use monotonic() so the value is
754          # unaffected by wall-clock adjustments during the call.
755          _dispatch_start = time.monotonic()
756          if function_name == "execute_code":
757              # Prefer the caller-provided list so subagents can't overwrite
758              # the parent's tool set via the process-global.
759              sandbox_enabled = enabled_tools if enabled_tools is not None else _last_resolved_tool_names
760              result = registry.dispatch(
761                  function_name, function_args,
762                  task_id=task_id,
763                  enabled_tools=sandbox_enabled,
764              )
765          else:
766              result = registry.dispatch(
767                  function_name, function_args,
768                  task_id=task_id,
769                  user_task=user_task,
770              )
771          duration_ms = int((time.monotonic() - _dispatch_start) * 1000)
772  
773          try:
774              from hermes_cli.plugins import invoke_hook
775              invoke_hook(
776                  "post_tool_call",
777                  tool_name=function_name,
778                  args=function_args,
779                  result=result,
780                  task_id=task_id or "",
781                  session_id=session_id or "",
782                  tool_call_id=tool_call_id or "",
783                  duration_ms=duration_ms,
784              )
785          except Exception:
786              pass
787  
788          # Generic tool-result canonicalization seam: plugins receive the
789          # final result string (JSON, usually) and may replace it by
790          # returning a string from transform_tool_result. Runs after
791          # post_tool_call (which stays observational) and before the result
792          # is appended back into conversation context. Fail-open; the first
793          # valid string return wins; non-string returns are ignored.
794          try:
795              from hermes_cli.plugins import invoke_hook
796              hook_results = invoke_hook(
797                  "transform_tool_result",
798                  tool_name=function_name,
799                  args=function_args,
800                  result=result,
801                  task_id=task_id or "",
802                  session_id=session_id or "",
803                  tool_call_id=tool_call_id or "",
804                  duration_ms=duration_ms,
805              )
806              for hook_result in hook_results:
807                  if isinstance(hook_result, str):
808                      result = hook_result
809                      break
810          except Exception:
811              pass
812  
813          return result
814  
815      except Exception as e:
816          error_msg = f"Error executing {function_name}: {str(e)}"
817          logger.exception(error_msg)
818          return json.dumps({"error": error_msg}, ensure_ascii=False)
819  
820  
821  # =============================================================================
822  # Backward-compat wrapper functions
823  # =============================================================================
824  
825  def get_all_tool_names() -> List[str]:
826      """Return all registered tool names."""
827      return registry.get_all_tool_names()
828  
829  
830  def get_toolset_for_tool(tool_name: str) -> Optional[str]:
831      """Return the toolset a tool belongs to."""
832      return registry.get_toolset_for_tool(tool_name)
833  
834  
835  def get_available_toolsets() -> Dict[str, dict]:
836      """Return toolset availability info for UI display."""
837      return registry.get_available_toolsets()
838  
839  
840  def check_toolset_requirements() -> Dict[str, bool]:
841      """Return {toolset: available_bool} for every registered toolset."""
842      return registry.check_toolset_requirements()
843  
844  
845  def check_tool_availability(quiet: bool = False) -> Tuple[List[str], List[dict]]:
846      """Return (available_toolsets, unavailable_info)."""
847      return registry.check_tool_availability(quiet=quiet)