Cradicle Explorer

/ tools / terminal_tool.py
terminal_tool.py
   1  #!/usr/bin/env python3
   2  """
   3  Terminal Tool Module
   4  
   5  A terminal tool that executes commands in local, Docker, Modal, SSH,
   6  Singularity, Daytona, and Vercel Sandbox environments. Supports local
   7  execution, containerized backends, and cloud sandboxes, including managed
   8  Modal mode.
   9  
  10  Environment Selection (via TERMINAL_ENV environment variable):
  11  - "local": Execute directly on the host machine (default, fastest)
  12  - "docker": Execute in Docker containers (isolated, requires Docker)
  13  - "modal": Execute in Modal cloud sandboxes (direct Modal or managed gateway)
  14  - "vercel_sandbox": Execute in Vercel Sandbox cloud sandboxes
  15  
  16  Features:
  17  - Multiple execution backends (local, docker, modal, vercel_sandbox)
  18  - Background task support
  19  - VM/container lifecycle management
  20  - Automatic cleanup after inactivity
  21  
  22  Cloud sandbox note:
  23  - Persistent filesystems preserve working state across sandbox recreation
  24  - Persistent filesystems do NOT guarantee the same live sandbox or long-running processes survive cleanup, idle reaping, or Hermes exit
  25  
  26  Usage:
  27      from terminal_tool import terminal_tool
  28  
  29      # Execute a simple command
  30      result = terminal_tool("ls -la")
  31  
  32      # Execute in background
  33      result = terminal_tool("python server.py", background=True)
  34  """
  35  
  36  import importlib.util
  37  import json
  38  import logging
  39  import os
  40  import platform
  41  import re
  42  import time
  43  import threading
  44  import atexit
  45  import shutil
  46  import subprocess
  47  from pathlib import Path
  48  from typing import Optional, Dict, Any, List
  49  
  50  logger = logging.getLogger(__name__)
  51  
  52  
  53  # ---------------------------------------------------------------------------
  54  # Global interrupt event: set by the agent when a user interrupt arrives.
  55  # The terminal tool polls this during command execution so it can kill
  56  # long-running subprocesses immediately instead of blocking until timeout.
  57  # ---------------------------------------------------------------------------
  58  from tools.interrupt import is_interrupted, _interrupt_event  # noqa: F401 — re-exported
  59  # display_hermes_home imported lazily at call site (stale-module safety during hermes update)
  60  
  61  
  62  
  63  
  64  # =============================================================================
  65  # Custom Singularity Environment with more space
  66  # =============================================================================
  67  
  68  # Singularity helpers (scratch dir, SIF cache) now live in tools/environments/singularity.py
  69  from tools.environments.singularity import _get_scratch_dir
  70  from tools.tool_backend_helpers import (
  71      coerce_modal_mode,
  72      has_direct_modal_credentials,
  73      managed_nous_tools_enabled,
  74      resolve_modal_backend_state,
  75  )
  76  
  77  
  78  def _safe_parse_import_env(
  79      name: str,
  80      default: Any,
  81      converter,
  82      type_label: str,
  83  ):
  84      """Parse module-level numeric env vars without breaking import.
  85  
  86      Terminal tool is imported by CLI, ACP, tests, and tool discovery. A single
  87      malformed env var must not make the whole module unloadable at import time.
  88      """
  89      raw = os.getenv(name)
  90      if raw is None or raw == "":
  91          return default
  92      try:
  93          return converter(raw)
  94      except (TypeError, ValueError):
  95          logger.warning(
  96              "Invalid value for %s: %r (expected %s). Falling back to %r.",
  97              name,
  98              raw,
  99              type_label,
 100              default,
 101          )
 102          return default
 103  
 104  
 105  # Hard cap on foreground timeout; override via TERMINAL_MAX_FOREGROUND_TIMEOUT env var.
 106  FOREGROUND_MAX_TIMEOUT = _safe_parse_import_env(
 107      "TERMINAL_MAX_FOREGROUND_TIMEOUT",
 108      600,
 109      int,
 110      "integer",
 111  )
 112  
 113  # Disk usage warning threshold (in GB)
 114  DISK_USAGE_WARNING_THRESHOLD_GB = _safe_parse_import_env(
 115      "TERMINAL_DISK_WARNING_GB",
 116      500.0,
 117      float,
 118      "number",
 119  )
 120  _VERCEL_SANDBOX_DEFAULT_CWD = "/vercel/sandbox"
 121  _SUPPORTED_VERCEL_RUNTIMES = ("node24", "node22", "python3.13")
 122  
 123  
 124  def _is_supported_vercel_runtime(runtime: str) -> bool:
 125      return not runtime or runtime in _SUPPORTED_VERCEL_RUNTIMES
 126  
 127  
 128  def _check_vercel_sandbox_requirements(config: dict[str, Any]) -> bool:
 129      """Validate Vercel Sandbox terminal backend requirements."""
 130      runtime = (config.get("vercel_runtime") or "").strip()
 131      if not _is_supported_vercel_runtime(runtime):
 132          supported = ", ".join(_SUPPORTED_VERCEL_RUNTIMES)
 133          logger.error(
 134              "Vercel Sandbox runtime %r is not supported. "
 135              "Set TERMINAL_VERCEL_RUNTIME to one of: %s.",
 136              runtime,
 137              supported,
 138          )
 139          return False
 140  
 141      disk = config.get("container_disk", 51200)
 142      if disk not in (0, 51200):
 143          logger.error(
 144              "Vercel Sandbox does not support custom TERMINAL_CONTAINER_DISK=%s. "
 145              "Use the default shared setting (51200 MB).",
 146              disk,
 147          )
 148          return False
 149  
 150      if importlib.util.find_spec("vercel") is None:
 151          logger.error(
 152              "vercel is required for the Vercel Sandbox terminal backend: pip install vercel"
 153          )
 154          return False
 155  
 156      has_oidc = bool(os.getenv("VERCEL_OIDC_TOKEN"))
 157      has_token = bool(os.getenv("VERCEL_TOKEN"))
 158      has_project = bool(os.getenv("VERCEL_PROJECT_ID"))
 159      has_team = bool(os.getenv("VERCEL_TEAM_ID"))
 160  
 161      if has_oidc:
 162          return True
 163  
 164      if has_token or has_project or has_team:
 165          if has_token and has_project and has_team:
 166              return True
 167          logger.error(
 168              "Vercel Sandbox backend selected with token auth, but "
 169              "VERCEL_TOKEN, VERCEL_PROJECT_ID, and VERCEL_TEAM_ID must all "
 170              "be set together. VERCEL_OIDC_TOKEN is supported for one-off "
 171              "local development only."
 172          )
 173          return False
 174  
 175      logger.error(
 176          "Vercel Sandbox backend selected but no supported auth configuration "
 177          "was found. Set VERCEL_TOKEN, VERCEL_PROJECT_ID, and VERCEL_TEAM_ID "
 178          "for normal use. VERCEL_OIDC_TOKEN is supported for one-off local "
 179          "development only."
 180      )
 181      return False
 182  
 183  
 184  def _check_disk_usage_warning():
 185      """Check if total disk usage exceeds warning threshold."""
 186      try:
 187          scratch_dir = _get_scratch_dir()
 188  
 189          # Get total size of hermes directories
 190          total_bytes = 0
 191          import glob
 192          for path in glob.glob(str(scratch_dir / "hermes-*")):
 193              for f in Path(path).rglob('*'):
 194                  if f.is_file():
 195                      try:
 196                          total_bytes += f.stat().st_size
 197                      except OSError as e:
 198                          logger.debug("Could not stat file %s: %s", f, e)
 199          
 200          total_gb = total_bytes / (1024 ** 3)
 201          
 202          if total_gb > DISK_USAGE_WARNING_THRESHOLD_GB:
 203              logger.warning("Disk usage (%.1fGB) exceeds threshold (%.0fGB). Consider running cleanup_all_environments().",
 204                             total_gb, DISK_USAGE_WARNING_THRESHOLD_GB)
 205              return True
 206          
 207          return False
 208      except Exception as e:
 209          logger.debug("Disk usage warning check failed: %s", e, exc_info=True)
 210          return False
 211  
 212  
 213  # Interactive sudo password cache.
 214  #
 215  # Scope the cache to the active session when a session key is available, then
 216  # fall back to callback identity (ACP / CLI interactive callbacks), then the
 217  # current thread. This prevents one interactive session from reusing another
 218  # session's cached sudo password inside the same long-lived process.
 219  _sudo_password_cache: dict[str, str] = {}
 220  _sudo_password_cache_lock = threading.Lock()
 221  
 222  # Optional UI callbacks for interactive prompts. When set, these are called
 223  # instead of the default /dev/tty or input() readers. The CLI registers these
 224  # so prompts route through prompt_toolkit's event loop.
 225  # Callback slots used by the approval prompt and sudo password prompt
 226  # routines. Stored in thread-local state so overlapping ACP sessions —
 227  # each running in its own ThreadPoolExecutor thread — don't stomp on
 228  # each other's callbacks. See GHSA-qg5c-hvr5-hjgr.
 229  #
 230  # CLI mode is single-threaded, so each thread (the only one) holds its
 231  # own callback exactly like before. Gateway mode resolves approvals via
 232  # the per-session queue in tools.approval, not through these callbacks,
 233  # so it's unaffected.
 234  import threading
 235  _callback_tls = threading.local()
 236  
 237  
 238  def _get_sudo_password_callback():
 239      return getattr(_callback_tls, "sudo_password", None)
 240  
 241  
 242  def _get_approval_callback():
 243      return getattr(_callback_tls, "approval", None)
 244  
 245  
 246  def set_sudo_password_callback(cb):
 247      """Register a callback for sudo password prompts (used by CLI).
 248  
 249      Per-thread scope — ACP sessions that run concurrently in a
 250      ThreadPoolExecutor each have their own callback slot.
 251      """
 252      _callback_tls.sudo_password = cb
 253  
 254  
 255  def set_approval_callback(cb):
 256      """Register a callback for dangerous command approval prompts.
 257  
 258      Per-thread scope — ACP sessions that run concurrently in a
 259      ThreadPoolExecutor each have their own callback slot. See
 260      GHSA-qg5c-hvr5-hjgr.
 261      """
 262      _callback_tls.approval = cb
 263  
 264  
 265  def _get_sudo_password_cache_scope() -> str:
 266      """Return the cache scope for interactive sudo passwords."""
 267      try:
 268          from gateway.session_context import get_session_env
 269  
 270          session_key = get_session_env("HERMES_SESSION_KEY", "")
 271      except Exception:
 272          session_key = os.getenv("HERMES_SESSION_KEY", "")
 273      if session_key:
 274          return f"session:{session_key}"
 275  
 276      callback = _get_sudo_password_callback()
 277      if callback is not None:
 278          owner = getattr(callback, "__self__", None)
 279          func = getattr(callback, "__func__", None)
 280          if owner is not None and func is not None:
 281              return f"callback-owner:{id(owner)}:{id(func)}"
 282          return f"callback:{id(callback)}"
 283  
 284      return f"thread:{threading.get_ident()}"
 285  
 286  
 287  def _get_cached_sudo_password() -> str:
 288      """Return the cached sudo password for the current scope."""
 289      scope = _get_sudo_password_cache_scope()
 290      with _sudo_password_cache_lock:
 291          return _sudo_password_cache.get(scope, "")
 292  
 293  
 294  def _set_cached_sudo_password(password: str) -> None:
 295      """Persist a sudo password for the current scope."""
 296      scope = _get_sudo_password_cache_scope()
 297      with _sudo_password_cache_lock:
 298          if password:
 299              _sudo_password_cache[scope] = password
 300          else:
 301              _sudo_password_cache.pop(scope, None)
 302  
 303  
 304  def _reset_cached_sudo_passwords() -> None:
 305      """Clear all cached sudo passwords.
 306  
 307      Internal helper for tests and process teardown paths.
 308      """
 309      with _sudo_password_cache_lock:
 310          _sudo_password_cache.clear()
 311  
 312  # =============================================================================
 313  # Dangerous Command Approval System
 314  # =============================================================================
 315  
 316  # Dangerous command detection + approval now consolidated in tools/approval.py
 317  from tools.approval import (
 318      check_all_command_guards as _check_all_guards_impl,
 319  )
 320  
 321  
 322  def _check_all_guards(command: str, env_type: str) -> dict:
 323      """Delegate to consolidated guard (tirith + dangerous cmd) with CLI callback."""
 324      return _check_all_guards_impl(command, env_type,
 325                                    approval_callback=_get_approval_callback())
 326  
 327  
 328  # Allowlist: characters that can legitimately appear in directory paths.
 329  # Covers alphanumeric, path separators, Windows drive/UNC separators, tilde,
 330  # dot, hyphen, underscore, space, plus, at, equals, and comma.  Everything
 331  # else is rejected.
 332  _WORKDIR_SAFE_RE = re.compile(r'^[A-Za-z0-9/\\:_\-.~ +@=,]+$')
 333  
 334  
 335  def _validate_workdir(workdir: str) -> str | None:
 336      """Reject workdir values that don't look like a filesystem path.
 337  
 338      Uses an allowlist of safe characters rather than a deny-list, so novel
 339      shell metacharacters can't slip through.
 340  
 341      Returns None if safe, or an error message string if dangerous.
 342      """
 343      if not workdir:
 344          return None
 345      if not _WORKDIR_SAFE_RE.match(workdir):
 346          # Find the first offending character for a helpful message.
 347          for ch in workdir:
 348              if not _WORKDIR_SAFE_RE.match(ch):
 349                  return (
 350                      f"Blocked: workdir contains disallowed character {repr(ch)}. "
 351                      "Use a simple filesystem path without shell metacharacters."
 352                  )
 353          return "Blocked: workdir contains disallowed characters."
 354      return None
 355  
 356  
 357  def _handle_sudo_failure(output: str, env_type: str) -> str:
 358      """
 359      Check for sudo failure and add helpful message for messaging contexts.
 360      
 361      Returns enhanced output if sudo failed in messaging context, else original.
 362      """
 363      is_gateway = os.getenv("HERMES_GATEWAY_SESSION")
 364      
 365      if not is_gateway:
 366          return output
 367      
 368      # Check for sudo failure indicators
 369      sudo_failures = [
 370          "sudo: a password is required",
 371          "sudo: no tty present",
 372          "sudo: a terminal is required",
 373      ]
 374      
 375      for failure in sudo_failures:
 376          if failure in output:
 377              from hermes_constants import display_hermes_home as _dhh
 378              return output + f"\n\n💡 Tip: To enable sudo over messaging, add SUDO_PASSWORD to {_dhh()}/.env on the agent machine."
 379      
 380      return output
 381  
 382  
 383  def _prompt_for_sudo_password(timeout_seconds: int = 45) -> str:
 384      """
 385      Prompt user for sudo password with timeout.
 386      
 387      Returns the password if entered, or empty string if:
 388      - User presses Enter without input (skip)
 389      - Timeout expires (45s default)
 390      - Any error occurs
 391      
 392      Only works in interactive mode (HERMES_INTERACTIVE=1).
 393      If a _sudo_password_callback is registered (by the CLI), delegates to it
 394      so the prompt integrates with prompt_toolkit's UI.  Otherwise reads
 395      directly from /dev/tty with echo disabled.
 396      """
 397      import sys
 398      
 399      # Use the registered callback when available (prompt_toolkit-compatible)
 400      _sudo_cb = _get_sudo_password_callback()
 401      if _sudo_cb is not None:
 402          try:
 403              return _sudo_cb() or ""
 404          except Exception:
 405              return ""
 406  
 407      result = {"password": None, "done": False}
 408      
 409      def read_password_thread():
 410          """Read password with echo disabled. Uses msvcrt on Windows, /dev/tty on Unix."""
 411          tty_fd = None
 412          old_attrs = None
 413          try:
 414              if platform.system() == "Windows":
 415                  import msvcrt
 416                  chars = []
 417                  while True:
 418                      c = msvcrt.getwch()
 419                      if c in ("\r", "\n"):
 420                          break
 421                      if c == "\x03":
 422                          raise KeyboardInterrupt
 423                      chars.append(c)
 424                  result["password"] = "".join(chars)
 425              else:
 426                  import termios
 427                  tty_fd = os.open("/dev/tty", os.O_RDONLY)
 428                  old_attrs = termios.tcgetattr(tty_fd)
 429                  new_attrs = termios.tcgetattr(tty_fd)
 430                  new_attrs[3] = new_attrs[3] & ~termios.ECHO
 431                  termios.tcsetattr(tty_fd, termios.TCSAFLUSH, new_attrs)
 432                  chars = []
 433                  while True:
 434                      b = os.read(tty_fd, 1)
 435                      if not b or b in (b"\n", b"\r"):
 436                          break
 437                      chars.append(b)
 438                  result["password"] = b"".join(chars).decode("utf-8", errors="replace")
 439          except (EOFError, KeyboardInterrupt, OSError):
 440              result["password"] = ""
 441          except Exception:
 442              result["password"] = ""
 443          finally:
 444              if tty_fd is not None and old_attrs is not None:
 445                  try:
 446                      import termios as _termios
 447                      _termios.tcsetattr(tty_fd, _termios.TCSAFLUSH, old_attrs)
 448                  except Exception as e:
 449                      logger.debug("Failed to restore terminal attributes: %s", e)
 450              if tty_fd is not None:
 451                  try:
 452                      os.close(tty_fd)
 453                  except Exception as e:
 454                      logger.debug("Failed to close tty fd: %s", e)
 455              result["done"] = True
 456      
 457      try:
 458          os.environ["HERMES_SPINNER_PAUSE"] = "1"
 459          time.sleep(0.2)
 460          
 461          print()
 462          print("┌" + "─" * 58 + "┐")
 463          print("│  🔐 SUDO PASSWORD REQUIRED" + " " * 30 + "│")
 464          print("├" + "─" * 58 + "┤")
 465          print("│  Enter password below (input is hidden), or:            │")
 466          print("│    • Press Enter to skip (command fails gracefully)     │")
 467          print(f"│    • Wait {timeout_seconds}s to auto-skip" + " " * 27 + "│")
 468          print("└" + "─" * 58 + "┘")
 469          print()
 470          print("  Password (hidden): ", end="", flush=True)
 471          
 472          password_thread = threading.Thread(target=read_password_thread, daemon=True)
 473          password_thread.start()
 474          password_thread.join(timeout=timeout_seconds)
 475          
 476          if result["done"]:
 477              password = result["password"] or ""
 478              print()  # newline after hidden input
 479              if password:
 480                  print("  ✓ Password received (cached for this session)")
 481              else:
 482                  print("  ⏭ Skipped - continuing without sudo")
 483              print()
 484              sys.stdout.flush()
 485              return password
 486          else:
 487              print("\n  ⏱ Timeout - continuing without sudo")
 488              print("    (Press Enter to dismiss)")
 489              print()
 490              sys.stdout.flush()
 491              return ""
 492              
 493      except (EOFError, KeyboardInterrupt):
 494          print()
 495          print("  ⏭ Cancelled - continuing without sudo")
 496          print()
 497          sys.stdout.flush()
 498          return ""
 499      except Exception as e:
 500          print(f"\n  [sudo prompt error: {e}] - continuing without sudo\n")
 501          sys.stdout.flush()
 502          return ""
 503      finally:
 504          if "HERMES_SPINNER_PAUSE" in os.environ:
 505              del os.environ["HERMES_SPINNER_PAUSE"]
 506  
 507  def _safe_command_preview(command: Any, limit: int = 200) -> str:
 508      """Return a log-safe preview for possibly-invalid command values."""
 509      if command is None:
 510          return "<None>"
 511      if isinstance(command, str):
 512          return command[:limit]
 513      try:
 514          return repr(command)[:limit]
 515      except Exception:
 516          return f"<{type(command).__name__}>"
 517  
 518  def _looks_like_env_assignment(token: str) -> bool:
 519      """Return True when *token* is a leading shell environment assignment."""
 520      if "=" not in token or token.startswith("="):
 521          return False
 522      name, _value = token.split("=", 1)
 523      return bool(re.match(r"^[A-Za-z_][A-Za-z0-9_]*$", name))
 524  
 525  
 526  def _read_shell_token(command: str, start: int) -> tuple[str, int]:
 527      """Read one shell token, preserving quotes/escapes, starting at *start*."""
 528      i = start
 529      n = len(command)
 530  
 531      while i < n:
 532          ch = command[i]
 533          if ch.isspace() or ch in ";|&()":
 534              break
 535          if ch == "'":
 536              i += 1
 537              while i < n and command[i] != "'":
 538                  i += 1
 539              if i < n:
 540                  i += 1
 541              continue
 542          if ch == '"':
 543              i += 1
 544              while i < n:
 545                  inner = command[i]
 546                  if inner == "\\" and i + 1 < n:
 547                      i += 2
 548                      continue
 549                  if inner == '"':
 550                      i += 1
 551                      break
 552                  i += 1
 553              continue
 554          if ch == "\\" and i + 1 < n:
 555              i += 2
 556              continue
 557          i += 1
 558  
 559      return command[start:i], i
 560  
 561  
 562  def _rewrite_real_sudo_invocations(command: str) -> tuple[str, bool]:
 563      """Rewrite only real unquoted sudo command words, not plain text mentions."""
 564      out: list[str] = []
 565      i = 0
 566      n = len(command)
 567      command_start = True
 568      found = False
 569  
 570      while i < n:
 571          ch = command[i]
 572  
 573          if ch.isspace():
 574              out.append(ch)
 575              if ch == "\n":
 576                  command_start = True
 577              i += 1
 578              continue
 579  
 580          if ch == "#" and command_start:
 581              comment_end = command.find("\n", i)
 582              if comment_end == -1:
 583                  out.append(command[i:])
 584                  break
 585              out.append(command[i:comment_end])
 586              i = comment_end
 587              continue
 588  
 589          if command.startswith("&&", i) or command.startswith("||", i) or command.startswith(";;", i):
 590              out.append(command[i:i + 2])
 591              i += 2
 592              command_start = True
 593              continue
 594  
 595          if ch in ";|&(":
 596              out.append(ch)
 597              i += 1
 598              command_start = True
 599              continue
 600  
 601          if ch == ")":
 602              out.append(ch)
 603              i += 1
 604              command_start = False
 605              continue
 606  
 607          token, next_i = _read_shell_token(command, i)
 608          if command_start and token == "sudo":
 609              out.append("sudo -S -p ''")
 610              found = True
 611          else:
 612              out.append(token)
 613  
 614          if command_start and _looks_like_env_assignment(token):
 615              command_start = True
 616          else:
 617              command_start = False
 618          i = next_i
 619  
 620      return "".join(out), found
 621  
 622  
 623  def _sudo_nopasswd_works() -> bool:
 624      """Return True when local sudo currently works without prompting.
 625  
 626      Only probes for the `local` terminal backend; Docker/SSH/Modal/etc. must
 627      not inherit the host's sudo state. Re-probes every call (no process-level
 628      cache) so an expired sudo timestamp cannot make a later command silently
 629      block waiting for a password.
 630      """
 631      terminal_env = os.getenv("TERMINAL_ENV", "local").strip().lower() or "local"
 632      if terminal_env != "local":
 633          return False
 634  
 635      try:
 636          probe = subprocess.run(
 637              ["sudo", "-n", "true"],
 638              stdin=subprocess.DEVNULL,
 639              stdout=subprocess.DEVNULL,
 640              stderr=subprocess.DEVNULL,
 641              timeout=3,
 642              check=False,
 643          )
 644          return probe.returncode == 0
 645      except Exception:
 646          return False
 647  
 648  
 649  def _rewrite_compound_background(command: str) -> str:
 650      """Wrap `A && B &` (or `A || B &`) to `A && { B & }` at depth 0.
 651  
 652      Bash parses ``A && B &`` with `&&` tighter than `&`, so it forks a
 653      subshell for the whole `A && B` compound and backgrounds it. Inside
 654      the subshell, `B` runs foreground, so the subshell waits for `B` to
 655      finish. When `B` is a long-running process (`python3 -m http.server`,
 656      `yes > /dev/null`, anything that doesn't naturally exit), the subshell
 657      never exits. It leaks as a process stuck in ``wait4`` forever — and
 658      on the way, its open stdout pipe can prevent the terminal tool from
 659      returning promptly.
 660  
 661      Rewriting the tail to `A && { B & }` preserves `&&`'s error semantics
 662      (skip B if A fails) while replacing the subshell with a brace group.
 663      The brace group runs in the current shell (no fork), backgrounds B as
 664      a simple command (bash doesn't wait for it in non-interactive mode),
 665      and exits immediately. B runs as a normal backgrounded child, orphaned
 666      when the parent shell exits.
 667  
 668      Handles redirects (``&>``, ``2>&1``) and skips content inside quoted
 669      strings and parenthesised subshells. Leaves simple ``cmd &`` alone —
 670      that construct doesn't have the subshell-wait bug.
 671      """
 672      n = len(command)
 673      i = 0
 674      paren_depth = 0
 675      brace_depth = 0
 676      # Position in *command* just after the most recent `&&` / `||` at depth 0
 677      # in the current statement; -1 when no chain operator is active.
 678      last_chain_op_end = -1
 679      rewrites: list[tuple[int, int]] = []  # (chain_op_end, amp_pos)
 680  
 681      while i < n:
 682          ch = command[i]
 683  
 684          # Newline terminates a statement at depth 0 — reset chain state.
 685          # Checked before the whitespace skip so we don't miss it.
 686          if ch == "\n" and paren_depth == 0 and brace_depth == 0:
 687              last_chain_op_end = -1
 688              i += 1
 689              continue
 690  
 691          if ch.isspace():
 692              i += 1
 693              continue
 694  
 695          # Comments (only at statement start — conservative: any `#` not inside
 696          # a token ends the line). `_read_shell_token` handles quoted strings
 697          # below so `#` inside quotes is safe.
 698          if ch == "#":
 699              nl = command.find("\n", i)
 700              if nl == -1:
 701                  break
 702              i = nl
 703              continue
 704  
 705          if ch == "\\" and i + 1 < n:
 706              i += 2
 707              continue
 708  
 709          # Quoted tokens — consume whole string via the shared tokenizer.
 710          if ch in ("'", '"'):
 711              _, next_i = _read_shell_token(command, i)
 712              i = max(next_i, i + 1)
 713              continue
 714  
 715          if ch == "(":
 716              paren_depth += 1
 717              i += 1
 718              continue
 719  
 720          if ch == ")":
 721              paren_depth = max(0, paren_depth - 1)
 722              i += 1
 723              continue
 724  
 725          # Brace groups: `{ ... }` is a group (no subshell fork), and bash
 726          # requires whitespace after `{`. We track depth so already-rewritten
 727          # output (`A && { B & }`) is idempotent — the inner `&` is part of
 728          # the group, not a new compound to rewrite. Also skip content inside
 729          # the group since `A && B &` there is separately well-formed.
 730          if ch == "{" and i + 1 < n and (command[i + 1].isspace() or command[i + 1] == "\n"):
 731              brace_depth += 1
 732              i += 1
 733              continue
 734          if ch == "}" and brace_depth > 0:
 735              brace_depth -= 1
 736              # Closing a group completes a compound statement; reset chain.
 737              last_chain_op_end = -1
 738              i += 1
 739              continue
 740  
 741          # Inside parens or brace groups, skip operators — they parse in their
 742          # own scope. `(...)` subshells have the same bug class but are not the
 743          # common agent pattern; leave for a follow-up.
 744          if paren_depth > 0 or brace_depth > 0:
 745              i += 1
 746              continue
 747  
 748          # Chain operators at depth 0
 749          if command.startswith("&&", i) or command.startswith("||", i):
 750              last_chain_op_end = i + 2
 751              i += 2
 752              continue
 753  
 754          # Statement terminators reset the chain state
 755          if ch == ";":
 756              last_chain_op_end = -1
 757              i += 1
 758              continue
 759  
 760          # Single `|` (pipe) starts a new pipeline stage; don't rewrite
 761          # across it. `||` handled above.
 762          if ch == "|":
 763              last_chain_op_end = -1
 764              i += 1
 765              continue
 766  
 767          # `&` handling: distinguish `&&`, `&>`, fd redirect (`>&`, `<&`),
 768          # and a true backgrounding `&`.
 769          if ch == "&":
 770              # `&&` handled above; won't reach here
 771              if i + 1 < n and command[i + 1] == ">":
 772                  # `&>` redirect — consume
 773                  i += 2
 774                  continue
 775              # `>&` / `<&` fd target — look back past whitespace
 776              j = i - 1
 777              while j >= 0 and command[j].isspace():
 778                  j -= 1
 779              if j >= 0 and command[j] in "<>":
 780                  i += 1
 781                  continue
 782              # Real background operator
 783              if last_chain_op_end >= 0:
 784                  rewrites.append((last_chain_op_end, i))
 785              last_chain_op_end = -1
 786              i += 1
 787              continue
 788  
 789          # Regular unquoted token — advance past it via the shared tokenizer
 790          _, next_i = _read_shell_token(command, i)
 791          i = max(next_i, i + 1)
 792  
 793      if not rewrites:
 794          return command
 795  
 796      # Apply rewrites back-to-front so earlier indices remain valid.
 797      result = command
 798      for chain_end, amp_pos in reversed(rewrites):
 799          # Skip whitespace right after the `&&`/`||` so the brace group
 800          # opens flush against the inner command.
 801          insert_pos = chain_end
 802          while insert_pos < amp_pos and result[insert_pos].isspace():
 803              insert_pos += 1
 804          prefix = result[:insert_pos]
 805          middle = result[insert_pos:amp_pos]  # inner command + trailing space
 806          suffix = result[amp_pos + 1 :]
 807          # `{` needs a trailing space in bash; the closing `}` needs to be
 808          # preceded by `;` or `&` — we're providing `&` from the backgrounding.
 809          result = prefix + "{ " + middle + "& }" + suffix
 810  
 811      return result
 812  
 813  
 814  def _transform_sudo_command(command: str | None) -> tuple[str | None, str | None]:
 815      """
 816      Transform sudo commands to use -S flag if SUDO_PASSWORD is available.
 817  
 818      This is a shared helper used by all execution environments to provide
 819      consistent sudo handling across local, SSH, and container environments.
 820  
 821      Returns:
 822          (transformed_command, sudo_stdin) where:
 823          - transformed_command has every bare ``sudo`` replaced with
 824            ``sudo -S -p ''`` so sudo reads its password from stdin.
 825          - sudo_stdin is the password string with a trailing newline that the
 826            caller must prepend to the process's stdin stream.  sudo -S reads
 827            exactly one line (the password) and passes the rest of stdin to the
 828            child command, so prepending is safe even when the caller also has
 829            its own stdin_data to pipe.
 830          - If no password is available, sudo_stdin is None and the command is
 831            returned unchanged so it fails gracefully with
 832            "sudo: a password is required".
 833  
 834      Callers that drive a subprocess directly (local, ssh, docker, singularity)
 835      should prepend sudo_stdin to their stdin_data and pass the merged bytes to
 836      Popen's stdin pipe.
 837  
 838      Callers that cannot pipe subprocess stdin (modal, daytona,
 839      vercel_sandbox) must embed the password in the command string
 840      themselves; see their execute() methods for how they handle the
 841      non-None sudo_stdin case.
 842  
 843      If SUDO_PASSWORD is not set and in interactive mode (HERMES_INTERACTIVE=1):
 844        Prompts user for password with 45s timeout, caches for session.
 845  
 846      If SUDO_PASSWORD is not set and NOT interactive:
 847        Command runs as-is (fails gracefully with "sudo: a password is required").
 848      """
 849      if command is None:
 850          return None, None
 851      transformed, has_real_sudo = _rewrite_real_sudo_invocations(command)
 852      if not has_real_sudo:
 853          return command, None
 854  
 855      has_configured_password = "SUDO_PASSWORD" in os.environ
 856      sudo_password = (
 857          os.environ.get("SUDO_PASSWORD", "")
 858          if has_configured_password
 859          else _get_cached_sudo_password()
 860      )
 861  
 862      # Local hosts with sudoers NOPASSWD should not be forced through the
 863      # interactive Hermes password prompt or the sudo -S password-pipe path.
 864      # Scoped to the local terminal backend so Docker/SSH/Modal/etc. can't
 865      # inherit host sudo state. Re-probes every call (no process-lifetime
 866      # cache) so an expired sudo timestamp doesn't make a later command block
 867      # silently without Hermes prompting.
 868      if not has_configured_password and not sudo_password and _sudo_nopasswd_works():
 869          return command, None
 870  
 871      if not has_configured_password and not sudo_password and os.getenv("HERMES_INTERACTIVE"):
 872          sudo_password = _prompt_for_sudo_password(timeout_seconds=45)
 873          if sudo_password:
 874              _set_cached_sudo_password(sudo_password)
 875  
 876      if has_configured_password or sudo_password:
 877          # Trailing newline is required: sudo -S reads one line for the password.
 878          return transformed, sudo_password + "\n"
 879  
 880      return command, None
 881  
 882  
 883  # Environment classes now live in tools/environments/
 884  from tools.environments.local import LocalEnvironment as _LocalEnvironment
 885  from tools.environments.singularity import SingularityEnvironment as _SingularityEnvironment
 886  from tools.environments.ssh import SSHEnvironment as _SSHEnvironment
 887  from tools.environments.docker import DockerEnvironment as _DockerEnvironment
 888  from tools.environments.modal import ModalEnvironment as _ModalEnvironment
 889  from tools.environments.managed_modal import ManagedModalEnvironment as _ManagedModalEnvironment
 890  from tools.managed_tool_gateway import is_managed_tool_gateway_ready
 891  
 892  
 893  # Tool description for LLM
 894  TERMINAL_TOOL_DESCRIPTION = """Execute shell commands on a Linux environment. Filesystem usually persists between calls.
 895  
 896  Do NOT use cat/head/tail to read files — use read_file instead.
 897  Do NOT use grep/rg/find to search — use search_files instead.
 898  Do NOT use ls to list directories — use search_files(target='files') instead.
 899  Do NOT use sed/awk to edit files — use patch instead.
 900  Do NOT use echo/cat heredoc to create files — use write_file instead.
 901  Reserve terminal for: builds, installs, git, processes, scripts, network, package managers, and anything that needs a shell.
 902  
 903  Foreground (default): Commands return INSTANTLY when done, even if the timeout is high. Set timeout=300 for long builds/scripts — you'll still get the result in seconds if it's fast. Prefer foreground for short commands.
 904  Background: Set background=true to get a session_id. Two patterns:
 905    (1) Long-lived processes that never exit (servers, watchers).
 906    (2) Long-running tasks with notify_on_complete=true — you can keep working on other things and the system auto-notifies you when the task finishes. Great for test suites, builds, deployments, or anything that takes more than a minute.
 907  For servers/watchers, do NOT use shell-level background wrappers (nohup/disown/setsid/trailing '&') in foreground mode. Use background=true so Hermes can track lifecycle and output.
 908  After starting a server, verify readiness with a health check or log signal, then run tests in a separate terminal() call. Avoid blind sleep loops.
 909  Use process(action="poll") for progress checks, process(action="wait") to block until done.
 910  Working directory: Use 'workdir' for per-command cwd.
 911  PTY mode: Set pty=true for interactive CLI tools (Codex, Claude Code, Python REPL).
 912  
 913  Do NOT use vim/nano/interactive tools without pty=true — they hang without a pseudo-terminal. Pipe git output to cat if it might page.
 914  """
 915  
 916  # Global state for environment lifecycle management
 917  _active_environments: Dict[str, Any] = {}
 918  _last_activity: Dict[str, float] = {}
 919  _env_lock = threading.Lock()
 920  _creation_locks: Dict[str, threading.Lock] = {}  # Per-task locks for sandbox creation
 921  _creation_locks_lock = threading.Lock()  # Protects _creation_locks dict itself
 922  _cleanup_thread = None
 923  _cleanup_running = False
 924  
 925  # Per-task environment overrides registry.
 926  # Allows environments (e.g., TerminalBench2Env) to specify a custom Docker/Modal
 927  # image for a specific task_id BEFORE the agent loop starts. When the terminal or
 928  # file tools create a new sandbox for that task_id, they check this registry first
 929  # and fall back to the TERMINAL_MODAL_IMAGE (etc.) env var if no override is set.
 930  #
 931  # This is never exposed to the model -- only infrastructure code calls it.
 932  # Thread-safe because each task_id is unique per rollout.
 933  _task_env_overrides: Dict[str, Dict[str, Any]] = {}
 934  
 935  
 936  def register_task_env_overrides(task_id: str, overrides: Dict[str, Any]):
 937      """
 938      Register environment overrides for a specific task/rollout.
 939  
 940      Called by Atropos environments before the agent loop to configure
 941      per-task sandbox settings (e.g., a custom Dockerfile for the Modal image).
 942  
 943      Supported override keys:
 944          - modal_image: str -- Path to Dockerfile or Docker Hub image name
 945          - docker_image: str -- Docker image name
 946          - cwd: str -- Working directory inside the sandbox
 947  
 948      Args:
 949          task_id: The rollout's unique task identifier
 950          overrides: Dict of config keys to override
 951      """
 952      _task_env_overrides[task_id] = overrides
 953  
 954  
 955  def clear_task_env_overrides(task_id: str):
 956      """
 957      Clear environment overrides for a task after rollout completes.
 958  
 959      Called during cleanup to avoid stale entries accumulating.
 960      """
 961      _task_env_overrides.pop(task_id, None)
 962  
 963  
 964  def _resolve_container_task_id(task_id: Optional[str]) -> str:
 965      """
 966      Map a tool-call ``task_id`` to the container/sandbox key used by
 967      ``_active_environments``.
 968  
 969      The top-level agent passes ``task_id=None`` and lands on ``"default"``.
 970      ``delegate_task`` children pass their own subagent ID so that
 971      file-state tracking, the active-subagents registry, and TUI events stay
 972      distinct per child -- but we deliberately collapse that ID back to
 973      ``"default"`` here so subagents share the parent's long-lived container
 974      (one bash, one /workspace, one set of installed packages).
 975  
 976      Exception: RL / benchmark environments (TerminalBench2, HermesSweEnv, ...)
 977      call ``register_task_env_overrides(task_id, {...})`` to request a
 978      per-task Docker/Modal image. When an override is registered for a
 979      task_id, we honour it by returning the task_id unchanged -- those
 980      rollouts need their own isolated sandbox, which is the whole point of
 981      the override.
 982      """
 983      if task_id and task_id in _task_env_overrides:
 984          return task_id
 985      return "default"
 986  
 987  
 988  # Configuration from environment variables
 989  
 990  def _parse_env_var(name: str, default: str, converter=int, type_label: str = "integer"):
 991      """Parse an environment variable with *converter*, raising a clear error on bad values.
 992  
 993      Without this wrapper, a single malformed env var (e.g. TERMINAL_TIMEOUT=5m)
 994      causes an unhandled ValueError that kills every terminal command.
 995      """
 996      raw = os.getenv(name, default)
 997      try:
 998          return converter(raw)
 999      except (ValueError, json.JSONDecodeError):
1000          raise ValueError(
1001              f"Invalid value for {name}: {raw!r} (expected {type_label}). "
1002              f"Check ~/.hermes/.env or environment variables."
1003          )
1004  
1005  
1006  def _get_env_config() -> Dict[str, Any]:
1007      """Get terminal environment configuration from environment variables."""
1008      # Default image with Python and Node.js for maximum compatibility
1009      default_image = "nikolaik/python-nodejs:python3.11-nodejs20"
1010      env_type = os.getenv("TERMINAL_ENV", "local")
1011      
1012      mount_docker_cwd = os.getenv("TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE", "false").lower() in ("true", "1", "yes")
1013  
1014      # Default cwd: local uses the host's current directory, ssh uses the
1015      # remote home, Vercel uses its documented workspace root, and everything
1016      # else starts in the backend's default root-like cwd.
1017      if env_type == "local":
1018          default_cwd = os.getcwd()
1019      elif env_type == "ssh":
1020          default_cwd = "~"
1021      elif env_type == "vercel_sandbox":
1022          default_cwd = _VERCEL_SANDBOX_DEFAULT_CWD
1023      else:
1024          default_cwd = "/root"
1025  
1026      # Read TERMINAL_CWD but sanity-check it for container backends.
1027      # If Docker cwd passthrough is explicitly enabled, remap the host path to
1028      # /workspace and track the original host path separately. Otherwise keep the
1029      # normal sandbox behavior and discard host paths.
1030      cwd = os.getenv("TERMINAL_CWD", default_cwd)
1031      if cwd:
1032          cwd = os.path.expanduser(cwd)
1033      host_cwd = None
1034      host_prefixes = ("/Users/", "/home/", "C:\\", "C:/")
1035      if env_type == "docker" and mount_docker_cwd:
1036          docker_cwd_source = os.getenv("TERMINAL_CWD") or os.getcwd()
1037          candidate = os.path.abspath(os.path.expanduser(docker_cwd_source))
1038          if (
1039              any(candidate.startswith(p) for p in host_prefixes)
1040              or (os.path.isabs(candidate) and os.path.isdir(candidate) and not candidate.startswith(("/workspace", "/root")))
1041          ):
1042              host_cwd = candidate
1043              cwd = "/workspace"
1044      elif env_type in ("modal", "docker", "singularity", "daytona", "vercel_sandbox") and cwd:
1045          # Host paths and relative paths that won't work inside containers
1046          is_host_path = any(cwd.startswith(p) for p in host_prefixes)
1047          is_relative = not os.path.isabs(cwd)  # e.g. "." or "src/"
1048          if (is_host_path or is_relative) and cwd != default_cwd:
1049              logger.info("Ignoring TERMINAL_CWD=%r for %s backend "
1050                          "(host/relative path won't work in sandbox). Using %r instead.",
1051                          cwd, env_type, default_cwd)
1052              cwd = default_cwd
1053  
1054      return {
1055          "env_type": env_type,
1056          "modal_mode": coerce_modal_mode(os.getenv("TERMINAL_MODAL_MODE", "auto")),
1057          "docker_image": os.getenv("TERMINAL_DOCKER_IMAGE", default_image),
1058          "docker_forward_env": _parse_env_var("TERMINAL_DOCKER_FORWARD_ENV", "[]", json.loads, "valid JSON"),
1059          "singularity_image": os.getenv("TERMINAL_SINGULARITY_IMAGE", f"docker://{default_image}"),
1060          "modal_image": os.getenv("TERMINAL_MODAL_IMAGE", default_image),
1061          "daytona_image": os.getenv("TERMINAL_DAYTONA_IMAGE", default_image),
1062          "vercel_runtime": os.getenv("TERMINAL_VERCEL_RUNTIME", "").strip(),
1063          "cwd": cwd,
1064          "host_cwd": host_cwd,
1065          "docker_mount_cwd_to_workspace": mount_docker_cwd,
1066          "timeout": _parse_env_var("TERMINAL_TIMEOUT", "180"),
1067          "lifetime_seconds": _parse_env_var("TERMINAL_LIFETIME_SECONDS", "300"),
1068          # SSH-specific config
1069          "ssh_host": os.getenv("TERMINAL_SSH_HOST", ""),
1070          "ssh_user": os.getenv("TERMINAL_SSH_USER", ""),
1071          "ssh_port": _parse_env_var("TERMINAL_SSH_PORT", "22"),
1072          "ssh_key": os.getenv("TERMINAL_SSH_KEY", ""),
1073          # Persistent shell: SSH defaults to the config-level persistent_shell
1074          # setting (true by default for non-local backends); local is always opt-in.
1075          # Per-backend env vars override if explicitly set.
1076          "ssh_persistent": os.getenv(
1077              "TERMINAL_SSH_PERSISTENT",
1078              os.getenv("TERMINAL_PERSISTENT_SHELL", "true"),
1079          ).lower() in ("true", "1", "yes"),
1080          "local_persistent": os.getenv("TERMINAL_LOCAL_PERSISTENT", "false").lower() in ("true", "1", "yes"),
1081          # Container resource config (applies to docker, singularity, modal,
1082          # daytona, and vercel_sandbox -- ignored for local/ssh)
1083          "container_cpu": _parse_env_var("TERMINAL_CONTAINER_CPU", "1", float, "number"),
1084          "container_memory": _parse_env_var("TERMINAL_CONTAINER_MEMORY", "5120"),     # MB (default 5GB)
1085          "container_disk": _parse_env_var("TERMINAL_CONTAINER_DISK", "51200"),        # MB (default 50GB)
1086          "container_persistent": os.getenv("TERMINAL_CONTAINER_PERSISTENT", "true").lower() in ("true", "1", "yes"),
1087          "docker_volumes": _parse_env_var("TERMINAL_DOCKER_VOLUMES", "[]", json.loads, "valid JSON"),
1088          "docker_run_as_host_user": os.getenv("TERMINAL_DOCKER_RUN_AS_HOST_USER", "false").lower() in ("true", "1", "yes"),
1089      }
1090  
1091  
1092  def _get_modal_backend_state(modal_mode: object | None) -> Dict[str, Any]:
1093      """Resolve direct vs managed Modal backend selection."""
1094      return resolve_modal_backend_state(
1095          modal_mode,
1096          has_direct=has_direct_modal_credentials(),
1097          managed_ready=is_managed_tool_gateway_ready("modal"),
1098      )
1099  
1100  
1101  def _create_environment(env_type: str, image: str, cwd: str, timeout: int,
1102                          ssh_config: dict = None, container_config: dict = None,
1103                          local_config: dict = None,
1104                          task_id: str = "default",
1105                          host_cwd: str = None):
1106      """
1107      Create an execution environment for sandboxed command execution.
1108      
1109      Args:
1110          env_type: One of "local", "docker", "singularity", "modal",
1111              "daytona", "vercel_sandbox", "ssh"
1112          image: Docker/Singularity/Modal image name (ignored for local/ssh/vercel)
1113          cwd: Working directory
1114          timeout: Default command timeout
1115          ssh_config: SSH connection config (for env_type="ssh")
1116          container_config: Resource config for container backends (cpu, memory, disk, persistent)
1117          task_id: Task identifier for environment reuse and snapshot keying
1118          host_cwd: Optional host working directory to bind into Docker when explicitly enabled
1119          
1120      Returns:
1121          Environment instance with execute() method
1122      """
1123      cc = container_config or {}
1124      cpu = cc.get("container_cpu", 1)
1125      memory = cc.get("container_memory", 5120)
1126      disk = cc.get("container_disk", 51200)
1127      persistent = cc.get("container_persistent", True)
1128      volumes = cc.get("docker_volumes", [])
1129      docker_forward_env = cc.get("docker_forward_env", [])
1130      docker_env = cc.get("docker_env", {})
1131  
1132      if env_type == "local":
1133          return _LocalEnvironment(cwd=cwd, timeout=timeout)
1134      
1135      elif env_type == "docker":
1136          return _DockerEnvironment(
1137              image=image, cwd=cwd, timeout=timeout,
1138              cpu=cpu, memory=memory, disk=disk,
1139              persistent_filesystem=persistent, task_id=task_id,
1140              volumes=volumes,
1141              host_cwd=host_cwd,
1142              auto_mount_cwd=cc.get("docker_mount_cwd_to_workspace", False),
1143              forward_env=docker_forward_env,
1144              env=docker_env,
1145              run_as_host_user=cc.get("docker_run_as_host_user", False),
1146          )
1147      
1148      elif env_type == "singularity":
1149          return _SingularityEnvironment(
1150              image=image, cwd=cwd, timeout=timeout,
1151              cpu=cpu, memory=memory, disk=disk,
1152              persistent_filesystem=persistent, task_id=task_id,
1153          )
1154      
1155      elif env_type == "modal":
1156          sandbox_kwargs = {}
1157          if cpu > 0:
1158              sandbox_kwargs["cpu"] = cpu
1159          if memory > 0:
1160              sandbox_kwargs["memory"] = memory
1161          if disk > 0:
1162              try:
1163                  import inspect, modal
1164                  if "ephemeral_disk" in inspect.signature(modal.Sandbox.create).parameters:
1165                      sandbox_kwargs["ephemeral_disk"] = disk
1166              except Exception:
1167                  pass
1168  
1169          modal_state = _get_modal_backend_state(cc.get("modal_mode"))
1170  
1171          if modal_state["selected_backend"] == "managed":
1172              return _ManagedModalEnvironment(
1173                  image=image, cwd=cwd, timeout=timeout,
1174                  modal_sandbox_kwargs=sandbox_kwargs,
1175                  persistent_filesystem=persistent, task_id=task_id,
1176              )
1177  
1178          if modal_state["selected_backend"] != "direct":
1179              if modal_state["managed_mode_blocked"]:
1180                  raise ValueError(
1181                      "Modal backend is configured for managed mode, but "
1182                      "a paid Nous subscription is required for the Tool Gateway and no direct "
1183                      "Modal credentials/config were found. Log in with `hermes model` or "
1184                      "choose TERMINAL_MODAL_MODE=direct/auto."
1185                  )
1186              if modal_state["mode"] == "managed":
1187                  raise ValueError(
1188                      "Modal backend is configured for managed mode, but the managed tool gateway is unavailable."
1189                  )
1190              if modal_state["mode"] == "direct":
1191                  raise ValueError(
1192                      "Modal backend is configured for direct mode, but no direct Modal credentials/config were found."
1193                  )
1194              message = "Modal backend selected but no direct Modal credentials/config was found."
1195              if managed_nous_tools_enabled():
1196                  message = (
1197                      "Modal backend selected but no direct Modal credentials/config or managed tool gateway was found."
1198                  )
1199              raise ValueError(message)
1200  
1201          return _ModalEnvironment(
1202              image=image, cwd=cwd, timeout=timeout,
1203              modal_sandbox_kwargs=sandbox_kwargs,
1204              persistent_filesystem=persistent, task_id=task_id,
1205          )
1206      
1207      elif env_type == "daytona":
1208          # Lazy import so daytona SDK is only required when backend is selected.
1209          from tools.environments.daytona import DaytonaEnvironment as _DaytonaEnvironment
1210          return _DaytonaEnvironment(
1211              image=image, cwd=cwd, timeout=timeout,
1212              cpu=int(cpu), memory=memory, disk=disk,
1213              persistent_filesystem=persistent, task_id=task_id,
1214          )
1215  
1216      elif env_type == "vercel_sandbox":
1217          from tools.environments.vercel_sandbox import (
1218              VercelSandboxEnvironment as _VercelSandboxEnvironment,
1219          )
1220          return _VercelSandboxEnvironment(
1221              runtime=cc.get("vercel_runtime") or None,
1222              cwd=cwd,
1223              timeout=timeout,
1224              cpu=cpu,
1225              memory=memory,
1226              disk=disk,
1227              persistent_filesystem=persistent,
1228              task_id=task_id,
1229          )
1230  
1231      elif env_type == "ssh":
1232          if not ssh_config or not ssh_config.get("host") or not ssh_config.get("user"):
1233              raise ValueError("SSH environment requires ssh_host and ssh_user to be configured")
1234          return _SSHEnvironment(
1235              host=ssh_config["host"],
1236              user=ssh_config["user"],
1237              port=ssh_config.get("port", 22),
1238              key_path=ssh_config.get("key", ""),
1239              cwd=cwd,
1240              timeout=timeout,
1241          )
1242  
1243      else:
1244          raise ValueError(
1245              f"Unknown environment type: {env_type}. Use 'local', 'docker', "
1246              f"'singularity', 'modal', 'daytona', 'vercel_sandbox', or 'ssh'"
1247          )
1248  
1249  
1250  def _cleanup_inactive_envs(lifetime_seconds: int = 300):
1251      """Clean up environments that have been inactive for longer than lifetime_seconds."""
1252      current_time = time.time()
1253  
1254      # Check the process registry -- skip cleanup for sandboxes with active
1255      # background processes (their _last_activity gets refreshed to keep them alive).
1256      try:
1257          from tools.process_registry import process_registry
1258          for task_id in list(_last_activity.keys()):
1259              if process_registry.has_active_processes(task_id):
1260                  _last_activity[task_id] = current_time  # Keep sandbox alive
1261      except ImportError:
1262          pass
1263  
1264      # Phase 1: collect stale entries and remove them from tracking dicts while
1265      # holding the lock.  Do NOT call env.cleanup() inside the lock -- Modal and
1266      # Docker teardown can block for 10-15s, which would stall every concurrent
1267      # terminal/file tool call waiting on _env_lock.
1268      envs_to_stop = []  # list of (task_id, env) pairs
1269  
1270      with _env_lock:
1271          for task_id, last_time in list(_last_activity.items()):
1272              if current_time - last_time > lifetime_seconds:
1273                  env = _active_environments.pop(task_id, None)
1274                  _last_activity.pop(task_id, None)
1275                  if env is not None:
1276                      envs_to_stop.append((task_id, env))
1277  
1278          # Also purge per-task creation locks for cleaned-up tasks
1279          with _creation_locks_lock:
1280              for task_id, _ in envs_to_stop:
1281                  _creation_locks.pop(task_id, None)
1282  
1283      # Phase 2: stop the actual sandboxes OUTSIDE the lock so other tool calls
1284      # are not blocked while Modal/Docker sandboxes shut down.
1285      for task_id, env in envs_to_stop:
1286          # Invalidate stale file_ops cache entry (Bug fix: prevents
1287          # ShellFileOperations from referencing a dead sandbox)
1288          try:
1289              from tools.file_tools import clear_file_ops_cache
1290              clear_file_ops_cache(task_id)
1291          except ImportError:
1292              pass
1293  
1294          try:
1295              if hasattr(env, 'cleanup'):
1296                  env.cleanup()
1297              elif hasattr(env, 'stop'):
1298                  env.stop()
1299              elif hasattr(env, 'terminate'):
1300                  env.terminate()
1301  
1302              logger.info("Cleaned up inactive environment for task: %s", task_id)
1303  
1304          except Exception as e:
1305              error_str = str(e)
1306              if "404" in error_str or "not found" in error_str.lower():
1307                  logger.info("Environment for task %s already cleaned up", task_id)
1308              else:
1309                  logger.warning("Error cleaning up environment for task %s: %s", task_id, e)
1310  
1311  
1312  def _cleanup_thread_worker():
1313      """Background thread worker that periodically cleans up inactive environments."""
1314      while _cleanup_running:
1315          try:
1316              config = _get_env_config()
1317              _cleanup_inactive_envs(config["lifetime_seconds"])
1318          except Exception as e:
1319              logger.warning("Error in cleanup thread: %s", e, exc_info=True)
1320  
1321          for _ in range(60):
1322              if not _cleanup_running:
1323                  break
1324              time.sleep(1)
1325  
1326  
1327  def _start_cleanup_thread():
1328      """Start the background cleanup thread if not already running."""
1329      global _cleanup_thread, _cleanup_running
1330  
1331      with _env_lock:
1332          if _cleanup_thread is None or not _cleanup_thread.is_alive():
1333              _cleanup_running = True
1334              _cleanup_thread = threading.Thread(target=_cleanup_thread_worker, daemon=True)
1335              _cleanup_thread.start()
1336  
1337  
1338  def _stop_cleanup_thread():
1339      """Stop the background cleanup thread."""
1340      global _cleanup_running
1341      _cleanup_running = False
1342      if _cleanup_thread is not None:
1343          try:
1344              _cleanup_thread.join(timeout=5)
1345          except (SystemExit, KeyboardInterrupt):
1346              pass
1347  
1348  
1349  def get_active_env(task_id: str):
1350      """Return the active BaseEnvironment for *task_id*, or None."""
1351      lookup = _resolve_container_task_id(task_id)
1352      with _env_lock:
1353          return _active_environments.get(lookup) or _active_environments.get(task_id)
1354  
1355  
1356  def is_persistent_env(task_id: str) -> bool:
1357      """Return True if the active environment for task_id is configured for
1358      cross-turn persistence (``persistent_filesystem=True``).
1359  
1360      Used by the agent loop to skip per-turn teardown for backends whose whole
1361      point is to survive between turns (docker with ``container_persistent``,
1362      daytona, modal, etc.). Non-persistent backends (e.g. Morph) still get torn
1363      down at end-of-turn to prevent leakage. The idle reaper
1364      (``_cleanup_inactive_envs``) handles persistent envs once they exceed
1365      ``terminal.lifetime_seconds``.
1366      """
1367      env = get_active_env(task_id)
1368      if env is None:
1369          return False
1370      return bool(getattr(env, "_persistent", False))
1371  
1372  
1373  
1374  
1375  def cleanup_all_environments():
1376      """Clean up ALL active environments. Use with caution."""
1377      task_ids = list(_active_environments.keys())
1378      cleaned = 0
1379      
1380      for task_id in task_ids:
1381          try:
1382              cleanup_vm(task_id)
1383              cleaned += 1
1384          except Exception as e:
1385              logger.error("Error cleaning %s: %s", task_id, e, exc_info=True)
1386      
1387      # Also clean any orphaned directories
1388      scratch_dir = _get_scratch_dir()
1389      import glob
1390      for path in glob.glob(str(scratch_dir / "hermes-*")):
1391          try:
1392              shutil.rmtree(path, ignore_errors=True)
1393              logger.info("Removed orphaned: %s", path)
1394          except OSError as e:
1395              logger.debug("Failed to remove orphaned path %s: %s", path, e)
1396      
1397      if cleaned > 0:
1398          logger.info("Cleaned %d environments", cleaned)
1399      return cleaned
1400  
1401  
1402  def cleanup_vm(task_id: str):
1403      """Manually clean up a specific environment by task_id."""
1404      # Remove from tracking dicts while holding the lock, but defer the
1405      # actual (potentially slow) env.cleanup() call to outside the lock
1406      # so other tool calls aren't blocked.
1407      env = None
1408      with _env_lock:
1409          env = _active_environments.pop(task_id, None)
1410          _last_activity.pop(task_id, None)
1411  
1412      # Clean up per-task creation lock
1413      with _creation_locks_lock:
1414          _creation_locks.pop(task_id, None)
1415  
1416      # Invalidate stale file_ops cache entry
1417      try:
1418          from tools.file_tools import clear_file_ops_cache
1419          clear_file_ops_cache(task_id)
1420      except ImportError:
1421          pass
1422  
1423      if env is None:
1424          return
1425  
1426      try:
1427          if hasattr(env, 'cleanup'):
1428              env.cleanup()
1429          elif hasattr(env, 'stop'):
1430              env.stop()
1431          elif hasattr(env, 'terminate'):
1432              env.terminate()
1433  
1434          logger.info("Manually cleaned up environment for task: %s", task_id)
1435  
1436      except Exception as e:
1437          error_str = str(e)
1438          if "404" in error_str or "not found" in error_str.lower():
1439              logger.info("Environment for task %s already cleaned up", task_id)
1440          else:
1441              logger.warning("Error cleaning up environment for task %s: %s", task_id, e)
1442  
1443  
1444  def _atexit_cleanup():
1445      """Stop cleanup thread and shut down all remaining sandboxes on exit."""
1446      _stop_cleanup_thread()
1447      if _active_environments:
1448          count = len(_active_environments)
1449          logger.info("Shutting down %d remaining sandbox(es)...", count)
1450          cleanup_all_environments()
1451  
1452  atexit.register(_atexit_cleanup)
1453  
1454  
1455  # =============================================================================
1456  # Exit Code Context for Common CLI Tools
1457  # =============================================================================
1458  # Many Unix commands use non-zero exit codes for informational purposes, not
1459  # to indicate failure.  The model sees a raw exit_code=1 from `grep` and
1460  # wastes a turn investigating something that just means "no matches".
1461  # This lookup adds a human-readable note so the agent can move on.
1462  
1463  def _interpret_exit_code(command: str, exit_code: int) -> str | None:
1464      """Return a human-readable note when a non-zero exit code is non-erroneous.
1465  
1466      Returns None when the exit code is 0 or genuinely signals an error.
1467      The note is appended to the tool result so the model doesn't waste
1468      turns investigating expected exit codes.
1469      """
1470      if exit_code == 0:
1471          return None
1472  
1473      # Extract the last command in a pipeline/chain — that determines the
1474      # exit code.  Handles  `cmd1 && cmd2`, `cmd1 | cmd2`, `cmd1; cmd2`.
1475      # Deliberately simple: split on shell operators and take the last piece.
1476      segments = re.split(r'\s*(?:\|\||&&|[|;])\s*', command)
1477      last_segment = (segments[-1] if segments else command).strip()
1478  
1479      # Get base command name (first word), stripping env var assignments
1480      # like  VAR=val cmd ...
1481      words = last_segment.split()
1482      base_cmd = ""
1483      for w in words:
1484          if "=" in w and not w.startswith("-"):
1485              continue  # skip VAR=val
1486          base_cmd = w.split("/")[-1]  # handle /usr/bin/grep -> grep
1487          break
1488  
1489      if not base_cmd:
1490          return None
1491  
1492      # Command-specific semantics
1493      semantics: dict[str, dict[int, str]] = {
1494          # grep/rg/ag/ack: 1=no matches found (normal), 2+=real error
1495          "grep":  {1: "No matches found (not an error)"},
1496          "egrep": {1: "No matches found (not an error)"},
1497          "fgrep": {1: "No matches found (not an error)"},
1498          "rg":    {1: "No matches found (not an error)"},
1499          "ag":    {1: "No matches found (not an error)"},
1500          "ack":   {1: "No matches found (not an error)"},
1501          # diff: 1=files differ (expected), 2+=real error
1502          "diff":  {1: "Files differ (expected, not an error)"},
1503          "colordiff": {1: "Files differ (expected, not an error)"},
1504          # find: 1=some dirs inaccessible but results may still be valid
1505          "find":  {1: "Some directories were inaccessible (partial results may still be valid)"},
1506          # test/[: 1=condition is false (expected)
1507          "test":  {1: "Condition evaluated to false (expected, not an error)"},
1508          "[":     {1: "Condition evaluated to false (expected, not an error)"},
1509          # curl: common non-error codes
1510          "curl":  {
1511              6: "Could not resolve host",
1512              7: "Failed to connect to host",
1513              22: "HTTP response code indicated error (e.g. 404, 500)",
1514              28: "Operation timed out",
1515          },
1516          # git: 1 is context-dependent but often normal (e.g. git diff with changes)
1517          "git":   {1: "Non-zero exit (often normal — e.g. 'git diff' returns 1 when files differ)"},
1518      }
1519  
1520      cmd_semantics = semantics.get(base_cmd)
1521      if cmd_semantics and exit_code in cmd_semantics:
1522          return cmd_semantics[exit_code]
1523  
1524      return None
1525  
1526  
1527  def _command_requires_pipe_stdin(command: str) -> bool:
1528      """Return True when PTY mode would break stdin-driven commands.
1529  
1530      Some CLIs change behavior when stdin is a TTY. In particular,
1531      `gh auth login --with-token` expects the token to arrive via piped stdin and
1532      waits for EOF; when we launch it under a PTY, `process.submit()` only sends a
1533      newline, so the command appears to hang forever with no visible progress.
1534      """
1535      normalized = " ".join(command.lower().split())
1536      return (
1537          normalized.startswith("gh auth login")
1538          and "--with-token" in normalized
1539      )
1540  
1541  
1542  _SHELL_LEVEL_BACKGROUND_RE = re.compile(r"\b(?:nohup|disown|setsid)\b", re.IGNORECASE)
1543  _INLINE_BACKGROUND_AMP_RE = re.compile(r"\s&\s")
1544  _TRAILING_BACKGROUND_AMP_RE = re.compile(r"\s&\s*(?:#.*)?$")
1545  _LONG_LIVED_FOREGROUND_PATTERNS = (
1546      re.compile(r"\b(?:npm|pnpm|yarn|bun)\s+(?:run\s+)?(?:dev|start|serve|watch)\b", re.IGNORECASE),
1547      re.compile(r"\bdocker\s+compose\s+up\b", re.IGNORECASE),
1548      re.compile(r"\bnext\s+dev\b", re.IGNORECASE),
1549      re.compile(r"\bvite(?:\s|$)", re.IGNORECASE),
1550      re.compile(r"\bnodemon\b", re.IGNORECASE),
1551      re.compile(r"\buvicorn\b", re.IGNORECASE),
1552      re.compile(r"\bgunicorn\b", re.IGNORECASE),
1553      re.compile(r"\bpython(?:3)?\s+-m\s+http\.server\b", re.IGNORECASE),
1554  )
1555  
1556  
1557  def _looks_like_help_or_version_command(command: str) -> bool:
1558      """Return True for informational invocations that should never be blocked."""
1559      normalized = " ".join(command.lower().split())
1560      return (
1561          " --help" in normalized
1562          or normalized.endswith(" -h")
1563          or " --version" in normalized
1564          or normalized.endswith(" -v")
1565      )
1566  
1567  
1568  def _foreground_background_guidance(command: str) -> str | None:
1569      """Suggest background mode when a foreground command looks long-lived.
1570  
1571      Prevents workflows that start a server/watch process and then stall before
1572      follow-up checks or test commands run.
1573      """
1574      if _looks_like_help_or_version_command(command):
1575          return None
1576  
1577      if _SHELL_LEVEL_BACKGROUND_RE.search(command):
1578          return (
1579              "Foreground command uses shell-level background wrappers (nohup/disown/setsid). "
1580              "Use terminal(background=true) so Hermes can track the process, then run "
1581              "readiness checks and tests in separate commands."
1582          )
1583  
1584      if _INLINE_BACKGROUND_AMP_RE.search(command) or _TRAILING_BACKGROUND_AMP_RE.search(command):
1585          return (
1586              "Foreground command uses '&' backgrounding. Use terminal(background=true) for long-lived "
1587              "processes, then run health checks and tests in follow-up terminal calls."
1588          )
1589  
1590      for pattern in _LONG_LIVED_FOREGROUND_PATTERNS:
1591          if pattern.search(command):
1592              return (
1593                  "This foreground command appears to start a long-lived server/watch process. "
1594                  "Run it with background=true, verify readiness (health endpoint/log signal), "
1595                  "then execute tests in a separate command."
1596              )
1597  
1598      return None
1599  
1600  
1601  def _resolve_notification_flag_conflict(
1602      *,
1603      notify_on_complete: bool,
1604      watch_patterns,
1605      background: bool,
1606  ) -> tuple:
1607      """Decide what to do when both notify_on_complete and watch_patterns are set.
1608  
1609      These flags produce duplicate, delayed notifications when combined — one
1610      notification per watch-pattern match AND one on process exit, with async
1611      delivery that can spam the user long after the process ends. When both are
1612      set, we drop watch_patterns in favor of notify_on_complete (the more useful
1613      "let me know when it's done" signal) and return a human-readable note.
1614  
1615      Returns:
1616          (watch_patterns_to_use, conflict_note). conflict_note is "" when there
1617          is no conflict.
1618      """
1619      if background and notify_on_complete and watch_patterns:
1620          note = (
1621              "watch_patterns ignored because notify_on_complete=True; "
1622              "these two flags produce duplicate notifications when combined"
1623          )
1624          return None, note
1625      return watch_patterns, ""
1626  
1627  
1628  def terminal_tool(
1629      command: str,
1630      background: bool = False,
1631      timeout: Optional[int] = None,
1632      task_id: Optional[str] = None,
1633      force: bool = False,
1634      workdir: Optional[str] = None,
1635      pty: bool = False,
1636      notify_on_complete: bool = False,
1637      watch_patterns: Optional[List[str]] = None,
1638  ) -> str:
1639      """
1640      Execute a command in the configured terminal environment.
1641  
1642      Args:
1643          command: The command to execute
1644          background: Whether to run in background (default: False)
1645          timeout: Command timeout in seconds (default: from config)
1646          task_id: Unique identifier for environment isolation (optional)
1647          force: If True, skip dangerous command check (use after user confirms)
1648          workdir: Working directory for this command (optional, uses session cwd if not set)
1649          pty: If True, use pseudo-terminal for interactive CLI tools (local backend only)
1650          notify_on_complete: If True and background=True, you'll be notified exactly once when the process exits. The right choice for almost every long task. MUTUALLY EXCLUSIVE with watch_patterns.
1651          watch_patterns: List of strings to watch for in background output. HARD rate limit: 1 notification per 15s per process. After 3 strike windows in a row, watch_patterns is disabled and the session is auto-promoted to notify_on_complete. Use ONLY for rare, one-shot mid-process signals on long-lived processes (server readiness, migration-done markers). NEVER use in loops/batch jobs — error patterns there will hit the strike limit and get disabled. MUTUALLY EXCLUSIVE with notify_on_complete — set one, not both.
1652  
1653      Returns:
1654          str: JSON string with output, exit_code, and error fields
1655  
1656      Examples:
1657          # Execute a simple command
1658          >>> result = terminal_tool(command="ls -la /tmp")
1659  
1660          # Run a background task
1661          >>> result = terminal_tool(command="python server.py", background=True)
1662  
1663          # With custom timeout
1664          >>> result = terminal_tool(command="long_task.sh", timeout=300)
1665          
1666          # Force run after user confirmation
1667          # Note: force parameter is internal only, not exposed to model API
1668      """
1669      try:
1670          if not isinstance(command, str):
1671              logger.warning(
1672                  "Rejected invalid terminal command value: %s",
1673                  type(command).__name__,
1674              )
1675              return json.dumps({
1676                  "output": "",
1677                  "exit_code": -1,
1678                  "error": f"Invalid command: expected string, got {type(command).__name__}",
1679                  "status": "error",
1680              }, ensure_ascii=False)
1681  
1682          # Get configuration
1683          config = _get_env_config()
1684          env_type = config["env_type"]
1685  
1686          # Use task_id for environment isolation. By default all subagent
1687          # task_ids collapse back to "default" so the top-level agent and
1688          # every delegate_task child share one container; only task_ids with
1689          # a registered env override (RL benchmarks) get isolated sandboxes.
1690          effective_task_id = _resolve_container_task_id(task_id)
1691  
1692          # Check per-task overrides (set by environments like TerminalBench2Env)
1693          # before falling back to global env var config
1694          overrides = _task_env_overrides.get(effective_task_id, {})
1695          
1696          # Select image based on env type, with per-task override support
1697          if env_type == "docker":
1698              image = overrides.get("docker_image") or config["docker_image"]
1699          elif env_type == "singularity":
1700              image = overrides.get("singularity_image") or config["singularity_image"]
1701          elif env_type == "modal":
1702              image = overrides.get("modal_image") or config["modal_image"]
1703          elif env_type == "daytona":
1704              image = overrides.get("daytona_image") or config["daytona_image"]
1705          else:
1706              image = ""
1707  
1708          cwd = overrides.get("cwd") or config["cwd"]
1709          default_timeout = config["timeout"]
1710          effective_timeout = timeout or default_timeout
1711  
1712          # Reject foreground commands where the model explicitly requests
1713          # a timeout above FOREGROUND_MAX_TIMEOUT — nudge it toward background.
1714          if not background and timeout and timeout > FOREGROUND_MAX_TIMEOUT:
1715              return json.dumps({
1716                  "error": (
1717                      f"Foreground timeout {timeout}s exceeds the maximum of "
1718                      f"{FOREGROUND_MAX_TIMEOUT}s. Use background=true with "
1719                      f"notify_on_complete=true for long-running commands."
1720                  ),
1721              }, ensure_ascii=False)
1722  
1723          # Guardrail: long-lived server/watch commands should run as managed
1724          # background sessions, not foreground shell hacks.
1725          if not background:
1726              guidance = _foreground_background_guidance(command)
1727              if guidance:
1728                  return json.dumps({
1729                      "output": "",
1730                      "exit_code": -1,
1731                      "error": guidance,
1732                      "status": "error",
1733                  }, ensure_ascii=False)
1734  
1735          # Start cleanup thread
1736          _start_cleanup_thread()
1737  
1738          # Get or create environment.
1739          # Use a per-task creation lock so concurrent tool calls for the same
1740          # task_id wait for the first one to finish creating the sandbox,
1741          # instead of each creating their own (wasting Modal resources).
1742          with _env_lock:
1743              if effective_task_id in _active_environments:
1744                  _last_activity[effective_task_id] = time.time()
1745                  env = _active_environments[effective_task_id]
1746                  needs_creation = False
1747              else:
1748                  needs_creation = True
1749  
1750          if needs_creation:
1751              # Per-task lock: only one thread creates the sandbox, others wait
1752              with _creation_locks_lock:
1753                  if effective_task_id not in _creation_locks:
1754                      _creation_locks[effective_task_id] = threading.Lock()
1755                  task_lock = _creation_locks[effective_task_id]
1756  
1757              with task_lock:
1758                  # Double-check after acquiring the per-task lock
1759                  with _env_lock:
1760                      if effective_task_id in _active_environments:
1761                          _last_activity[effective_task_id] = time.time()
1762                          env = _active_environments[effective_task_id]
1763                          needs_creation = False
1764  
1765                  if needs_creation:
1766                      if env_type == "singularity":
1767                          _check_disk_usage_warning()
1768                      logger.info("Creating new %s environment for task %s...", env_type, effective_task_id[:8])
1769                      try:
1770                          ssh_config = None
1771                          if env_type == "ssh":
1772                              ssh_config = {
1773                                  "host": config.get("ssh_host", ""),
1774                                  "user": config.get("ssh_user", ""),
1775                                  "port": config.get("ssh_port", 22),
1776                                  "key": config.get("ssh_key", ""),
1777                                  "persistent": config.get("ssh_persistent", False),
1778                              }
1779  
1780                          container_config = None
1781                          if env_type in ("docker", "singularity", "modal", "daytona", "vercel_sandbox"):
1782                              container_config = {
1783                                  "container_cpu": config.get("container_cpu", 1),
1784                                  "container_memory": config.get("container_memory", 5120),
1785                                  "container_disk": config.get("container_disk", 51200),
1786                                  "container_persistent": config.get("container_persistent", True),
1787                                  "modal_mode": config.get("modal_mode", "auto"),
1788                                  "vercel_runtime": config.get("vercel_runtime", ""),
1789                                  "docker_volumes": config.get("docker_volumes", []),
1790                                  "docker_mount_cwd_to_workspace": config.get("docker_mount_cwd_to_workspace", False),
1791                                  "docker_forward_env": config.get("docker_forward_env", []),
1792                                  "docker_env": config.get("docker_env", {}),
1793                                  "docker_run_as_host_user": config.get("docker_run_as_host_user", False),
1794                              }
1795  
1796                          local_config = None
1797                          if env_type == "local":
1798                              local_config = {
1799                                  "persistent": config.get("local_persistent", False),
1800                              }
1801  
1802                          new_env = _create_environment(
1803                              env_type=env_type,
1804                              image=image,
1805                              cwd=cwd,
1806                              timeout=effective_timeout,
1807                              ssh_config=ssh_config,
1808                              container_config=container_config,
1809                              local_config=local_config,
1810                              task_id=effective_task_id,
1811                              host_cwd=config.get("host_cwd"),
1812                          )
1813                      except ImportError as e:
1814                          return json.dumps({
1815                              "output": "",
1816                              "exit_code": -1,
1817                              "error": f"Terminal tool disabled: environment creation failed ({e})",
1818                              "status": "disabled"
1819                          }, ensure_ascii=False)
1820  
1821                      with _env_lock:
1822                          _active_environments[effective_task_id] = new_env
1823                          _last_activity[effective_task_id] = time.time()
1824                          env = new_env
1825                      logger.info("%s environment ready for task %s", env_type, effective_task_id[:8])
1826  
1827          # Pre-exec security checks (tirith + dangerous command detection)
1828          # Skip check if force=True (user has confirmed they want to run it)
1829          approval_note = None
1830          if not force:
1831              approval = _check_all_guards(command, env_type)
1832              if not approval["approved"]:
1833                  # Check if this is an approval_required (gateway ask mode)
1834                  if approval.get("status") == "approval_required":
1835                      return json.dumps({
1836                          "output": "",
1837                          "exit_code": -1,
1838                          "error": approval.get("message", "Waiting for user approval"),
1839                          "status": "approval_required",
1840                          "command": approval.get("command", command),
1841                          "description": approval.get("description", "command flagged"),
1842                          "pattern_key": approval.get("pattern_key", ""),
1843                      }, ensure_ascii=False)
1844                  # Command was blocked
1845                  desc = approval.get("description", "command flagged")
1846                  fallback_msg = (
1847                      f"Command denied: {desc}. "
1848                      "Use the approval prompt to allow it, or rephrase the command."
1849                  )
1850                  return json.dumps({
1851                      "output": "",
1852                      "exit_code": -1,
1853                      "error": approval.get("message", fallback_msg),
1854                      "status": "blocked"
1855                  }, ensure_ascii=False)
1856              # Track whether approval was explicitly granted by the user
1857              if approval.get("user_approved"):
1858                  desc = approval.get("description", "flagged as dangerous")
1859                  approval_note = f"Command required approval ({desc}) and was approved by the user."
1860              elif approval.get("smart_approved"):
1861                  desc = approval.get("description", "flagged as dangerous")
1862                  approval_note = f"Command was flagged ({desc}) and auto-approved by smart approval."
1863  
1864          # Validate workdir against shell injection
1865          if workdir:
1866              workdir_error = _validate_workdir(workdir)
1867              if workdir_error:
1868                  logger.warning("Blocked dangerous workdir: %s (command: %s)",
1869                                 workdir[:200], _safe_command_preview(command))
1870                  return json.dumps({
1871                      "output": "",
1872                      "exit_code": -1,
1873                      "error": workdir_error,
1874                      "status": "blocked"
1875                  }, ensure_ascii=False)
1876  
1877          # Prepare command for execution
1878          pty_disabled_reason = None
1879          effective_pty = pty
1880          if pty and _command_requires_pipe_stdin(command):
1881              effective_pty = False
1882              pty_disabled_reason = (
1883                  "PTY disabled for this command because it expects piped stdin/EOF "
1884                  "(for example gh auth login --with-token). For local background "
1885                  "processes, call process(action='close') after writing so it receives "
1886                  "EOF."
1887              )
1888  
1889          if background:
1890              # Spawn a tracked background process via the process registry.
1891              # For local backends: uses subprocess.Popen with output buffering.
1892              # For non-local backends: runs inside the sandbox via env.execute().
1893              from tools.approval import get_current_session_key
1894              from tools.process_registry import process_registry
1895  
1896              session_key = get_current_session_key(default="")
1897              effective_cwd = workdir or cwd
1898              try:
1899                  if env_type == "local":
1900                      proc_session = process_registry.spawn_local(
1901                          command=command,
1902                          cwd=effective_cwd,
1903                          task_id=effective_task_id,
1904                          session_key=session_key,
1905                          env_vars=env.env if hasattr(env, 'env') else None,
1906                          use_pty=effective_pty,
1907                      )
1908                  else:
1909                      proc_session = process_registry.spawn_via_env(
1910                          env=env,
1911                          command=command,
1912                          cwd=effective_cwd,
1913                          task_id=effective_task_id,
1914                          session_key=session_key,
1915                      )
1916  
1917                  result_data = {
1918                      "output": "Background process started",
1919                      "session_id": proc_session.id,
1920                      "pid": proc_session.pid,
1921                      "exit_code": 0,
1922                      "error": None,
1923                  }
1924                  if approval_note:
1925                      result_data["approval"] = approval_note
1926                  if pty_disabled_reason:
1927                      result_data["pty_note"] = pty_disabled_reason
1928  
1929                  # Populate routing metadata on the session so that
1930                  # watch-pattern and completion notifications can be
1931                  # routed back to the correct chat/thread.
1932                  if background and (notify_on_complete or watch_patterns):
1933                      from gateway.session_context import get_session_env as _gse
1934                      _gw_platform = _gse("HERMES_SESSION_PLATFORM", "")
1935                      if _gw_platform:
1936                          _gw_chat_id = _gse("HERMES_SESSION_CHAT_ID", "")
1937                          _gw_thread_id = _gse("HERMES_SESSION_THREAD_ID", "")
1938                          _gw_user_id = _gse("HERMES_SESSION_USER_ID", "")
1939                          _gw_user_name = _gse("HERMES_SESSION_USER_NAME", "")
1940                          proc_session.watcher_platform = _gw_platform
1941                          proc_session.watcher_chat_id = _gw_chat_id
1942                          proc_session.watcher_user_id = _gw_user_id
1943                          proc_session.watcher_user_name = _gw_user_name
1944                          proc_session.watcher_thread_id = _gw_thread_id
1945  
1946                  # Mutual exclusion: if both notify_on_complete and watch_patterns
1947                  # are set, drop watch_patterns. The combination produces duplicate
1948                  # notifications (one per match + one on exit) that deliver
1949                  # asynchronously and can spam the user long after the process ends.
1950                  # notify_on_complete is the more useful signal for "let me know
1951                  # when the task finishes"; watch_patterns should be reserved for
1952                  # standalone mid-process signals on long-lived processes.
1953                  watch_patterns, conflict_note = _resolve_notification_flag_conflict(
1954                      notify_on_complete=bool(notify_on_complete),
1955                      watch_patterns=watch_patterns,
1956                      background=bool(background),
1957                  )
1958                  if conflict_note:
1959                      logger.warning("background proc %s: %s", proc_session.id, conflict_note)
1960                      result_data["watch_patterns_ignored"] = conflict_note
1961  
1962                  # Mark for agent notification on completion
1963                  if notify_on_complete and background:
1964                      proc_session.notify_on_complete = True
1965                      result_data["notify_on_complete"] = True
1966  
1967                      # In gateway mode, auto-register a fast watcher so the
1968                      # gateway can detect completion and trigger a new agent
1969                      # turn.  CLI mode uses the completion_queue directly.
1970                      if proc_session.watcher_platform:
1971                          proc_session.watcher_interval = 5
1972                          process_registry.pending_watchers.append({
1973                              "session_id": proc_session.id,
1974                              "check_interval": 5,
1975                              "session_key": session_key,
1976                              "platform": proc_session.watcher_platform,
1977                              "chat_id": proc_session.watcher_chat_id,
1978                              "user_id": proc_session.watcher_user_id,
1979                              "user_name": proc_session.watcher_user_name,
1980                              "thread_id": proc_session.watcher_thread_id,
1981                              "notify_on_complete": True,
1982                          })
1983  
1984                  # Set watch patterns for output monitoring
1985                  if watch_patterns and background:
1986                      proc_session.watch_patterns = list(watch_patterns)
1987                      result_data["watch_patterns"] = proc_session.watch_patterns
1988  
1989                  return json.dumps(result_data, ensure_ascii=False)
1990              except Exception as e:
1991                  return json.dumps({
1992                      "output": "",
1993                      "exit_code": -1,
1994                      "error": f"Failed to start background process: {str(e)}"
1995                  }, ensure_ascii=False)
1996          else:
1997              # Run foreground command with retry logic
1998              max_retries = 3
1999              retry_count = 0
2000              result = None
2001              
2002              while retry_count <= max_retries:
2003                  try:
2004                      execute_kwargs = {"timeout": effective_timeout}
2005                      if workdir:
2006                          execute_kwargs["cwd"] = workdir
2007                      result = env.execute(command, **execute_kwargs)
2008                  except Exception as e:
2009                      error_str = str(e).lower()
2010                      if "timeout" in error_str:
2011                          return json.dumps({
2012                              "output": "",
2013                              "exit_code": 124,
2014                              "error": f"Command timed out after {effective_timeout} seconds"
2015                          }, ensure_ascii=False)
2016                      
2017                      # Retry on transient errors
2018                      if retry_count < max_retries:
2019                          retry_count += 1
2020                          wait_time = 2 ** retry_count
2021                          logger.warning("Execution error, retrying in %ds (attempt %d/%d) - Command: %s - Error: %s: %s - Task: %s, Backend: %s",
2022                                         wait_time, retry_count, max_retries, _safe_command_preview(command), type(e).__name__, e, effective_task_id, env_type)
2023                          time.sleep(wait_time)
2024                          continue
2025                      
2026                      logger.error("Execution failed after %d retries - Command: %s - Error: %s: %s - Task: %s, Backend: %s",
2027                                   max_retries, _safe_command_preview(command), type(e).__name__, e, effective_task_id, env_type)
2028                      return json.dumps({
2029                          "output": "",
2030                          "exit_code": -1,
2031                          "error": f"Command execution failed: {type(e).__name__}: {str(e)}"
2032                      }, ensure_ascii=False)
2033                  
2034                  # Got a result
2035                  break
2036              
2037              # Extract output
2038              output = result.get("output", "")
2039              returncode = result.get("returncode", 0)
2040  
2041              # Add helpful message for sudo failures in messaging context
2042              output = _handle_sudo_failure(output, env_type)
2043  
2044              # Foreground terminal output canonicalization seam: plugins receive
2045              # the full output string before default truncation and may only
2046              # replace it by returning a string from transform_terminal_output.
2047              # The hook is fail-open, and the first valid string return wins.
2048              try:
2049                  from hermes_cli.plugins import invoke_hook
2050                  hook_results = invoke_hook(
2051                      "transform_terminal_output",
2052                      command=command,
2053                      output=output,
2054                      returncode=returncode,
2055                      task_id=effective_task_id or "",
2056                      env_type=env_type,
2057                  )
2058                  for hook_result in hook_results:
2059                      if isinstance(hook_result, str):
2060                          output = hook_result
2061                          break
2062              except Exception:
2063                  pass
2064              
2065              # Truncate output if too long, keeping both head and tail
2066              from tools.tool_output_limits import get_max_bytes
2067              MAX_OUTPUT_CHARS = get_max_bytes()
2068              if len(output) > MAX_OUTPUT_CHARS:
2069                  head_chars = int(MAX_OUTPUT_CHARS * 0.4)  # 40% head (error messages often appear early)
2070                  tail_chars = MAX_OUTPUT_CHARS - head_chars  # 60% tail (most recent/relevant output)
2071                  omitted = len(output) - head_chars - tail_chars
2072                  truncated_notice = (
2073                      f"\n\n... [OUTPUT TRUNCATED - {omitted} chars omitted "
2074                      f"out of {len(output)} total] ...\n\n"
2075                  )
2076                  output = output[:head_chars] + truncated_notice + output[-tail_chars:]
2077  
2078              # Strip ANSI escape sequences so the model never sees terminal
2079              # formatting — prevents it from copying escapes into file writes.
2080              from tools.ansi_strip import strip_ansi
2081              output = strip_ansi(output)
2082  
2083              # Redact secrets from command output (catches env/printenv leaking keys)
2084              from agent.redact import redact_sensitive_text
2085              output = redact_sensitive_text(output.strip()) if output else ""
2086  
2087              # Interpret non-zero exit codes that aren't real errors
2088              # (e.g. grep=1 means "no matches", diff=1 means "files differ")
2089              exit_note = _interpret_exit_code(command, returncode)
2090  
2091              result_dict = {
2092                  "output": output,
2093                  "exit_code": returncode,
2094                  "error": None,
2095              }
2096              if approval_note:
2097                  result_dict["approval"] = approval_note
2098              if exit_note:
2099                  result_dict["exit_code_meaning"] = exit_note
2100  
2101              return json.dumps(result_dict, ensure_ascii=False)
2102  
2103      except Exception as e:
2104          import traceback
2105          tb_str = traceback.format_exc()
2106          logger.error("terminal_tool exception:\n%s", tb_str)
2107          return json.dumps({
2108              "output": "",
2109              "exit_code": -1,
2110              "error": f"Failed to execute command: {str(e)}",
2111              "traceback": tb_str,
2112              "status": "error"
2113          }, ensure_ascii=False)
2114  
2115  
2116  def check_terminal_requirements() -> bool:
2117      """Check if all requirements for the terminal tool are met."""
2118      try:
2119          config = _get_env_config()
2120          env_type = config["env_type"]
2121  
2122          if env_type == "local":
2123              return True
2124  
2125          elif env_type == "docker":
2126              from tools.environments.docker import find_docker
2127              docker = find_docker()
2128              if not docker:
2129                  logger.error("Docker executable not found in PATH or common install locations")
2130                  return False
2131              result = subprocess.run([docker, "version"], capture_output=True, timeout=5)
2132              return result.returncode == 0
2133  
2134          elif env_type == "singularity":
2135              executable = shutil.which("apptainer") or shutil.which("singularity")
2136              if executable:
2137                  result = subprocess.run([executable, "--version"], capture_output=True, timeout=5)
2138                  return result.returncode == 0
2139              return False
2140  
2141          elif env_type == "ssh":
2142              if not config.get("ssh_host") or not config.get("ssh_user"):
2143                  logger.error(
2144                      "SSH backend selected but TERMINAL_SSH_HOST and TERMINAL_SSH_USER "
2145                      "are not both set. Configure both or switch TERMINAL_ENV to 'local'."
2146                  )
2147                  return False
2148              return True
2149  
2150          elif env_type == "modal":
2151              modal_state = _get_modal_backend_state(config.get("modal_mode"))
2152              if modal_state["selected_backend"] == "managed":
2153                  return True
2154  
2155              if modal_state["selected_backend"] != "direct":
2156                  if modal_state["managed_mode_blocked"]:
2157                      logger.error(
2158                          "Modal backend selected with TERMINAL_MODAL_MODE=managed, but "
2159                          "a paid Nous subscription is required for the Tool Gateway and no direct "
2160                          "Modal credentials/config were found. Log in with `hermes model` "
2161                          "or choose TERMINAL_MODAL_MODE=direct/auto."
2162                      )
2163                      return False
2164                  if modal_state["mode"] == "managed":
2165                      logger.error(
2166                          "Modal backend selected with TERMINAL_MODAL_MODE=managed, but the managed "
2167                          "tool gateway is unavailable. Configure the managed gateway or choose "
2168                          "TERMINAL_MODAL_MODE=direct/auto."
2169                      )
2170                      return False
2171                  elif modal_state["mode"] == "direct":
2172                      if managed_nous_tools_enabled():
2173                          logger.error(
2174                              "Modal backend selected with TERMINAL_MODAL_MODE=direct, but no direct "
2175                              "Modal credentials/config were found. Configure Modal or choose "
2176                              "TERMINAL_MODAL_MODE=managed/auto."
2177                          )
2178                      else:
2179                          logger.error(
2180                              "Modal backend selected with TERMINAL_MODAL_MODE=direct, but no direct "
2181                              "Modal credentials/config were found. Configure Modal or choose "
2182                              "TERMINAL_MODAL_MODE=auto."
2183                          )
2184                      return False
2185                  else:
2186                      if managed_nous_tools_enabled():
2187                          logger.error(
2188                              "Modal backend selected but no direct Modal credentials/config or managed "
2189                              "tool gateway was found. Configure Modal, set up the managed gateway, "
2190                              "or choose a different TERMINAL_ENV."
2191                          )
2192                      else:
2193                          logger.error(
2194                              "Modal backend selected but no direct Modal credentials/config was found. "
2195                              "Configure Modal or choose a different TERMINAL_ENV."
2196                          )
2197                      return False
2198  
2199              if importlib.util.find_spec("modal") is None:
2200                  logger.error("modal is required for direct modal terminal backend: pip install modal")
2201                  return False
2202  
2203              return True
2204  
2205          elif env_type == "vercel_sandbox":
2206              return _check_vercel_sandbox_requirements(config)
2207  
2208          elif env_type == "daytona":
2209              from daytona import Daytona  # noqa: F401 — SDK presence check
2210              return os.getenv("DAYTONA_API_KEY") is not None
2211  
2212          else:
2213              logger.error(
2214                  "Unknown TERMINAL_ENV '%s'. Use one of: local, docker, singularity, "
2215                  "modal, daytona, vercel_sandbox, ssh.",
2216                  env_type,
2217              )
2218              return False
2219      except Exception as e:
2220          logger.error("Terminal requirements check failed: %s", e, exc_info=True)
2221          return False
2222  
2223  
2224  if __name__ == "__main__":
2225      # Simple test when run directly
2226      print("Terminal Tool Module")
2227      print("=" * 50)
2228      
2229      config = _get_env_config()
2230      print("\nCurrent Configuration:")
2231      print(f"  Environment type: {config['env_type']}")
2232      print(f"  Docker image: {config['docker_image']}")
2233      print(f"  Modal image: {config['modal_image']}")
2234      print(f"  Working directory: {config['cwd']}")
2235      print(f"  Default timeout: {config['timeout']}s")
2236      print(f"  Lifetime: {config['lifetime_seconds']}s")
2237  
2238      if not check_terminal_requirements():
2239          print("\n❌ Requirements not met. Please check the messages above.")
2240          exit(1)
2241  
2242      print("\n✅ All requirements met!")
2243      print("\nAvailable Tool:")
2244      print("  - terminal_tool: Execute commands in sandboxed environments")
2245  
2246      print("\nUsage Examples:")
2247      print("  # Execute a command")
2248      print("  result = terminal_tool(command='ls -la')")
2249      print("  ")
2250      print("  # Run a background task")
2251      print("  result = terminal_tool(command='python server.py', background=True)")
2252  
2253      print("\nEnvironment Variables:")
2254      default_img = "nikolaik/python-nodejs:python3.11-nodejs20"
2255      print(
2256          "  TERMINAL_ENV: "
2257          f"{os.getenv('TERMINAL_ENV', 'local')} "
2258          "(local/docker/singularity/modal/daytona/vercel_sandbox/ssh)"
2259      )
2260      print(f"  TERMINAL_DOCKER_IMAGE: {os.getenv('TERMINAL_DOCKER_IMAGE', default_img)}")
2261      print(f"  TERMINAL_SINGULARITY_IMAGE: {os.getenv('TERMINAL_SINGULARITY_IMAGE', f'docker://{default_img}')}")
2262      print(f"  TERMINAL_MODAL_IMAGE: {os.getenv('TERMINAL_MODAL_IMAGE', default_img)}")
2263      print(f"  TERMINAL_DAYTONA_IMAGE: {os.getenv('TERMINAL_DAYTONA_IMAGE', default_img)}")
2264      print(f"  TERMINAL_CWD: {os.getenv('TERMINAL_CWD', os.getcwd())}")
2265      from hermes_constants import display_hermes_home as _dhh
2266      print(f"  TERMINAL_SANDBOX_DIR: {os.getenv('TERMINAL_SANDBOX_DIR', f'{_dhh()}/sandboxes')}")
2267      print(f"  TERMINAL_TIMEOUT: {os.getenv('TERMINAL_TIMEOUT', '60')}")
2268      print(f"  TERMINAL_LIFETIME_SECONDS: {os.getenv('TERMINAL_LIFETIME_SECONDS', '300')}")
2269  
2270  
2271  # ---------------------------------------------------------------------------
2272  # Registry
2273  # ---------------------------------------------------------------------------
2274  from tools.registry import registry
2275  
2276  TERMINAL_SCHEMA = {
2277      "name": "terminal",
2278      "description": TERMINAL_TOOL_DESCRIPTION,
2279      "parameters": {
2280          "type": "object",
2281          "properties": {
2282              "command": {
2283                  "type": "string",
2284                  "description": "The command to execute on the VM"
2285              },
2286              "background": {
2287                  "type": "boolean",
2288                  "description": "Run the command in the background. Two patterns: (1) Long-lived processes that never exit (servers, watchers). (2) Long-running tasks paired with notify_on_complete=true — you can keep working and get notified when the task finishes. For short commands, prefer foreground with a generous timeout instead.",
2289                  "default": False
2290              },
2291              "timeout": {
2292                  "type": "integer",
2293                  "description": f"Max seconds to wait (default: 180, foreground max: {FOREGROUND_MAX_TIMEOUT}). Returns INSTANTLY when command finishes — set high for long tasks, you won't wait unnecessarily. Foreground timeout above {FOREGROUND_MAX_TIMEOUT}s is rejected; use background=true for longer commands.",
2294                  "minimum": 1
2295              },
2296              "workdir": {
2297                  "type": "string",
2298                  "description": "Working directory for this command (absolute path). Defaults to the session working directory."
2299              },
2300              "pty": {
2301                  "type": "boolean",
2302                  "description": "Run in pseudo-terminal (PTY) mode for interactive CLI tools like Codex, Claude Code, or Python REPL. Only works with local and SSH backends. Default: false.",
2303                  "default": False
2304              },
2305              "notify_on_complete": {
2306                  "type": "boolean",
2307                  "description": "When true (and background=true), you'll be automatically notified exactly once when the process finishes. **This is the right choice for almost every long-running task** — tests, builds, deployments, multi-item batch jobs, anything that takes over a minute and has a defined end. Use this and keep working on other things; the system notifies you on exit. MUTUALLY EXCLUSIVE with watch_patterns — when both are set, watch_patterns is dropped.",
2308                  "default": False
2309              },
2310              "watch_patterns": {
2311                  "type": "array",
2312                  "items": {"type": "string"},
2313                  "description": "Strings to watch for in background process output. HARD RATE LIMIT: at most 1 notification per 15 seconds per process — matches arriving inside the cooldown are dropped. After 3 consecutive 15-second windows with dropped matches, watch_patterns is automatically disabled for that process and promoted to notify_on_complete behavior (one notification on exit, no more mid-process spam). USE ONLY for truly rare, one-shot mid-process signals on LONG-LIVED processes that will never exit on their own — e.g. ['Application startup complete'] on a server so you know when to hit its endpoint, or ['migration done'] on a daemon. DO NOT use for: (1) end-of-run markers like 'DONE'/'PASS' — use notify_on_complete instead; (2) error patterns like 'ERROR'/'Traceback' in loops or multi-item batch jobs — they fire on every iteration and you'll hit the strike limit fast; (3) anything you'd ever combine with notify_on_complete. When in doubt, choose notify_on_complete. MUTUALLY EXCLUSIVE with notify_on_complete — set one, not both."
2314              }
2315          },
2316          "required": ["command"]
2317      }
2318  }
2319  
2320  
2321  def _handle_terminal(args, **kw):
2322      return terminal_tool(
2323          command=args.get("command"),
2324          background=args.get("background", False),
2325          timeout=args.get("timeout"),
2326          task_id=kw.get("task_id"),
2327          workdir=args.get("workdir"),
2328          pty=args.get("pty", False),
2329          notify_on_complete=args.get("notify_on_complete", False),
2330          watch_patterns=args.get("watch_patterns"),
2331      )
2332  
2333  
2334  registry.register(
2335      name="terminal",
2336      toolset="terminal",
2337      schema=TERMINAL_SCHEMA,
2338      handler=_handle_terminal,
2339      check_fn=check_terminal_requirements,
2340      emoji="💻",
2341      max_result_size_chars=100_000,
2342  )