terminal_tool.py
1 #!/usr/bin/env python3 2 """ 3 Terminal Tool Module 4 5 A terminal tool that executes commands in local, Docker, Modal, SSH, 6 Singularity, Daytona, and Vercel Sandbox environments. Supports local 7 execution, containerized backends, and cloud sandboxes, including managed 8 Modal mode. 9 10 Environment Selection (via TERMINAL_ENV environment variable): 11 - "local": Execute directly on the host machine (default, fastest) 12 - "docker": Execute in Docker containers (isolated, requires Docker) 13 - "modal": Execute in Modal cloud sandboxes (direct Modal or managed gateway) 14 - "vercel_sandbox": Execute in Vercel Sandbox cloud sandboxes 15 16 Features: 17 - Multiple execution backends (local, docker, modal, vercel_sandbox) 18 - Background task support 19 - VM/container lifecycle management 20 - Automatic cleanup after inactivity 21 22 Cloud sandbox note: 23 - Persistent filesystems preserve working state across sandbox recreation 24 - Persistent filesystems do NOT guarantee the same live sandbox or long-running processes survive cleanup, idle reaping, or Hermes exit 25 26 Usage: 27 from terminal_tool import terminal_tool 28 29 # Execute a simple command 30 result = terminal_tool("ls -la") 31 32 # Execute in background 33 result = terminal_tool("python server.py", background=True) 34 """ 35 36 import importlib.util 37 import json 38 import logging 39 import os 40 import platform 41 import re 42 import time 43 import threading 44 import atexit 45 import shutil 46 import subprocess 47 from pathlib import Path 48 from typing import Optional, Dict, Any, List 49 50 logger = logging.getLogger(__name__) 51 52 53 # --------------------------------------------------------------------------- 54 # Global interrupt event: set by the agent when a user interrupt arrives. 55 # The terminal tool polls this during command execution so it can kill 56 # long-running subprocesses immediately instead of blocking until timeout. 57 # --------------------------------------------------------------------------- 58 from tools.interrupt import is_interrupted, _interrupt_event # noqa: F401 — re-exported 59 # display_hermes_home imported lazily at call site (stale-module safety during hermes update) 60 61 62 63 64 # ============================================================================= 65 # Custom Singularity Environment with more space 66 # ============================================================================= 67 68 # Singularity helpers (scratch dir, SIF cache) now live in tools/environments/singularity.py 69 from tools.environments.singularity import _get_scratch_dir 70 from tools.tool_backend_helpers import ( 71 coerce_modal_mode, 72 has_direct_modal_credentials, 73 managed_nous_tools_enabled, 74 resolve_modal_backend_state, 75 ) 76 77 78 def _safe_parse_import_env( 79 name: str, 80 default: Any, 81 converter, 82 type_label: str, 83 ): 84 """Parse module-level numeric env vars without breaking import. 85 86 Terminal tool is imported by CLI, ACP, tests, and tool discovery. A single 87 malformed env var must not make the whole module unloadable at import time. 88 """ 89 raw = os.getenv(name) 90 if raw is None or raw == "": 91 return default 92 try: 93 return converter(raw) 94 except (TypeError, ValueError): 95 logger.warning( 96 "Invalid value for %s: %r (expected %s). Falling back to %r.", 97 name, 98 raw, 99 type_label, 100 default, 101 ) 102 return default 103 104 105 # Hard cap on foreground timeout; override via TERMINAL_MAX_FOREGROUND_TIMEOUT env var. 106 FOREGROUND_MAX_TIMEOUT = _safe_parse_import_env( 107 "TERMINAL_MAX_FOREGROUND_TIMEOUT", 108 600, 109 int, 110 "integer", 111 ) 112 113 # Disk usage warning threshold (in GB) 114 DISK_USAGE_WARNING_THRESHOLD_GB = _safe_parse_import_env( 115 "TERMINAL_DISK_WARNING_GB", 116 500.0, 117 float, 118 "number", 119 ) 120 _VERCEL_SANDBOX_DEFAULT_CWD = "/vercel/sandbox" 121 _SUPPORTED_VERCEL_RUNTIMES = ("node24", "node22", "python3.13") 122 123 124 def _is_supported_vercel_runtime(runtime: str) -> bool: 125 return not runtime or runtime in _SUPPORTED_VERCEL_RUNTIMES 126 127 128 def _check_vercel_sandbox_requirements(config: dict[str, Any]) -> bool: 129 """Validate Vercel Sandbox terminal backend requirements.""" 130 runtime = (config.get("vercel_runtime") or "").strip() 131 if not _is_supported_vercel_runtime(runtime): 132 supported = ", ".join(_SUPPORTED_VERCEL_RUNTIMES) 133 logger.error( 134 "Vercel Sandbox runtime %r is not supported. " 135 "Set TERMINAL_VERCEL_RUNTIME to one of: %s.", 136 runtime, 137 supported, 138 ) 139 return False 140 141 disk = config.get("container_disk", 51200) 142 if disk not in (0, 51200): 143 logger.error( 144 "Vercel Sandbox does not support custom TERMINAL_CONTAINER_DISK=%s. " 145 "Use the default shared setting (51200 MB).", 146 disk, 147 ) 148 return False 149 150 if importlib.util.find_spec("vercel") is None: 151 logger.error( 152 "vercel is required for the Vercel Sandbox terminal backend: pip install vercel" 153 ) 154 return False 155 156 has_oidc = bool(os.getenv("VERCEL_OIDC_TOKEN")) 157 has_token = bool(os.getenv("VERCEL_TOKEN")) 158 has_project = bool(os.getenv("VERCEL_PROJECT_ID")) 159 has_team = bool(os.getenv("VERCEL_TEAM_ID")) 160 161 if has_oidc: 162 return True 163 164 if has_token or has_project or has_team: 165 if has_token and has_project and has_team: 166 return True 167 logger.error( 168 "Vercel Sandbox backend selected with token auth, but " 169 "VERCEL_TOKEN, VERCEL_PROJECT_ID, and VERCEL_TEAM_ID must all " 170 "be set together. VERCEL_OIDC_TOKEN is supported for one-off " 171 "local development only." 172 ) 173 return False 174 175 logger.error( 176 "Vercel Sandbox backend selected but no supported auth configuration " 177 "was found. Set VERCEL_TOKEN, VERCEL_PROJECT_ID, and VERCEL_TEAM_ID " 178 "for normal use. VERCEL_OIDC_TOKEN is supported for one-off local " 179 "development only." 180 ) 181 return False 182 183 184 def _check_disk_usage_warning(): 185 """Check if total disk usage exceeds warning threshold.""" 186 try: 187 scratch_dir = _get_scratch_dir() 188 189 # Get total size of hermes directories 190 total_bytes = 0 191 import glob 192 for path in glob.glob(str(scratch_dir / "hermes-*")): 193 for f in Path(path).rglob('*'): 194 if f.is_file(): 195 try: 196 total_bytes += f.stat().st_size 197 except OSError as e: 198 logger.debug("Could not stat file %s: %s", f, e) 199 200 total_gb = total_bytes / (1024 ** 3) 201 202 if total_gb > DISK_USAGE_WARNING_THRESHOLD_GB: 203 logger.warning("Disk usage (%.1fGB) exceeds threshold (%.0fGB). Consider running cleanup_all_environments().", 204 total_gb, DISK_USAGE_WARNING_THRESHOLD_GB) 205 return True 206 207 return False 208 except Exception as e: 209 logger.debug("Disk usage warning check failed: %s", e, exc_info=True) 210 return False 211 212 213 # Interactive sudo password cache. 214 # 215 # Scope the cache to the active session when a session key is available, then 216 # fall back to callback identity (ACP / CLI interactive callbacks), then the 217 # current thread. This prevents one interactive session from reusing another 218 # session's cached sudo password inside the same long-lived process. 219 _sudo_password_cache: dict[str, str] = {} 220 _sudo_password_cache_lock = threading.Lock() 221 222 # Optional UI callbacks for interactive prompts. When set, these are called 223 # instead of the default /dev/tty or input() readers. The CLI registers these 224 # so prompts route through prompt_toolkit's event loop. 225 # Callback slots used by the approval prompt and sudo password prompt 226 # routines. Stored in thread-local state so overlapping ACP sessions — 227 # each running in its own ThreadPoolExecutor thread — don't stomp on 228 # each other's callbacks. See GHSA-qg5c-hvr5-hjgr. 229 # 230 # CLI mode is single-threaded, so each thread (the only one) holds its 231 # own callback exactly like before. Gateway mode resolves approvals via 232 # the per-session queue in tools.approval, not through these callbacks, 233 # so it's unaffected. 234 import threading 235 _callback_tls = threading.local() 236 237 238 def _get_sudo_password_callback(): 239 return getattr(_callback_tls, "sudo_password", None) 240 241 242 def _get_approval_callback(): 243 return getattr(_callback_tls, "approval", None) 244 245 246 def set_sudo_password_callback(cb): 247 """Register a callback for sudo password prompts (used by CLI). 248 249 Per-thread scope — ACP sessions that run concurrently in a 250 ThreadPoolExecutor each have their own callback slot. 251 """ 252 _callback_tls.sudo_password = cb 253 254 255 def set_approval_callback(cb): 256 """Register a callback for dangerous command approval prompts. 257 258 Per-thread scope — ACP sessions that run concurrently in a 259 ThreadPoolExecutor each have their own callback slot. See 260 GHSA-qg5c-hvr5-hjgr. 261 """ 262 _callback_tls.approval = cb 263 264 265 def _get_sudo_password_cache_scope() -> str: 266 """Return the cache scope for interactive sudo passwords.""" 267 try: 268 from gateway.session_context import get_session_env 269 270 session_key = get_session_env("HERMES_SESSION_KEY", "") 271 except Exception: 272 session_key = os.getenv("HERMES_SESSION_KEY", "") 273 if session_key: 274 return f"session:{session_key}" 275 276 callback = _get_sudo_password_callback() 277 if callback is not None: 278 owner = getattr(callback, "__self__", None) 279 func = getattr(callback, "__func__", None) 280 if owner is not None and func is not None: 281 return f"callback-owner:{id(owner)}:{id(func)}" 282 return f"callback:{id(callback)}" 283 284 return f"thread:{threading.get_ident()}" 285 286 287 def _get_cached_sudo_password() -> str: 288 """Return the cached sudo password for the current scope.""" 289 scope = _get_sudo_password_cache_scope() 290 with _sudo_password_cache_lock: 291 return _sudo_password_cache.get(scope, "") 292 293 294 def _set_cached_sudo_password(password: str) -> None: 295 """Persist a sudo password for the current scope.""" 296 scope = _get_sudo_password_cache_scope() 297 with _sudo_password_cache_lock: 298 if password: 299 _sudo_password_cache[scope] = password 300 else: 301 _sudo_password_cache.pop(scope, None) 302 303 304 def _reset_cached_sudo_passwords() -> None: 305 """Clear all cached sudo passwords. 306 307 Internal helper for tests and process teardown paths. 308 """ 309 with _sudo_password_cache_lock: 310 _sudo_password_cache.clear() 311 312 # ============================================================================= 313 # Dangerous Command Approval System 314 # ============================================================================= 315 316 # Dangerous command detection + approval now consolidated in tools/approval.py 317 from tools.approval import ( 318 check_all_command_guards as _check_all_guards_impl, 319 ) 320 321 322 def _check_all_guards(command: str, env_type: str) -> dict: 323 """Delegate to consolidated guard (tirith + dangerous cmd) with CLI callback.""" 324 return _check_all_guards_impl(command, env_type, 325 approval_callback=_get_approval_callback()) 326 327 328 # Allowlist: characters that can legitimately appear in directory paths. 329 # Covers alphanumeric, path separators, Windows drive/UNC separators, tilde, 330 # dot, hyphen, underscore, space, plus, at, equals, and comma. Everything 331 # else is rejected. 332 _WORKDIR_SAFE_RE = re.compile(r'^[A-Za-z0-9/\\:_\-.~ +@=,]+$') 333 334 335 def _validate_workdir(workdir: str) -> str | None: 336 """Reject workdir values that don't look like a filesystem path. 337 338 Uses an allowlist of safe characters rather than a deny-list, so novel 339 shell metacharacters can't slip through. 340 341 Returns None if safe, or an error message string if dangerous. 342 """ 343 if not workdir: 344 return None 345 if not _WORKDIR_SAFE_RE.match(workdir): 346 # Find the first offending character for a helpful message. 347 for ch in workdir: 348 if not _WORKDIR_SAFE_RE.match(ch): 349 return ( 350 f"Blocked: workdir contains disallowed character {repr(ch)}. " 351 "Use a simple filesystem path without shell metacharacters." 352 ) 353 return "Blocked: workdir contains disallowed characters." 354 return None 355 356 357 def _handle_sudo_failure(output: str, env_type: str) -> str: 358 """ 359 Check for sudo failure and add helpful message for messaging contexts. 360 361 Returns enhanced output if sudo failed in messaging context, else original. 362 """ 363 is_gateway = os.getenv("HERMES_GATEWAY_SESSION") 364 365 if not is_gateway: 366 return output 367 368 # Check for sudo failure indicators 369 sudo_failures = [ 370 "sudo: a password is required", 371 "sudo: no tty present", 372 "sudo: a terminal is required", 373 ] 374 375 for failure in sudo_failures: 376 if failure in output: 377 from hermes_constants import display_hermes_home as _dhh 378 return output + f"\n\n💡 Tip: To enable sudo over messaging, add SUDO_PASSWORD to {_dhh()}/.env on the agent machine." 379 380 return output 381 382 383 def _prompt_for_sudo_password(timeout_seconds: int = 45) -> str: 384 """ 385 Prompt user for sudo password with timeout. 386 387 Returns the password if entered, or empty string if: 388 - User presses Enter without input (skip) 389 - Timeout expires (45s default) 390 - Any error occurs 391 392 Only works in interactive mode (HERMES_INTERACTIVE=1). 393 If a _sudo_password_callback is registered (by the CLI), delegates to it 394 so the prompt integrates with prompt_toolkit's UI. Otherwise reads 395 directly from /dev/tty with echo disabled. 396 """ 397 import sys 398 399 # Use the registered callback when available (prompt_toolkit-compatible) 400 _sudo_cb = _get_sudo_password_callback() 401 if _sudo_cb is not None: 402 try: 403 return _sudo_cb() or "" 404 except Exception: 405 return "" 406 407 result = {"password": None, "done": False} 408 409 def read_password_thread(): 410 """Read password with echo disabled. Uses msvcrt on Windows, /dev/tty on Unix.""" 411 tty_fd = None 412 old_attrs = None 413 try: 414 if platform.system() == "Windows": 415 import msvcrt 416 chars = [] 417 while True: 418 c = msvcrt.getwch() 419 if c in ("\r", "\n"): 420 break 421 if c == "\x03": 422 raise KeyboardInterrupt 423 chars.append(c) 424 result["password"] = "".join(chars) 425 else: 426 import termios 427 tty_fd = os.open("/dev/tty", os.O_RDONLY) 428 old_attrs = termios.tcgetattr(tty_fd) 429 new_attrs = termios.tcgetattr(tty_fd) 430 new_attrs[3] = new_attrs[3] & ~termios.ECHO 431 termios.tcsetattr(tty_fd, termios.TCSAFLUSH, new_attrs) 432 chars = [] 433 while True: 434 b = os.read(tty_fd, 1) 435 if not b or b in (b"\n", b"\r"): 436 break 437 chars.append(b) 438 result["password"] = b"".join(chars).decode("utf-8", errors="replace") 439 except (EOFError, KeyboardInterrupt, OSError): 440 result["password"] = "" 441 except Exception: 442 result["password"] = "" 443 finally: 444 if tty_fd is not None and old_attrs is not None: 445 try: 446 import termios as _termios 447 _termios.tcsetattr(tty_fd, _termios.TCSAFLUSH, old_attrs) 448 except Exception as e: 449 logger.debug("Failed to restore terminal attributes: %s", e) 450 if tty_fd is not None: 451 try: 452 os.close(tty_fd) 453 except Exception as e: 454 logger.debug("Failed to close tty fd: %s", e) 455 result["done"] = True 456 457 try: 458 os.environ["HERMES_SPINNER_PAUSE"] = "1" 459 time.sleep(0.2) 460 461 print() 462 print("┌" + "─" * 58 + "┐") 463 print("│ 🔐 SUDO PASSWORD REQUIRED" + " " * 30 + "│") 464 print("├" + "─" * 58 + "┤") 465 print("│ Enter password below (input is hidden), or: │") 466 print("│ • Press Enter to skip (command fails gracefully) │") 467 print(f"│ • Wait {timeout_seconds}s to auto-skip" + " " * 27 + "│") 468 print("└" + "─" * 58 + "┘") 469 print() 470 print(" Password (hidden): ", end="", flush=True) 471 472 password_thread = threading.Thread(target=read_password_thread, daemon=True) 473 password_thread.start() 474 password_thread.join(timeout=timeout_seconds) 475 476 if result["done"]: 477 password = result["password"] or "" 478 print() # newline after hidden input 479 if password: 480 print(" ✓ Password received (cached for this session)") 481 else: 482 print(" ⏭ Skipped - continuing without sudo") 483 print() 484 sys.stdout.flush() 485 return password 486 else: 487 print("\n ⏱ Timeout - continuing without sudo") 488 print(" (Press Enter to dismiss)") 489 print() 490 sys.stdout.flush() 491 return "" 492 493 except (EOFError, KeyboardInterrupt): 494 print() 495 print(" ⏭ Cancelled - continuing without sudo") 496 print() 497 sys.stdout.flush() 498 return "" 499 except Exception as e: 500 print(f"\n [sudo prompt error: {e}] - continuing without sudo\n") 501 sys.stdout.flush() 502 return "" 503 finally: 504 if "HERMES_SPINNER_PAUSE" in os.environ: 505 del os.environ["HERMES_SPINNER_PAUSE"] 506 507 def _safe_command_preview(command: Any, limit: int = 200) -> str: 508 """Return a log-safe preview for possibly-invalid command values.""" 509 if command is None: 510 return "<None>" 511 if isinstance(command, str): 512 return command[:limit] 513 try: 514 return repr(command)[:limit] 515 except Exception: 516 return f"<{type(command).__name__}>" 517 518 def _looks_like_env_assignment(token: str) -> bool: 519 """Return True when *token* is a leading shell environment assignment.""" 520 if "=" not in token or token.startswith("="): 521 return False 522 name, _value = token.split("=", 1) 523 return bool(re.match(r"^[A-Za-z_][A-Za-z0-9_]*$", name)) 524 525 526 def _read_shell_token(command: str, start: int) -> tuple[str, int]: 527 """Read one shell token, preserving quotes/escapes, starting at *start*.""" 528 i = start 529 n = len(command) 530 531 while i < n: 532 ch = command[i] 533 if ch.isspace() or ch in ";|&()": 534 break 535 if ch == "'": 536 i += 1 537 while i < n and command[i] != "'": 538 i += 1 539 if i < n: 540 i += 1 541 continue 542 if ch == '"': 543 i += 1 544 while i < n: 545 inner = command[i] 546 if inner == "\\" and i + 1 < n: 547 i += 2 548 continue 549 if inner == '"': 550 i += 1 551 break 552 i += 1 553 continue 554 if ch == "\\" and i + 1 < n: 555 i += 2 556 continue 557 i += 1 558 559 return command[start:i], i 560 561 562 def _rewrite_real_sudo_invocations(command: str) -> tuple[str, bool]: 563 """Rewrite only real unquoted sudo command words, not plain text mentions.""" 564 out: list[str] = [] 565 i = 0 566 n = len(command) 567 command_start = True 568 found = False 569 570 while i < n: 571 ch = command[i] 572 573 if ch.isspace(): 574 out.append(ch) 575 if ch == "\n": 576 command_start = True 577 i += 1 578 continue 579 580 if ch == "#" and command_start: 581 comment_end = command.find("\n", i) 582 if comment_end == -1: 583 out.append(command[i:]) 584 break 585 out.append(command[i:comment_end]) 586 i = comment_end 587 continue 588 589 if command.startswith("&&", i) or command.startswith("||", i) or command.startswith(";;", i): 590 out.append(command[i:i + 2]) 591 i += 2 592 command_start = True 593 continue 594 595 if ch in ";|&(": 596 out.append(ch) 597 i += 1 598 command_start = True 599 continue 600 601 if ch == ")": 602 out.append(ch) 603 i += 1 604 command_start = False 605 continue 606 607 token, next_i = _read_shell_token(command, i) 608 if command_start and token == "sudo": 609 out.append("sudo -S -p ''") 610 found = True 611 else: 612 out.append(token) 613 614 if command_start and _looks_like_env_assignment(token): 615 command_start = True 616 else: 617 command_start = False 618 i = next_i 619 620 return "".join(out), found 621 622 623 def _sudo_nopasswd_works() -> bool: 624 """Return True when local sudo currently works without prompting. 625 626 Only probes for the `local` terminal backend; Docker/SSH/Modal/etc. must 627 not inherit the host's sudo state. Re-probes every call (no process-level 628 cache) so an expired sudo timestamp cannot make a later command silently 629 block waiting for a password. 630 """ 631 terminal_env = os.getenv("TERMINAL_ENV", "local").strip().lower() or "local" 632 if terminal_env != "local": 633 return False 634 635 try: 636 probe = subprocess.run( 637 ["sudo", "-n", "true"], 638 stdin=subprocess.DEVNULL, 639 stdout=subprocess.DEVNULL, 640 stderr=subprocess.DEVNULL, 641 timeout=3, 642 check=False, 643 ) 644 return probe.returncode == 0 645 except Exception: 646 return False 647 648 649 def _rewrite_compound_background(command: str) -> str: 650 """Wrap `A && B &` (or `A || B &`) to `A && { B & }` at depth 0. 651 652 Bash parses ``A && B &`` with `&&` tighter than `&`, so it forks a 653 subshell for the whole `A && B` compound and backgrounds it. Inside 654 the subshell, `B` runs foreground, so the subshell waits for `B` to 655 finish. When `B` is a long-running process (`python3 -m http.server`, 656 `yes > /dev/null`, anything that doesn't naturally exit), the subshell 657 never exits. It leaks as a process stuck in ``wait4`` forever — and 658 on the way, its open stdout pipe can prevent the terminal tool from 659 returning promptly. 660 661 Rewriting the tail to `A && { B & }` preserves `&&`'s error semantics 662 (skip B if A fails) while replacing the subshell with a brace group. 663 The brace group runs in the current shell (no fork), backgrounds B as 664 a simple command (bash doesn't wait for it in non-interactive mode), 665 and exits immediately. B runs as a normal backgrounded child, orphaned 666 when the parent shell exits. 667 668 Handles redirects (``&>``, ``2>&1``) and skips content inside quoted 669 strings and parenthesised subshells. Leaves simple ``cmd &`` alone — 670 that construct doesn't have the subshell-wait bug. 671 """ 672 n = len(command) 673 i = 0 674 paren_depth = 0 675 brace_depth = 0 676 # Position in *command* just after the most recent `&&` / `||` at depth 0 677 # in the current statement; -1 when no chain operator is active. 678 last_chain_op_end = -1 679 rewrites: list[tuple[int, int]] = [] # (chain_op_end, amp_pos) 680 681 while i < n: 682 ch = command[i] 683 684 # Newline terminates a statement at depth 0 — reset chain state. 685 # Checked before the whitespace skip so we don't miss it. 686 if ch == "\n" and paren_depth == 0 and brace_depth == 0: 687 last_chain_op_end = -1 688 i += 1 689 continue 690 691 if ch.isspace(): 692 i += 1 693 continue 694 695 # Comments (only at statement start — conservative: any `#` not inside 696 # a token ends the line). `_read_shell_token` handles quoted strings 697 # below so `#` inside quotes is safe. 698 if ch == "#": 699 nl = command.find("\n", i) 700 if nl == -1: 701 break 702 i = nl 703 continue 704 705 if ch == "\\" and i + 1 < n: 706 i += 2 707 continue 708 709 # Quoted tokens — consume whole string via the shared tokenizer. 710 if ch in ("'", '"'): 711 _, next_i = _read_shell_token(command, i) 712 i = max(next_i, i + 1) 713 continue 714 715 if ch == "(": 716 paren_depth += 1 717 i += 1 718 continue 719 720 if ch == ")": 721 paren_depth = max(0, paren_depth - 1) 722 i += 1 723 continue 724 725 # Brace groups: `{ ... }` is a group (no subshell fork), and bash 726 # requires whitespace after `{`. We track depth so already-rewritten 727 # output (`A && { B & }`) is idempotent — the inner `&` is part of 728 # the group, not a new compound to rewrite. Also skip content inside 729 # the group since `A && B &` there is separately well-formed. 730 if ch == "{" and i + 1 < n and (command[i + 1].isspace() or command[i + 1] == "\n"): 731 brace_depth += 1 732 i += 1 733 continue 734 if ch == "}" and brace_depth > 0: 735 brace_depth -= 1 736 # Closing a group completes a compound statement; reset chain. 737 last_chain_op_end = -1 738 i += 1 739 continue 740 741 # Inside parens or brace groups, skip operators — they parse in their 742 # own scope. `(...)` subshells have the same bug class but are not the 743 # common agent pattern; leave for a follow-up. 744 if paren_depth > 0 or brace_depth > 0: 745 i += 1 746 continue 747 748 # Chain operators at depth 0 749 if command.startswith("&&", i) or command.startswith("||", i): 750 last_chain_op_end = i + 2 751 i += 2 752 continue 753 754 # Statement terminators reset the chain state 755 if ch == ";": 756 last_chain_op_end = -1 757 i += 1 758 continue 759 760 # Single `|` (pipe) starts a new pipeline stage; don't rewrite 761 # across it. `||` handled above. 762 if ch == "|": 763 last_chain_op_end = -1 764 i += 1 765 continue 766 767 # `&` handling: distinguish `&&`, `&>`, fd redirect (`>&`, `<&`), 768 # and a true backgrounding `&`. 769 if ch == "&": 770 # `&&` handled above; won't reach here 771 if i + 1 < n and command[i + 1] == ">": 772 # `&>` redirect — consume 773 i += 2 774 continue 775 # `>&` / `<&` fd target — look back past whitespace 776 j = i - 1 777 while j >= 0 and command[j].isspace(): 778 j -= 1 779 if j >= 0 and command[j] in "<>": 780 i += 1 781 continue 782 # Real background operator 783 if last_chain_op_end >= 0: 784 rewrites.append((last_chain_op_end, i)) 785 last_chain_op_end = -1 786 i += 1 787 continue 788 789 # Regular unquoted token — advance past it via the shared tokenizer 790 _, next_i = _read_shell_token(command, i) 791 i = max(next_i, i + 1) 792 793 if not rewrites: 794 return command 795 796 # Apply rewrites back-to-front so earlier indices remain valid. 797 result = command 798 for chain_end, amp_pos in reversed(rewrites): 799 # Skip whitespace right after the `&&`/`||` so the brace group 800 # opens flush against the inner command. 801 insert_pos = chain_end 802 while insert_pos < amp_pos and result[insert_pos].isspace(): 803 insert_pos += 1 804 prefix = result[:insert_pos] 805 middle = result[insert_pos:amp_pos] # inner command + trailing space 806 suffix = result[amp_pos + 1 :] 807 # `{` needs a trailing space in bash; the closing `}` needs to be 808 # preceded by `;` or `&` — we're providing `&` from the backgrounding. 809 result = prefix + "{ " + middle + "& }" + suffix 810 811 return result 812 813 814 def _transform_sudo_command(command: str | None) -> tuple[str | None, str | None]: 815 """ 816 Transform sudo commands to use -S flag if SUDO_PASSWORD is available. 817 818 This is a shared helper used by all execution environments to provide 819 consistent sudo handling across local, SSH, and container environments. 820 821 Returns: 822 (transformed_command, sudo_stdin) where: 823 - transformed_command has every bare ``sudo`` replaced with 824 ``sudo -S -p ''`` so sudo reads its password from stdin. 825 - sudo_stdin is the password string with a trailing newline that the 826 caller must prepend to the process's stdin stream. sudo -S reads 827 exactly one line (the password) and passes the rest of stdin to the 828 child command, so prepending is safe even when the caller also has 829 its own stdin_data to pipe. 830 - If no password is available, sudo_stdin is None and the command is 831 returned unchanged so it fails gracefully with 832 "sudo: a password is required". 833 834 Callers that drive a subprocess directly (local, ssh, docker, singularity) 835 should prepend sudo_stdin to their stdin_data and pass the merged bytes to 836 Popen's stdin pipe. 837 838 Callers that cannot pipe subprocess stdin (modal, daytona, 839 vercel_sandbox) must embed the password in the command string 840 themselves; see their execute() methods for how they handle the 841 non-None sudo_stdin case. 842 843 If SUDO_PASSWORD is not set and in interactive mode (HERMES_INTERACTIVE=1): 844 Prompts user for password with 45s timeout, caches for session. 845 846 If SUDO_PASSWORD is not set and NOT interactive: 847 Command runs as-is (fails gracefully with "sudo: a password is required"). 848 """ 849 if command is None: 850 return None, None 851 transformed, has_real_sudo = _rewrite_real_sudo_invocations(command) 852 if not has_real_sudo: 853 return command, None 854 855 has_configured_password = "SUDO_PASSWORD" in os.environ 856 sudo_password = ( 857 os.environ.get("SUDO_PASSWORD", "") 858 if has_configured_password 859 else _get_cached_sudo_password() 860 ) 861 862 # Local hosts with sudoers NOPASSWD should not be forced through the 863 # interactive Hermes password prompt or the sudo -S password-pipe path. 864 # Scoped to the local terminal backend so Docker/SSH/Modal/etc. can't 865 # inherit host sudo state. Re-probes every call (no process-lifetime 866 # cache) so an expired sudo timestamp doesn't make a later command block 867 # silently without Hermes prompting. 868 if not has_configured_password and not sudo_password and _sudo_nopasswd_works(): 869 return command, None 870 871 if not has_configured_password and not sudo_password and os.getenv("HERMES_INTERACTIVE"): 872 sudo_password = _prompt_for_sudo_password(timeout_seconds=45) 873 if sudo_password: 874 _set_cached_sudo_password(sudo_password) 875 876 if has_configured_password or sudo_password: 877 # Trailing newline is required: sudo -S reads one line for the password. 878 return transformed, sudo_password + "\n" 879 880 return command, None 881 882 883 # Environment classes now live in tools/environments/ 884 from tools.environments.local import LocalEnvironment as _LocalEnvironment 885 from tools.environments.singularity import SingularityEnvironment as _SingularityEnvironment 886 from tools.environments.ssh import SSHEnvironment as _SSHEnvironment 887 from tools.environments.docker import DockerEnvironment as _DockerEnvironment 888 from tools.environments.modal import ModalEnvironment as _ModalEnvironment 889 from tools.environments.managed_modal import ManagedModalEnvironment as _ManagedModalEnvironment 890 from tools.managed_tool_gateway import is_managed_tool_gateway_ready 891 892 893 # Tool description for LLM 894 TERMINAL_TOOL_DESCRIPTION = """Execute shell commands on a Linux environment. Filesystem usually persists between calls. 895 896 Do NOT use cat/head/tail to read files — use read_file instead. 897 Do NOT use grep/rg/find to search — use search_files instead. 898 Do NOT use ls to list directories — use search_files(target='files') instead. 899 Do NOT use sed/awk to edit files — use patch instead. 900 Do NOT use echo/cat heredoc to create files — use write_file instead. 901 Reserve terminal for: builds, installs, git, processes, scripts, network, package managers, and anything that needs a shell. 902 903 Foreground (default): Commands return INSTANTLY when done, even if the timeout is high. Set timeout=300 for long builds/scripts — you'll still get the result in seconds if it's fast. Prefer foreground for short commands. 904 Background: Set background=true to get a session_id. Two patterns: 905 (1) Long-lived processes that never exit (servers, watchers). 906 (2) Long-running tasks with notify_on_complete=true — you can keep working on other things and the system auto-notifies you when the task finishes. Great for test suites, builds, deployments, or anything that takes more than a minute. 907 For servers/watchers, do NOT use shell-level background wrappers (nohup/disown/setsid/trailing '&') in foreground mode. Use background=true so Hermes can track lifecycle and output. 908 After starting a server, verify readiness with a health check or log signal, then run tests in a separate terminal() call. Avoid blind sleep loops. 909 Use process(action="poll") for progress checks, process(action="wait") to block until done. 910 Working directory: Use 'workdir' for per-command cwd. 911 PTY mode: Set pty=true for interactive CLI tools (Codex, Claude Code, Python REPL). 912 913 Do NOT use vim/nano/interactive tools without pty=true — they hang without a pseudo-terminal. Pipe git output to cat if it might page. 914 """ 915 916 # Global state for environment lifecycle management 917 _active_environments: Dict[str, Any] = {} 918 _last_activity: Dict[str, float] = {} 919 _env_lock = threading.Lock() 920 _creation_locks: Dict[str, threading.Lock] = {} # Per-task locks for sandbox creation 921 _creation_locks_lock = threading.Lock() # Protects _creation_locks dict itself 922 _cleanup_thread = None 923 _cleanup_running = False 924 925 # Per-task environment overrides registry. 926 # Allows environments (e.g., TerminalBench2Env) to specify a custom Docker/Modal 927 # image for a specific task_id BEFORE the agent loop starts. When the terminal or 928 # file tools create a new sandbox for that task_id, they check this registry first 929 # and fall back to the TERMINAL_MODAL_IMAGE (etc.) env var if no override is set. 930 # 931 # This is never exposed to the model -- only infrastructure code calls it. 932 # Thread-safe because each task_id is unique per rollout. 933 _task_env_overrides: Dict[str, Dict[str, Any]] = {} 934 935 936 def register_task_env_overrides(task_id: str, overrides: Dict[str, Any]): 937 """ 938 Register environment overrides for a specific task/rollout. 939 940 Called by Atropos environments before the agent loop to configure 941 per-task sandbox settings (e.g., a custom Dockerfile for the Modal image). 942 943 Supported override keys: 944 - modal_image: str -- Path to Dockerfile or Docker Hub image name 945 - docker_image: str -- Docker image name 946 - cwd: str -- Working directory inside the sandbox 947 948 Args: 949 task_id: The rollout's unique task identifier 950 overrides: Dict of config keys to override 951 """ 952 _task_env_overrides[task_id] = overrides 953 954 955 def clear_task_env_overrides(task_id: str): 956 """ 957 Clear environment overrides for a task after rollout completes. 958 959 Called during cleanup to avoid stale entries accumulating. 960 """ 961 _task_env_overrides.pop(task_id, None) 962 963 964 def _resolve_container_task_id(task_id: Optional[str]) -> str: 965 """ 966 Map a tool-call ``task_id`` to the container/sandbox key used by 967 ``_active_environments``. 968 969 The top-level agent passes ``task_id=None`` and lands on ``"default"``. 970 ``delegate_task`` children pass their own subagent ID so that 971 file-state tracking, the active-subagents registry, and TUI events stay 972 distinct per child -- but we deliberately collapse that ID back to 973 ``"default"`` here so subagents share the parent's long-lived container 974 (one bash, one /workspace, one set of installed packages). 975 976 Exception: RL / benchmark environments (TerminalBench2, HermesSweEnv, ...) 977 call ``register_task_env_overrides(task_id, {...})`` to request a 978 per-task Docker/Modal image. When an override is registered for a 979 task_id, we honour it by returning the task_id unchanged -- those 980 rollouts need their own isolated sandbox, which is the whole point of 981 the override. 982 """ 983 if task_id and task_id in _task_env_overrides: 984 return task_id 985 return "default" 986 987 988 # Configuration from environment variables 989 990 def _parse_env_var(name: str, default: str, converter=int, type_label: str = "integer"): 991 """Parse an environment variable with *converter*, raising a clear error on bad values. 992 993 Without this wrapper, a single malformed env var (e.g. TERMINAL_TIMEOUT=5m) 994 causes an unhandled ValueError that kills every terminal command. 995 """ 996 raw = os.getenv(name, default) 997 try: 998 return converter(raw) 999 except (ValueError, json.JSONDecodeError): 1000 raise ValueError( 1001 f"Invalid value for {name}: {raw!r} (expected {type_label}). " 1002 f"Check ~/.hermes/.env or environment variables." 1003 ) 1004 1005 1006 def _get_env_config() -> Dict[str, Any]: 1007 """Get terminal environment configuration from environment variables.""" 1008 # Default image with Python and Node.js for maximum compatibility 1009 default_image = "nikolaik/python-nodejs:python3.11-nodejs20" 1010 env_type = os.getenv("TERMINAL_ENV", "local") 1011 1012 mount_docker_cwd = os.getenv("TERMINAL_DOCKER_MOUNT_CWD_TO_WORKSPACE", "false").lower() in ("true", "1", "yes") 1013 1014 # Default cwd: local uses the host's current directory, ssh uses the 1015 # remote home, Vercel uses its documented workspace root, and everything 1016 # else starts in the backend's default root-like cwd. 1017 if env_type == "local": 1018 default_cwd = os.getcwd() 1019 elif env_type == "ssh": 1020 default_cwd = "~" 1021 elif env_type == "vercel_sandbox": 1022 default_cwd = _VERCEL_SANDBOX_DEFAULT_CWD 1023 else: 1024 default_cwd = "/root" 1025 1026 # Read TERMINAL_CWD but sanity-check it for container backends. 1027 # If Docker cwd passthrough is explicitly enabled, remap the host path to 1028 # /workspace and track the original host path separately. Otherwise keep the 1029 # normal sandbox behavior and discard host paths. 1030 cwd = os.getenv("TERMINAL_CWD", default_cwd) 1031 if cwd: 1032 cwd = os.path.expanduser(cwd) 1033 host_cwd = None 1034 host_prefixes = ("/Users/", "/home/", "C:\\", "C:/") 1035 if env_type == "docker" and mount_docker_cwd: 1036 docker_cwd_source = os.getenv("TERMINAL_CWD") or os.getcwd() 1037 candidate = os.path.abspath(os.path.expanduser(docker_cwd_source)) 1038 if ( 1039 any(candidate.startswith(p) for p in host_prefixes) 1040 or (os.path.isabs(candidate) and os.path.isdir(candidate) and not candidate.startswith(("/workspace", "/root"))) 1041 ): 1042 host_cwd = candidate 1043 cwd = "/workspace" 1044 elif env_type in ("modal", "docker", "singularity", "daytona", "vercel_sandbox") and cwd: 1045 # Host paths and relative paths that won't work inside containers 1046 is_host_path = any(cwd.startswith(p) for p in host_prefixes) 1047 is_relative = not os.path.isabs(cwd) # e.g. "." or "src/" 1048 if (is_host_path or is_relative) and cwd != default_cwd: 1049 logger.info("Ignoring TERMINAL_CWD=%r for %s backend " 1050 "(host/relative path won't work in sandbox). Using %r instead.", 1051 cwd, env_type, default_cwd) 1052 cwd = default_cwd 1053 1054 return { 1055 "env_type": env_type, 1056 "modal_mode": coerce_modal_mode(os.getenv("TERMINAL_MODAL_MODE", "auto")), 1057 "docker_image": os.getenv("TERMINAL_DOCKER_IMAGE", default_image), 1058 "docker_forward_env": _parse_env_var("TERMINAL_DOCKER_FORWARD_ENV", "[]", json.loads, "valid JSON"), 1059 "singularity_image": os.getenv("TERMINAL_SINGULARITY_IMAGE", f"docker://{default_image}"), 1060 "modal_image": os.getenv("TERMINAL_MODAL_IMAGE", default_image), 1061 "daytona_image": os.getenv("TERMINAL_DAYTONA_IMAGE", default_image), 1062 "vercel_runtime": os.getenv("TERMINAL_VERCEL_RUNTIME", "").strip(), 1063 "cwd": cwd, 1064 "host_cwd": host_cwd, 1065 "docker_mount_cwd_to_workspace": mount_docker_cwd, 1066 "timeout": _parse_env_var("TERMINAL_TIMEOUT", "180"), 1067 "lifetime_seconds": _parse_env_var("TERMINAL_LIFETIME_SECONDS", "300"), 1068 # SSH-specific config 1069 "ssh_host": os.getenv("TERMINAL_SSH_HOST", ""), 1070 "ssh_user": os.getenv("TERMINAL_SSH_USER", ""), 1071 "ssh_port": _parse_env_var("TERMINAL_SSH_PORT", "22"), 1072 "ssh_key": os.getenv("TERMINAL_SSH_KEY", ""), 1073 # Persistent shell: SSH defaults to the config-level persistent_shell 1074 # setting (true by default for non-local backends); local is always opt-in. 1075 # Per-backend env vars override if explicitly set. 1076 "ssh_persistent": os.getenv( 1077 "TERMINAL_SSH_PERSISTENT", 1078 os.getenv("TERMINAL_PERSISTENT_SHELL", "true"), 1079 ).lower() in ("true", "1", "yes"), 1080 "local_persistent": os.getenv("TERMINAL_LOCAL_PERSISTENT", "false").lower() in ("true", "1", "yes"), 1081 # Container resource config (applies to docker, singularity, modal, 1082 # daytona, and vercel_sandbox -- ignored for local/ssh) 1083 "container_cpu": _parse_env_var("TERMINAL_CONTAINER_CPU", "1", float, "number"), 1084 "container_memory": _parse_env_var("TERMINAL_CONTAINER_MEMORY", "5120"), # MB (default 5GB) 1085 "container_disk": _parse_env_var("TERMINAL_CONTAINER_DISK", "51200"), # MB (default 50GB) 1086 "container_persistent": os.getenv("TERMINAL_CONTAINER_PERSISTENT", "true").lower() in ("true", "1", "yes"), 1087 "docker_volumes": _parse_env_var("TERMINAL_DOCKER_VOLUMES", "[]", json.loads, "valid JSON"), 1088 "docker_run_as_host_user": os.getenv("TERMINAL_DOCKER_RUN_AS_HOST_USER", "false").lower() in ("true", "1", "yes"), 1089 } 1090 1091 1092 def _get_modal_backend_state(modal_mode: object | None) -> Dict[str, Any]: 1093 """Resolve direct vs managed Modal backend selection.""" 1094 return resolve_modal_backend_state( 1095 modal_mode, 1096 has_direct=has_direct_modal_credentials(), 1097 managed_ready=is_managed_tool_gateway_ready("modal"), 1098 ) 1099 1100 1101 def _create_environment(env_type: str, image: str, cwd: str, timeout: int, 1102 ssh_config: dict = None, container_config: dict = None, 1103 local_config: dict = None, 1104 task_id: str = "default", 1105 host_cwd: str = None): 1106 """ 1107 Create an execution environment for sandboxed command execution. 1108 1109 Args: 1110 env_type: One of "local", "docker", "singularity", "modal", 1111 "daytona", "vercel_sandbox", "ssh" 1112 image: Docker/Singularity/Modal image name (ignored for local/ssh/vercel) 1113 cwd: Working directory 1114 timeout: Default command timeout 1115 ssh_config: SSH connection config (for env_type="ssh") 1116 container_config: Resource config for container backends (cpu, memory, disk, persistent) 1117 task_id: Task identifier for environment reuse and snapshot keying 1118 host_cwd: Optional host working directory to bind into Docker when explicitly enabled 1119 1120 Returns: 1121 Environment instance with execute() method 1122 """ 1123 cc = container_config or {} 1124 cpu = cc.get("container_cpu", 1) 1125 memory = cc.get("container_memory", 5120) 1126 disk = cc.get("container_disk", 51200) 1127 persistent = cc.get("container_persistent", True) 1128 volumes = cc.get("docker_volumes", []) 1129 docker_forward_env = cc.get("docker_forward_env", []) 1130 docker_env = cc.get("docker_env", {}) 1131 1132 if env_type == "local": 1133 return _LocalEnvironment(cwd=cwd, timeout=timeout) 1134 1135 elif env_type == "docker": 1136 return _DockerEnvironment( 1137 image=image, cwd=cwd, timeout=timeout, 1138 cpu=cpu, memory=memory, disk=disk, 1139 persistent_filesystem=persistent, task_id=task_id, 1140 volumes=volumes, 1141 host_cwd=host_cwd, 1142 auto_mount_cwd=cc.get("docker_mount_cwd_to_workspace", False), 1143 forward_env=docker_forward_env, 1144 env=docker_env, 1145 run_as_host_user=cc.get("docker_run_as_host_user", False), 1146 ) 1147 1148 elif env_type == "singularity": 1149 return _SingularityEnvironment( 1150 image=image, cwd=cwd, timeout=timeout, 1151 cpu=cpu, memory=memory, disk=disk, 1152 persistent_filesystem=persistent, task_id=task_id, 1153 ) 1154 1155 elif env_type == "modal": 1156 sandbox_kwargs = {} 1157 if cpu > 0: 1158 sandbox_kwargs["cpu"] = cpu 1159 if memory > 0: 1160 sandbox_kwargs["memory"] = memory 1161 if disk > 0: 1162 try: 1163 import inspect, modal 1164 if "ephemeral_disk" in inspect.signature(modal.Sandbox.create).parameters: 1165 sandbox_kwargs["ephemeral_disk"] = disk 1166 except Exception: 1167 pass 1168 1169 modal_state = _get_modal_backend_state(cc.get("modal_mode")) 1170 1171 if modal_state["selected_backend"] == "managed": 1172 return _ManagedModalEnvironment( 1173 image=image, cwd=cwd, timeout=timeout, 1174 modal_sandbox_kwargs=sandbox_kwargs, 1175 persistent_filesystem=persistent, task_id=task_id, 1176 ) 1177 1178 if modal_state["selected_backend"] != "direct": 1179 if modal_state["managed_mode_blocked"]: 1180 raise ValueError( 1181 "Modal backend is configured for managed mode, but " 1182 "a paid Nous subscription is required for the Tool Gateway and no direct " 1183 "Modal credentials/config were found. Log in with `hermes model` or " 1184 "choose TERMINAL_MODAL_MODE=direct/auto." 1185 ) 1186 if modal_state["mode"] == "managed": 1187 raise ValueError( 1188 "Modal backend is configured for managed mode, but the managed tool gateway is unavailable." 1189 ) 1190 if modal_state["mode"] == "direct": 1191 raise ValueError( 1192 "Modal backend is configured for direct mode, but no direct Modal credentials/config were found." 1193 ) 1194 message = "Modal backend selected but no direct Modal credentials/config was found." 1195 if managed_nous_tools_enabled(): 1196 message = ( 1197 "Modal backend selected but no direct Modal credentials/config or managed tool gateway was found." 1198 ) 1199 raise ValueError(message) 1200 1201 return _ModalEnvironment( 1202 image=image, cwd=cwd, timeout=timeout, 1203 modal_sandbox_kwargs=sandbox_kwargs, 1204 persistent_filesystem=persistent, task_id=task_id, 1205 ) 1206 1207 elif env_type == "daytona": 1208 # Lazy import so daytona SDK is only required when backend is selected. 1209 from tools.environments.daytona import DaytonaEnvironment as _DaytonaEnvironment 1210 return _DaytonaEnvironment( 1211 image=image, cwd=cwd, timeout=timeout, 1212 cpu=int(cpu), memory=memory, disk=disk, 1213 persistent_filesystem=persistent, task_id=task_id, 1214 ) 1215 1216 elif env_type == "vercel_sandbox": 1217 from tools.environments.vercel_sandbox import ( 1218 VercelSandboxEnvironment as _VercelSandboxEnvironment, 1219 ) 1220 return _VercelSandboxEnvironment( 1221 runtime=cc.get("vercel_runtime") or None, 1222 cwd=cwd, 1223 timeout=timeout, 1224 cpu=cpu, 1225 memory=memory, 1226 disk=disk, 1227 persistent_filesystem=persistent, 1228 task_id=task_id, 1229 ) 1230 1231 elif env_type == "ssh": 1232 if not ssh_config or not ssh_config.get("host") or not ssh_config.get("user"): 1233 raise ValueError("SSH environment requires ssh_host and ssh_user to be configured") 1234 return _SSHEnvironment( 1235 host=ssh_config["host"], 1236 user=ssh_config["user"], 1237 port=ssh_config.get("port", 22), 1238 key_path=ssh_config.get("key", ""), 1239 cwd=cwd, 1240 timeout=timeout, 1241 ) 1242 1243 else: 1244 raise ValueError( 1245 f"Unknown environment type: {env_type}. Use 'local', 'docker', " 1246 f"'singularity', 'modal', 'daytona', 'vercel_sandbox', or 'ssh'" 1247 ) 1248 1249 1250 def _cleanup_inactive_envs(lifetime_seconds: int = 300): 1251 """Clean up environments that have been inactive for longer than lifetime_seconds.""" 1252 current_time = time.time() 1253 1254 # Check the process registry -- skip cleanup for sandboxes with active 1255 # background processes (their _last_activity gets refreshed to keep them alive). 1256 try: 1257 from tools.process_registry import process_registry 1258 for task_id in list(_last_activity.keys()): 1259 if process_registry.has_active_processes(task_id): 1260 _last_activity[task_id] = current_time # Keep sandbox alive 1261 except ImportError: 1262 pass 1263 1264 # Phase 1: collect stale entries and remove them from tracking dicts while 1265 # holding the lock. Do NOT call env.cleanup() inside the lock -- Modal and 1266 # Docker teardown can block for 10-15s, which would stall every concurrent 1267 # terminal/file tool call waiting on _env_lock. 1268 envs_to_stop = [] # list of (task_id, env) pairs 1269 1270 with _env_lock: 1271 for task_id, last_time in list(_last_activity.items()): 1272 if current_time - last_time > lifetime_seconds: 1273 env = _active_environments.pop(task_id, None) 1274 _last_activity.pop(task_id, None) 1275 if env is not None: 1276 envs_to_stop.append((task_id, env)) 1277 1278 # Also purge per-task creation locks for cleaned-up tasks 1279 with _creation_locks_lock: 1280 for task_id, _ in envs_to_stop: 1281 _creation_locks.pop(task_id, None) 1282 1283 # Phase 2: stop the actual sandboxes OUTSIDE the lock so other tool calls 1284 # are not blocked while Modal/Docker sandboxes shut down. 1285 for task_id, env in envs_to_stop: 1286 # Invalidate stale file_ops cache entry (Bug fix: prevents 1287 # ShellFileOperations from referencing a dead sandbox) 1288 try: 1289 from tools.file_tools import clear_file_ops_cache 1290 clear_file_ops_cache(task_id) 1291 except ImportError: 1292 pass 1293 1294 try: 1295 if hasattr(env, 'cleanup'): 1296 env.cleanup() 1297 elif hasattr(env, 'stop'): 1298 env.stop() 1299 elif hasattr(env, 'terminate'): 1300 env.terminate() 1301 1302 logger.info("Cleaned up inactive environment for task: %s", task_id) 1303 1304 except Exception as e: 1305 error_str = str(e) 1306 if "404" in error_str or "not found" in error_str.lower(): 1307 logger.info("Environment for task %s already cleaned up", task_id) 1308 else: 1309 logger.warning("Error cleaning up environment for task %s: %s", task_id, e) 1310 1311 1312 def _cleanup_thread_worker(): 1313 """Background thread worker that periodically cleans up inactive environments.""" 1314 while _cleanup_running: 1315 try: 1316 config = _get_env_config() 1317 _cleanup_inactive_envs(config["lifetime_seconds"]) 1318 except Exception as e: 1319 logger.warning("Error in cleanup thread: %s", e, exc_info=True) 1320 1321 for _ in range(60): 1322 if not _cleanup_running: 1323 break 1324 time.sleep(1) 1325 1326 1327 def _start_cleanup_thread(): 1328 """Start the background cleanup thread if not already running.""" 1329 global _cleanup_thread, _cleanup_running 1330 1331 with _env_lock: 1332 if _cleanup_thread is None or not _cleanup_thread.is_alive(): 1333 _cleanup_running = True 1334 _cleanup_thread = threading.Thread(target=_cleanup_thread_worker, daemon=True) 1335 _cleanup_thread.start() 1336 1337 1338 def _stop_cleanup_thread(): 1339 """Stop the background cleanup thread.""" 1340 global _cleanup_running 1341 _cleanup_running = False 1342 if _cleanup_thread is not None: 1343 try: 1344 _cleanup_thread.join(timeout=5) 1345 except (SystemExit, KeyboardInterrupt): 1346 pass 1347 1348 1349 def get_active_env(task_id: str): 1350 """Return the active BaseEnvironment for *task_id*, or None.""" 1351 lookup = _resolve_container_task_id(task_id) 1352 with _env_lock: 1353 return _active_environments.get(lookup) or _active_environments.get(task_id) 1354 1355 1356 def is_persistent_env(task_id: str) -> bool: 1357 """Return True if the active environment for task_id is configured for 1358 cross-turn persistence (``persistent_filesystem=True``). 1359 1360 Used by the agent loop to skip per-turn teardown for backends whose whole 1361 point is to survive between turns (docker with ``container_persistent``, 1362 daytona, modal, etc.). Non-persistent backends (e.g. Morph) still get torn 1363 down at end-of-turn to prevent leakage. The idle reaper 1364 (``_cleanup_inactive_envs``) handles persistent envs once they exceed 1365 ``terminal.lifetime_seconds``. 1366 """ 1367 env = get_active_env(task_id) 1368 if env is None: 1369 return False 1370 return bool(getattr(env, "_persistent", False)) 1371 1372 1373 1374 1375 def cleanup_all_environments(): 1376 """Clean up ALL active environments. Use with caution.""" 1377 task_ids = list(_active_environments.keys()) 1378 cleaned = 0 1379 1380 for task_id in task_ids: 1381 try: 1382 cleanup_vm(task_id) 1383 cleaned += 1 1384 except Exception as e: 1385 logger.error("Error cleaning %s: %s", task_id, e, exc_info=True) 1386 1387 # Also clean any orphaned directories 1388 scratch_dir = _get_scratch_dir() 1389 import glob 1390 for path in glob.glob(str(scratch_dir / "hermes-*")): 1391 try: 1392 shutil.rmtree(path, ignore_errors=True) 1393 logger.info("Removed orphaned: %s", path) 1394 except OSError as e: 1395 logger.debug("Failed to remove orphaned path %s: %s", path, e) 1396 1397 if cleaned > 0: 1398 logger.info("Cleaned %d environments", cleaned) 1399 return cleaned 1400 1401 1402 def cleanup_vm(task_id: str): 1403 """Manually clean up a specific environment by task_id.""" 1404 # Remove from tracking dicts while holding the lock, but defer the 1405 # actual (potentially slow) env.cleanup() call to outside the lock 1406 # so other tool calls aren't blocked. 1407 env = None 1408 with _env_lock: 1409 env = _active_environments.pop(task_id, None) 1410 _last_activity.pop(task_id, None) 1411 1412 # Clean up per-task creation lock 1413 with _creation_locks_lock: 1414 _creation_locks.pop(task_id, None) 1415 1416 # Invalidate stale file_ops cache entry 1417 try: 1418 from tools.file_tools import clear_file_ops_cache 1419 clear_file_ops_cache(task_id) 1420 except ImportError: 1421 pass 1422 1423 if env is None: 1424 return 1425 1426 try: 1427 if hasattr(env, 'cleanup'): 1428 env.cleanup() 1429 elif hasattr(env, 'stop'): 1430 env.stop() 1431 elif hasattr(env, 'terminate'): 1432 env.terminate() 1433 1434 logger.info("Manually cleaned up environment for task: %s", task_id) 1435 1436 except Exception as e: 1437 error_str = str(e) 1438 if "404" in error_str or "not found" in error_str.lower(): 1439 logger.info("Environment for task %s already cleaned up", task_id) 1440 else: 1441 logger.warning("Error cleaning up environment for task %s: %s", task_id, e) 1442 1443 1444 def _atexit_cleanup(): 1445 """Stop cleanup thread and shut down all remaining sandboxes on exit.""" 1446 _stop_cleanup_thread() 1447 if _active_environments: 1448 count = len(_active_environments) 1449 logger.info("Shutting down %d remaining sandbox(es)...", count) 1450 cleanup_all_environments() 1451 1452 atexit.register(_atexit_cleanup) 1453 1454 1455 # ============================================================================= 1456 # Exit Code Context for Common CLI Tools 1457 # ============================================================================= 1458 # Many Unix commands use non-zero exit codes for informational purposes, not 1459 # to indicate failure. The model sees a raw exit_code=1 from `grep` and 1460 # wastes a turn investigating something that just means "no matches". 1461 # This lookup adds a human-readable note so the agent can move on. 1462 1463 def _interpret_exit_code(command: str, exit_code: int) -> str | None: 1464 """Return a human-readable note when a non-zero exit code is non-erroneous. 1465 1466 Returns None when the exit code is 0 or genuinely signals an error. 1467 The note is appended to the tool result so the model doesn't waste 1468 turns investigating expected exit codes. 1469 """ 1470 if exit_code == 0: 1471 return None 1472 1473 # Extract the last command in a pipeline/chain — that determines the 1474 # exit code. Handles `cmd1 && cmd2`, `cmd1 | cmd2`, `cmd1; cmd2`. 1475 # Deliberately simple: split on shell operators and take the last piece. 1476 segments = re.split(r'\s*(?:\|\||&&|[|;])\s*', command) 1477 last_segment = (segments[-1] if segments else command).strip() 1478 1479 # Get base command name (first word), stripping env var assignments 1480 # like VAR=val cmd ... 1481 words = last_segment.split() 1482 base_cmd = "" 1483 for w in words: 1484 if "=" in w and not w.startswith("-"): 1485 continue # skip VAR=val 1486 base_cmd = w.split("/")[-1] # handle /usr/bin/grep -> grep 1487 break 1488 1489 if not base_cmd: 1490 return None 1491 1492 # Command-specific semantics 1493 semantics: dict[str, dict[int, str]] = { 1494 # grep/rg/ag/ack: 1=no matches found (normal), 2+=real error 1495 "grep": {1: "No matches found (not an error)"}, 1496 "egrep": {1: "No matches found (not an error)"}, 1497 "fgrep": {1: "No matches found (not an error)"}, 1498 "rg": {1: "No matches found (not an error)"}, 1499 "ag": {1: "No matches found (not an error)"}, 1500 "ack": {1: "No matches found (not an error)"}, 1501 # diff: 1=files differ (expected), 2+=real error 1502 "diff": {1: "Files differ (expected, not an error)"}, 1503 "colordiff": {1: "Files differ (expected, not an error)"}, 1504 # find: 1=some dirs inaccessible but results may still be valid 1505 "find": {1: "Some directories were inaccessible (partial results may still be valid)"}, 1506 # test/[: 1=condition is false (expected) 1507 "test": {1: "Condition evaluated to false (expected, not an error)"}, 1508 "[": {1: "Condition evaluated to false (expected, not an error)"}, 1509 # curl: common non-error codes 1510 "curl": { 1511 6: "Could not resolve host", 1512 7: "Failed to connect to host", 1513 22: "HTTP response code indicated error (e.g. 404, 500)", 1514 28: "Operation timed out", 1515 }, 1516 # git: 1 is context-dependent but often normal (e.g. git diff with changes) 1517 "git": {1: "Non-zero exit (often normal — e.g. 'git diff' returns 1 when files differ)"}, 1518 } 1519 1520 cmd_semantics = semantics.get(base_cmd) 1521 if cmd_semantics and exit_code in cmd_semantics: 1522 return cmd_semantics[exit_code] 1523 1524 return None 1525 1526 1527 def _command_requires_pipe_stdin(command: str) -> bool: 1528 """Return True when PTY mode would break stdin-driven commands. 1529 1530 Some CLIs change behavior when stdin is a TTY. In particular, 1531 `gh auth login --with-token` expects the token to arrive via piped stdin and 1532 waits for EOF; when we launch it under a PTY, `process.submit()` only sends a 1533 newline, so the command appears to hang forever with no visible progress. 1534 """ 1535 normalized = " ".join(command.lower().split()) 1536 return ( 1537 normalized.startswith("gh auth login") 1538 and "--with-token" in normalized 1539 ) 1540 1541 1542 _SHELL_LEVEL_BACKGROUND_RE = re.compile(r"\b(?:nohup|disown|setsid)\b", re.IGNORECASE) 1543 _INLINE_BACKGROUND_AMP_RE = re.compile(r"\s&\s") 1544 _TRAILING_BACKGROUND_AMP_RE = re.compile(r"\s&\s*(?:#.*)?$") 1545 _LONG_LIVED_FOREGROUND_PATTERNS = ( 1546 re.compile(r"\b(?:npm|pnpm|yarn|bun)\s+(?:run\s+)?(?:dev|start|serve|watch)\b", re.IGNORECASE), 1547 re.compile(r"\bdocker\s+compose\s+up\b", re.IGNORECASE), 1548 re.compile(r"\bnext\s+dev\b", re.IGNORECASE), 1549 re.compile(r"\bvite(?:\s|$)", re.IGNORECASE), 1550 re.compile(r"\bnodemon\b", re.IGNORECASE), 1551 re.compile(r"\buvicorn\b", re.IGNORECASE), 1552 re.compile(r"\bgunicorn\b", re.IGNORECASE), 1553 re.compile(r"\bpython(?:3)?\s+-m\s+http\.server\b", re.IGNORECASE), 1554 ) 1555 1556 1557 def _looks_like_help_or_version_command(command: str) -> bool: 1558 """Return True for informational invocations that should never be blocked.""" 1559 normalized = " ".join(command.lower().split()) 1560 return ( 1561 " --help" in normalized 1562 or normalized.endswith(" -h") 1563 or " --version" in normalized 1564 or normalized.endswith(" -v") 1565 ) 1566 1567 1568 def _foreground_background_guidance(command: str) -> str | None: 1569 """Suggest background mode when a foreground command looks long-lived. 1570 1571 Prevents workflows that start a server/watch process and then stall before 1572 follow-up checks or test commands run. 1573 """ 1574 if _looks_like_help_or_version_command(command): 1575 return None 1576 1577 if _SHELL_LEVEL_BACKGROUND_RE.search(command): 1578 return ( 1579 "Foreground command uses shell-level background wrappers (nohup/disown/setsid). " 1580 "Use terminal(background=true) so Hermes can track the process, then run " 1581 "readiness checks and tests in separate commands." 1582 ) 1583 1584 if _INLINE_BACKGROUND_AMP_RE.search(command) or _TRAILING_BACKGROUND_AMP_RE.search(command): 1585 return ( 1586 "Foreground command uses '&' backgrounding. Use terminal(background=true) for long-lived " 1587 "processes, then run health checks and tests in follow-up terminal calls." 1588 ) 1589 1590 for pattern in _LONG_LIVED_FOREGROUND_PATTERNS: 1591 if pattern.search(command): 1592 return ( 1593 "This foreground command appears to start a long-lived server/watch process. " 1594 "Run it with background=true, verify readiness (health endpoint/log signal), " 1595 "then execute tests in a separate command." 1596 ) 1597 1598 return None 1599 1600 1601 def _resolve_notification_flag_conflict( 1602 *, 1603 notify_on_complete: bool, 1604 watch_patterns, 1605 background: bool, 1606 ) -> tuple: 1607 """Decide what to do when both notify_on_complete and watch_patterns are set. 1608 1609 These flags produce duplicate, delayed notifications when combined — one 1610 notification per watch-pattern match AND one on process exit, with async 1611 delivery that can spam the user long after the process ends. When both are 1612 set, we drop watch_patterns in favor of notify_on_complete (the more useful 1613 "let me know when it's done" signal) and return a human-readable note. 1614 1615 Returns: 1616 (watch_patterns_to_use, conflict_note). conflict_note is "" when there 1617 is no conflict. 1618 """ 1619 if background and notify_on_complete and watch_patterns: 1620 note = ( 1621 "watch_patterns ignored because notify_on_complete=True; " 1622 "these two flags produce duplicate notifications when combined" 1623 ) 1624 return None, note 1625 return watch_patterns, "" 1626 1627 1628 def terminal_tool( 1629 command: str, 1630 background: bool = False, 1631 timeout: Optional[int] = None, 1632 task_id: Optional[str] = None, 1633 force: bool = False, 1634 workdir: Optional[str] = None, 1635 pty: bool = False, 1636 notify_on_complete: bool = False, 1637 watch_patterns: Optional[List[str]] = None, 1638 ) -> str: 1639 """ 1640 Execute a command in the configured terminal environment. 1641 1642 Args: 1643 command: The command to execute 1644 background: Whether to run in background (default: False) 1645 timeout: Command timeout in seconds (default: from config) 1646 task_id: Unique identifier for environment isolation (optional) 1647 force: If True, skip dangerous command check (use after user confirms) 1648 workdir: Working directory for this command (optional, uses session cwd if not set) 1649 pty: If True, use pseudo-terminal for interactive CLI tools (local backend only) 1650 notify_on_complete: If True and background=True, you'll be notified exactly once when the process exits. The right choice for almost every long task. MUTUALLY EXCLUSIVE with watch_patterns. 1651 watch_patterns: List of strings to watch for in background output. HARD rate limit: 1 notification per 15s per process. After 3 strike windows in a row, watch_patterns is disabled and the session is auto-promoted to notify_on_complete. Use ONLY for rare, one-shot mid-process signals on long-lived processes (server readiness, migration-done markers). NEVER use in loops/batch jobs — error patterns there will hit the strike limit and get disabled. MUTUALLY EXCLUSIVE with notify_on_complete — set one, not both. 1652 1653 Returns: 1654 str: JSON string with output, exit_code, and error fields 1655 1656 Examples: 1657 # Execute a simple command 1658 >>> result = terminal_tool(command="ls -la /tmp") 1659 1660 # Run a background task 1661 >>> result = terminal_tool(command="python server.py", background=True) 1662 1663 # With custom timeout 1664 >>> result = terminal_tool(command="long_task.sh", timeout=300) 1665 1666 # Force run after user confirmation 1667 # Note: force parameter is internal only, not exposed to model API 1668 """ 1669 try: 1670 if not isinstance(command, str): 1671 logger.warning( 1672 "Rejected invalid terminal command value: %s", 1673 type(command).__name__, 1674 ) 1675 return json.dumps({ 1676 "output": "", 1677 "exit_code": -1, 1678 "error": f"Invalid command: expected string, got {type(command).__name__}", 1679 "status": "error", 1680 }, ensure_ascii=False) 1681 1682 # Get configuration 1683 config = _get_env_config() 1684 env_type = config["env_type"] 1685 1686 # Use task_id for environment isolation. By default all subagent 1687 # task_ids collapse back to "default" so the top-level agent and 1688 # every delegate_task child share one container; only task_ids with 1689 # a registered env override (RL benchmarks) get isolated sandboxes. 1690 effective_task_id = _resolve_container_task_id(task_id) 1691 1692 # Check per-task overrides (set by environments like TerminalBench2Env) 1693 # before falling back to global env var config 1694 overrides = _task_env_overrides.get(effective_task_id, {}) 1695 1696 # Select image based on env type, with per-task override support 1697 if env_type == "docker": 1698 image = overrides.get("docker_image") or config["docker_image"] 1699 elif env_type == "singularity": 1700 image = overrides.get("singularity_image") or config["singularity_image"] 1701 elif env_type == "modal": 1702 image = overrides.get("modal_image") or config["modal_image"] 1703 elif env_type == "daytona": 1704 image = overrides.get("daytona_image") or config["daytona_image"] 1705 else: 1706 image = "" 1707 1708 cwd = overrides.get("cwd") or config["cwd"] 1709 default_timeout = config["timeout"] 1710 effective_timeout = timeout or default_timeout 1711 1712 # Reject foreground commands where the model explicitly requests 1713 # a timeout above FOREGROUND_MAX_TIMEOUT — nudge it toward background. 1714 if not background and timeout and timeout > FOREGROUND_MAX_TIMEOUT: 1715 return json.dumps({ 1716 "error": ( 1717 f"Foreground timeout {timeout}s exceeds the maximum of " 1718 f"{FOREGROUND_MAX_TIMEOUT}s. Use background=true with " 1719 f"notify_on_complete=true for long-running commands." 1720 ), 1721 }, ensure_ascii=False) 1722 1723 # Guardrail: long-lived server/watch commands should run as managed 1724 # background sessions, not foreground shell hacks. 1725 if not background: 1726 guidance = _foreground_background_guidance(command) 1727 if guidance: 1728 return json.dumps({ 1729 "output": "", 1730 "exit_code": -1, 1731 "error": guidance, 1732 "status": "error", 1733 }, ensure_ascii=False) 1734 1735 # Start cleanup thread 1736 _start_cleanup_thread() 1737 1738 # Get or create environment. 1739 # Use a per-task creation lock so concurrent tool calls for the same 1740 # task_id wait for the first one to finish creating the sandbox, 1741 # instead of each creating their own (wasting Modal resources). 1742 with _env_lock: 1743 if effective_task_id in _active_environments: 1744 _last_activity[effective_task_id] = time.time() 1745 env = _active_environments[effective_task_id] 1746 needs_creation = False 1747 else: 1748 needs_creation = True 1749 1750 if needs_creation: 1751 # Per-task lock: only one thread creates the sandbox, others wait 1752 with _creation_locks_lock: 1753 if effective_task_id not in _creation_locks: 1754 _creation_locks[effective_task_id] = threading.Lock() 1755 task_lock = _creation_locks[effective_task_id] 1756 1757 with task_lock: 1758 # Double-check after acquiring the per-task lock 1759 with _env_lock: 1760 if effective_task_id in _active_environments: 1761 _last_activity[effective_task_id] = time.time() 1762 env = _active_environments[effective_task_id] 1763 needs_creation = False 1764 1765 if needs_creation: 1766 if env_type == "singularity": 1767 _check_disk_usage_warning() 1768 logger.info("Creating new %s environment for task %s...", env_type, effective_task_id[:8]) 1769 try: 1770 ssh_config = None 1771 if env_type == "ssh": 1772 ssh_config = { 1773 "host": config.get("ssh_host", ""), 1774 "user": config.get("ssh_user", ""), 1775 "port": config.get("ssh_port", 22), 1776 "key": config.get("ssh_key", ""), 1777 "persistent": config.get("ssh_persistent", False), 1778 } 1779 1780 container_config = None 1781 if env_type in ("docker", "singularity", "modal", "daytona", "vercel_sandbox"): 1782 container_config = { 1783 "container_cpu": config.get("container_cpu", 1), 1784 "container_memory": config.get("container_memory", 5120), 1785 "container_disk": config.get("container_disk", 51200), 1786 "container_persistent": config.get("container_persistent", True), 1787 "modal_mode": config.get("modal_mode", "auto"), 1788 "vercel_runtime": config.get("vercel_runtime", ""), 1789 "docker_volumes": config.get("docker_volumes", []), 1790 "docker_mount_cwd_to_workspace": config.get("docker_mount_cwd_to_workspace", False), 1791 "docker_forward_env": config.get("docker_forward_env", []), 1792 "docker_env": config.get("docker_env", {}), 1793 "docker_run_as_host_user": config.get("docker_run_as_host_user", False), 1794 } 1795 1796 local_config = None 1797 if env_type == "local": 1798 local_config = { 1799 "persistent": config.get("local_persistent", False), 1800 } 1801 1802 new_env = _create_environment( 1803 env_type=env_type, 1804 image=image, 1805 cwd=cwd, 1806 timeout=effective_timeout, 1807 ssh_config=ssh_config, 1808 container_config=container_config, 1809 local_config=local_config, 1810 task_id=effective_task_id, 1811 host_cwd=config.get("host_cwd"), 1812 ) 1813 except ImportError as e: 1814 return json.dumps({ 1815 "output": "", 1816 "exit_code": -1, 1817 "error": f"Terminal tool disabled: environment creation failed ({e})", 1818 "status": "disabled" 1819 }, ensure_ascii=False) 1820 1821 with _env_lock: 1822 _active_environments[effective_task_id] = new_env 1823 _last_activity[effective_task_id] = time.time() 1824 env = new_env 1825 logger.info("%s environment ready for task %s", env_type, effective_task_id[:8]) 1826 1827 # Pre-exec security checks (tirith + dangerous command detection) 1828 # Skip check if force=True (user has confirmed they want to run it) 1829 approval_note = None 1830 if not force: 1831 approval = _check_all_guards(command, env_type) 1832 if not approval["approved"]: 1833 # Check if this is an approval_required (gateway ask mode) 1834 if approval.get("status") == "approval_required": 1835 return json.dumps({ 1836 "output": "", 1837 "exit_code": -1, 1838 "error": approval.get("message", "Waiting for user approval"), 1839 "status": "approval_required", 1840 "command": approval.get("command", command), 1841 "description": approval.get("description", "command flagged"), 1842 "pattern_key": approval.get("pattern_key", ""), 1843 }, ensure_ascii=False) 1844 # Command was blocked 1845 desc = approval.get("description", "command flagged") 1846 fallback_msg = ( 1847 f"Command denied: {desc}. " 1848 "Use the approval prompt to allow it, or rephrase the command." 1849 ) 1850 return json.dumps({ 1851 "output": "", 1852 "exit_code": -1, 1853 "error": approval.get("message", fallback_msg), 1854 "status": "blocked" 1855 }, ensure_ascii=False) 1856 # Track whether approval was explicitly granted by the user 1857 if approval.get("user_approved"): 1858 desc = approval.get("description", "flagged as dangerous") 1859 approval_note = f"Command required approval ({desc}) and was approved by the user." 1860 elif approval.get("smart_approved"): 1861 desc = approval.get("description", "flagged as dangerous") 1862 approval_note = f"Command was flagged ({desc}) and auto-approved by smart approval." 1863 1864 # Validate workdir against shell injection 1865 if workdir: 1866 workdir_error = _validate_workdir(workdir) 1867 if workdir_error: 1868 logger.warning("Blocked dangerous workdir: %s (command: %s)", 1869 workdir[:200], _safe_command_preview(command)) 1870 return json.dumps({ 1871 "output": "", 1872 "exit_code": -1, 1873 "error": workdir_error, 1874 "status": "blocked" 1875 }, ensure_ascii=False) 1876 1877 # Prepare command for execution 1878 pty_disabled_reason = None 1879 effective_pty = pty 1880 if pty and _command_requires_pipe_stdin(command): 1881 effective_pty = False 1882 pty_disabled_reason = ( 1883 "PTY disabled for this command because it expects piped stdin/EOF " 1884 "(for example gh auth login --with-token). For local background " 1885 "processes, call process(action='close') after writing so it receives " 1886 "EOF." 1887 ) 1888 1889 if background: 1890 # Spawn a tracked background process via the process registry. 1891 # For local backends: uses subprocess.Popen with output buffering. 1892 # For non-local backends: runs inside the sandbox via env.execute(). 1893 from tools.approval import get_current_session_key 1894 from tools.process_registry import process_registry 1895 1896 session_key = get_current_session_key(default="") 1897 effective_cwd = workdir or cwd 1898 try: 1899 if env_type == "local": 1900 proc_session = process_registry.spawn_local( 1901 command=command, 1902 cwd=effective_cwd, 1903 task_id=effective_task_id, 1904 session_key=session_key, 1905 env_vars=env.env if hasattr(env, 'env') else None, 1906 use_pty=effective_pty, 1907 ) 1908 else: 1909 proc_session = process_registry.spawn_via_env( 1910 env=env, 1911 command=command, 1912 cwd=effective_cwd, 1913 task_id=effective_task_id, 1914 session_key=session_key, 1915 ) 1916 1917 result_data = { 1918 "output": "Background process started", 1919 "session_id": proc_session.id, 1920 "pid": proc_session.pid, 1921 "exit_code": 0, 1922 "error": None, 1923 } 1924 if approval_note: 1925 result_data["approval"] = approval_note 1926 if pty_disabled_reason: 1927 result_data["pty_note"] = pty_disabled_reason 1928 1929 # Populate routing metadata on the session so that 1930 # watch-pattern and completion notifications can be 1931 # routed back to the correct chat/thread. 1932 if background and (notify_on_complete or watch_patterns): 1933 from gateway.session_context import get_session_env as _gse 1934 _gw_platform = _gse("HERMES_SESSION_PLATFORM", "") 1935 if _gw_platform: 1936 _gw_chat_id = _gse("HERMES_SESSION_CHAT_ID", "") 1937 _gw_thread_id = _gse("HERMES_SESSION_THREAD_ID", "") 1938 _gw_user_id = _gse("HERMES_SESSION_USER_ID", "") 1939 _gw_user_name = _gse("HERMES_SESSION_USER_NAME", "") 1940 proc_session.watcher_platform = _gw_platform 1941 proc_session.watcher_chat_id = _gw_chat_id 1942 proc_session.watcher_user_id = _gw_user_id 1943 proc_session.watcher_user_name = _gw_user_name 1944 proc_session.watcher_thread_id = _gw_thread_id 1945 1946 # Mutual exclusion: if both notify_on_complete and watch_patterns 1947 # are set, drop watch_patterns. The combination produces duplicate 1948 # notifications (one per match + one on exit) that deliver 1949 # asynchronously and can spam the user long after the process ends. 1950 # notify_on_complete is the more useful signal for "let me know 1951 # when the task finishes"; watch_patterns should be reserved for 1952 # standalone mid-process signals on long-lived processes. 1953 watch_patterns, conflict_note = _resolve_notification_flag_conflict( 1954 notify_on_complete=bool(notify_on_complete), 1955 watch_patterns=watch_patterns, 1956 background=bool(background), 1957 ) 1958 if conflict_note: 1959 logger.warning("background proc %s: %s", proc_session.id, conflict_note) 1960 result_data["watch_patterns_ignored"] = conflict_note 1961 1962 # Mark for agent notification on completion 1963 if notify_on_complete and background: 1964 proc_session.notify_on_complete = True 1965 result_data["notify_on_complete"] = True 1966 1967 # In gateway mode, auto-register a fast watcher so the 1968 # gateway can detect completion and trigger a new agent 1969 # turn. CLI mode uses the completion_queue directly. 1970 if proc_session.watcher_platform: 1971 proc_session.watcher_interval = 5 1972 process_registry.pending_watchers.append({ 1973 "session_id": proc_session.id, 1974 "check_interval": 5, 1975 "session_key": session_key, 1976 "platform": proc_session.watcher_platform, 1977 "chat_id": proc_session.watcher_chat_id, 1978 "user_id": proc_session.watcher_user_id, 1979 "user_name": proc_session.watcher_user_name, 1980 "thread_id": proc_session.watcher_thread_id, 1981 "notify_on_complete": True, 1982 }) 1983 1984 # Set watch patterns for output monitoring 1985 if watch_patterns and background: 1986 proc_session.watch_patterns = list(watch_patterns) 1987 result_data["watch_patterns"] = proc_session.watch_patterns 1988 1989 return json.dumps(result_data, ensure_ascii=False) 1990 except Exception as e: 1991 return json.dumps({ 1992 "output": "", 1993 "exit_code": -1, 1994 "error": f"Failed to start background process: {str(e)}" 1995 }, ensure_ascii=False) 1996 else: 1997 # Run foreground command with retry logic 1998 max_retries = 3 1999 retry_count = 0 2000 result = None 2001 2002 while retry_count <= max_retries: 2003 try: 2004 execute_kwargs = {"timeout": effective_timeout} 2005 if workdir: 2006 execute_kwargs["cwd"] = workdir 2007 result = env.execute(command, **execute_kwargs) 2008 except Exception as e: 2009 error_str = str(e).lower() 2010 if "timeout" in error_str: 2011 return json.dumps({ 2012 "output": "", 2013 "exit_code": 124, 2014 "error": f"Command timed out after {effective_timeout} seconds" 2015 }, ensure_ascii=False) 2016 2017 # Retry on transient errors 2018 if retry_count < max_retries: 2019 retry_count += 1 2020 wait_time = 2 ** retry_count 2021 logger.warning("Execution error, retrying in %ds (attempt %d/%d) - Command: %s - Error: %s: %s - Task: %s, Backend: %s", 2022 wait_time, retry_count, max_retries, _safe_command_preview(command), type(e).__name__, e, effective_task_id, env_type) 2023 time.sleep(wait_time) 2024 continue 2025 2026 logger.error("Execution failed after %d retries - Command: %s - Error: %s: %s - Task: %s, Backend: %s", 2027 max_retries, _safe_command_preview(command), type(e).__name__, e, effective_task_id, env_type) 2028 return json.dumps({ 2029 "output": "", 2030 "exit_code": -1, 2031 "error": f"Command execution failed: {type(e).__name__}: {str(e)}" 2032 }, ensure_ascii=False) 2033 2034 # Got a result 2035 break 2036 2037 # Extract output 2038 output = result.get("output", "") 2039 returncode = result.get("returncode", 0) 2040 2041 # Add helpful message for sudo failures in messaging context 2042 output = _handle_sudo_failure(output, env_type) 2043 2044 # Foreground terminal output canonicalization seam: plugins receive 2045 # the full output string before default truncation and may only 2046 # replace it by returning a string from transform_terminal_output. 2047 # The hook is fail-open, and the first valid string return wins. 2048 try: 2049 from hermes_cli.plugins import invoke_hook 2050 hook_results = invoke_hook( 2051 "transform_terminal_output", 2052 command=command, 2053 output=output, 2054 returncode=returncode, 2055 task_id=effective_task_id or "", 2056 env_type=env_type, 2057 ) 2058 for hook_result in hook_results: 2059 if isinstance(hook_result, str): 2060 output = hook_result 2061 break 2062 except Exception: 2063 pass 2064 2065 # Truncate output if too long, keeping both head and tail 2066 from tools.tool_output_limits import get_max_bytes 2067 MAX_OUTPUT_CHARS = get_max_bytes() 2068 if len(output) > MAX_OUTPUT_CHARS: 2069 head_chars = int(MAX_OUTPUT_CHARS * 0.4) # 40% head (error messages often appear early) 2070 tail_chars = MAX_OUTPUT_CHARS - head_chars # 60% tail (most recent/relevant output) 2071 omitted = len(output) - head_chars - tail_chars 2072 truncated_notice = ( 2073 f"\n\n... [OUTPUT TRUNCATED - {omitted} chars omitted " 2074 f"out of {len(output)} total] ...\n\n" 2075 ) 2076 output = output[:head_chars] + truncated_notice + output[-tail_chars:] 2077 2078 # Strip ANSI escape sequences so the model never sees terminal 2079 # formatting — prevents it from copying escapes into file writes. 2080 from tools.ansi_strip import strip_ansi 2081 output = strip_ansi(output) 2082 2083 # Redact secrets from command output (catches env/printenv leaking keys) 2084 from agent.redact import redact_sensitive_text 2085 output = redact_sensitive_text(output.strip()) if output else "" 2086 2087 # Interpret non-zero exit codes that aren't real errors 2088 # (e.g. grep=1 means "no matches", diff=1 means "files differ") 2089 exit_note = _interpret_exit_code(command, returncode) 2090 2091 result_dict = { 2092 "output": output, 2093 "exit_code": returncode, 2094 "error": None, 2095 } 2096 if approval_note: 2097 result_dict["approval"] = approval_note 2098 if exit_note: 2099 result_dict["exit_code_meaning"] = exit_note 2100 2101 return json.dumps(result_dict, ensure_ascii=False) 2102 2103 except Exception as e: 2104 import traceback 2105 tb_str = traceback.format_exc() 2106 logger.error("terminal_tool exception:\n%s", tb_str) 2107 return json.dumps({ 2108 "output": "", 2109 "exit_code": -1, 2110 "error": f"Failed to execute command: {str(e)}", 2111 "traceback": tb_str, 2112 "status": "error" 2113 }, ensure_ascii=False) 2114 2115 2116 def check_terminal_requirements() -> bool: 2117 """Check if all requirements for the terminal tool are met.""" 2118 try: 2119 config = _get_env_config() 2120 env_type = config["env_type"] 2121 2122 if env_type == "local": 2123 return True 2124 2125 elif env_type == "docker": 2126 from tools.environments.docker import find_docker 2127 docker = find_docker() 2128 if not docker: 2129 logger.error("Docker executable not found in PATH or common install locations") 2130 return False 2131 result = subprocess.run([docker, "version"], capture_output=True, timeout=5) 2132 return result.returncode == 0 2133 2134 elif env_type == "singularity": 2135 executable = shutil.which("apptainer") or shutil.which("singularity") 2136 if executable: 2137 result = subprocess.run([executable, "--version"], capture_output=True, timeout=5) 2138 return result.returncode == 0 2139 return False 2140 2141 elif env_type == "ssh": 2142 if not config.get("ssh_host") or not config.get("ssh_user"): 2143 logger.error( 2144 "SSH backend selected but TERMINAL_SSH_HOST and TERMINAL_SSH_USER " 2145 "are not both set. Configure both or switch TERMINAL_ENV to 'local'." 2146 ) 2147 return False 2148 return True 2149 2150 elif env_type == "modal": 2151 modal_state = _get_modal_backend_state(config.get("modal_mode")) 2152 if modal_state["selected_backend"] == "managed": 2153 return True 2154 2155 if modal_state["selected_backend"] != "direct": 2156 if modal_state["managed_mode_blocked"]: 2157 logger.error( 2158 "Modal backend selected with TERMINAL_MODAL_MODE=managed, but " 2159 "a paid Nous subscription is required for the Tool Gateway and no direct " 2160 "Modal credentials/config were found. Log in with `hermes model` " 2161 "or choose TERMINAL_MODAL_MODE=direct/auto." 2162 ) 2163 return False 2164 if modal_state["mode"] == "managed": 2165 logger.error( 2166 "Modal backend selected with TERMINAL_MODAL_MODE=managed, but the managed " 2167 "tool gateway is unavailable. Configure the managed gateway or choose " 2168 "TERMINAL_MODAL_MODE=direct/auto." 2169 ) 2170 return False 2171 elif modal_state["mode"] == "direct": 2172 if managed_nous_tools_enabled(): 2173 logger.error( 2174 "Modal backend selected with TERMINAL_MODAL_MODE=direct, but no direct " 2175 "Modal credentials/config were found. Configure Modal or choose " 2176 "TERMINAL_MODAL_MODE=managed/auto." 2177 ) 2178 else: 2179 logger.error( 2180 "Modal backend selected with TERMINAL_MODAL_MODE=direct, but no direct " 2181 "Modal credentials/config were found. Configure Modal or choose " 2182 "TERMINAL_MODAL_MODE=auto." 2183 ) 2184 return False 2185 else: 2186 if managed_nous_tools_enabled(): 2187 logger.error( 2188 "Modal backend selected but no direct Modal credentials/config or managed " 2189 "tool gateway was found. Configure Modal, set up the managed gateway, " 2190 "or choose a different TERMINAL_ENV." 2191 ) 2192 else: 2193 logger.error( 2194 "Modal backend selected but no direct Modal credentials/config was found. " 2195 "Configure Modal or choose a different TERMINAL_ENV." 2196 ) 2197 return False 2198 2199 if importlib.util.find_spec("modal") is None: 2200 logger.error("modal is required for direct modal terminal backend: pip install modal") 2201 return False 2202 2203 return True 2204 2205 elif env_type == "vercel_sandbox": 2206 return _check_vercel_sandbox_requirements(config) 2207 2208 elif env_type == "daytona": 2209 from daytona import Daytona # noqa: F401 — SDK presence check 2210 return os.getenv("DAYTONA_API_KEY") is not None 2211 2212 else: 2213 logger.error( 2214 "Unknown TERMINAL_ENV '%s'. Use one of: local, docker, singularity, " 2215 "modal, daytona, vercel_sandbox, ssh.", 2216 env_type, 2217 ) 2218 return False 2219 except Exception as e: 2220 logger.error("Terminal requirements check failed: %s", e, exc_info=True) 2221 return False 2222 2223 2224 if __name__ == "__main__": 2225 # Simple test when run directly 2226 print("Terminal Tool Module") 2227 print("=" * 50) 2228 2229 config = _get_env_config() 2230 print("\nCurrent Configuration:") 2231 print(f" Environment type: {config['env_type']}") 2232 print(f" Docker image: {config['docker_image']}") 2233 print(f" Modal image: {config['modal_image']}") 2234 print(f" Working directory: {config['cwd']}") 2235 print(f" Default timeout: {config['timeout']}s") 2236 print(f" Lifetime: {config['lifetime_seconds']}s") 2237 2238 if not check_terminal_requirements(): 2239 print("\n❌ Requirements not met. Please check the messages above.") 2240 exit(1) 2241 2242 print("\n✅ All requirements met!") 2243 print("\nAvailable Tool:") 2244 print(" - terminal_tool: Execute commands in sandboxed environments") 2245 2246 print("\nUsage Examples:") 2247 print(" # Execute a command") 2248 print(" result = terminal_tool(command='ls -la')") 2249 print(" ") 2250 print(" # Run a background task") 2251 print(" result = terminal_tool(command='python server.py', background=True)") 2252 2253 print("\nEnvironment Variables:") 2254 default_img = "nikolaik/python-nodejs:python3.11-nodejs20" 2255 print( 2256 " TERMINAL_ENV: " 2257 f"{os.getenv('TERMINAL_ENV', 'local')} " 2258 "(local/docker/singularity/modal/daytona/vercel_sandbox/ssh)" 2259 ) 2260 print(f" TERMINAL_DOCKER_IMAGE: {os.getenv('TERMINAL_DOCKER_IMAGE', default_img)}") 2261 print(f" TERMINAL_SINGULARITY_IMAGE: {os.getenv('TERMINAL_SINGULARITY_IMAGE', f'docker://{default_img}')}") 2262 print(f" TERMINAL_MODAL_IMAGE: {os.getenv('TERMINAL_MODAL_IMAGE', default_img)}") 2263 print(f" TERMINAL_DAYTONA_IMAGE: {os.getenv('TERMINAL_DAYTONA_IMAGE', default_img)}") 2264 print(f" TERMINAL_CWD: {os.getenv('TERMINAL_CWD', os.getcwd())}") 2265 from hermes_constants import display_hermes_home as _dhh 2266 print(f" TERMINAL_SANDBOX_DIR: {os.getenv('TERMINAL_SANDBOX_DIR', f'{_dhh()}/sandboxes')}") 2267 print(f" TERMINAL_TIMEOUT: {os.getenv('TERMINAL_TIMEOUT', '60')}") 2268 print(f" TERMINAL_LIFETIME_SECONDS: {os.getenv('TERMINAL_LIFETIME_SECONDS', '300')}") 2269 2270 2271 # --------------------------------------------------------------------------- 2272 # Registry 2273 # --------------------------------------------------------------------------- 2274 from tools.registry import registry 2275 2276 TERMINAL_SCHEMA = { 2277 "name": "terminal", 2278 "description": TERMINAL_TOOL_DESCRIPTION, 2279 "parameters": { 2280 "type": "object", 2281 "properties": { 2282 "command": { 2283 "type": "string", 2284 "description": "The command to execute on the VM" 2285 }, 2286 "background": { 2287 "type": "boolean", 2288 "description": "Run the command in the background. Two patterns: (1) Long-lived processes that never exit (servers, watchers). (2) Long-running tasks paired with notify_on_complete=true — you can keep working and get notified when the task finishes. For short commands, prefer foreground with a generous timeout instead.", 2289 "default": False 2290 }, 2291 "timeout": { 2292 "type": "integer", 2293 "description": f"Max seconds to wait (default: 180, foreground max: {FOREGROUND_MAX_TIMEOUT}). Returns INSTANTLY when command finishes — set high for long tasks, you won't wait unnecessarily. Foreground timeout above {FOREGROUND_MAX_TIMEOUT}s is rejected; use background=true for longer commands.", 2294 "minimum": 1 2295 }, 2296 "workdir": { 2297 "type": "string", 2298 "description": "Working directory for this command (absolute path). Defaults to the session working directory." 2299 }, 2300 "pty": { 2301 "type": "boolean", 2302 "description": "Run in pseudo-terminal (PTY) mode for interactive CLI tools like Codex, Claude Code, or Python REPL. Only works with local and SSH backends. Default: false.", 2303 "default": False 2304 }, 2305 "notify_on_complete": { 2306 "type": "boolean", 2307 "description": "When true (and background=true), you'll be automatically notified exactly once when the process finishes. **This is the right choice for almost every long-running task** — tests, builds, deployments, multi-item batch jobs, anything that takes over a minute and has a defined end. Use this and keep working on other things; the system notifies you on exit. MUTUALLY EXCLUSIVE with watch_patterns — when both are set, watch_patterns is dropped.", 2308 "default": False 2309 }, 2310 "watch_patterns": { 2311 "type": "array", 2312 "items": {"type": "string"}, 2313 "description": "Strings to watch for in background process output. HARD RATE LIMIT: at most 1 notification per 15 seconds per process — matches arriving inside the cooldown are dropped. After 3 consecutive 15-second windows with dropped matches, watch_patterns is automatically disabled for that process and promoted to notify_on_complete behavior (one notification on exit, no more mid-process spam). USE ONLY for truly rare, one-shot mid-process signals on LONG-LIVED processes that will never exit on their own — e.g. ['Application startup complete'] on a server so you know when to hit its endpoint, or ['migration done'] on a daemon. DO NOT use for: (1) end-of-run markers like 'DONE'/'PASS' — use notify_on_complete instead; (2) error patterns like 'ERROR'/'Traceback' in loops or multi-item batch jobs — they fire on every iteration and you'll hit the strike limit fast; (3) anything you'd ever combine with notify_on_complete. When in doubt, choose notify_on_complete. MUTUALLY EXCLUSIVE with notify_on_complete — set one, not both." 2314 } 2315 }, 2316 "required": ["command"] 2317 } 2318 } 2319 2320 2321 def _handle_terminal(args, **kw): 2322 return terminal_tool( 2323 command=args.get("command"), 2324 background=args.get("background", False), 2325 timeout=args.get("timeout"), 2326 task_id=kw.get("task_id"), 2327 workdir=args.get("workdir"), 2328 pty=args.get("pty", False), 2329 notify_on_complete=args.get("notify_on_complete", False), 2330 watch_patterns=args.get("watch_patterns"), 2331 ) 2332 2333 2334 registry.register( 2335 name="terminal", 2336 toolset="terminal", 2337 schema=TERMINAL_SCHEMA, 2338 handler=_handle_terminal, 2339 check_fn=check_terminal_requirements, 2340 emoji="💻", 2341 max_result_size_chars=100_000, 2342 )