test_shared_container_task_id.py
1 """ 2 Regression tests for the shared-container task_id mapping. 3 4 The top-level agent and all delegate_task subagents share a single 5 terminal sandbox keyed by ``"default"``. ``_resolve_container_task_id`` 6 is the sole gatekeeper for which tool-call task_ids go to the shared 7 container vs. get their own isolated sandbox. RL / benchmark 8 environments opt in to isolation by calling 9 ``register_task_env_overrides(task_id, {...})`` before the agent loop; 10 every other task_id collapses back to ``"default"``. 11 12 If you change the collapse logic, update both the helper and these 13 tests -- see `hermes-agent-dev` skill, "Why do subagents get their own 14 containers?" section, and the Container lifecycle paragraph under 15 Docker Backend in ``website/docs/user-guide/configuration.md``. 16 """ 17 18 import pytest 19 20 from tools import terminal_tool 21 22 23 @pytest.fixture(autouse=True) 24 def _clean_overrides(): 25 """Ensure no stray overrides from other tests leak in.""" 26 before = dict(terminal_tool._task_env_overrides) 27 terminal_tool._task_env_overrides.clear() 28 yield 29 terminal_tool._task_env_overrides.clear() 30 terminal_tool._task_env_overrides.update(before) 31 32 33 def test_none_task_id_maps_to_default(): 34 assert terminal_tool._resolve_container_task_id(None) == "default" 35 36 37 def test_empty_task_id_maps_to_default(): 38 assert terminal_tool._resolve_container_task_id("") == "default" 39 40 41 def test_literal_default_stays_default(): 42 assert terminal_tool._resolve_container_task_id("default") == "default" 43 44 45 def test_subagent_task_id_collapses_to_default(): 46 # delegate_task constructs IDs like "subagent-<N>-<uuid_hex>"; these 47 # should share the parent's container, not spin up their own. 48 assert terminal_tool._resolve_container_task_id("subagent-0-deadbeef") == "default" 49 assert terminal_tool._resolve_container_task_id("subagent-42-cafef00d") == "default" 50 51 52 def test_arbitrary_session_id_collapses_to_default(): 53 # Session UUIDs or anything else without an override still collapse. 54 assert terminal_tool._resolve_container_task_id("sess-123e4567-e89b-12d3") == "default" 55 56 57 def test_rl_task_with_override_keeps_its_own_id(): 58 # RL / benchmark pattern: register a per-task image, then the task_id 59 # must survive ``_resolve_container_task_id`` so the rollout lands in 60 # its own sandbox. 61 terminal_tool.register_task_env_overrides( 62 "tb2-task-fix-git", {"docker_image": "tb2:fix-git", "cwd": "/app"} 63 ) 64 try: 65 assert ( 66 terminal_tool._resolve_container_task_id("tb2-task-fix-git") 67 == "tb2-task-fix-git" 68 ) 69 finally: 70 terminal_tool.clear_task_env_overrides("tb2-task-fix-git") 71 72 73 def test_cleared_override_collapses_again(): 74 terminal_tool.register_task_env_overrides("tb2-x", {"docker_image": "x:y"}) 75 assert terminal_tool._resolve_container_task_id("tb2-x") == "tb2-x" 76 terminal_tool.clear_task_env_overrides("tb2-x") 77 assert terminal_tool._resolve_container_task_id("tb2-x") == "default" 78 79 80 def test_get_active_env_reads_shared_container_from_subagent_id(): 81 """``get_active_env`` must see the shared ``"default"`` sandbox when 82 called with a subagent's task_id, so the agent loop's turn-budget 83 enforcement reads the real env (not None) during delegation.""" 84 sentinel = object() 85 terminal_tool._active_environments["default"] = sentinel 86 try: 87 assert terminal_tool.get_active_env("subagent-7-cafe") is sentinel 88 assert terminal_tool.get_active_env(None) is sentinel 89 assert terminal_tool.get_active_env("default") is sentinel 90 finally: 91 terminal_tool._active_environments.pop("default", None) 92 93 94 def test_get_active_env_honours_rl_override(): 95 rl_env = object() 96 default_env = object() 97 terminal_tool._active_environments["default"] = default_env 98 terminal_tool._active_environments["rl-42"] = rl_env 99 terminal_tool.register_task_env_overrides("rl-42", {"docker_image": "x"}) 100 try: 101 # With an override registered, lookup returns the task's own env, 102 # not the shared "default" one. 103 assert terminal_tool.get_active_env("rl-42") is rl_env 104 finally: 105 terminal_tool.clear_task_env_overrides("rl-42") 106 terminal_tool._active_environments.pop("default", None) 107 terminal_tool._active_environments.pop("rl-42", None)