/ tests / tools / test_shared_container_task_id.py
test_shared_container_task_id.py
  1  """
  2  Regression tests for the shared-container task_id mapping.
  3  
  4  The top-level agent and all delegate_task subagents share a single
  5  terminal sandbox keyed by ``"default"``.  ``_resolve_container_task_id``
  6  is the sole gatekeeper for which tool-call task_ids go to the shared
  7  container vs. get their own isolated sandbox.  RL / benchmark
  8  environments opt in to isolation by calling
  9  ``register_task_env_overrides(task_id, {...})`` before the agent loop;
 10  every other task_id collapses back to ``"default"``.
 11  
 12  If you change the collapse logic, update both the helper and these
 13  tests -- see `hermes-agent-dev` skill, "Why do subagents get their own
 14  containers?" section, and the Container lifecycle paragraph under
 15  Docker Backend in ``website/docs/user-guide/configuration.md``.
 16  """
 17  
 18  import pytest
 19  
 20  from tools import terminal_tool
 21  
 22  
 23  @pytest.fixture(autouse=True)
 24  def _clean_overrides():
 25      """Ensure no stray overrides from other tests leak in."""
 26      before = dict(terminal_tool._task_env_overrides)
 27      terminal_tool._task_env_overrides.clear()
 28      yield
 29      terminal_tool._task_env_overrides.clear()
 30      terminal_tool._task_env_overrides.update(before)
 31  
 32  
 33  def test_none_task_id_maps_to_default():
 34      assert terminal_tool._resolve_container_task_id(None) == "default"
 35  
 36  
 37  def test_empty_task_id_maps_to_default():
 38      assert terminal_tool._resolve_container_task_id("") == "default"
 39  
 40  
 41  def test_literal_default_stays_default():
 42      assert terminal_tool._resolve_container_task_id("default") == "default"
 43  
 44  
 45  def test_subagent_task_id_collapses_to_default():
 46      # delegate_task constructs IDs like "subagent-<N>-<uuid_hex>"; these
 47      # should share the parent's container, not spin up their own.
 48      assert terminal_tool._resolve_container_task_id("subagent-0-deadbeef") == "default"
 49      assert terminal_tool._resolve_container_task_id("subagent-42-cafef00d") == "default"
 50  
 51  
 52  def test_arbitrary_session_id_collapses_to_default():
 53      # Session UUIDs or anything else without an override still collapse.
 54      assert terminal_tool._resolve_container_task_id("sess-123e4567-e89b-12d3") == "default"
 55  
 56  
 57  def test_rl_task_with_override_keeps_its_own_id():
 58      # RL / benchmark pattern: register a per-task image, then the task_id
 59      # must survive ``_resolve_container_task_id`` so the rollout lands in
 60      # its own sandbox.
 61      terminal_tool.register_task_env_overrides(
 62          "tb2-task-fix-git", {"docker_image": "tb2:fix-git", "cwd": "/app"}
 63      )
 64      try:
 65          assert (
 66              terminal_tool._resolve_container_task_id("tb2-task-fix-git")
 67              == "tb2-task-fix-git"
 68          )
 69      finally:
 70          terminal_tool.clear_task_env_overrides("tb2-task-fix-git")
 71  
 72  
 73  def test_cleared_override_collapses_again():
 74      terminal_tool.register_task_env_overrides("tb2-x", {"docker_image": "x:y"})
 75      assert terminal_tool._resolve_container_task_id("tb2-x") == "tb2-x"
 76      terminal_tool.clear_task_env_overrides("tb2-x")
 77      assert terminal_tool._resolve_container_task_id("tb2-x") == "default"
 78  
 79  
 80  def test_get_active_env_reads_shared_container_from_subagent_id():
 81      """``get_active_env`` must see the shared ``"default"`` sandbox when
 82      called with a subagent's task_id, so the agent loop's turn-budget
 83      enforcement reads the real env (not None) during delegation."""
 84      sentinel = object()
 85      terminal_tool._active_environments["default"] = sentinel
 86      try:
 87          assert terminal_tool.get_active_env("subagent-7-cafe") is sentinel
 88          assert terminal_tool.get_active_env(None) is sentinel
 89          assert terminal_tool.get_active_env("default") is sentinel
 90      finally:
 91          terminal_tool._active_environments.pop("default", None)
 92  
 93  
 94  def test_get_active_env_honours_rl_override():
 95      rl_env = object()
 96      default_env = object()
 97      terminal_tool._active_environments["default"] = default_env
 98      terminal_tool._active_environments["rl-42"] = rl_env
 99      terminal_tool.register_task_env_overrides("rl-42", {"docker_image": "x"})
100      try:
101          # With an override registered, lookup returns the task's own env,
102          # not the shared "default" one.
103          assert terminal_tool.get_active_env("rl-42") is rl_env
104      finally:
105          terminal_tool.clear_task_env_overrides("rl-42")
106          terminal_tool._active_environments.pop("default", None)
107          terminal_tool._active_environments.pop("rl-42", None)