/ tests / tools / test_delegate.py
test_delegate.py
   1  #!/usr/bin/env python3
   2  """
   3  Tests for the subagent delegation tool.
   4  
   5  Uses mock AIAgent instances to test the delegation logic without
   6  requiring API keys or real LLM calls.
   7  
   8  Run with:  python -m pytest tests/test_delegate.py -v
   9     or:     python tests/test_delegate.py
  10  """
  11  
  12  import json
  13  import os
  14  import sys
  15  import threading
  16  import time
  17  import unittest
  18  from unittest.mock import MagicMock, patch
  19  
  20  from tools.delegate_tool import (
  21      DELEGATE_BLOCKED_TOOLS,
  22      DELEGATE_TASK_SCHEMA,
  23      DelegateEvent,
  24      _get_max_concurrent_children,
  25      _LEGACY_EVENT_MAP,
  26      MAX_DEPTH,
  27      check_delegate_requirements,
  28      delegate_task,
  29      _build_child_agent,
  30      _build_child_progress_callback,
  31      _build_child_system_prompt,
  32      _strip_blocked_tools,
  33      _resolve_child_credential_pool,
  34      _resolve_delegation_credentials,
  35  )
  36  
  37  
  38  def _make_mock_parent(depth=0):
  39      """Create a mock parent agent with the fields delegate_task expects."""
  40      parent = MagicMock()
  41      parent.base_url = "https://openrouter.ai/api/v1"
  42      parent.api_key="***"
  43      parent.provider = "openrouter"
  44      parent.api_mode = "chat_completions"
  45      parent.model = "anthropic/claude-sonnet-4"
  46      parent.platform = "cli"
  47      parent.providers_allowed = None
  48      parent.providers_ignored = None
  49      parent.providers_order = None
  50      parent.provider_sort = None
  51      parent._session_db = None
  52      parent._delegate_depth = depth
  53      parent._active_children = []
  54      parent._active_children_lock = threading.Lock()
  55      parent._print_fn = None
  56      parent.tool_progress_callback = None
  57      parent.thinking_callback = None
  58      return parent
  59  
  60  
  61  class TestDelegateRequirements(unittest.TestCase):
  62      def test_always_available(self):
  63          self.assertTrue(check_delegate_requirements())
  64  
  65      def test_schema_valid(self):
  66          self.assertEqual(DELEGATE_TASK_SCHEMA["name"], "delegate_task")
  67          props = DELEGATE_TASK_SCHEMA["parameters"]["properties"]
  68          self.assertIn("goal", props)
  69          self.assertIn("tasks", props)
  70          self.assertIn("context", props)
  71          self.assertIn("toolsets", props)
  72          # max_iterations is intentionally NOT exposed to the model — it's
  73          # config-authoritative via delegation.max_iterations so users get
  74          # predictable budgets.
  75          self.assertNotIn("max_iterations", props)
  76          self.assertNotIn("maxItems", props["tasks"])  # removed — limit is now runtime-configurable
  77  
  78  
  79  class TestChildSystemPrompt(unittest.TestCase):
  80      def test_goal_only(self):
  81          prompt = _build_child_system_prompt("Fix the tests")
  82          self.assertIn("Fix the tests", prompt)
  83          self.assertIn("YOUR TASK", prompt)
  84          self.assertNotIn("CONTEXT", prompt)
  85  
  86      def test_goal_with_context(self):
  87          prompt = _build_child_system_prompt("Fix the tests", "Error: assertion failed in test_foo.py line 42")
  88          self.assertIn("Fix the tests", prompt)
  89          self.assertIn("CONTEXT", prompt)
  90          self.assertIn("assertion failed", prompt)
  91  
  92      def test_empty_context_ignored(self):
  93          prompt = _build_child_system_prompt("Do something", "  ")
  94          self.assertNotIn("CONTEXT", prompt)
  95  
  96  
  97  class TestStripBlockedTools(unittest.TestCase):
  98      def test_removes_blocked_toolsets(self):
  99          result = _strip_blocked_tools(["terminal", "file", "delegation", "clarify", "memory", "code_execution"])
 100          self.assertEqual(sorted(result), ["file", "terminal"])
 101  
 102      def test_preserves_allowed_toolsets(self):
 103          result = _strip_blocked_tools(["terminal", "file", "web", "browser"])
 104          self.assertEqual(sorted(result), ["browser", "file", "terminal", "web"])
 105  
 106      def test_empty_input(self):
 107          result = _strip_blocked_tools([])
 108          self.assertEqual(result, [])
 109  
 110  
 111  class TestDelegateTask(unittest.TestCase):
 112      def test_no_parent_agent(self):
 113          result = json.loads(delegate_task(goal="test"))
 114          self.assertIn("error", result)
 115          self.assertIn("parent agent", result["error"])
 116  
 117      def test_depth_limit(self):
 118          parent = _make_mock_parent(depth=2)
 119          result = json.loads(delegate_task(goal="test", parent_agent=parent))
 120          self.assertIn("error", result)
 121          self.assertIn("depth limit", result["error"].lower())
 122  
 123      def test_no_goal_or_tasks(self):
 124          parent = _make_mock_parent()
 125          result = json.loads(delegate_task(parent_agent=parent))
 126          self.assertIn("error", result)
 127  
 128      def test_empty_goal(self):
 129          parent = _make_mock_parent()
 130          result = json.loads(delegate_task(goal="  ", parent_agent=parent))
 131          self.assertIn("error", result)
 132  
 133      def test_task_missing_goal(self):
 134          parent = _make_mock_parent()
 135          result = json.loads(delegate_task(tasks=[{"context": "no goal here"}], parent_agent=parent))
 136          self.assertIn("error", result)
 137  
 138      @patch("tools.delegate_tool._run_single_child")
 139      def test_single_task_mode(self, mock_run):
 140          mock_run.return_value = {
 141              "task_index": 0, "status": "completed",
 142              "summary": "Done!", "api_calls": 3, "duration_seconds": 5.0
 143          }
 144          parent = _make_mock_parent()
 145          result = json.loads(delegate_task(goal="Fix tests", context="error log...", parent_agent=parent))
 146          self.assertIn("results", result)
 147          self.assertEqual(len(result["results"]), 1)
 148          self.assertEqual(result["results"][0]["status"], "completed")
 149          self.assertEqual(result["results"][0]["summary"], "Done!")
 150          mock_run.assert_called_once()
 151  
 152      @patch("tools.delegate_tool._run_single_child")
 153      def test_batch_mode(self, mock_run):
 154          mock_run.side_effect = [
 155              {"task_index": 0, "status": "completed", "summary": "Result A", "api_calls": 2, "duration_seconds": 3.0},
 156              {"task_index": 1, "status": "completed", "summary": "Result B", "api_calls": 4, "duration_seconds": 6.0},
 157          ]
 158          parent = _make_mock_parent()
 159          tasks = [
 160              {"goal": "Research topic A"},
 161              {"goal": "Research topic B"},
 162          ]
 163          result = json.loads(delegate_task(tasks=tasks, parent_agent=parent))
 164          self.assertIn("results", result)
 165          self.assertEqual(len(result["results"]), 2)
 166          self.assertEqual(result["results"][0]["summary"], "Result A")
 167          self.assertEqual(result["results"][1]["summary"], "Result B")
 168          self.assertIn("total_duration_seconds", result)
 169  
 170      @patch("tools.delegate_tool._run_single_child")
 171      def test_batch_capped_at_3(self, mock_run):
 172          mock_run.return_value = {
 173              "task_index": 0, "status": "completed",
 174              "summary": "Done", "api_calls": 1, "duration_seconds": 1.0
 175          }
 176          parent = _make_mock_parent()
 177          limit = _get_max_concurrent_children()
 178          tasks = [{"goal": f"Task {i}"} for i in range(limit + 2)]
 179          result = json.loads(delegate_task(tasks=tasks, parent_agent=parent))
 180          # Should return an error instead of silently truncating
 181          self.assertIn("error", result)
 182          self.assertIn("Too many tasks", result["error"])
 183          mock_run.assert_not_called()
 184  
 185      @patch("tools.delegate_tool._run_single_child")
 186      def test_batch_ignores_toplevel_goal(self, mock_run):
 187          """When tasks array is provided, top-level goal/context/toolsets are ignored."""
 188          mock_run.return_value = {
 189              "task_index": 0, "status": "completed",
 190              "summary": "Done", "api_calls": 1, "duration_seconds": 1.0
 191          }
 192          parent = _make_mock_parent()
 193          result = json.loads(delegate_task(
 194              goal="This should be ignored",
 195              tasks=[{"goal": "Actual task"}],
 196              parent_agent=parent,
 197          ))
 198          # The mock was called with the tasks array item, not the top-level goal
 199          call_args = mock_run.call_args
 200          self.assertEqual(call_args.kwargs.get("goal") or call_args[1].get("goal", call_args[0][1] if len(call_args[0]) > 1 else None), "Actual task")
 201  
 202      @patch("tools.delegate_tool._run_single_child")
 203      def test_failed_child_included_in_results(self, mock_run):
 204          mock_run.return_value = {
 205              "task_index": 0, "status": "error",
 206              "summary": None, "error": "Something broke",
 207              "api_calls": 0, "duration_seconds": 0.5
 208          }
 209          parent = _make_mock_parent()
 210          result = json.loads(delegate_task(goal="Break things", parent_agent=parent))
 211          self.assertEqual(result["results"][0]["status"], "error")
 212          self.assertIn("Something broke", result["results"][0]["error"])
 213  
 214      def test_depth_increments(self):
 215          """Verify child gets parent's depth + 1."""
 216          parent = _make_mock_parent(depth=0)
 217  
 218          with patch("run_agent.AIAgent") as MockAgent:
 219              mock_child = MagicMock()
 220              mock_child.run_conversation.return_value = {
 221                  "final_response": "done", "completed": True, "api_calls": 1
 222              }
 223              MockAgent.return_value = mock_child
 224  
 225              delegate_task(goal="Test depth", parent_agent=parent)
 226              self.assertEqual(mock_child._delegate_depth, 1)
 227  
 228      def test_active_children_tracking(self):
 229          """Verify children are registered/unregistered for interrupt propagation."""
 230          parent = _make_mock_parent(depth=0)
 231  
 232          with patch("run_agent.AIAgent") as MockAgent:
 233              mock_child = MagicMock()
 234              mock_child.run_conversation.return_value = {
 235                  "final_response": "done", "completed": True, "api_calls": 1
 236              }
 237              MockAgent.return_value = mock_child
 238  
 239              delegate_task(goal="Test tracking", parent_agent=parent)
 240              self.assertEqual(len(parent._active_children), 0)
 241  
 242      def test_child_inherits_runtime_credentials(self):
 243          parent = _make_mock_parent(depth=0)
 244          parent.base_url = "https://chatgpt.com/backend-api/codex"
 245          parent.api_key="***"
 246          parent.provider = "openai-codex"
 247          parent.api_mode = "codex_responses"
 248  
 249          with patch("run_agent.AIAgent") as MockAgent:
 250              mock_child = MagicMock()
 251              mock_child.run_conversation.return_value = {
 252                  "final_response": "ok",
 253                  "completed": True,
 254                  "api_calls": 1,
 255              }
 256              MockAgent.return_value = mock_child
 257  
 258              delegate_task(goal="Test runtime inheritance", parent_agent=parent)
 259  
 260              _, kwargs = MockAgent.call_args
 261              self.assertEqual(kwargs["base_url"], parent.base_url)
 262              self.assertEqual(kwargs["api_key"], parent.api_key)
 263              self.assertEqual(kwargs["provider"], parent.provider)
 264              self.assertEqual(kwargs["api_mode"], parent.api_mode)
 265  
 266      def test_child_inherits_parent_print_fn(self):
 267          parent = _make_mock_parent(depth=0)
 268          sink = MagicMock()
 269          parent._print_fn = sink
 270  
 271          with patch("run_agent.AIAgent") as MockAgent:
 272              mock_child = MagicMock()
 273              MockAgent.return_value = mock_child
 274  
 275              _build_child_agent(
 276                  task_index=0,
 277                  goal="Keep stdout clean",
 278                  context=None,
 279                  toolsets=None,
 280                  model=None,
 281                  max_iterations=10,
 282                  parent_agent=parent,
 283                  task_count=1,
 284              )
 285  
 286          self.assertIs(mock_child._print_fn, sink)
 287  
 288      def test_child_uses_thinking_callback_when_progress_callback_available(self):
 289          parent = _make_mock_parent(depth=0)
 290          parent.tool_progress_callback = MagicMock()
 291  
 292          with patch("run_agent.AIAgent") as MockAgent:
 293              mock_child = MagicMock()
 294              MockAgent.return_value = mock_child
 295  
 296              _build_child_agent(
 297                  task_index=0,
 298                  goal="Avoid raw child spinners",
 299                  context=None,
 300                  toolsets=None,
 301                  model=None,
 302                  max_iterations=10,
 303                  parent_agent=parent,
 304                  task_count=1,
 305              )
 306  
 307          self.assertTrue(callable(mock_child.thinking_callback))
 308          mock_child.thinking_callback("deliberating...")
 309          parent.tool_progress_callback.assert_not_called()
 310  
 311  
 312  class TestToolNamePreservation(unittest.TestCase):
 313      """Verify _last_resolved_tool_names is restored after subagent runs."""
 314  
 315      def test_global_tool_names_restored_after_delegation(self):
 316          """The process-global _last_resolved_tool_names must be restored
 317          after a subagent completes so the parent's execute_code sandbox
 318          generates correct imports."""
 319          import model_tools
 320  
 321          parent = _make_mock_parent(depth=0)
 322          original_tools = ["terminal", "read_file", "web_search", "execute_code", "delegate_task"]
 323          model_tools._last_resolved_tool_names = list(original_tools)
 324  
 325          with patch("run_agent.AIAgent") as MockAgent:
 326              mock_child = MagicMock()
 327              mock_child.run_conversation.return_value = {
 328                  "final_response": "done", "completed": True, "api_calls": 1,
 329              }
 330              MockAgent.return_value = mock_child
 331  
 332              delegate_task(goal="Test tool preservation", parent_agent=parent)
 333  
 334          self.assertEqual(model_tools._last_resolved_tool_names, original_tools)
 335  
 336      def test_global_tool_names_restored_after_child_failure(self):
 337          """Even when the child agent raises, the global must be restored."""
 338          import model_tools
 339  
 340          parent = _make_mock_parent(depth=0)
 341          original_tools = ["terminal", "read_file", "web_search"]
 342          model_tools._last_resolved_tool_names = list(original_tools)
 343  
 344          with patch("run_agent.AIAgent") as MockAgent:
 345              mock_child = MagicMock()
 346              mock_child.run_conversation.side_effect = RuntimeError("boom")
 347              MockAgent.return_value = mock_child
 348  
 349              result = json.loads(delegate_task(goal="Crash test", parent_agent=parent))
 350              self.assertEqual(result["results"][0]["status"], "error")
 351  
 352          self.assertEqual(model_tools._last_resolved_tool_names, original_tools)
 353  
 354      def test_build_child_agent_does_not_raise_name_error(self):
 355          """Regression: _build_child_agent must not reference _saved_tool_names.
 356  
 357          The bug introduced by the e7844e9c merge conflict: line 235 inside
 358          _build_child_agent read `list(_saved_tool_names)` where that variable
 359          is only defined later in _run_single_child.  Calling _build_child_agent
 360          standalone (without _run_single_child's scope) must never raise NameError.
 361          """
 362          parent = _make_mock_parent(depth=0)
 363  
 364          with patch("run_agent.AIAgent"):
 365              try:
 366                  _build_child_agent(
 367                      task_index=0,
 368                      goal="regression check",
 369                      context=None,
 370                      toolsets=None,
 371                      model=None,
 372                      max_iterations=10,
 373                      parent_agent=parent,
 374                      task_count=1,
 375                  )
 376              except NameError as exc:
 377                  self.fail(
 378                      f"_build_child_agent raised NameError — "
 379                      f"_saved_tool_names leaked back into wrong scope: {exc}"
 380                  )
 381  
 382      def test_saved_tool_names_set_on_child_before_run(self):
 383          """_run_single_child must set _delegate_saved_tool_names on the child
 384          from model_tools._last_resolved_tool_names before run_conversation."""
 385          import model_tools
 386  
 387          parent = _make_mock_parent(depth=0)
 388          expected_tools = ["read_file", "web_search", "execute_code"]
 389          model_tools._last_resolved_tool_names = list(expected_tools)
 390  
 391          captured = {}
 392  
 393          with patch("run_agent.AIAgent") as MockAgent:
 394              mock_child = MagicMock()
 395  
 396              def capture_and_return(user_message, task_id=None):
 397                  captured["saved"] = list(mock_child._delegate_saved_tool_names)
 398                  return {"final_response": "ok", "completed": True, "api_calls": 1}
 399  
 400              mock_child.run_conversation.side_effect = capture_and_return
 401              MockAgent.return_value = mock_child
 402  
 403              delegate_task(goal="capture test", parent_agent=parent)
 404  
 405          self.assertEqual(captured["saved"], expected_tools)
 406  
 407  
 408  class TestDelegateObservability(unittest.TestCase):
 409      """Tests for enriched metadata returned by _run_single_child."""
 410  
 411      def test_observability_fields_present(self):
 412          """Completed child should return tool_trace, tokens, model, exit_reason."""
 413          parent = _make_mock_parent(depth=0)
 414  
 415          with patch("run_agent.AIAgent") as MockAgent:
 416              mock_child = MagicMock()
 417              mock_child.model = "claude-sonnet-4-6"
 418              mock_child.session_prompt_tokens = 5000
 419              mock_child.session_completion_tokens = 1200
 420              mock_child.run_conversation.return_value = {
 421                  "final_response": "done",
 422                  "completed": True,
 423                  "interrupted": False,
 424                  "api_calls": 3,
 425                  "messages": [
 426                      {"role": "user", "content": "do something"},
 427                      {"role": "assistant", "tool_calls": [
 428                          {"id": "tc_1", "function": {"name": "web_search", "arguments": '{"query": "test"}'}}
 429                      ]},
 430                      {"role": "tool", "tool_call_id": "tc_1", "content": '{"results": [1,2,3]}'},
 431                      {"role": "assistant", "content": "done"},
 432                  ],
 433              }
 434              MockAgent.return_value = mock_child
 435  
 436              result = json.loads(delegate_task(goal="Test observability", parent_agent=parent))
 437              entry = result["results"][0]
 438  
 439              # Core observability fields
 440              self.assertEqual(entry["model"], "claude-sonnet-4-6")
 441              self.assertEqual(entry["exit_reason"], "completed")
 442              self.assertEqual(entry["tokens"]["input"], 5000)
 443              self.assertEqual(entry["tokens"]["output"], 1200)
 444  
 445              # Tool trace
 446              self.assertEqual(len(entry["tool_trace"]), 1)
 447              self.assertEqual(entry["tool_trace"][0]["tool"], "web_search")
 448              self.assertIn("args_bytes", entry["tool_trace"][0])
 449              self.assertIn("result_bytes", entry["tool_trace"][0])
 450              self.assertEqual(entry["tool_trace"][0]["status"], "ok")
 451  
 452      def test_tool_trace_detects_error(self):
 453          """Tool results containing 'error' should be marked as error status."""
 454          parent = _make_mock_parent(depth=0)
 455  
 456          with patch("run_agent.AIAgent") as MockAgent:
 457              mock_child = MagicMock()
 458              mock_child.model = "claude-sonnet-4-6"
 459              mock_child.session_prompt_tokens = 0
 460              mock_child.session_completion_tokens = 0
 461              mock_child.run_conversation.return_value = {
 462                  "final_response": "failed",
 463                  "completed": True,
 464                  "interrupted": False,
 465                  "api_calls": 1,
 466                  "messages": [
 467                      {"role": "assistant", "tool_calls": [
 468                          {"id": "tc_1", "function": {"name": "terminal", "arguments": '{"cmd": "ls"}'}}
 469                      ]},
 470                      {"role": "tool", "tool_call_id": "tc_1", "content": "Error: command not found"},
 471                  ],
 472              }
 473              MockAgent.return_value = mock_child
 474  
 475              result = json.loads(delegate_task(goal="Test error trace", parent_agent=parent))
 476              trace = result["results"][0]["tool_trace"]
 477              self.assertEqual(trace[0]["status"], "error")
 478  
 479      def test_parallel_tool_calls_paired_correctly(self):
 480          """Parallel tool calls should each get their own result via tool_call_id matching."""
 481          parent = _make_mock_parent(depth=0)
 482  
 483          with patch("run_agent.AIAgent") as MockAgent:
 484              mock_child = MagicMock()
 485              mock_child.model = "claude-sonnet-4-6"
 486              mock_child.session_prompt_tokens = 3000
 487              mock_child.session_completion_tokens = 800
 488              mock_child.run_conversation.return_value = {
 489                  "final_response": "done",
 490                  "completed": True,
 491                  "interrupted": False,
 492                  "api_calls": 1,
 493                  "messages": [
 494                      {"role": "assistant", "tool_calls": [
 495                          {"id": "tc_a", "function": {"name": "web_search", "arguments": '{"q": "a"}'}},
 496                          {"id": "tc_b", "function": {"name": "web_search", "arguments": '{"q": "b"}'}},
 497                          {"id": "tc_c", "function": {"name": "terminal", "arguments": '{"cmd": "ls"}'}},
 498                      ]},
 499                      {"role": "tool", "tool_call_id": "tc_a", "content": '{"ok": true}'},
 500                      {"role": "tool", "tool_call_id": "tc_b", "content": "Error: rate limited"},
 501                      {"role": "tool", "tool_call_id": "tc_c", "content": "file1.txt\nfile2.txt"},
 502                      {"role": "assistant", "content": "done"},
 503                  ],
 504              }
 505              MockAgent.return_value = mock_child
 506  
 507              result = json.loads(delegate_task(goal="Test parallel", parent_agent=parent))
 508              trace = result["results"][0]["tool_trace"]
 509  
 510              # All three tool calls should have results
 511              self.assertEqual(len(trace), 3)
 512  
 513              # First: web_search → ok
 514              self.assertEqual(trace[0]["tool"], "web_search")
 515              self.assertEqual(trace[0]["status"], "ok")
 516              self.assertIn("result_bytes", trace[0])
 517  
 518              # Second: web_search → error
 519              self.assertEqual(trace[1]["tool"], "web_search")
 520              self.assertEqual(trace[1]["status"], "error")
 521              self.assertIn("result_bytes", trace[1])
 522  
 523              # Third: terminal → ok
 524              self.assertEqual(trace[2]["tool"], "terminal")
 525              self.assertEqual(trace[2]["status"], "ok")
 526              self.assertIn("result_bytes", trace[2])
 527  
 528      def test_exit_reason_interrupted(self):
 529          """Interrupted child should report exit_reason='interrupted'."""
 530          parent = _make_mock_parent(depth=0)
 531  
 532          with patch("run_agent.AIAgent") as MockAgent:
 533              mock_child = MagicMock()
 534              mock_child.model = "claude-sonnet-4-6"
 535              mock_child.session_prompt_tokens = 0
 536              mock_child.session_completion_tokens = 0
 537              mock_child.run_conversation.return_value = {
 538                  "final_response": "",
 539                  "completed": False,
 540                  "interrupted": True,
 541                  "api_calls": 2,
 542                  "messages": [],
 543              }
 544              MockAgent.return_value = mock_child
 545  
 546              result = json.loads(delegate_task(goal="Test interrupt", parent_agent=parent))
 547              self.assertEqual(result["results"][0]["exit_reason"], "interrupted")
 548  
 549      def test_exit_reason_max_iterations(self):
 550          """Child that didn't complete and wasn't interrupted hit max_iterations."""
 551          parent = _make_mock_parent(depth=0)
 552  
 553          with patch("run_agent.AIAgent") as MockAgent:
 554              mock_child = MagicMock()
 555              mock_child.model = "claude-sonnet-4-6"
 556              mock_child.session_prompt_tokens = 0
 557              mock_child.session_completion_tokens = 0
 558              mock_child.run_conversation.return_value = {
 559                  "final_response": "",
 560                  "completed": False,
 561                  "interrupted": False,
 562                  "api_calls": 50,
 563                  "messages": [],
 564              }
 565              MockAgent.return_value = mock_child
 566  
 567              result = json.loads(delegate_task(goal="Test max iter", parent_agent=parent))
 568              self.assertEqual(result["results"][0]["exit_reason"], "max_iterations")
 569  
 570  
 571  class TestSubagentCostRollup(unittest.TestCase):
 572      """Port of Kilo-Org/kilocode#9448 — parent's session_estimated_cost_usd
 573      must include subagent spend, not just the parent's own API calls."""
 574  
 575      def _make_parent_with_cost_counters(self, depth=0, starting_cost=0.0):
 576          parent = _make_mock_parent(depth=depth)
 577          # The fields AIAgent exposes and the footer reads from.  Set real
 578          # floats/strings so the rollup can add to them rather than tripping
 579          # on MagicMock auto-attrs.
 580          parent.session_estimated_cost_usd = starting_cost
 581          parent.session_cost_status = "unknown"
 582          parent.session_cost_source = "none"
 583          return parent
 584  
 585      def test_single_child_cost_folded_into_parent(self):
 586          parent = self._make_parent_with_cost_counters(starting_cost=0.10)
 587  
 588          with patch("run_agent.AIAgent") as MockAgent:
 589              mock_child = MagicMock()
 590              mock_child.model = "claude-sonnet-4-6"
 591              mock_child.session_prompt_tokens = 1000
 592              mock_child.session_completion_tokens = 200
 593              mock_child.session_estimated_cost_usd = 0.42
 594              mock_child.run_conversation.return_value = {
 595                  "final_response": "done",
 596                  "completed": True,
 597                  "interrupted": False,
 598                  "api_calls": 2,
 599                  "messages": [],
 600              }
 601              MockAgent.return_value = mock_child
 602  
 603              result = json.loads(delegate_task(goal="do stuff", parent_agent=parent))
 604  
 605          # Parent footer must reflect parent_cost + child_cost.
 606          self.assertAlmostEqual(parent.session_estimated_cost_usd, 0.52, places=6)
 607          # Rollup must strip the internal field before serialising to the model.
 608          self.assertNotIn("_child_cost_usd", result["results"][0])
 609          self.assertNotIn("_child_role", result["results"][0])
 610  
 611      def test_batch_children_costs_sum_into_parent(self):
 612          parent = self._make_parent_with_cost_counters(starting_cost=0.00)
 613  
 614          with patch("tools.delegate_tool._run_single_child") as mock_run:
 615              mock_run.side_effect = [
 616                  {
 617                      "task_index": 0,
 618                      "status": "completed",
 619                      "summary": "A",
 620                      "api_calls": 2,
 621                      "duration_seconds": 1.0,
 622                      "_child_role": "leaf",
 623                      "_child_cost_usd": 0.15,
 624                  },
 625                  {
 626                      "task_index": 1,
 627                      "status": "completed",
 628                      "summary": "B",
 629                      "api_calls": 2,
 630                      "duration_seconds": 1.0,
 631                      "_child_role": "leaf",
 632                      "_child_cost_usd": 0.27,
 633                  },
 634                  {
 635                      "task_index": 2,
 636                      "status": "failed",
 637                      "summary": "",
 638                      "error": "boom",
 639                      "api_calls": 0,
 640                      "duration_seconds": 0.1,
 641                      "_child_role": "leaf",
 642                      "_child_cost_usd": 0.03,
 643                  },
 644              ]
 645              result = json.loads(
 646                  delegate_task(
 647                      tasks=[{"goal": "A"}, {"goal": "B"}, {"goal": "C"}],
 648                      parent_agent=parent,
 649                  )
 650              )
 651  
 652          # 0.15 + 0.27 + 0.03 even though one child failed — the API calls it
 653          # made before failing still cost money.
 654          self.assertAlmostEqual(parent.session_estimated_cost_usd, 0.45, places=6)
 655          # cost_source promoted from "none" since the parent had no direct spend.
 656          self.assertEqual(parent.session_cost_source, "subagent")
 657          self.assertEqual(parent.session_cost_status, "estimated")
 658          # All internal fields stripped from results.
 659          for entry in result["results"]:
 660              self.assertNotIn("_child_cost_usd", entry)
 661              self.assertNotIn("_child_role", entry)
 662  
 663      def test_zero_cost_children_leave_parent_source_untouched(self):
 664          """If every child reports 0 cost (e.g. free local model), we should
 665          not invent a fake 'subagent' source — the parent's 'none' stays."""
 666          parent = self._make_parent_with_cost_counters(starting_cost=0.00)
 667  
 668          with patch("tools.delegate_tool._run_single_child") as mock_run:
 669              mock_run.return_value = {
 670                  "task_index": 0,
 671                  "status": "completed",
 672                  "summary": "done",
 673                  "api_calls": 1,
 674                  "duration_seconds": 0.5,
 675                  "_child_role": "leaf",
 676                  "_child_cost_usd": 0.0,
 677              }
 678              delegate_task(goal="free local run", parent_agent=parent)
 679  
 680          self.assertEqual(parent.session_estimated_cost_usd, 0.0)
 681          self.assertEqual(parent.session_cost_source, "none")
 682  
 683      def test_parent_with_real_source_not_overwritten(self):
 684          """If the parent already has its own cost billed (cost_source != 'none'),
 685          adding subagent cost must not clobber the existing source label."""
 686          parent = self._make_parent_with_cost_counters(starting_cost=0.20)
 687          parent.session_cost_status = "exact"
 688          parent.session_cost_source = "openrouter"
 689  
 690          with patch("tools.delegate_tool._run_single_child") as mock_run:
 691              mock_run.return_value = {
 692                  "task_index": 0,
 693                  "status": "completed",
 694                  "summary": "done",
 695                  "api_calls": 1,
 696                  "duration_seconds": 0.5,
 697                  "_child_role": "leaf",
 698                  "_child_cost_usd": 0.30,
 699              }
 700              delegate_task(goal="billed run", parent_agent=parent)
 701  
 702          self.assertAlmostEqual(parent.session_estimated_cost_usd, 0.50, places=6)
 703          # Real source label preserved.
 704          self.assertEqual(parent.session_cost_source, "openrouter")
 705          self.assertEqual(parent.session_cost_status, "exact")
 706  
 707      def test_rollup_tolerates_missing_cost_fields(self):
 708          """Older fixtures / fabricated error entries may not carry
 709          _child_cost_usd.  Rollup must degrade to zero-add silently."""
 710          parent = self._make_parent_with_cost_counters(starting_cost=0.10)
 711  
 712          with patch("tools.delegate_tool._run_single_child") as mock_run:
 713              mock_run.return_value = {
 714                  "task_index": 0,
 715                  "status": "completed",
 716                  "summary": "done",
 717                  "api_calls": 1,
 718                  "duration_seconds": 0.5,
 719                  # no _child_role, no _child_cost_usd
 720              }
 721              result = json.loads(delegate_task(goal="legacy", parent_agent=parent))
 722  
 723          # Parent cost unchanged.
 724          self.assertEqual(parent.session_estimated_cost_usd, 0.10)
 725          self.assertEqual(len(result["results"]), 1)
 726  
 727  
 728  class TestBlockedTools(unittest.TestCase):
 729      def test_blocked_tools_constant(self):
 730          for tool in ["delegate_task", "clarify", "memory", "send_message", "execute_code"]:
 731              self.assertIn(tool, DELEGATE_BLOCKED_TOOLS)
 732  
 733      def test_constants(self):
 734          from tools.delegate_tool import (
 735              _get_max_spawn_depth, _get_orchestrator_enabled,
 736              _MIN_SPAWN_DEPTH, _MAX_SPAWN_DEPTH_CAP,
 737          )
 738          self.assertEqual(_get_max_concurrent_children(), 3)
 739          self.assertEqual(MAX_DEPTH, 1)
 740          self.assertEqual(_get_max_spawn_depth(), 1)       # default: flat
 741          self.assertTrue(_get_orchestrator_enabled())      # default
 742          self.assertEqual(_MIN_SPAWN_DEPTH, 1)
 743          self.assertEqual(_MAX_SPAWN_DEPTH_CAP, 3)
 744  
 745  
 746  class TestDelegationCredentialResolution(unittest.TestCase):
 747      """Tests for provider:model credential resolution in delegation config."""
 748  
 749      def test_no_provider_returns_none_credentials(self):
 750          """When delegation.provider is empty, all credentials are None (inherit parent)."""
 751          parent = _make_mock_parent(depth=0)
 752          cfg = {"model": "", "provider": ""}
 753          creds = _resolve_delegation_credentials(cfg, parent)
 754          self.assertIsNone(creds["provider"])
 755          self.assertIsNone(creds["base_url"])
 756          self.assertIsNone(creds["api_key"])
 757          self.assertIsNone(creds["api_mode"])
 758          self.assertIsNone(creds["model"])
 759  
 760      def test_model_only_no_provider(self):
 761          """When only model is set (no provider), model is returned but credentials are None."""
 762          parent = _make_mock_parent(depth=0)
 763          cfg = {"model": "google/gemini-3-flash-preview", "provider": ""}
 764          creds = _resolve_delegation_credentials(cfg, parent)
 765          self.assertEqual(creds["model"], "google/gemini-3-flash-preview")
 766          self.assertIsNone(creds["provider"])
 767          self.assertIsNone(creds["base_url"])
 768          self.assertIsNone(creds["api_key"])
 769  
 770      @patch("hermes_cli.runtime_provider.resolve_runtime_provider")
 771      def test_provider_resolves_full_credentials(self, mock_resolve):
 772          """When delegation.provider is set, full credentials are resolved."""
 773          mock_resolve.return_value = {
 774              "provider": "openrouter",
 775              "base_url": "https://openrouter.ai/api/v1",
 776              "api_key": "sk-or-test-key",
 777              "api_mode": "chat_completions",
 778          }
 779          parent = _make_mock_parent(depth=0)
 780          cfg = {"model": "google/gemini-3-flash-preview", "provider": "openrouter"}
 781          creds = _resolve_delegation_credentials(cfg, parent)
 782          self.assertEqual(creds["model"], "google/gemini-3-flash-preview")
 783          self.assertEqual(creds["provider"], "openrouter")
 784          self.assertEqual(creds["base_url"], "https://openrouter.ai/api/v1")
 785          self.assertEqual(creds["api_key"], "sk-or-test-key")
 786          self.assertEqual(creds["api_mode"], "chat_completions")
 787          mock_resolve.assert_called_once_with(requested="openrouter")
 788  
 789      @patch("hermes_cli.runtime_provider.resolve_runtime_provider")
 790      def test_provider_resolution_uses_runtime_model_when_config_model_missing(self, mock_resolve):
 791          """Named providers should propagate their runtime default model to children."""
 792          mock_resolve.return_value = {
 793              "provider": "custom",
 794              "base_url": "https://my-server.example/v1",
 795              "api_key": "sk-test-key",
 796              "api_mode": "chat_completions",
 797              "model": "server-default-model",
 798          }
 799          parent = _make_mock_parent(depth=0)
 800          cfg = {"provider": "custom:my-server", "model": ""}
 801  
 802          creds = _resolve_delegation_credentials(cfg, parent)
 803  
 804          self.assertEqual(creds["model"], "server-default-model")
 805          self.assertEqual(creds["provider"], "custom")
 806          self.assertEqual(creds["base_url"], "https://my-server.example/v1")
 807          mock_resolve.assert_called_once_with(requested="custom:my-server")
 808  
 809      def test_direct_endpoint_uses_configured_base_url_and_api_key(self):
 810          parent = _make_mock_parent(depth=0)
 811          cfg = {
 812              "model": "qwen2.5-coder",
 813              "provider": "openrouter",
 814              "base_url": "http://localhost:1234/v1",
 815              "api_key": "local-key",
 816          }
 817          creds = _resolve_delegation_credentials(cfg, parent)
 818          self.assertEqual(creds["model"], "qwen2.5-coder")
 819          self.assertEqual(creds["provider"], "custom")
 820          self.assertEqual(creds["base_url"], "http://localhost:1234/v1")
 821          self.assertEqual(creds["api_key"], "local-key")
 822          self.assertEqual(creds["api_mode"], "chat_completions")
 823  
 824      def test_direct_endpoint_returns_none_api_key_when_not_configured(self):
 825          # When base_url is set without api_key, api_key should be None so
 826          # _build_child_agent inherits the parent's key (effective_api_key = override or parent).
 827          parent = _make_mock_parent(depth=0)
 828          cfg = {
 829              "model": "qwen2.5-coder",
 830              "base_url": "http://localhost:1234/v1",
 831          }
 832          with patch.dict(os.environ, {"OPENAI_API_KEY": "env-openai-key"}, clear=False):
 833              creds = _resolve_delegation_credentials(cfg, parent)
 834          self.assertIsNone(creds["api_key"])
 835          self.assertEqual(creds["provider"], "custom")
 836  
 837      def test_direct_endpoint_no_raise_when_only_provider_env_key_present(self):
 838          # Even if OPENAI_API_KEY is absent, no ValueError — _build_child_agent uses parent key.
 839          parent = _make_mock_parent(depth=0)
 840          cfg = {
 841              "model": "qwen2.5-coder",
 842              "base_url": "http://localhost:1234/v1",
 843          }
 844          with patch.dict(
 845              os.environ,
 846              {
 847                  "OPENROUTER_API_KEY": "env-openrouter-key",
 848                  "OPENAI_API_KEY": "",
 849              },
 850              clear=False,
 851          ):
 852              creds = _resolve_delegation_credentials(cfg, parent)
 853          self.assertIsNone(creds["api_key"])
 854          self.assertEqual(creds["provider"], "custom")
 855  
 856      @patch("hermes_cli.runtime_provider.resolve_runtime_provider")
 857      def test_nous_provider_resolves_nous_credentials(self, mock_resolve):
 858          """Nous provider resolves Nous Portal base_url and api_key."""
 859          mock_resolve.return_value = {
 860              "provider": "nous",
 861              "base_url": "https://inference-api.nousresearch.com/v1",
 862              "api_key": "nous-agent-key-xyz",
 863              "api_mode": "chat_completions",
 864          }
 865          parent = _make_mock_parent(depth=0)
 866          cfg = {"model": "hermes-3-llama-3.1-8b", "provider": "nous"}
 867          creds = _resolve_delegation_credentials(cfg, parent)
 868          self.assertEqual(creds["provider"], "nous")
 869          self.assertEqual(creds["base_url"], "https://inference-api.nousresearch.com/v1")
 870          self.assertEqual(creds["api_key"], "nous-agent-key-xyz")
 871          mock_resolve.assert_called_once_with(requested="nous")
 872  
 873      @patch("hermes_cli.runtime_provider.resolve_runtime_provider")
 874      def test_provider_resolution_failure_raises_valueerror(self, mock_resolve):
 875          """When provider resolution fails, ValueError is raised with helpful message."""
 876          mock_resolve.side_effect = RuntimeError("OPENROUTER_API_KEY not set")
 877          parent = _make_mock_parent(depth=0)
 878          cfg = {"model": "some-model", "provider": "openrouter"}
 879          with self.assertRaises(ValueError) as ctx:
 880              _resolve_delegation_credentials(cfg, parent)
 881          self.assertIn("openrouter", str(ctx.exception).lower())
 882          self.assertIn("Cannot resolve", str(ctx.exception))
 883  
 884      @patch("hermes_cli.runtime_provider.resolve_runtime_provider")
 885      def test_provider_resolves_but_no_api_key_raises(self, mock_resolve):
 886          """When provider resolves but has no API key, ValueError is raised."""
 887          mock_resolve.return_value = {
 888              "provider": "openrouter",
 889              "base_url": "https://openrouter.ai/api/v1",
 890              "api_key": "",
 891              "api_mode": "chat_completions",
 892          }
 893          parent = _make_mock_parent(depth=0)
 894          cfg = {"model": "some-model", "provider": "openrouter"}
 895          with self.assertRaises(ValueError) as ctx:
 896              _resolve_delegation_credentials(cfg, parent)
 897          self.assertIn("no API key", str(ctx.exception))
 898  
 899      def test_missing_config_keys_inherit_parent(self):
 900          """When config dict has no model/provider keys at all, inherits parent."""
 901          parent = _make_mock_parent(depth=0)
 902          cfg = {"max_iterations": 45}
 903          creds = _resolve_delegation_credentials(cfg, parent)
 904          self.assertIsNone(creds["model"])
 905          self.assertIsNone(creds["provider"])
 906  
 907  
 908  class TestDelegationProviderIntegration(unittest.TestCase):
 909      """Integration tests: delegation config → _run_single_child → AIAgent construction."""
 910  
 911      @patch("tools.delegate_tool._load_config")
 912      @patch("tools.delegate_tool._resolve_delegation_credentials")
 913      def test_config_provider_credentials_reach_child_agent(self, mock_creds, mock_cfg):
 914          """When delegation.provider is configured, child agent gets resolved credentials."""
 915          mock_cfg.return_value = {
 916              "max_iterations": 45,
 917              "model": "google/gemini-3-flash-preview",
 918              "provider": "openrouter",
 919          }
 920          mock_creds.return_value = {
 921              "model": "google/gemini-3-flash-preview",
 922              "provider": "openrouter",
 923              "base_url": "https://openrouter.ai/api/v1",
 924              "api_key": "sk-or-delegation-key",
 925              "api_mode": "chat_completions",
 926          }
 927          parent = _make_mock_parent(depth=0)
 928  
 929          with patch("run_agent.AIAgent") as MockAgent:
 930              mock_child = MagicMock()
 931              mock_child.run_conversation.return_value = {
 932                  "final_response": "done", "completed": True, "api_calls": 1
 933              }
 934              MockAgent.return_value = mock_child
 935  
 936              delegate_task(goal="Test provider routing", parent_agent=parent)
 937  
 938              _, kwargs = MockAgent.call_args
 939              self.assertEqual(kwargs["model"], "google/gemini-3-flash-preview")
 940              self.assertEqual(kwargs["provider"], "openrouter")
 941              self.assertEqual(kwargs["base_url"], "https://openrouter.ai/api/v1")
 942              self.assertEqual(kwargs["api_key"], "sk-or-delegation-key")
 943              self.assertEqual(kwargs["api_mode"], "chat_completions")
 944  
 945      @patch("tools.delegate_tool._load_config")
 946      @patch("tools.delegate_tool._resolve_delegation_credentials")
 947      def test_cross_provider_delegation(self, mock_creds, mock_cfg):
 948          """Parent on Nous, subagent on OpenRouter — full credential switch."""
 949          mock_cfg.return_value = {
 950              "max_iterations": 45,
 951              "model": "google/gemini-3-flash-preview",
 952              "provider": "openrouter",
 953          }
 954          mock_creds.return_value = {
 955              "model": "google/gemini-3-flash-preview",
 956              "provider": "openrouter",
 957              "base_url": "https://openrouter.ai/api/v1",
 958              "api_key": "sk-or-key",
 959              "api_mode": "chat_completions",
 960          }
 961          parent = _make_mock_parent(depth=0)
 962          parent.provider = "nous"
 963          parent.base_url = "https://inference-api.nousresearch.com/v1"
 964          parent.api_key = "nous-key-abc"
 965  
 966          with patch("run_agent.AIAgent") as MockAgent:
 967              mock_child = MagicMock()
 968              mock_child.run_conversation.return_value = {
 969                  "final_response": "done", "completed": True, "api_calls": 1
 970              }
 971              MockAgent.return_value = mock_child
 972  
 973              delegate_task(goal="Cross-provider test", parent_agent=parent)
 974  
 975              _, kwargs = MockAgent.call_args
 976              # Child should use OpenRouter, NOT Nous
 977              self.assertEqual(kwargs["provider"], "openrouter")
 978              self.assertEqual(kwargs["base_url"], "https://openrouter.ai/api/v1")
 979              self.assertEqual(kwargs["api_key"], "sk-or-key")
 980              self.assertNotEqual(kwargs["base_url"], parent.base_url)
 981              self.assertNotEqual(kwargs["api_key"], parent.api_key)
 982  
 983      @patch("tools.delegate_tool._load_config")
 984      @patch("tools.delegate_tool._resolve_delegation_credentials")
 985      def test_provider_override_clears_parent_openrouter_filters(
 986          self, mock_creds, mock_cfg
 987      ):
 988          """Delegated provider should not inherit parent provider-preference filters."""
 989          mock_cfg.return_value = {
 990              "max_iterations": 45,
 991              "model": "google/gemini-3-flash-preview",
 992              "provider": "openrouter",
 993          }
 994          mock_creds.return_value = {
 995              "model": "google/gemini-3-flash-preview",
 996              "provider": "openrouter",
 997              "base_url": "https://openrouter.ai/api/v1",
 998              "api_key": "sk-or-key",
 999              "api_mode": "chat_completions",
1000          }
1001          parent = _make_mock_parent(depth=0)
1002          parent.providers_allowed = ["anthropic/claude-3.5-sonnet"]
1003          parent.providers_ignored = ["openai/gpt-4o-mini"]
1004          parent.providers_order = ["google/gemini-2.5-pro"]
1005          parent.provider_sort = "price"
1006  
1007          with patch("run_agent.AIAgent") as MockAgent:
1008              mock_child = MagicMock()
1009              mock_child.run_conversation.return_value = {
1010                  "final_response": "done",
1011                  "completed": True,
1012                  "api_calls": 1,
1013              }
1014              MockAgent.return_value = mock_child
1015  
1016              delegate_task(goal="Cross-provider test", parent_agent=parent)
1017  
1018              _, kwargs = MockAgent.call_args
1019              self.assertEqual(kwargs["provider"], "openrouter")
1020              self.assertIsNone(kwargs["providers_allowed"])
1021              self.assertIsNone(kwargs["providers_ignored"])
1022              self.assertIsNone(kwargs["providers_order"])
1023              self.assertIsNone(kwargs["provider_sort"])
1024  
1025      @patch("tools.delegate_tool._load_config")
1026      @patch("tools.delegate_tool._resolve_delegation_credentials")
1027      def test_direct_endpoint_credentials_reach_child_agent(self, mock_creds, mock_cfg):
1028          mock_cfg.return_value = {
1029              "max_iterations": 45,
1030              "model": "qwen2.5-coder",
1031              "base_url": "http://localhost:1234/v1",
1032              "api_key": "local-key",
1033          }
1034          mock_creds.return_value = {
1035              "model": "qwen2.5-coder",
1036              "provider": "custom",
1037              "base_url": "http://localhost:1234/v1",
1038              "api_key": "local-key",
1039              "api_mode": "chat_completions",
1040          }
1041          parent = _make_mock_parent(depth=0)
1042  
1043          with patch("run_agent.AIAgent") as MockAgent:
1044              mock_child = MagicMock()
1045              mock_child.run_conversation.return_value = {
1046                  "final_response": "done", "completed": True, "api_calls": 1
1047              }
1048              MockAgent.return_value = mock_child
1049  
1050              delegate_task(goal="Direct endpoint test", parent_agent=parent)
1051  
1052              _, kwargs = MockAgent.call_args
1053              self.assertEqual(kwargs["model"], "qwen2.5-coder")
1054              self.assertEqual(kwargs["provider"], "custom")
1055              self.assertEqual(kwargs["base_url"], "http://localhost:1234/v1")
1056              self.assertEqual(kwargs["api_key"], "local-key")
1057              self.assertEqual(kwargs["api_mode"], "chat_completions")
1058  
1059      @patch("tools.delegate_tool._load_config")
1060      @patch("tools.delegate_tool._resolve_delegation_credentials")
1061      def test_empty_config_inherits_parent(self, mock_creds, mock_cfg):
1062          """When delegation config is empty, child inherits parent credentials."""
1063          mock_cfg.return_value = {"max_iterations": 45, "model": "", "provider": ""}
1064          mock_creds.return_value = {
1065              "model": None,
1066              "provider": None,
1067              "base_url": None,
1068              "api_key": None,
1069              "api_mode": None,
1070          }
1071          parent = _make_mock_parent(depth=0)
1072  
1073          with patch("run_agent.AIAgent") as MockAgent:
1074              mock_child = MagicMock()
1075              mock_child.run_conversation.return_value = {
1076                  "final_response": "done", "completed": True, "api_calls": 1
1077              }
1078              MockAgent.return_value = mock_child
1079  
1080              delegate_task(goal="Test inherit", parent_agent=parent)
1081  
1082              _, kwargs = MockAgent.call_args
1083              self.assertEqual(kwargs["model"], parent.model)
1084              self.assertEqual(kwargs["provider"], parent.provider)
1085              self.assertEqual(kwargs["base_url"], parent.base_url)
1086  
1087      @patch("tools.delegate_tool._load_config")
1088      @patch("tools.delegate_tool._resolve_delegation_credentials")
1089      def test_credential_error_returns_json_error(self, mock_creds, mock_cfg):
1090          """When credential resolution fails, delegate_task returns a JSON error."""
1091          mock_cfg.return_value = {"model": "bad-model", "provider": "nonexistent"}
1092          mock_creds.side_effect = ValueError(
1093              "Cannot resolve delegation provider 'nonexistent': Unknown provider"
1094          )
1095          parent = _make_mock_parent(depth=0)
1096  
1097          result = json.loads(delegate_task(goal="Should fail", parent_agent=parent))
1098          self.assertIn("error", result)
1099          self.assertIn("Cannot resolve", result["error"])
1100          self.assertIn("nonexistent", result["error"])
1101  
1102      @patch("tools.delegate_tool._load_config")
1103      @patch("tools.delegate_tool._resolve_delegation_credentials")
1104      def test_batch_mode_all_children_get_credentials(self, mock_creds, mock_cfg):
1105          """In batch mode, all children receive the resolved credentials."""
1106          mock_cfg.return_value = {
1107              "max_iterations": 45,
1108              "model": "meta-llama/llama-4-scout",
1109              "provider": "openrouter",
1110          }
1111          mock_creds.return_value = {
1112              "model": "meta-llama/llama-4-scout",
1113              "provider": "openrouter",
1114              "base_url": "https://openrouter.ai/api/v1",
1115              "api_key": "sk-or-batch",
1116              "api_mode": "chat_completions",
1117          }
1118          parent = _make_mock_parent(depth=0)
1119  
1120          # Patch _build_child_agent since credentials are now passed there
1121          # (agents are built in the main thread before being handed to workers)
1122          with patch("tools.delegate_tool._build_child_agent") as mock_build, \
1123               patch("tools.delegate_tool._run_single_child") as mock_run:
1124              mock_child = MagicMock()
1125              mock_build.return_value = mock_child
1126              mock_run.return_value = {
1127                  "task_index": 0, "status": "completed",
1128                  "summary": "Done", "api_calls": 1, "duration_seconds": 1.0
1129              }
1130  
1131              tasks = [{"goal": "Task A"}, {"goal": "Task B"}]
1132              delegate_task(tasks=tasks, parent_agent=parent)
1133  
1134              self.assertEqual(mock_build.call_count, 2)
1135              for call in mock_build.call_args_list:
1136                  self.assertEqual(call.kwargs.get("model"), "meta-llama/llama-4-scout")
1137                  self.assertEqual(call.kwargs.get("override_provider"), "openrouter")
1138                  self.assertEqual(call.kwargs.get("override_base_url"), "https://openrouter.ai/api/v1")
1139                  self.assertEqual(call.kwargs.get("override_api_key"), "sk-or-batch")
1140                  self.assertEqual(call.kwargs.get("override_api_mode"), "chat_completions")
1141  
1142      @patch("tools.delegate_tool._load_config")
1143      @patch("tools.delegate_tool._resolve_delegation_credentials")
1144      def test_delegation_acp_runtime_reaches_child_agent(self, mock_creds, mock_cfg):
1145          """Resolved ACP runtime command/args must be forwarded to child agents."""
1146          mock_cfg.return_value = {
1147              "max_iterations": 45,
1148              "model": "copilot-model",
1149              "provider": "copilot-acp",
1150          }
1151          mock_creds.return_value = {
1152              "model": "copilot-model",
1153              "provider": "copilot-acp",
1154              "base_url": "acp://copilot",
1155              "api_key": "copilot-acp",
1156              "api_mode": "chat_completions",
1157              "command": "custom-copilot",
1158              "args": ["--stdio-custom"],
1159          }
1160          parent = _make_mock_parent(depth=0)
1161  
1162          with patch("tools.delegate_tool._build_child_agent") as mock_build, \
1163               patch("tools.delegate_tool._run_single_child") as mock_run:
1164              mock_child = MagicMock()
1165              mock_build.return_value = mock_child
1166              mock_run.return_value = {
1167                  "task_index": 0, "status": "completed",
1168                  "summary": "Done", "api_calls": 1, "duration_seconds": 1.0
1169              }
1170  
1171              delegate_task(goal="ACP delegation test", parent_agent=parent)
1172  
1173              _, kwargs = mock_build.call_args
1174              self.assertEqual(kwargs.get("override_provider"), "copilot-acp")
1175              self.assertEqual(kwargs.get("override_base_url"), "acp://copilot")
1176              self.assertEqual(kwargs.get("override_api_key"), "copilot-acp")
1177              self.assertEqual(kwargs.get("override_api_mode"), "chat_completions")
1178              self.assertEqual(kwargs.get("override_acp_command"), "custom-copilot")
1179              self.assertEqual(kwargs.get("override_acp_args"), ["--stdio-custom"])
1180  
1181      @patch("tools.delegate_tool._load_config")
1182      @patch("tools.delegate_tool._resolve_delegation_credentials")
1183      def test_model_only_no_provider_inherits_parent_credentials(self, mock_creds, mock_cfg):
1184          """Setting only model (no provider) changes model but keeps parent credentials."""
1185          mock_cfg.return_value = {
1186              "max_iterations": 45,
1187              "model": "google/gemini-3-flash-preview",
1188              "provider": "",
1189          }
1190          mock_creds.return_value = {
1191              "model": "google/gemini-3-flash-preview",
1192              "provider": None,
1193              "base_url": None,
1194              "api_key": None,
1195              "api_mode": None,
1196          }
1197          parent = _make_mock_parent(depth=0)
1198  
1199          with patch("run_agent.AIAgent") as MockAgent:
1200              mock_child = MagicMock()
1201              mock_child.run_conversation.return_value = {
1202                  "final_response": "done", "completed": True, "api_calls": 1
1203              }
1204              MockAgent.return_value = mock_child
1205  
1206              delegate_task(goal="Model only test", parent_agent=parent)
1207  
1208              _, kwargs = MockAgent.call_args
1209              # Model should be overridden
1210              self.assertEqual(kwargs["model"], "google/gemini-3-flash-preview")
1211              # But provider/base_url/api_key should inherit from parent
1212              self.assertEqual(kwargs["provider"], parent.provider)
1213              self.assertEqual(kwargs["base_url"], parent.base_url)
1214  
1215  
1216  class TestChildCredentialPoolResolution(unittest.TestCase):
1217      def test_same_provider_shares_parent_pool(self):
1218          parent = _make_mock_parent()
1219          mock_pool = MagicMock()
1220          parent._credential_pool = mock_pool
1221  
1222          result = _resolve_child_credential_pool("openrouter", parent)
1223          self.assertIs(result, mock_pool)
1224  
1225      def test_no_provider_inherits_parent_pool(self):
1226          parent = _make_mock_parent()
1227          mock_pool = MagicMock()
1228          parent._credential_pool = mock_pool
1229  
1230          result = _resolve_child_credential_pool(None, parent)
1231          self.assertIs(result, mock_pool)
1232  
1233      def test_different_provider_loads_own_pool(self):
1234          parent = _make_mock_parent()
1235          parent._credential_pool = MagicMock()
1236          mock_pool = MagicMock()
1237          mock_pool.has_credentials.return_value = True
1238  
1239          with patch("agent.credential_pool.load_pool", return_value=mock_pool):
1240              result = _resolve_child_credential_pool("anthropic", parent)
1241  
1242          self.assertIs(result, mock_pool)
1243  
1244      def test_different_provider_empty_pool_returns_none(self):
1245          parent = _make_mock_parent()
1246          parent._credential_pool = MagicMock()
1247          mock_pool = MagicMock()
1248          mock_pool.has_credentials.return_value = False
1249  
1250          with patch("agent.credential_pool.load_pool", return_value=mock_pool):
1251              result = _resolve_child_credential_pool("anthropic", parent)
1252  
1253          self.assertIsNone(result)
1254  
1255      def test_different_provider_load_failure_returns_none(self):
1256          parent = _make_mock_parent()
1257          parent._credential_pool = MagicMock()
1258  
1259          with patch("agent.credential_pool.load_pool", side_effect=Exception("disk error")):
1260              result = _resolve_child_credential_pool("anthropic", parent)
1261  
1262          self.assertIsNone(result)
1263  
1264      def test_build_child_agent_assigns_parent_pool_when_shared(self):
1265          parent = _make_mock_parent()
1266          mock_pool = MagicMock()
1267          parent._credential_pool = mock_pool
1268  
1269          with patch("run_agent.AIAgent") as MockAgent:
1270              mock_child = MagicMock()
1271              MockAgent.return_value = mock_child
1272  
1273              _build_child_agent(
1274                  task_index=0,
1275                  goal="Test pool assignment",
1276                  context=None,
1277                  toolsets=["terminal"],
1278                  model=None,
1279                  max_iterations=10,
1280                  parent_agent=parent,
1281                  task_count=1,
1282              )
1283  
1284              self.assertEqual(mock_child._credential_pool, mock_pool)
1285  
1286      @patch("tools.delegate_tool._load_config", return_value={})
1287      def test_build_child_agent_preserves_mcp_toolsets_by_default(self, mock_cfg):
1288          parent = _make_mock_parent()
1289          parent.enabled_toolsets = ["web", "browser", "mcp-MiniMax"]
1290  
1291          with patch("run_agent.AIAgent") as MockAgent:
1292              mock_child = MagicMock()
1293              MockAgent.return_value = mock_child
1294  
1295              _build_child_agent(
1296                  task_index=0,
1297                  goal="Test narrowed toolsets",
1298                  context=None,
1299                  toolsets=["web", "browser"],
1300                  model=None,
1301                  max_iterations=10,
1302                  parent_agent=parent,
1303                  task_count=1,
1304              )
1305  
1306          self.assertEqual(
1307              MockAgent.call_args[1]["enabled_toolsets"],
1308              ["web", "browser", "mcp-MiniMax"],
1309          )
1310  
1311      @patch(
1312          "tools.delegate_tool._load_config",
1313          return_value={"inherit_mcp_toolsets": False},
1314      )
1315      def test_build_child_agent_strict_intersection_when_opted_out(self, mock_cfg):
1316          parent = _make_mock_parent()
1317          parent.enabled_toolsets = ["web", "browser", "mcp-MiniMax"]
1318  
1319          with patch("run_agent.AIAgent") as MockAgent:
1320              mock_child = MagicMock()
1321              MockAgent.return_value = mock_child
1322  
1323              _build_child_agent(
1324                  task_index=0,
1325                  goal="Test narrowed toolsets",
1326                  context=None,
1327                  toolsets=["web", "browser"],
1328                  model=None,
1329                  max_iterations=10,
1330                  parent_agent=parent,
1331                  task_count=1,
1332              )
1333  
1334          self.assertEqual(
1335              MockAgent.call_args[1]["enabled_toolsets"],
1336              ["web", "browser"],
1337          )
1338  
1339  
1340  class TestChildCredentialLeasing(unittest.TestCase):
1341      def test_run_single_child_acquires_and_releases_lease(self):
1342          from tools.delegate_tool import _run_single_child
1343  
1344          leased_entry = MagicMock()
1345          leased_entry.id = "cred-b"
1346  
1347          child = MagicMock()
1348          child._credential_pool = MagicMock()
1349          child._credential_pool.acquire_lease.return_value = "cred-b"
1350          child._credential_pool.current.return_value = leased_entry
1351          child.run_conversation.return_value = {
1352              "final_response": "done",
1353              "completed": True,
1354              "interrupted": False,
1355              "api_calls": 1,
1356              "messages": [],
1357          }
1358  
1359          result = _run_single_child(
1360              task_index=0,
1361              goal="Investigate rate limits",
1362              child=child,
1363              parent_agent=_make_mock_parent(),
1364          )
1365  
1366          self.assertEqual(result["status"], "completed")
1367          child._credential_pool.acquire_lease.assert_called_once_with()
1368          child._swap_credential.assert_called_once_with(leased_entry)
1369          child._credential_pool.release_lease.assert_called_once_with("cred-b")
1370  
1371      def test_run_single_child_releases_lease_after_failure(self):
1372          from tools.delegate_tool import _run_single_child
1373  
1374          child = MagicMock()
1375          child._credential_pool = MagicMock()
1376          child._credential_pool.acquire_lease.return_value = "cred-a"
1377          child._credential_pool.current.return_value = MagicMock(id="cred-a")
1378          child.run_conversation.side_effect = RuntimeError("boom")
1379  
1380          result = _run_single_child(
1381              task_index=1,
1382              goal="Trigger failure",
1383              child=child,
1384              parent_agent=_make_mock_parent(),
1385          )
1386  
1387          self.assertEqual(result["status"], "error")
1388          child._credential_pool.release_lease.assert_called_once_with("cred-a")
1389  
1390  
1391  class TestDelegateHeartbeat(unittest.TestCase):
1392      """Heartbeat propagates child activity to parent during delegation.
1393  
1394      Without the heartbeat, the gateway inactivity timeout fires because the
1395      parent's _last_activity_ts freezes when delegate_task starts.
1396      """
1397  
1398      def test_heartbeat_touches_parent_activity_during_child_run(self):
1399          """Parent's _touch_activity is called while child.run_conversation blocks."""
1400          from tools.delegate_tool import _run_single_child
1401  
1402          parent = _make_mock_parent()
1403          touch_calls = []
1404          parent._touch_activity = lambda desc: touch_calls.append(desc)
1405  
1406          child = MagicMock()
1407          child.get_activity_summary.return_value = {
1408              "current_tool": "terminal",
1409              "api_call_count": 3,
1410              "max_iterations": 50,
1411              "last_activity_desc": "executing tool: terminal",
1412          }
1413  
1414          # Make run_conversation block long enough for heartbeats to fire
1415          def slow_run(**kwargs):
1416              time.sleep(0.25)
1417              return {"final_response": "done", "completed": True, "api_calls": 3}
1418  
1419          child.run_conversation.side_effect = slow_run
1420  
1421          # Patch the heartbeat interval to fire quickly
1422          with patch("tools.delegate_tool._HEARTBEAT_INTERVAL", 0.05):
1423              _run_single_child(
1424                  task_index=0,
1425                  goal="Test heartbeat",
1426                  child=child,
1427                  parent_agent=parent,
1428              )
1429  
1430          # Heartbeat should have fired at least once during the 0.25s sleep
1431          self.assertGreater(len(touch_calls), 0,
1432                             "Heartbeat did not propagate activity to parent")
1433          # Verify the description includes child's current tool detail
1434          self.assertTrue(
1435              any("terminal" in desc for desc in touch_calls),
1436              f"Heartbeat descriptions should include child tool info: {touch_calls}")
1437  
1438      def test_heartbeat_stops_after_child_completes(self):
1439          """Heartbeat thread is cleaned up when the child finishes."""
1440          from tools.delegate_tool import _run_single_child
1441  
1442          parent = _make_mock_parent()
1443          touch_calls = []
1444          parent._touch_activity = lambda desc: touch_calls.append(desc)
1445  
1446          child = MagicMock()
1447          child.get_activity_summary.return_value = {
1448              "current_tool": None,
1449              "api_call_count": 1,
1450              "max_iterations": 50,
1451              "last_activity_desc": "done",
1452          }
1453          child.run_conversation.return_value = {
1454              "final_response": "done", "completed": True, "api_calls": 1,
1455          }
1456  
1457          with patch("tools.delegate_tool._HEARTBEAT_INTERVAL", 0.05):
1458              _run_single_child(
1459                  task_index=0,
1460                  goal="Test cleanup",
1461                  child=child,
1462                  parent_agent=parent,
1463              )
1464  
1465          # Record count after completion, wait, and verify no more calls
1466          count_after = len(touch_calls)
1467          time.sleep(0.15)
1468          self.assertEqual(len(touch_calls), count_after,
1469                           "Heartbeat continued firing after child completed")
1470  
1471      def test_heartbeat_stops_after_child_error(self):
1472          """Heartbeat thread is cleaned up even when the child raises."""
1473          from tools.delegate_tool import _run_single_child
1474  
1475          parent = _make_mock_parent()
1476          touch_calls = []
1477          parent._touch_activity = lambda desc: touch_calls.append(desc)
1478  
1479          child = MagicMock()
1480          child.get_activity_summary.return_value = {
1481              "current_tool": "web_search",
1482              "api_call_count": 2,
1483              "max_iterations": 50,
1484              "last_activity_desc": "executing tool: web_search",
1485          }
1486  
1487          def slow_fail(**kwargs):
1488              time.sleep(0.15)
1489              raise RuntimeError("network timeout")
1490  
1491          child.run_conversation.side_effect = slow_fail
1492  
1493          with patch("tools.delegate_tool._HEARTBEAT_INTERVAL", 0.05):
1494              result = _run_single_child(
1495                  task_index=0,
1496                  goal="Test error cleanup",
1497                  child=child,
1498                  parent_agent=parent,
1499              )
1500  
1501          self.assertEqual(result["status"], "error")
1502  
1503          # Verify heartbeat stopped
1504          count_after = len(touch_calls)
1505          time.sleep(0.15)
1506          self.assertEqual(len(touch_calls), count_after,
1507                           "Heartbeat continued firing after child error")
1508  
1509      def test_heartbeat_includes_child_activity_desc_when_no_tool(self):
1510          """When child has no current_tool, heartbeat uses last_activity_desc."""
1511          from tools.delegate_tool import _run_single_child
1512  
1513          parent = _make_mock_parent()
1514          touch_calls = []
1515          parent._touch_activity = lambda desc: touch_calls.append(desc)
1516  
1517          child = MagicMock()
1518          child.get_activity_summary.return_value = {
1519              "current_tool": None,
1520              "api_call_count": 5,
1521              "max_iterations": 90,
1522              "last_activity_desc": "API call #5 completed",
1523          }
1524  
1525          def slow_run(**kwargs):
1526              time.sleep(0.15)
1527              return {"final_response": "done", "completed": True, "api_calls": 5}
1528  
1529          child.run_conversation.side_effect = slow_run
1530  
1531          with patch("tools.delegate_tool._HEARTBEAT_INTERVAL", 0.05):
1532              _run_single_child(
1533                  task_index=0,
1534                  goal="Test desc fallback",
1535                  child=child,
1536                  parent_agent=parent,
1537              )
1538  
1539          self.assertGreater(len(touch_calls), 0)
1540          self.assertTrue(
1541              any("API call #5 completed" in desc for desc in touch_calls),
1542              f"Heartbeat should include last_activity_desc: {touch_calls}")
1543  
1544      def test_heartbeat_does_not_trip_idle_stale_while_inside_tool(self):
1545          """A long-running tool (no iteration advance, but current_tool set)
1546          must not be flagged stale at the idle threshold.
1547  
1548          Bug #13041: when a child is legitimately busy inside a slow tool
1549          (terminal command, browser fetch), api_call_count does not advance.
1550          The previous stale check treated this as idle and stopped the
1551          heartbeat after 5 cycles (~150s), letting the gateway kill the
1552          session. The fix uses a much higher in-tool threshold and only
1553          applies the tight idle threshold when current_tool is None.
1554          """
1555          from tools.delegate_tool import _run_single_child
1556  
1557          parent = _make_mock_parent()
1558          touch_calls = []
1559          parent._touch_activity = lambda desc: touch_calls.append(desc)
1560  
1561          child = MagicMock()
1562          # Child is stuck inside a single terminal call for the whole run.
1563          # api_call_count never advances, current_tool is always set.
1564          child.get_activity_summary.return_value = {
1565              "current_tool": "terminal",
1566              "api_call_count": 1,
1567              "max_iterations": 50,
1568              "last_activity_desc": "executing tool: terminal",
1569          }
1570  
1571          def slow_run(**kwargs):
1572              # Long enough to exceed the OLD idle threshold (5 cycles) at
1573              # the patched interval, but shorter than the new in-tool
1574              # threshold.
1575              time.sleep(0.4)
1576              return {"final_response": "done", "completed": True, "api_calls": 1}
1577  
1578          child.run_conversation.side_effect = slow_run
1579  
1580          # Patch both the interval AND the idle ceiling so the test proves
1581          # the in-tool branch takes effect: with a 0.05s interval and the
1582          # default _HEARTBEAT_STALE_CYCLES_IDLE=5, the old behavior would
1583          # trip after 0.25s and stop firing. We should see heartbeats
1584          # continuing through the full 0.4s run.
1585          with patch("tools.delegate_tool._HEARTBEAT_INTERVAL", 0.05):
1586              _run_single_child(
1587                  task_index=0,
1588                  goal="Test long-running tool",
1589                  child=child,
1590                  parent_agent=parent,
1591              )
1592  
1593          # With the old idle threshold (5 cycles = 0.25s), touch_calls
1594          # would cap at ~5. With the in-tool threshold (20 cycles = 1.0s),
1595          # we should see substantially more heartbeats over 0.4s.
1596          self.assertGreater(
1597              len(touch_calls), 6,
1598              f"Heartbeat stopped too early while child was inside a tool; "
1599              f"got {len(touch_calls)} touches over 0.4s at 0.05s interval",
1600          )
1601  
1602      def test_heartbeat_still_trips_idle_stale_when_no_tool(self):
1603          """A wedged child with no current_tool still trips the idle threshold.
1604  
1605          Regression guard: the fix for #13041 must not disable stale
1606          detection entirely. A child that's hung between turns (no tool
1607          running, no iteration progress) must still stop touching the
1608          parent so the gateway timeout can fire.
1609          """
1610          from tools.delegate_tool import _run_single_child
1611  
1612          parent = _make_mock_parent()
1613          touch_calls = []
1614          parent._touch_activity = lambda desc: touch_calls.append(desc)
1615  
1616          child = MagicMock()
1617          # Wedged child: no tool running, iteration frozen.
1618          child.get_activity_summary.return_value = {
1619              "current_tool": None,
1620              "api_call_count": 3,
1621              "max_iterations": 50,
1622              "last_activity_desc": "waiting for API response",
1623          }
1624  
1625          def slow_run(**kwargs):
1626              time.sleep(0.6)
1627              return {"final_response": "done", "completed": True, "api_calls": 3}
1628  
1629          child.run_conversation.side_effect = slow_run
1630  
1631          # At interval 0.05s, idle threshold (5 cycles) trips at ~0.25s.
1632          # We should see the heartbeat stop firing well before 0.6s.
1633          with patch("tools.delegate_tool._HEARTBEAT_INTERVAL", 0.05):
1634              _run_single_child(
1635                  task_index=0,
1636                  goal="Test wedged child",
1637                  child=child,
1638                  parent_agent=parent,
1639              )
1640  
1641          # With idle threshold=5 + interval=0.05s, touches should cap
1642          # around 5. Bound loosely to avoid timing flakes.
1643          self.assertLess(
1644              len(touch_calls), 9,
1645              f"Idle stale detection did not fire: got {len(touch_calls)} "
1646              f"touches over 0.6s — expected heartbeat to stop after "
1647              f"~5 stale cycles",
1648          )
1649  
1650  
1651  class TestDelegationReasoningEffort(unittest.TestCase):
1652      """Tests for delegation.reasoning_effort config override."""
1653  
1654      @patch("tools.delegate_tool._load_config")
1655      @patch("run_agent.AIAgent")
1656      def test_inherits_parent_reasoning_when_no_override(self, MockAgent, mock_cfg):
1657          """With no delegation.reasoning_effort, child inherits parent's config."""
1658          mock_cfg.return_value = {"max_iterations": 50, "reasoning_effort": ""}
1659          MockAgent.return_value = MagicMock()
1660          parent = _make_mock_parent()
1661          parent.reasoning_config = {"enabled": True, "effort": "xhigh"}
1662  
1663          _build_child_agent(
1664              task_index=0, goal="test", context=None, toolsets=None,
1665              model=None, max_iterations=50, parent_agent=parent,
1666              task_count=1,
1667          )
1668          call_kwargs = MockAgent.call_args[1]
1669          self.assertEqual(call_kwargs["reasoning_config"], {"enabled": True, "effort": "xhigh"})
1670  
1671      @patch("tools.delegate_tool._load_config")
1672      @patch("run_agent.AIAgent")
1673      def test_override_reasoning_effort_from_config(self, MockAgent, mock_cfg):
1674          """delegation.reasoning_effort overrides the parent's level."""
1675          mock_cfg.return_value = {"max_iterations": 50, "reasoning_effort": "low"}
1676          MockAgent.return_value = MagicMock()
1677          parent = _make_mock_parent()
1678          parent.reasoning_config = {"enabled": True, "effort": "xhigh"}
1679  
1680          _build_child_agent(
1681              task_index=0, goal="test", context=None, toolsets=None,
1682              model=None, max_iterations=50, parent_agent=parent,
1683              task_count=1,
1684          )
1685          call_kwargs = MockAgent.call_args[1]
1686          self.assertEqual(call_kwargs["reasoning_config"], {"enabled": True, "effort": "low"})
1687  
1688      @patch("tools.delegate_tool._load_config")
1689      @patch("run_agent.AIAgent")
1690      def test_override_reasoning_effort_none_disables(self, MockAgent, mock_cfg):
1691          """delegation.reasoning_effort: 'none' disables thinking for subagents."""
1692          mock_cfg.return_value = {"max_iterations": 50, "reasoning_effort": "none"}
1693          MockAgent.return_value = MagicMock()
1694          parent = _make_mock_parent()
1695          parent.reasoning_config = {"enabled": True, "effort": "high"}
1696  
1697          _build_child_agent(
1698              task_index=0, goal="test", context=None, toolsets=None,
1699              model=None, max_iterations=50, parent_agent=parent,
1700              task_count=1,
1701          )
1702          call_kwargs = MockAgent.call_args[1]
1703          self.assertEqual(call_kwargs["reasoning_config"], {"enabled": False})
1704  
1705      @patch("tools.delegate_tool._load_config")
1706      @patch("run_agent.AIAgent")
1707      def test_invalid_reasoning_effort_falls_back_to_parent(self, MockAgent, mock_cfg):
1708          """Invalid delegation.reasoning_effort falls back to parent's config."""
1709          mock_cfg.return_value = {"max_iterations": 50, "reasoning_effort": "banana"}
1710          MockAgent.return_value = MagicMock()
1711          parent = _make_mock_parent()
1712          parent.reasoning_config = {"enabled": True, "effort": "medium"}
1713  
1714          _build_child_agent(
1715              task_index=0, goal="test", context=None, toolsets=None,
1716              model=None, max_iterations=50, parent_agent=parent,
1717              task_count=1,
1718          )
1719          call_kwargs = MockAgent.call_args[1]
1720          self.assertEqual(call_kwargs["reasoning_config"], {"enabled": True, "effort": "medium"})
1721  
1722  
1723  # =========================================================================
1724  # Dispatch helper, progress events, concurrency
1725  # =========================================================================
1726  
1727  class TestDispatchDelegateTask(unittest.TestCase):
1728      """Tests for the _dispatch_delegate_task helper and full param forwarding."""
1729  
1730      @patch("tools.delegate_tool._load_config", return_value={})
1731      @patch("tools.delegate_tool._resolve_delegation_credentials")
1732      def test_acp_args_forwarded(self, mock_creds, mock_cfg):
1733          """Both acp_command and acp_args reach delegate_task via the helper."""
1734          mock_creds.return_value = {
1735              "provider": None, "base_url": None,
1736              "api_key": None, "api_mode": None, "model": None,
1737          }
1738          parent = _make_mock_parent(depth=0)
1739          with patch("tools.delegate_tool._build_child_agent") as mock_build:
1740              mock_child = MagicMock()
1741              mock_child.run_conversation.return_value = {
1742                  "final_response": "done", "completed": True,
1743                  "api_calls": 1, "messages": [],
1744              }
1745              mock_child._delegate_saved_tool_names = []
1746              mock_child._credential_pool = None
1747              mock_child.session_prompt_tokens = 0
1748              mock_child.session_completion_tokens = 0
1749              mock_child.model = "test"
1750              mock_build.return_value = mock_child
1751  
1752              delegate_task(
1753                  goal="test",
1754                  acp_command="claude",
1755                  acp_args=["--acp", "--stdio"],
1756                  parent_agent=parent,
1757              )
1758              _, kwargs = mock_build.call_args
1759              self.assertEqual(kwargs["override_acp_command"], "claude")
1760              self.assertEqual(kwargs["override_acp_args"], ["--acp", "--stdio"])
1761  
1762  class TestDelegateEventEnum(unittest.TestCase):
1763      """Tests for DelegateEvent enum and back-compat aliases."""
1764  
1765      def test_enum_values_are_strings(self):
1766          for event in DelegateEvent:
1767              self.assertIsInstance(event.value, str)
1768              self.assertTrue(event.value.startswith("delegate."))
1769  
1770      def test_legacy_map_covers_all_old_names(self):
1771          expected_legacy = {"_thinking", "reasoning.available",
1772                            "tool.started", "tool.completed", "subagent_progress"}
1773          self.assertEqual(set(_LEGACY_EVENT_MAP.keys()), expected_legacy)
1774  
1775      def test_legacy_map_values_are_delegate_events(self):
1776          for old_name, event in _LEGACY_EVENT_MAP.items():
1777              self.assertIsInstance(event, DelegateEvent)
1778  
1779      def test_progress_callback_normalises_tool_started(self):
1780          """_build_child_progress_callback handles tool.started via enum."""
1781          parent = _make_mock_parent()
1782          parent._delegate_spinner = MagicMock()
1783          parent.tool_progress_callback = MagicMock()
1784  
1785          cb = _build_child_progress_callback(0, "test goal", parent, task_count=1)
1786          self.assertIsNotNone(cb)
1787  
1788          cb("tool.started", tool_name="terminal", preview="ls")
1789          parent._delegate_spinner.print_above.assert_called()
1790  
1791      def test_progress_callback_normalises_thinking(self):
1792          """Both _thinking and reasoning.available route to TASK_THINKING."""
1793          parent = _make_mock_parent()
1794          parent._delegate_spinner = MagicMock()
1795          parent.tool_progress_callback = None
1796  
1797          cb = _build_child_progress_callback(0, "test goal", parent, task_count=1)
1798  
1799          cb("_thinking", tool_name=None, preview="pondering...")
1800          assert any("💭" in str(c) for c in parent._delegate_spinner.print_above.call_args_list)
1801  
1802          parent._delegate_spinner.print_above.reset_mock()
1803          cb("reasoning.available", tool_name=None, preview="hmm")
1804          assert any("💭" in str(c) for c in parent._delegate_spinner.print_above.call_args_list)
1805  
1806      def test_progress_callback_tool_completed_is_noop(self):
1807          """tool.completed is normalised but produces no display output."""
1808          parent = _make_mock_parent()
1809          parent._delegate_spinner = MagicMock()
1810          parent.tool_progress_callback = None
1811  
1812          cb = _build_child_progress_callback(0, "test goal", parent, task_count=1)
1813          cb("tool.completed", tool_name="terminal")
1814          parent._delegate_spinner.print_above.assert_not_called()
1815  
1816      def test_progress_callback_ignores_unknown_events(self):
1817          """Unknown event types are silently ignored."""
1818          parent = _make_mock_parent()
1819          parent._delegate_spinner = MagicMock()
1820  
1821          cb = _build_child_progress_callback(0, "test goal", parent, task_count=1)
1822          # Should not raise
1823          cb("some.unknown.event", tool_name="x")
1824          parent._delegate_spinner.print_above.assert_not_called()
1825  
1826      def test_progress_callback_accepts_enum_value_directly(self):
1827          """cb(DelegateEvent.TASK_THINKING, ...) must route to the thinking
1828          branch.  Pre-fix the callback only handled legacy strings via
1829          _LEGACY_EVENT_MAP.get and silently dropped enum-typed callers."""
1830          parent = _make_mock_parent()
1831          parent._delegate_spinner = MagicMock()
1832          parent.tool_progress_callback = None
1833  
1834          cb = _build_child_progress_callback(0, "test goal", parent, task_count=1)
1835          cb(DelegateEvent.TASK_THINKING, preview="pondering")
1836          # If the enum was accepted, the thinking emoji got printed.
1837          assert any(
1838              "💭" in str(c)
1839              for c in parent._delegate_spinner.print_above.call_args_list
1840          )
1841  
1842      def test_progress_callback_accepts_new_style_string(self):
1843          """cb('delegate.task_thinking', ...) — the string form of the
1844          enum value — must route to the thinking branch too, so new-style
1845          emitters don't have to import DelegateEvent."""
1846          parent = _make_mock_parent()
1847          parent._delegate_spinner = MagicMock()
1848  
1849          cb = _build_child_progress_callback(0, "test goal", parent, task_count=1)
1850          cb("delegate.task_thinking", preview="hmm")
1851          assert any(
1852              "💭" in str(c)
1853              for c in parent._delegate_spinner.print_above.call_args_list
1854          )
1855  
1856      def test_progress_callback_task_progress_not_misrendered(self):
1857          """'subagent_progress' (legacy name for TASK_PROGRESS) carries a
1858          pre-batched summary in the tool_name slot.  Before the fix, this
1859          fell through to the TASK_TOOL_STARTED rendering path, treating
1860          the summary string as a tool name.  After the fix: distinct
1861          render (no tool-start emoji lookup) and pass-through relay
1862          upward (no re-batching).
1863  
1864          Regression path only reachable once nested orchestration is
1865          enabled: nested orchestrators relay subagent_progress from
1866          grandchildren upward through this callback.
1867          """
1868          parent = _make_mock_parent()
1869          parent._delegate_spinner = MagicMock()
1870          parent.tool_progress_callback = MagicMock()
1871  
1872          cb = _build_child_progress_callback(0, "test goal", parent, task_count=1)
1873          cb("subagent_progress", tool_name="🔀 [1] terminal, file")
1874  
1875          # Spinner gets a distinct 🔀-prefixed line, NOT a tool emoji
1876          # followed by the summary string as if it were a tool name.
1877          calls = parent._delegate_spinner.print_above.call_args_list
1878          self.assertTrue(any("🔀 🔀 [1] terminal, file" in str(c) for c in calls))
1879          # Parent callback receives the relay (pass-through, no re-batching).
1880          parent.tool_progress_callback.assert_called_once()
1881          # No '⚡' tool-start emoji should appear — that's the pre-fix bug.
1882          self.assertFalse(any("⚡" in str(c) for c in calls))
1883  
1884  
1885  class TestConcurrencyDefaults(unittest.TestCase):
1886      """Tests for the concurrency default and no hard ceiling."""
1887  
1888      @patch("tools.delegate_tool._load_config", return_value={})
1889      def test_default_is_three(self, mock_cfg):
1890          # Clear env var if set
1891          with patch.dict(os.environ, {}, clear=True):
1892              self.assertEqual(_get_max_concurrent_children(), 3)
1893  
1894      @patch("tools.delegate_tool._load_config",
1895             return_value={"max_concurrent_children": 10})
1896      def test_no_upper_ceiling(self, mock_cfg):
1897          """Users can raise concurrency as high as they want — no hard cap."""
1898          self.assertEqual(_get_max_concurrent_children(), 10)
1899  
1900      @patch("tools.delegate_tool._load_config",
1901             return_value={"max_concurrent_children": 100})
1902      def test_very_high_values_honored(self, mock_cfg):
1903          self.assertEqual(_get_max_concurrent_children(), 100)
1904  
1905      @patch("tools.delegate_tool._load_config",
1906             return_value={"max_concurrent_children": 0})
1907      def test_zero_clamped_to_one(self, mock_cfg):
1908          """Floor of 1 is enforced; zero or negative values raise to 1."""
1909          self.assertEqual(_get_max_concurrent_children(), 1)
1910  
1911      @patch("tools.delegate_tool._load_config", return_value={})
1912      def test_env_var_honored_uncapped(self, mock_cfg):
1913          with patch.dict(os.environ, {"DELEGATION_MAX_CONCURRENT_CHILDREN": "12"}):
1914              self.assertEqual(_get_max_concurrent_children(), 12)
1915  
1916      @patch("tools.delegate_tool._load_config",
1917             return_value={"max_concurrent_children": 6})
1918      def test_configured_value_returned(self, mock_cfg):
1919          self.assertEqual(_get_max_concurrent_children(), 6)
1920  
1921  
1922  # =========================================================================
1923  # max_spawn_depth clamping
1924  # =========================================================================
1925  
1926  class TestMaxSpawnDepth(unittest.TestCase):
1927      """Tests for _get_max_spawn_depth clamping and fallback behavior."""
1928  
1929      @patch("tools.delegate_tool._load_config", return_value={})
1930      def test_max_spawn_depth_defaults_to_1(self, mock_cfg):
1931          from tools.delegate_tool import _get_max_spawn_depth
1932          self.assertEqual(_get_max_spawn_depth(), 1)
1933  
1934      @patch("tools.delegate_tool._load_config",
1935             return_value={"max_spawn_depth": 0})
1936      def test_max_spawn_depth_clamped_below_one(self, mock_cfg):
1937          import logging
1938          from tools.delegate_tool import _get_max_spawn_depth
1939          with self.assertLogs("tools.delegate_tool", level=logging.WARNING) as cm:
1940              result = _get_max_spawn_depth()
1941          self.assertEqual(result, 1)
1942          self.assertTrue(any("clamping to 1" in m for m in cm.output))
1943  
1944      @patch("tools.delegate_tool._load_config",
1945             return_value={"max_spawn_depth": 99})
1946      def test_max_spawn_depth_clamped_above_three(self, mock_cfg):
1947          import logging
1948          from tools.delegate_tool import _get_max_spawn_depth
1949          with self.assertLogs("tools.delegate_tool", level=logging.WARNING) as cm:
1950              result = _get_max_spawn_depth()
1951          self.assertEqual(result, 3)
1952          self.assertTrue(any("clamping to 3" in m for m in cm.output))
1953  
1954      @patch("tools.delegate_tool._load_config",
1955             return_value={"max_spawn_depth": "not-a-number"})
1956      def test_max_spawn_depth_invalid_falls_back_to_default(self, mock_cfg):
1957          from tools.delegate_tool import _get_max_spawn_depth
1958          self.assertEqual(_get_max_spawn_depth(), 1)
1959  
1960  
1961  # =========================================================================
1962  # role param plumbing
1963  # =========================================================================
1964  #
1965  # These tests cover the schema + signature + stash plumbing of the role
1966  # param.  The full role-honoring behavior (toolset re-add, role-aware
1967  # prompt) lives in TestOrchestratorRoleBehavior below; these tests only
1968  # assert on _delegate_role stashing and on the schema shape.
1969  
1970  
1971  class TestOrchestratorRoleSchema(unittest.TestCase):
1972      """Tests that the role param reaches the child via dispatch."""
1973  
1974      @patch("tools.delegate_tool._resolve_delegation_credentials")
1975      @patch("tools.delegate_tool._load_config",
1976             return_value={"max_spawn_depth": 2})
1977      def _run_with_mock_child(self, role_arg, mock_cfg, mock_creds):
1978          mock_creds.return_value = {
1979              "provider": None, "base_url": None,
1980              "api_key": None, "api_mode": None, "model": None,
1981          }
1982          parent = _make_mock_parent(depth=0)
1983          with patch("run_agent.AIAgent") as MockAgent:
1984              mock_child = MagicMock()
1985              mock_child.run_conversation.return_value = {
1986                  "final_response": "done", "completed": True,
1987                  "api_calls": 1, "messages": [],
1988              }
1989              mock_child._delegate_saved_tool_names = []
1990              mock_child._credential_pool = None
1991              mock_child.session_prompt_tokens = 0
1992              mock_child.session_completion_tokens = 0
1993              mock_child.model = "test"
1994              MockAgent.return_value = mock_child
1995              kwargs = {"goal": "test", "parent_agent": parent}
1996              if role_arg is not _SENTINEL:
1997                  kwargs["role"] = role_arg
1998              delegate_task(**kwargs)
1999              return mock_child
2000  
2001      def test_default_role_is_leaf(self):
2002          child = self._run_with_mock_child(_SENTINEL)
2003          self.assertEqual(child._delegate_role, "leaf")
2004  
2005      def test_explicit_orchestrator_role_stashed(self):
2006          """role='orchestrator' reaches _build_child_agent and is stashed.
2007          Full behavior (toolset re-add) lands in commit 3; commit 2 only
2008          verifies the plumbing."""
2009          child = self._run_with_mock_child("orchestrator")
2010          self.assertEqual(child._delegate_role, "orchestrator")
2011  
2012      def test_unknown_role_coerces_to_leaf(self):
2013          """role='nonsense' → _normalize_role warns and returns 'leaf'."""
2014          import logging
2015          with self.assertLogs("tools.delegate_tool", level=logging.WARNING) as cm:
2016              child = self._run_with_mock_child("nonsense")
2017          self.assertEqual(child._delegate_role, "leaf")
2018          self.assertTrue(any("coercing" in m.lower() for m in cm.output))
2019  
2020      def test_schema_has_role_top_level_and_per_task(self):
2021          from tools.delegate_tool import DELEGATE_TASK_SCHEMA
2022          props = DELEGATE_TASK_SCHEMA["parameters"]["properties"]
2023          self.assertIn("role", props)
2024          self.assertEqual(props["role"]["enum"], ["leaf", "orchestrator"])
2025          task_props = props["tasks"]["items"]["properties"]
2026          self.assertIn("role", task_props)
2027          self.assertEqual(task_props["role"]["enum"], ["leaf", "orchestrator"])
2028  
2029  
2030  # Sentinel used to distinguish "role kwarg omitted" from "role=None".
2031  _SENTINEL = object()
2032  
2033  
2034  # =========================================================================
2035  # role-honoring behavior
2036  # =========================================================================
2037  
2038  
2039  def _make_role_mock_child():
2040      """Helper: mock child with minimal fields for delegate_task to process."""
2041      mock_child = MagicMock()
2042      mock_child.run_conversation.return_value = {
2043          "final_response": "done", "completed": True,
2044          "api_calls": 1, "messages": [],
2045      }
2046      mock_child._delegate_saved_tool_names = []
2047      mock_child._credential_pool = None
2048      mock_child.session_prompt_tokens = 0
2049      mock_child.session_completion_tokens = 0
2050      mock_child.model = "test"
2051      return mock_child
2052  
2053  
2054  class TestOrchestratorRoleBehavior(unittest.TestCase):
2055      """Tests that role='orchestrator' actually changes toolset + prompt."""
2056  
2057      @patch("tools.delegate_tool._resolve_delegation_credentials")
2058      @patch("tools.delegate_tool._load_config",
2059             return_value={"max_spawn_depth": 2})
2060      def test_orchestrator_role_keeps_delegation_at_depth_1(
2061          self, mock_cfg, mock_creds
2062      ):
2063          """role='orchestrator' + depth-0 parent with max_spawn_depth=2 →
2064          child at depth 1 gets 'delegation' in enabled_toolsets (can
2065          further delegate).  Requires max_spawn_depth>=2 since the new
2066          default is 1 (flat)."""
2067          mock_creds.return_value = {
2068              "provider": None, "base_url": None,
2069              "api_key": None, "api_mode": None, "model": None,
2070          }
2071          parent = _make_mock_parent(depth=0)
2072          parent.enabled_toolsets = ["terminal", "file"]
2073          with patch("run_agent.AIAgent") as MockAgent:
2074              mock_child = _make_role_mock_child()
2075              MockAgent.return_value = mock_child
2076              delegate_task(goal="test", role="orchestrator", parent_agent=parent)
2077              kwargs = MockAgent.call_args[1]
2078              self.assertIn("delegation", kwargs["enabled_toolsets"])
2079              self.assertEqual(mock_child._delegate_role, "orchestrator")
2080  
2081      @patch("tools.delegate_tool._resolve_delegation_credentials")
2082      @patch("tools.delegate_tool._load_config",
2083             return_value={"max_spawn_depth": 2})
2084      def test_orchestrator_blocked_at_max_spawn_depth(
2085          self, mock_cfg, mock_creds
2086      ):
2087          """Parent at depth 1 with max_spawn_depth=2 spawns child
2088          at depth 2 (the floor); role='orchestrator' degrades to leaf."""
2089          mock_creds.return_value = {
2090              "provider": None, "base_url": None,
2091              "api_key": None, "api_mode": None, "model": None,
2092          }
2093          parent = _make_mock_parent(depth=1)
2094          parent.enabled_toolsets = ["terminal", "delegation"]
2095          with patch("run_agent.AIAgent") as MockAgent:
2096              mock_child = _make_role_mock_child()
2097              MockAgent.return_value = mock_child
2098              delegate_task(goal="test", role="orchestrator", parent_agent=parent)
2099              kwargs = MockAgent.call_args[1]
2100              self.assertNotIn("delegation", kwargs["enabled_toolsets"])
2101              self.assertEqual(mock_child._delegate_role, "leaf")
2102  
2103      @patch("tools.delegate_tool._resolve_delegation_credentials")
2104      @patch("tools.delegate_tool._load_config", return_value={})
2105      def test_orchestrator_blocked_at_default_flat_depth(
2106          self, mock_cfg, mock_creds
2107      ):
2108          """With default max_spawn_depth=1 (flat), role='orchestrator'
2109          on a depth-0 parent produces a depth-1 child that is already at
2110          the floor — the role degrades to 'leaf' and the delegation
2111          toolset is stripped.  This is the new default posture."""
2112          mock_creds.return_value = {
2113              "provider": None, "base_url": None,
2114              "api_key": None, "api_mode": None, "model": None,
2115          }
2116          parent = _make_mock_parent(depth=0)
2117          parent.enabled_toolsets = ["terminal", "file", "delegation"]
2118          with patch("run_agent.AIAgent") as MockAgent:
2119              mock_child = _make_role_mock_child()
2120              MockAgent.return_value = mock_child
2121              delegate_task(goal="test", role="orchestrator", parent_agent=parent)
2122              kwargs = MockAgent.call_args[1]
2123              self.assertNotIn("delegation", kwargs["enabled_toolsets"])
2124              self.assertEqual(mock_child._delegate_role, "leaf")
2125  
2126      @patch("tools.delegate_tool._resolve_delegation_credentials")
2127      def test_orchestrator_enabled_false_forces_leaf(self, mock_creds):
2128          """Kill switch delegation.orchestrator_enabled=false overrides
2129          role='orchestrator'."""
2130          mock_creds.return_value = {
2131              "provider": None, "base_url": None,
2132              "api_key": None, "api_mode": None, "model": None,
2133          }
2134          parent = _make_mock_parent(depth=0)
2135          parent.enabled_toolsets = ["terminal", "delegation"]
2136          with patch("tools.delegate_tool._load_config",
2137                     return_value={"orchestrator_enabled": False}):
2138              with patch("run_agent.AIAgent") as MockAgent:
2139                  mock_child = _make_role_mock_child()
2140                  MockAgent.return_value = mock_child
2141                  delegate_task(goal="test", role="orchestrator",
2142                                parent_agent=parent)
2143                  kwargs = MockAgent.call_args[1]
2144                  self.assertNotIn("delegation", kwargs["enabled_toolsets"])
2145                  self.assertEqual(mock_child._delegate_role, "leaf")
2146  
2147      # ── Role-aware system prompt ────────────────────────────────────────
2148  
2149      def test_leaf_prompt_does_not_mention_delegation(self):
2150          prompt = _build_child_system_prompt(
2151              "Fix tests", role="leaf",
2152              max_spawn_depth=2, child_depth=1,
2153          )
2154          self.assertNotIn("delegate_task", prompt)
2155          self.assertNotIn("Orchestrator Role", prompt)
2156  
2157      def test_orchestrator_prompt_mentions_delegation_capability(self):
2158          prompt = _build_child_system_prompt(
2159              "Survey approaches", role="orchestrator",
2160              max_spawn_depth=2, child_depth=1,
2161          )
2162          self.assertIn("delegate_task", prompt)
2163          self.assertIn("Orchestrator Role", prompt)
2164          # Depth/max-depth note present and literal:
2165          self.assertIn("depth 1", prompt)
2166          self.assertIn("max_spawn_depth=2", prompt)
2167  
2168      def test_orchestrator_prompt_at_depth_floor_says_children_are_leaves(self):
2169          """With max_spawn_depth=2 and child_depth=1, the orchestrator's
2170          own children would be at depth 2 (the floor) → must be leaves."""
2171          prompt = _build_child_system_prompt(
2172              "Survey", role="orchestrator",
2173              max_spawn_depth=2, child_depth=1,
2174          )
2175          self.assertIn("MUST be leaves", prompt)
2176  
2177      def test_orchestrator_prompt_below_floor_allows_more_nesting(self):
2178          """With max_spawn_depth=3 and child_depth=1, the orchestrator's
2179          own children can themselves be orchestrators (depth 2 < 3)."""
2180          prompt = _build_child_system_prompt(
2181              "Deep work", role="orchestrator",
2182              max_spawn_depth=3, child_depth=1,
2183          )
2184          self.assertIn("can themselves be orchestrators", prompt)
2185  
2186      # ── Batch mode and intersection ─────────────────────────────────────
2187  
2188      @patch("tools.delegate_tool._resolve_delegation_credentials")
2189      @patch("tools.delegate_tool._load_config",
2190             return_value={"max_spawn_depth": 2})
2191      def test_batch_mode_per_task_role_override(self, mock_cfg, mock_creds):
2192          """Per-task role beats top-level; no top-level role → "leaf".
2193  
2194          tasks=[{role:'orchestrator'},{role:'leaf'},{}] → first gets
2195          delegation, second and third don't.  Requires max_spawn_depth>=2
2196          (raised explicitly here) since the new default is 1 (flat).
2197          """
2198          mock_creds.return_value = {
2199              "provider": None, "base_url": None,
2200              "api_key": None, "api_mode": None, "model": None,
2201          }
2202          parent = _make_mock_parent(depth=0)
2203          parent.enabled_toolsets = ["terminal", "file", "delegation"]
2204          built_toolsets = []
2205  
2206          def _factory(*a, **kw):
2207              m = _make_role_mock_child()
2208              built_toolsets.append(kw.get("enabled_toolsets"))
2209              return m
2210  
2211          with patch("run_agent.AIAgent", side_effect=_factory):
2212              delegate_task(
2213                  tasks=[
2214                      {"goal": "A", "role": "orchestrator"},
2215                      {"goal": "B", "role": "leaf"},
2216                      {"goal": "C"},  # no role → falls back to top_role (leaf)
2217                  ],
2218                  parent_agent=parent,
2219              )
2220          self.assertIn("delegation", built_toolsets[0])
2221          self.assertNotIn("delegation", built_toolsets[1])
2222          self.assertNotIn("delegation", built_toolsets[2])
2223  
2224      @patch("tools.delegate_tool._resolve_delegation_credentials")
2225      @patch("tools.delegate_tool._load_config",
2226             return_value={"max_spawn_depth": 2})
2227      def test_intersection_preserves_delegation_bound(
2228          self, mock_cfg, mock_creds
2229      ):
2230          """Design decision: orchestrator capability is granted by role,
2231          NOT inherited from the parent's toolset. A parent without
2232          'delegation' in its enabled_toolsets can still spawn an
2233          orchestrator child — the re-add in _build_child_agent runs
2234          unconditionally for orchestrators (when max_spawn_depth allows).
2235  
2236          If you want to change to "parent must have delegation too",
2237          update _build_child_agent to check parent_toolsets before the
2238          re-add and update this test to match.
2239          """
2240          mock_creds.return_value = {
2241              "provider": None, "base_url": None,
2242              "api_key": None, "api_mode": None, "model": None,
2243          }
2244          parent = _make_mock_parent(depth=0)
2245          parent.enabled_toolsets = ["terminal", "file"]  # no delegation
2246          with patch("run_agent.AIAgent") as MockAgent:
2247              mock_child = _make_role_mock_child()
2248              MockAgent.return_value = mock_child
2249              delegate_task(goal="test", role="orchestrator",
2250                            parent_agent=parent)
2251              self.assertIn("delegation", MockAgent.call_args[1]["enabled_toolsets"])
2252  
2253  
2254  class TestOrchestratorEndToEnd(unittest.TestCase):
2255      """End-to-end: parent -> orchestrator -> two-leaf nested orchestration.
2256  
2257      Covers the acceptance gate: parent delegates to an orchestrator
2258      child; the orchestrator delegates to two leaf grandchildren; the
2259      role/toolset/depth chain all resolve correctly.
2260  
2261      Mock strategy: a single AIAgent patch with a side_effect factory
2262      that keys on the child's ephemeral_system_prompt — orchestrator
2263      prompts contain the string "Orchestrator Role" (see
2264      _build_child_system_prompt), leaves don't.  The orchestrator
2265      mock's run_conversation recursively calls delegate_task with
2266      tasks=[{goal:...},{goal:...}] to spawn two leaves.  This keeps
2267      the test in one patch context and avoids depth-indexed nesting.
2268      """
2269  
2270      @patch("tools.delegate_tool._resolve_delegation_credentials")
2271      @patch("tools.delegate_tool._load_config",
2272             return_value={"max_spawn_depth": 2})
2273      def test_end_to_end_nested_orchestration(self, mock_cfg, mock_creds):
2274          mock_creds.return_value = {
2275              "provider": None, "base_url": None,
2276              "api_key": None, "api_mode": None, "model": None,
2277          }
2278          parent = _make_mock_parent(depth=0)
2279          parent.enabled_toolsets = ["terminal", "file", "delegation"]
2280  
2281          # (enabled_toolsets, _delegate_role) for each agent built
2282          built_agents: list = []
2283          # Keep the orchestrator mock around so the re-entrant delegate_task
2284          # can reach it via closure.
2285          orch_mock = {}
2286  
2287          def _factory(*a, **kw):
2288              prompt = kw.get("ephemeral_system_prompt", "") or ""
2289              is_orchestrator = "Orchestrator Role" in prompt
2290              m = _make_role_mock_child()
2291              built_agents.append({
2292                  "enabled_toolsets": list(kw.get("enabled_toolsets") or []),
2293                  "is_orchestrator_prompt": is_orchestrator,
2294              })
2295  
2296              if is_orchestrator:
2297                  # Prepare the orchestrator mock as a parent-capable object
2298                  # so the nested delegate_task call succeeds.
2299                  m._delegate_depth = 1
2300                  m._delegate_role = "orchestrator"
2301                  m._active_children = []
2302                  m._active_children_lock = threading.Lock()
2303                  m._session_db = None
2304                  m.platform = "cli"
2305                  m.enabled_toolsets = ["terminal", "file", "delegation"]
2306                  m.api_key = "***"
2307                  m.base_url = ""
2308                  m.provider = None
2309                  m.api_mode = None
2310                  m.providers_allowed = None
2311                  m.providers_ignored = None
2312                  m.providers_order = None
2313                  m.provider_sort = None
2314                  m._print_fn = None
2315                  m.tool_progress_callback = None
2316                  m.thinking_callback = None
2317                  orch_mock["agent"] = m
2318  
2319                  def _orchestrator_run(user_message=None, task_id=None):
2320                      # Re-entrant: orchestrator spawns two leaves
2321                      delegate_task(
2322                          tasks=[{"goal": "leaf-A"}, {"goal": "leaf-B"}],
2323                          parent_agent=m,
2324                      )
2325                      return {
2326                          "final_response": "orchestrated 2 workers",
2327                          "completed": True, "api_calls": 1,
2328                          "messages": [],
2329                      }
2330                  m.run_conversation.side_effect = _orchestrator_run
2331  
2332              return m
2333  
2334          with patch("run_agent.AIAgent", side_effect=_factory) as MockAgent:
2335              delegate_task(
2336                  goal="top-level orchestration",
2337                  role="orchestrator",
2338                  parent_agent=parent,
2339              )
2340  
2341          # 1 orchestrator + 2 leaf grandchildren = 3 agents
2342          self.assertEqual(MockAgent.call_count, 3)
2343          # First built = the orchestrator (parent's direct child)
2344          self.assertIn("delegation", built_agents[0]["enabled_toolsets"])
2345          self.assertTrue(built_agents[0]["is_orchestrator_prompt"])
2346          # Next two = leaves (grandchildren)
2347          self.assertNotIn("delegation", built_agents[1]["enabled_toolsets"])
2348          self.assertFalse(built_agents[1]["is_orchestrator_prompt"])
2349          self.assertNotIn("delegation", built_agents[2]["enabled_toolsets"])
2350          self.assertFalse(built_agents[2]["is_orchestrator_prompt"])
2351  
2352  
2353  class TestSubagentApprovalCallback(unittest.TestCase):
2354      """Subagent worker threads must have a non-interactive approval callback
2355      installed so dangerous-command prompts don't fall back to input() and
2356      deadlock the parent's prompt_toolkit TUI.
2357  
2358      Governed by delegation.subagent_auto_approve:
2359        false (default) → _subagent_auto_deny
2360        true            → _subagent_auto_approve
2361      """
2362  
2363      def test_auto_deny_returns_deny(self):
2364          from tools.delegate_tool import _subagent_auto_deny
2365          self.assertEqual(
2366              _subagent_auto_deny("rm -rf /tmp/x", "dangerous"),
2367              "deny",
2368          )
2369  
2370      def test_auto_approve_returns_once(self):
2371          from tools.delegate_tool import _subagent_auto_approve
2372          self.assertEqual(
2373              _subagent_auto_approve("rm -rf /tmp/x", "dangerous"),
2374              "once",
2375          )
2376  
2377      @patch("tools.delegate_tool._load_config", return_value={})
2378      def test_getter_defaults_to_deny(self, _mock_cfg):
2379          from tools.delegate_tool import (
2380              _get_subagent_approval_callback,
2381              _subagent_auto_deny,
2382          )
2383          self.assertIs(_get_subagent_approval_callback(), _subagent_auto_deny)
2384  
2385      @patch(
2386          "tools.delegate_tool._load_config",
2387          return_value={"subagent_auto_approve": False},
2388      )
2389      def test_getter_explicit_false_is_deny(self, _mock_cfg):
2390          from tools.delegate_tool import (
2391              _get_subagent_approval_callback,
2392              _subagent_auto_deny,
2393          )
2394          self.assertIs(_get_subagent_approval_callback(), _subagent_auto_deny)
2395  
2396      @patch(
2397          "tools.delegate_tool._load_config",
2398          return_value={"subagent_auto_approve": True},
2399      )
2400      def test_getter_true_is_approve(self, _mock_cfg):
2401          from tools.delegate_tool import (
2402              _get_subagent_approval_callback,
2403              _subagent_auto_approve,
2404          )
2405          self.assertIs(_get_subagent_approval_callback(), _subagent_auto_approve)
2406  
2407      @patch(
2408          "tools.delegate_tool._load_config",
2409          return_value={"subagent_auto_approve": "yes"},
2410      )
2411      def test_getter_truthy_string_is_approve(self, _mock_cfg):
2412          """is_truthy_value accepts 'yes'/'1'/'true' as truthy."""
2413          from tools.delegate_tool import (
2414              _get_subagent_approval_callback,
2415              _subagent_auto_approve,
2416          )
2417          self.assertIs(_get_subagent_approval_callback(), _subagent_auto_approve)
2418  
2419      def test_executor_initializer_installs_callback_in_worker(self):
2420          """The initializer sets the callback on the worker thread's TLS,
2421          not the parent's — verifies the fix actually scopes to workers.
2422          """
2423          from concurrent.futures import ThreadPoolExecutor
2424          from tools.terminal_tool import (
2425              set_approval_callback as _set_cb,
2426              _get_approval_callback,
2427          )
2428          from tools.delegate_tool import _subagent_auto_deny
2429  
2430          # Parent thread has no callback.
2431          _set_cb(None)
2432          self.assertIsNone(_get_approval_callback())
2433  
2434          seen = []
2435  
2436          def worker():
2437              seen.append(_get_approval_callback())
2438  
2439          with ThreadPoolExecutor(
2440              max_workers=1,
2441              initializer=_set_cb,
2442              initargs=(_subagent_auto_deny,),
2443          ) as executor:
2444              executor.submit(worker).result()
2445  
2446          self.assertEqual(seen, [_subagent_auto_deny])
2447          # Parent's callback slot is still empty (TLS isolates threads).
2448          self.assertIsNone(_get_approval_callback())
2449  
2450  
2451  class TestFallbackModelInheritance(unittest.TestCase):
2452      """Subagents must inherit the parent's fallback provider chain."""
2453  
2454      def test_child_inherits_fallback_chain(self):
2455          """_build_child_agent passes parent._fallback_chain as fallback_model."""
2456          parent = _make_mock_parent(depth=0)
2457          fallback_entry = {"provider": "openrouter", "model": "gpt-4o-mini", "api_key": "sk-or-x"}
2458          parent._fallback_chain = [fallback_entry]
2459  
2460          with patch("run_agent.AIAgent") as MockAgent:
2461              MockAgent.return_value = MagicMock()
2462              _build_child_agent(
2463                  task_index=0,
2464                  goal="test fallback inheritance",
2465                  context=None,
2466                  toolsets=None,
2467                  model=None,
2468                  max_iterations=10,
2469                  parent_agent=parent,
2470                  task_count=1,
2471              )
2472  
2473          _, kwargs = MockAgent.call_args
2474          self.assertEqual(kwargs["fallback_model"], [fallback_entry])
2475  
2476      def test_child_gets_no_fallback_when_parent_chain_empty(self):
2477          """When parent._fallback_chain is empty, fallback_model is None."""
2478          parent = _make_mock_parent(depth=0)
2479          parent._fallback_chain = []
2480  
2481          with patch("run_agent.AIAgent") as MockAgent:
2482              MockAgent.return_value = MagicMock()
2483              _build_child_agent(
2484                  task_index=0,
2485                  goal="test no fallback",
2486                  context=None,
2487                  toolsets=None,
2488                  model=None,
2489                  max_iterations=10,
2490                  parent_agent=parent,
2491                  task_count=1,
2492              )
2493  
2494          _, kwargs = MockAgent.call_args
2495          self.assertIsNone(kwargs["fallback_model"])
2496  
2497  
2498  if __name__ == "__main__":
2499      unittest.main()