test_delegate.py
1 #!/usr/bin/env python3 2 """ 3 Tests for the subagent delegation tool. 4 5 Uses mock AIAgent instances to test the delegation logic without 6 requiring API keys or real LLM calls. 7 8 Run with: python -m pytest tests/test_delegate.py -v 9 or: python tests/test_delegate.py 10 """ 11 12 import json 13 import os 14 import sys 15 import threading 16 import time 17 import unittest 18 from unittest.mock import MagicMock, patch 19 20 from tools.delegate_tool import ( 21 DELEGATE_BLOCKED_TOOLS, 22 DELEGATE_TASK_SCHEMA, 23 DelegateEvent, 24 _get_max_concurrent_children, 25 _LEGACY_EVENT_MAP, 26 MAX_DEPTH, 27 check_delegate_requirements, 28 delegate_task, 29 _build_child_agent, 30 _build_child_progress_callback, 31 _build_child_system_prompt, 32 _strip_blocked_tools, 33 _resolve_child_credential_pool, 34 _resolve_delegation_credentials, 35 ) 36 37 38 def _make_mock_parent(depth=0): 39 """Create a mock parent agent with the fields delegate_task expects.""" 40 parent = MagicMock() 41 parent.base_url = "https://openrouter.ai/api/v1" 42 parent.api_key="***" 43 parent.provider = "openrouter" 44 parent.api_mode = "chat_completions" 45 parent.model = "anthropic/claude-sonnet-4" 46 parent.platform = "cli" 47 parent.providers_allowed = None 48 parent.providers_ignored = None 49 parent.providers_order = None 50 parent.provider_sort = None 51 parent._session_db = None 52 parent._delegate_depth = depth 53 parent._active_children = [] 54 parent._active_children_lock = threading.Lock() 55 parent._print_fn = None 56 parent.tool_progress_callback = None 57 parent.thinking_callback = None 58 return parent 59 60 61 class TestDelegateRequirements(unittest.TestCase): 62 def test_always_available(self): 63 self.assertTrue(check_delegate_requirements()) 64 65 def test_schema_valid(self): 66 self.assertEqual(DELEGATE_TASK_SCHEMA["name"], "delegate_task") 67 props = DELEGATE_TASK_SCHEMA["parameters"]["properties"] 68 self.assertIn("goal", props) 69 self.assertIn("tasks", props) 70 self.assertIn("context", props) 71 self.assertIn("toolsets", props) 72 # max_iterations is intentionally NOT exposed to the model — it's 73 # config-authoritative via delegation.max_iterations so users get 74 # predictable budgets. 75 self.assertNotIn("max_iterations", props) 76 self.assertNotIn("maxItems", props["tasks"]) # removed — limit is now runtime-configurable 77 78 79 class TestChildSystemPrompt(unittest.TestCase): 80 def test_goal_only(self): 81 prompt = _build_child_system_prompt("Fix the tests") 82 self.assertIn("Fix the tests", prompt) 83 self.assertIn("YOUR TASK", prompt) 84 self.assertNotIn("CONTEXT", prompt) 85 86 def test_goal_with_context(self): 87 prompt = _build_child_system_prompt("Fix the tests", "Error: assertion failed in test_foo.py line 42") 88 self.assertIn("Fix the tests", prompt) 89 self.assertIn("CONTEXT", prompt) 90 self.assertIn("assertion failed", prompt) 91 92 def test_empty_context_ignored(self): 93 prompt = _build_child_system_prompt("Do something", " ") 94 self.assertNotIn("CONTEXT", prompt) 95 96 97 class TestStripBlockedTools(unittest.TestCase): 98 def test_removes_blocked_toolsets(self): 99 result = _strip_blocked_tools(["terminal", "file", "delegation", "clarify", "memory", "code_execution"]) 100 self.assertEqual(sorted(result), ["file", "terminal"]) 101 102 def test_preserves_allowed_toolsets(self): 103 result = _strip_blocked_tools(["terminal", "file", "web", "browser"]) 104 self.assertEqual(sorted(result), ["browser", "file", "terminal", "web"]) 105 106 def test_empty_input(self): 107 result = _strip_blocked_tools([]) 108 self.assertEqual(result, []) 109 110 111 class TestDelegateTask(unittest.TestCase): 112 def test_no_parent_agent(self): 113 result = json.loads(delegate_task(goal="test")) 114 self.assertIn("error", result) 115 self.assertIn("parent agent", result["error"]) 116 117 def test_depth_limit(self): 118 parent = _make_mock_parent(depth=2) 119 result = json.loads(delegate_task(goal="test", parent_agent=parent)) 120 self.assertIn("error", result) 121 self.assertIn("depth limit", result["error"].lower()) 122 123 def test_no_goal_or_tasks(self): 124 parent = _make_mock_parent() 125 result = json.loads(delegate_task(parent_agent=parent)) 126 self.assertIn("error", result) 127 128 def test_empty_goal(self): 129 parent = _make_mock_parent() 130 result = json.loads(delegate_task(goal=" ", parent_agent=parent)) 131 self.assertIn("error", result) 132 133 def test_task_missing_goal(self): 134 parent = _make_mock_parent() 135 result = json.loads(delegate_task(tasks=[{"context": "no goal here"}], parent_agent=parent)) 136 self.assertIn("error", result) 137 138 @patch("tools.delegate_tool._run_single_child") 139 def test_single_task_mode(self, mock_run): 140 mock_run.return_value = { 141 "task_index": 0, "status": "completed", 142 "summary": "Done!", "api_calls": 3, "duration_seconds": 5.0 143 } 144 parent = _make_mock_parent() 145 result = json.loads(delegate_task(goal="Fix tests", context="error log...", parent_agent=parent)) 146 self.assertIn("results", result) 147 self.assertEqual(len(result["results"]), 1) 148 self.assertEqual(result["results"][0]["status"], "completed") 149 self.assertEqual(result["results"][0]["summary"], "Done!") 150 mock_run.assert_called_once() 151 152 @patch("tools.delegate_tool._run_single_child") 153 def test_batch_mode(self, mock_run): 154 mock_run.side_effect = [ 155 {"task_index": 0, "status": "completed", "summary": "Result A", "api_calls": 2, "duration_seconds": 3.0}, 156 {"task_index": 1, "status": "completed", "summary": "Result B", "api_calls": 4, "duration_seconds": 6.0}, 157 ] 158 parent = _make_mock_parent() 159 tasks = [ 160 {"goal": "Research topic A"}, 161 {"goal": "Research topic B"}, 162 ] 163 result = json.loads(delegate_task(tasks=tasks, parent_agent=parent)) 164 self.assertIn("results", result) 165 self.assertEqual(len(result["results"]), 2) 166 self.assertEqual(result["results"][0]["summary"], "Result A") 167 self.assertEqual(result["results"][1]["summary"], "Result B") 168 self.assertIn("total_duration_seconds", result) 169 170 @patch("tools.delegate_tool._run_single_child") 171 def test_batch_capped_at_3(self, mock_run): 172 mock_run.return_value = { 173 "task_index": 0, "status": "completed", 174 "summary": "Done", "api_calls": 1, "duration_seconds": 1.0 175 } 176 parent = _make_mock_parent() 177 limit = _get_max_concurrent_children() 178 tasks = [{"goal": f"Task {i}"} for i in range(limit + 2)] 179 result = json.loads(delegate_task(tasks=tasks, parent_agent=parent)) 180 # Should return an error instead of silently truncating 181 self.assertIn("error", result) 182 self.assertIn("Too many tasks", result["error"]) 183 mock_run.assert_not_called() 184 185 @patch("tools.delegate_tool._run_single_child") 186 def test_batch_ignores_toplevel_goal(self, mock_run): 187 """When tasks array is provided, top-level goal/context/toolsets are ignored.""" 188 mock_run.return_value = { 189 "task_index": 0, "status": "completed", 190 "summary": "Done", "api_calls": 1, "duration_seconds": 1.0 191 } 192 parent = _make_mock_parent() 193 result = json.loads(delegate_task( 194 goal="This should be ignored", 195 tasks=[{"goal": "Actual task"}], 196 parent_agent=parent, 197 )) 198 # The mock was called with the tasks array item, not the top-level goal 199 call_args = mock_run.call_args 200 self.assertEqual(call_args.kwargs.get("goal") or call_args[1].get("goal", call_args[0][1] if len(call_args[0]) > 1 else None), "Actual task") 201 202 @patch("tools.delegate_tool._run_single_child") 203 def test_failed_child_included_in_results(self, mock_run): 204 mock_run.return_value = { 205 "task_index": 0, "status": "error", 206 "summary": None, "error": "Something broke", 207 "api_calls": 0, "duration_seconds": 0.5 208 } 209 parent = _make_mock_parent() 210 result = json.loads(delegate_task(goal="Break things", parent_agent=parent)) 211 self.assertEqual(result["results"][0]["status"], "error") 212 self.assertIn("Something broke", result["results"][0]["error"]) 213 214 def test_depth_increments(self): 215 """Verify child gets parent's depth + 1.""" 216 parent = _make_mock_parent(depth=0) 217 218 with patch("run_agent.AIAgent") as MockAgent: 219 mock_child = MagicMock() 220 mock_child.run_conversation.return_value = { 221 "final_response": "done", "completed": True, "api_calls": 1 222 } 223 MockAgent.return_value = mock_child 224 225 delegate_task(goal="Test depth", parent_agent=parent) 226 self.assertEqual(mock_child._delegate_depth, 1) 227 228 def test_active_children_tracking(self): 229 """Verify children are registered/unregistered for interrupt propagation.""" 230 parent = _make_mock_parent(depth=0) 231 232 with patch("run_agent.AIAgent") as MockAgent: 233 mock_child = MagicMock() 234 mock_child.run_conversation.return_value = { 235 "final_response": "done", "completed": True, "api_calls": 1 236 } 237 MockAgent.return_value = mock_child 238 239 delegate_task(goal="Test tracking", parent_agent=parent) 240 self.assertEqual(len(parent._active_children), 0) 241 242 def test_child_inherits_runtime_credentials(self): 243 parent = _make_mock_parent(depth=0) 244 parent.base_url = "https://chatgpt.com/backend-api/codex" 245 parent.api_key="***" 246 parent.provider = "openai-codex" 247 parent.api_mode = "codex_responses" 248 249 with patch("run_agent.AIAgent") as MockAgent: 250 mock_child = MagicMock() 251 mock_child.run_conversation.return_value = { 252 "final_response": "ok", 253 "completed": True, 254 "api_calls": 1, 255 } 256 MockAgent.return_value = mock_child 257 258 delegate_task(goal="Test runtime inheritance", parent_agent=parent) 259 260 _, kwargs = MockAgent.call_args 261 self.assertEqual(kwargs["base_url"], parent.base_url) 262 self.assertEqual(kwargs["api_key"], parent.api_key) 263 self.assertEqual(kwargs["provider"], parent.provider) 264 self.assertEqual(kwargs["api_mode"], parent.api_mode) 265 266 def test_child_inherits_parent_print_fn(self): 267 parent = _make_mock_parent(depth=0) 268 sink = MagicMock() 269 parent._print_fn = sink 270 271 with patch("run_agent.AIAgent") as MockAgent: 272 mock_child = MagicMock() 273 MockAgent.return_value = mock_child 274 275 _build_child_agent( 276 task_index=0, 277 goal="Keep stdout clean", 278 context=None, 279 toolsets=None, 280 model=None, 281 max_iterations=10, 282 parent_agent=parent, 283 task_count=1, 284 ) 285 286 self.assertIs(mock_child._print_fn, sink) 287 288 def test_child_uses_thinking_callback_when_progress_callback_available(self): 289 parent = _make_mock_parent(depth=0) 290 parent.tool_progress_callback = MagicMock() 291 292 with patch("run_agent.AIAgent") as MockAgent: 293 mock_child = MagicMock() 294 MockAgent.return_value = mock_child 295 296 _build_child_agent( 297 task_index=0, 298 goal="Avoid raw child spinners", 299 context=None, 300 toolsets=None, 301 model=None, 302 max_iterations=10, 303 parent_agent=parent, 304 task_count=1, 305 ) 306 307 self.assertTrue(callable(mock_child.thinking_callback)) 308 mock_child.thinking_callback("deliberating...") 309 parent.tool_progress_callback.assert_not_called() 310 311 312 class TestToolNamePreservation(unittest.TestCase): 313 """Verify _last_resolved_tool_names is restored after subagent runs.""" 314 315 def test_global_tool_names_restored_after_delegation(self): 316 """The process-global _last_resolved_tool_names must be restored 317 after a subagent completes so the parent's execute_code sandbox 318 generates correct imports.""" 319 import model_tools 320 321 parent = _make_mock_parent(depth=0) 322 original_tools = ["terminal", "read_file", "web_search", "execute_code", "delegate_task"] 323 model_tools._last_resolved_tool_names = list(original_tools) 324 325 with patch("run_agent.AIAgent") as MockAgent: 326 mock_child = MagicMock() 327 mock_child.run_conversation.return_value = { 328 "final_response": "done", "completed": True, "api_calls": 1, 329 } 330 MockAgent.return_value = mock_child 331 332 delegate_task(goal="Test tool preservation", parent_agent=parent) 333 334 self.assertEqual(model_tools._last_resolved_tool_names, original_tools) 335 336 def test_global_tool_names_restored_after_child_failure(self): 337 """Even when the child agent raises, the global must be restored.""" 338 import model_tools 339 340 parent = _make_mock_parent(depth=0) 341 original_tools = ["terminal", "read_file", "web_search"] 342 model_tools._last_resolved_tool_names = list(original_tools) 343 344 with patch("run_agent.AIAgent") as MockAgent: 345 mock_child = MagicMock() 346 mock_child.run_conversation.side_effect = RuntimeError("boom") 347 MockAgent.return_value = mock_child 348 349 result = json.loads(delegate_task(goal="Crash test", parent_agent=parent)) 350 self.assertEqual(result["results"][0]["status"], "error") 351 352 self.assertEqual(model_tools._last_resolved_tool_names, original_tools) 353 354 def test_build_child_agent_does_not_raise_name_error(self): 355 """Regression: _build_child_agent must not reference _saved_tool_names. 356 357 The bug introduced by the e7844e9c merge conflict: line 235 inside 358 _build_child_agent read `list(_saved_tool_names)` where that variable 359 is only defined later in _run_single_child. Calling _build_child_agent 360 standalone (without _run_single_child's scope) must never raise NameError. 361 """ 362 parent = _make_mock_parent(depth=0) 363 364 with patch("run_agent.AIAgent"): 365 try: 366 _build_child_agent( 367 task_index=0, 368 goal="regression check", 369 context=None, 370 toolsets=None, 371 model=None, 372 max_iterations=10, 373 parent_agent=parent, 374 task_count=1, 375 ) 376 except NameError as exc: 377 self.fail( 378 f"_build_child_agent raised NameError — " 379 f"_saved_tool_names leaked back into wrong scope: {exc}" 380 ) 381 382 def test_saved_tool_names_set_on_child_before_run(self): 383 """_run_single_child must set _delegate_saved_tool_names on the child 384 from model_tools._last_resolved_tool_names before run_conversation.""" 385 import model_tools 386 387 parent = _make_mock_parent(depth=0) 388 expected_tools = ["read_file", "web_search", "execute_code"] 389 model_tools._last_resolved_tool_names = list(expected_tools) 390 391 captured = {} 392 393 with patch("run_agent.AIAgent") as MockAgent: 394 mock_child = MagicMock() 395 396 def capture_and_return(user_message, task_id=None): 397 captured["saved"] = list(mock_child._delegate_saved_tool_names) 398 return {"final_response": "ok", "completed": True, "api_calls": 1} 399 400 mock_child.run_conversation.side_effect = capture_and_return 401 MockAgent.return_value = mock_child 402 403 delegate_task(goal="capture test", parent_agent=parent) 404 405 self.assertEqual(captured["saved"], expected_tools) 406 407 408 class TestDelegateObservability(unittest.TestCase): 409 """Tests for enriched metadata returned by _run_single_child.""" 410 411 def test_observability_fields_present(self): 412 """Completed child should return tool_trace, tokens, model, exit_reason.""" 413 parent = _make_mock_parent(depth=0) 414 415 with patch("run_agent.AIAgent") as MockAgent: 416 mock_child = MagicMock() 417 mock_child.model = "claude-sonnet-4-6" 418 mock_child.session_prompt_tokens = 5000 419 mock_child.session_completion_tokens = 1200 420 mock_child.run_conversation.return_value = { 421 "final_response": "done", 422 "completed": True, 423 "interrupted": False, 424 "api_calls": 3, 425 "messages": [ 426 {"role": "user", "content": "do something"}, 427 {"role": "assistant", "tool_calls": [ 428 {"id": "tc_1", "function": {"name": "web_search", "arguments": '{"query": "test"}'}} 429 ]}, 430 {"role": "tool", "tool_call_id": "tc_1", "content": '{"results": [1,2,3]}'}, 431 {"role": "assistant", "content": "done"}, 432 ], 433 } 434 MockAgent.return_value = mock_child 435 436 result = json.loads(delegate_task(goal="Test observability", parent_agent=parent)) 437 entry = result["results"][0] 438 439 # Core observability fields 440 self.assertEqual(entry["model"], "claude-sonnet-4-6") 441 self.assertEqual(entry["exit_reason"], "completed") 442 self.assertEqual(entry["tokens"]["input"], 5000) 443 self.assertEqual(entry["tokens"]["output"], 1200) 444 445 # Tool trace 446 self.assertEqual(len(entry["tool_trace"]), 1) 447 self.assertEqual(entry["tool_trace"][0]["tool"], "web_search") 448 self.assertIn("args_bytes", entry["tool_trace"][0]) 449 self.assertIn("result_bytes", entry["tool_trace"][0]) 450 self.assertEqual(entry["tool_trace"][0]["status"], "ok") 451 452 def test_tool_trace_detects_error(self): 453 """Tool results containing 'error' should be marked as error status.""" 454 parent = _make_mock_parent(depth=0) 455 456 with patch("run_agent.AIAgent") as MockAgent: 457 mock_child = MagicMock() 458 mock_child.model = "claude-sonnet-4-6" 459 mock_child.session_prompt_tokens = 0 460 mock_child.session_completion_tokens = 0 461 mock_child.run_conversation.return_value = { 462 "final_response": "failed", 463 "completed": True, 464 "interrupted": False, 465 "api_calls": 1, 466 "messages": [ 467 {"role": "assistant", "tool_calls": [ 468 {"id": "tc_1", "function": {"name": "terminal", "arguments": '{"cmd": "ls"}'}} 469 ]}, 470 {"role": "tool", "tool_call_id": "tc_1", "content": "Error: command not found"}, 471 ], 472 } 473 MockAgent.return_value = mock_child 474 475 result = json.loads(delegate_task(goal="Test error trace", parent_agent=parent)) 476 trace = result["results"][0]["tool_trace"] 477 self.assertEqual(trace[0]["status"], "error") 478 479 def test_parallel_tool_calls_paired_correctly(self): 480 """Parallel tool calls should each get their own result via tool_call_id matching.""" 481 parent = _make_mock_parent(depth=0) 482 483 with patch("run_agent.AIAgent") as MockAgent: 484 mock_child = MagicMock() 485 mock_child.model = "claude-sonnet-4-6" 486 mock_child.session_prompt_tokens = 3000 487 mock_child.session_completion_tokens = 800 488 mock_child.run_conversation.return_value = { 489 "final_response": "done", 490 "completed": True, 491 "interrupted": False, 492 "api_calls": 1, 493 "messages": [ 494 {"role": "assistant", "tool_calls": [ 495 {"id": "tc_a", "function": {"name": "web_search", "arguments": '{"q": "a"}'}}, 496 {"id": "tc_b", "function": {"name": "web_search", "arguments": '{"q": "b"}'}}, 497 {"id": "tc_c", "function": {"name": "terminal", "arguments": '{"cmd": "ls"}'}}, 498 ]}, 499 {"role": "tool", "tool_call_id": "tc_a", "content": '{"ok": true}'}, 500 {"role": "tool", "tool_call_id": "tc_b", "content": "Error: rate limited"}, 501 {"role": "tool", "tool_call_id": "tc_c", "content": "file1.txt\nfile2.txt"}, 502 {"role": "assistant", "content": "done"}, 503 ], 504 } 505 MockAgent.return_value = mock_child 506 507 result = json.loads(delegate_task(goal="Test parallel", parent_agent=parent)) 508 trace = result["results"][0]["tool_trace"] 509 510 # All three tool calls should have results 511 self.assertEqual(len(trace), 3) 512 513 # First: web_search → ok 514 self.assertEqual(trace[0]["tool"], "web_search") 515 self.assertEqual(trace[0]["status"], "ok") 516 self.assertIn("result_bytes", trace[0]) 517 518 # Second: web_search → error 519 self.assertEqual(trace[1]["tool"], "web_search") 520 self.assertEqual(trace[1]["status"], "error") 521 self.assertIn("result_bytes", trace[1]) 522 523 # Third: terminal → ok 524 self.assertEqual(trace[2]["tool"], "terminal") 525 self.assertEqual(trace[2]["status"], "ok") 526 self.assertIn("result_bytes", trace[2]) 527 528 def test_exit_reason_interrupted(self): 529 """Interrupted child should report exit_reason='interrupted'.""" 530 parent = _make_mock_parent(depth=0) 531 532 with patch("run_agent.AIAgent") as MockAgent: 533 mock_child = MagicMock() 534 mock_child.model = "claude-sonnet-4-6" 535 mock_child.session_prompt_tokens = 0 536 mock_child.session_completion_tokens = 0 537 mock_child.run_conversation.return_value = { 538 "final_response": "", 539 "completed": False, 540 "interrupted": True, 541 "api_calls": 2, 542 "messages": [], 543 } 544 MockAgent.return_value = mock_child 545 546 result = json.loads(delegate_task(goal="Test interrupt", parent_agent=parent)) 547 self.assertEqual(result["results"][0]["exit_reason"], "interrupted") 548 549 def test_exit_reason_max_iterations(self): 550 """Child that didn't complete and wasn't interrupted hit max_iterations.""" 551 parent = _make_mock_parent(depth=0) 552 553 with patch("run_agent.AIAgent") as MockAgent: 554 mock_child = MagicMock() 555 mock_child.model = "claude-sonnet-4-6" 556 mock_child.session_prompt_tokens = 0 557 mock_child.session_completion_tokens = 0 558 mock_child.run_conversation.return_value = { 559 "final_response": "", 560 "completed": False, 561 "interrupted": False, 562 "api_calls": 50, 563 "messages": [], 564 } 565 MockAgent.return_value = mock_child 566 567 result = json.loads(delegate_task(goal="Test max iter", parent_agent=parent)) 568 self.assertEqual(result["results"][0]["exit_reason"], "max_iterations") 569 570 571 class TestSubagentCostRollup(unittest.TestCase): 572 """Port of Kilo-Org/kilocode#9448 — parent's session_estimated_cost_usd 573 must include subagent spend, not just the parent's own API calls.""" 574 575 def _make_parent_with_cost_counters(self, depth=0, starting_cost=0.0): 576 parent = _make_mock_parent(depth=depth) 577 # The fields AIAgent exposes and the footer reads from. Set real 578 # floats/strings so the rollup can add to them rather than tripping 579 # on MagicMock auto-attrs. 580 parent.session_estimated_cost_usd = starting_cost 581 parent.session_cost_status = "unknown" 582 parent.session_cost_source = "none" 583 return parent 584 585 def test_single_child_cost_folded_into_parent(self): 586 parent = self._make_parent_with_cost_counters(starting_cost=0.10) 587 588 with patch("run_agent.AIAgent") as MockAgent: 589 mock_child = MagicMock() 590 mock_child.model = "claude-sonnet-4-6" 591 mock_child.session_prompt_tokens = 1000 592 mock_child.session_completion_tokens = 200 593 mock_child.session_estimated_cost_usd = 0.42 594 mock_child.run_conversation.return_value = { 595 "final_response": "done", 596 "completed": True, 597 "interrupted": False, 598 "api_calls": 2, 599 "messages": [], 600 } 601 MockAgent.return_value = mock_child 602 603 result = json.loads(delegate_task(goal="do stuff", parent_agent=parent)) 604 605 # Parent footer must reflect parent_cost + child_cost. 606 self.assertAlmostEqual(parent.session_estimated_cost_usd, 0.52, places=6) 607 # Rollup must strip the internal field before serialising to the model. 608 self.assertNotIn("_child_cost_usd", result["results"][0]) 609 self.assertNotIn("_child_role", result["results"][0]) 610 611 def test_batch_children_costs_sum_into_parent(self): 612 parent = self._make_parent_with_cost_counters(starting_cost=0.00) 613 614 with patch("tools.delegate_tool._run_single_child") as mock_run: 615 mock_run.side_effect = [ 616 { 617 "task_index": 0, 618 "status": "completed", 619 "summary": "A", 620 "api_calls": 2, 621 "duration_seconds": 1.0, 622 "_child_role": "leaf", 623 "_child_cost_usd": 0.15, 624 }, 625 { 626 "task_index": 1, 627 "status": "completed", 628 "summary": "B", 629 "api_calls": 2, 630 "duration_seconds": 1.0, 631 "_child_role": "leaf", 632 "_child_cost_usd": 0.27, 633 }, 634 { 635 "task_index": 2, 636 "status": "failed", 637 "summary": "", 638 "error": "boom", 639 "api_calls": 0, 640 "duration_seconds": 0.1, 641 "_child_role": "leaf", 642 "_child_cost_usd": 0.03, 643 }, 644 ] 645 result = json.loads( 646 delegate_task( 647 tasks=[{"goal": "A"}, {"goal": "B"}, {"goal": "C"}], 648 parent_agent=parent, 649 ) 650 ) 651 652 # 0.15 + 0.27 + 0.03 even though one child failed — the API calls it 653 # made before failing still cost money. 654 self.assertAlmostEqual(parent.session_estimated_cost_usd, 0.45, places=6) 655 # cost_source promoted from "none" since the parent had no direct spend. 656 self.assertEqual(parent.session_cost_source, "subagent") 657 self.assertEqual(parent.session_cost_status, "estimated") 658 # All internal fields stripped from results. 659 for entry in result["results"]: 660 self.assertNotIn("_child_cost_usd", entry) 661 self.assertNotIn("_child_role", entry) 662 663 def test_zero_cost_children_leave_parent_source_untouched(self): 664 """If every child reports 0 cost (e.g. free local model), we should 665 not invent a fake 'subagent' source — the parent's 'none' stays.""" 666 parent = self._make_parent_with_cost_counters(starting_cost=0.00) 667 668 with patch("tools.delegate_tool._run_single_child") as mock_run: 669 mock_run.return_value = { 670 "task_index": 0, 671 "status": "completed", 672 "summary": "done", 673 "api_calls": 1, 674 "duration_seconds": 0.5, 675 "_child_role": "leaf", 676 "_child_cost_usd": 0.0, 677 } 678 delegate_task(goal="free local run", parent_agent=parent) 679 680 self.assertEqual(parent.session_estimated_cost_usd, 0.0) 681 self.assertEqual(parent.session_cost_source, "none") 682 683 def test_parent_with_real_source_not_overwritten(self): 684 """If the parent already has its own cost billed (cost_source != 'none'), 685 adding subagent cost must not clobber the existing source label.""" 686 parent = self._make_parent_with_cost_counters(starting_cost=0.20) 687 parent.session_cost_status = "exact" 688 parent.session_cost_source = "openrouter" 689 690 with patch("tools.delegate_tool._run_single_child") as mock_run: 691 mock_run.return_value = { 692 "task_index": 0, 693 "status": "completed", 694 "summary": "done", 695 "api_calls": 1, 696 "duration_seconds": 0.5, 697 "_child_role": "leaf", 698 "_child_cost_usd": 0.30, 699 } 700 delegate_task(goal="billed run", parent_agent=parent) 701 702 self.assertAlmostEqual(parent.session_estimated_cost_usd, 0.50, places=6) 703 # Real source label preserved. 704 self.assertEqual(parent.session_cost_source, "openrouter") 705 self.assertEqual(parent.session_cost_status, "exact") 706 707 def test_rollup_tolerates_missing_cost_fields(self): 708 """Older fixtures / fabricated error entries may not carry 709 _child_cost_usd. Rollup must degrade to zero-add silently.""" 710 parent = self._make_parent_with_cost_counters(starting_cost=0.10) 711 712 with patch("tools.delegate_tool._run_single_child") as mock_run: 713 mock_run.return_value = { 714 "task_index": 0, 715 "status": "completed", 716 "summary": "done", 717 "api_calls": 1, 718 "duration_seconds": 0.5, 719 # no _child_role, no _child_cost_usd 720 } 721 result = json.loads(delegate_task(goal="legacy", parent_agent=parent)) 722 723 # Parent cost unchanged. 724 self.assertEqual(parent.session_estimated_cost_usd, 0.10) 725 self.assertEqual(len(result["results"]), 1) 726 727 728 class TestBlockedTools(unittest.TestCase): 729 def test_blocked_tools_constant(self): 730 for tool in ["delegate_task", "clarify", "memory", "send_message", "execute_code"]: 731 self.assertIn(tool, DELEGATE_BLOCKED_TOOLS) 732 733 def test_constants(self): 734 from tools.delegate_tool import ( 735 _get_max_spawn_depth, _get_orchestrator_enabled, 736 _MIN_SPAWN_DEPTH, _MAX_SPAWN_DEPTH_CAP, 737 ) 738 self.assertEqual(_get_max_concurrent_children(), 3) 739 self.assertEqual(MAX_DEPTH, 1) 740 self.assertEqual(_get_max_spawn_depth(), 1) # default: flat 741 self.assertTrue(_get_orchestrator_enabled()) # default 742 self.assertEqual(_MIN_SPAWN_DEPTH, 1) 743 self.assertEqual(_MAX_SPAWN_DEPTH_CAP, 3) 744 745 746 class TestDelegationCredentialResolution(unittest.TestCase): 747 """Tests for provider:model credential resolution in delegation config.""" 748 749 def test_no_provider_returns_none_credentials(self): 750 """When delegation.provider is empty, all credentials are None (inherit parent).""" 751 parent = _make_mock_parent(depth=0) 752 cfg = {"model": "", "provider": ""} 753 creds = _resolve_delegation_credentials(cfg, parent) 754 self.assertIsNone(creds["provider"]) 755 self.assertIsNone(creds["base_url"]) 756 self.assertIsNone(creds["api_key"]) 757 self.assertIsNone(creds["api_mode"]) 758 self.assertIsNone(creds["model"]) 759 760 def test_model_only_no_provider(self): 761 """When only model is set (no provider), model is returned but credentials are None.""" 762 parent = _make_mock_parent(depth=0) 763 cfg = {"model": "google/gemini-3-flash-preview", "provider": ""} 764 creds = _resolve_delegation_credentials(cfg, parent) 765 self.assertEqual(creds["model"], "google/gemini-3-flash-preview") 766 self.assertIsNone(creds["provider"]) 767 self.assertIsNone(creds["base_url"]) 768 self.assertIsNone(creds["api_key"]) 769 770 @patch("hermes_cli.runtime_provider.resolve_runtime_provider") 771 def test_provider_resolves_full_credentials(self, mock_resolve): 772 """When delegation.provider is set, full credentials are resolved.""" 773 mock_resolve.return_value = { 774 "provider": "openrouter", 775 "base_url": "https://openrouter.ai/api/v1", 776 "api_key": "sk-or-test-key", 777 "api_mode": "chat_completions", 778 } 779 parent = _make_mock_parent(depth=0) 780 cfg = {"model": "google/gemini-3-flash-preview", "provider": "openrouter"} 781 creds = _resolve_delegation_credentials(cfg, parent) 782 self.assertEqual(creds["model"], "google/gemini-3-flash-preview") 783 self.assertEqual(creds["provider"], "openrouter") 784 self.assertEqual(creds["base_url"], "https://openrouter.ai/api/v1") 785 self.assertEqual(creds["api_key"], "sk-or-test-key") 786 self.assertEqual(creds["api_mode"], "chat_completions") 787 mock_resolve.assert_called_once_with(requested="openrouter") 788 789 @patch("hermes_cli.runtime_provider.resolve_runtime_provider") 790 def test_provider_resolution_uses_runtime_model_when_config_model_missing(self, mock_resolve): 791 """Named providers should propagate their runtime default model to children.""" 792 mock_resolve.return_value = { 793 "provider": "custom", 794 "base_url": "https://my-server.example/v1", 795 "api_key": "sk-test-key", 796 "api_mode": "chat_completions", 797 "model": "server-default-model", 798 } 799 parent = _make_mock_parent(depth=0) 800 cfg = {"provider": "custom:my-server", "model": ""} 801 802 creds = _resolve_delegation_credentials(cfg, parent) 803 804 self.assertEqual(creds["model"], "server-default-model") 805 self.assertEqual(creds["provider"], "custom") 806 self.assertEqual(creds["base_url"], "https://my-server.example/v1") 807 mock_resolve.assert_called_once_with(requested="custom:my-server") 808 809 def test_direct_endpoint_uses_configured_base_url_and_api_key(self): 810 parent = _make_mock_parent(depth=0) 811 cfg = { 812 "model": "qwen2.5-coder", 813 "provider": "openrouter", 814 "base_url": "http://localhost:1234/v1", 815 "api_key": "local-key", 816 } 817 creds = _resolve_delegation_credentials(cfg, parent) 818 self.assertEqual(creds["model"], "qwen2.5-coder") 819 self.assertEqual(creds["provider"], "custom") 820 self.assertEqual(creds["base_url"], "http://localhost:1234/v1") 821 self.assertEqual(creds["api_key"], "local-key") 822 self.assertEqual(creds["api_mode"], "chat_completions") 823 824 def test_direct_endpoint_returns_none_api_key_when_not_configured(self): 825 # When base_url is set without api_key, api_key should be None so 826 # _build_child_agent inherits the parent's key (effective_api_key = override or parent). 827 parent = _make_mock_parent(depth=0) 828 cfg = { 829 "model": "qwen2.5-coder", 830 "base_url": "http://localhost:1234/v1", 831 } 832 with patch.dict(os.environ, {"OPENAI_API_KEY": "env-openai-key"}, clear=False): 833 creds = _resolve_delegation_credentials(cfg, parent) 834 self.assertIsNone(creds["api_key"]) 835 self.assertEqual(creds["provider"], "custom") 836 837 def test_direct_endpoint_no_raise_when_only_provider_env_key_present(self): 838 # Even if OPENAI_API_KEY is absent, no ValueError — _build_child_agent uses parent key. 839 parent = _make_mock_parent(depth=0) 840 cfg = { 841 "model": "qwen2.5-coder", 842 "base_url": "http://localhost:1234/v1", 843 } 844 with patch.dict( 845 os.environ, 846 { 847 "OPENROUTER_API_KEY": "env-openrouter-key", 848 "OPENAI_API_KEY": "", 849 }, 850 clear=False, 851 ): 852 creds = _resolve_delegation_credentials(cfg, parent) 853 self.assertIsNone(creds["api_key"]) 854 self.assertEqual(creds["provider"], "custom") 855 856 @patch("hermes_cli.runtime_provider.resolve_runtime_provider") 857 def test_nous_provider_resolves_nous_credentials(self, mock_resolve): 858 """Nous provider resolves Nous Portal base_url and api_key.""" 859 mock_resolve.return_value = { 860 "provider": "nous", 861 "base_url": "https://inference-api.nousresearch.com/v1", 862 "api_key": "nous-agent-key-xyz", 863 "api_mode": "chat_completions", 864 } 865 parent = _make_mock_parent(depth=0) 866 cfg = {"model": "hermes-3-llama-3.1-8b", "provider": "nous"} 867 creds = _resolve_delegation_credentials(cfg, parent) 868 self.assertEqual(creds["provider"], "nous") 869 self.assertEqual(creds["base_url"], "https://inference-api.nousresearch.com/v1") 870 self.assertEqual(creds["api_key"], "nous-agent-key-xyz") 871 mock_resolve.assert_called_once_with(requested="nous") 872 873 @patch("hermes_cli.runtime_provider.resolve_runtime_provider") 874 def test_provider_resolution_failure_raises_valueerror(self, mock_resolve): 875 """When provider resolution fails, ValueError is raised with helpful message.""" 876 mock_resolve.side_effect = RuntimeError("OPENROUTER_API_KEY not set") 877 parent = _make_mock_parent(depth=0) 878 cfg = {"model": "some-model", "provider": "openrouter"} 879 with self.assertRaises(ValueError) as ctx: 880 _resolve_delegation_credentials(cfg, parent) 881 self.assertIn("openrouter", str(ctx.exception).lower()) 882 self.assertIn("Cannot resolve", str(ctx.exception)) 883 884 @patch("hermes_cli.runtime_provider.resolve_runtime_provider") 885 def test_provider_resolves_but_no_api_key_raises(self, mock_resolve): 886 """When provider resolves but has no API key, ValueError is raised.""" 887 mock_resolve.return_value = { 888 "provider": "openrouter", 889 "base_url": "https://openrouter.ai/api/v1", 890 "api_key": "", 891 "api_mode": "chat_completions", 892 } 893 parent = _make_mock_parent(depth=0) 894 cfg = {"model": "some-model", "provider": "openrouter"} 895 with self.assertRaises(ValueError) as ctx: 896 _resolve_delegation_credentials(cfg, parent) 897 self.assertIn("no API key", str(ctx.exception)) 898 899 def test_missing_config_keys_inherit_parent(self): 900 """When config dict has no model/provider keys at all, inherits parent.""" 901 parent = _make_mock_parent(depth=0) 902 cfg = {"max_iterations": 45} 903 creds = _resolve_delegation_credentials(cfg, parent) 904 self.assertIsNone(creds["model"]) 905 self.assertIsNone(creds["provider"]) 906 907 908 class TestDelegationProviderIntegration(unittest.TestCase): 909 """Integration tests: delegation config → _run_single_child → AIAgent construction.""" 910 911 @patch("tools.delegate_tool._load_config") 912 @patch("tools.delegate_tool._resolve_delegation_credentials") 913 def test_config_provider_credentials_reach_child_agent(self, mock_creds, mock_cfg): 914 """When delegation.provider is configured, child agent gets resolved credentials.""" 915 mock_cfg.return_value = { 916 "max_iterations": 45, 917 "model": "google/gemini-3-flash-preview", 918 "provider": "openrouter", 919 } 920 mock_creds.return_value = { 921 "model": "google/gemini-3-flash-preview", 922 "provider": "openrouter", 923 "base_url": "https://openrouter.ai/api/v1", 924 "api_key": "sk-or-delegation-key", 925 "api_mode": "chat_completions", 926 } 927 parent = _make_mock_parent(depth=0) 928 929 with patch("run_agent.AIAgent") as MockAgent: 930 mock_child = MagicMock() 931 mock_child.run_conversation.return_value = { 932 "final_response": "done", "completed": True, "api_calls": 1 933 } 934 MockAgent.return_value = mock_child 935 936 delegate_task(goal="Test provider routing", parent_agent=parent) 937 938 _, kwargs = MockAgent.call_args 939 self.assertEqual(kwargs["model"], "google/gemini-3-flash-preview") 940 self.assertEqual(kwargs["provider"], "openrouter") 941 self.assertEqual(kwargs["base_url"], "https://openrouter.ai/api/v1") 942 self.assertEqual(kwargs["api_key"], "sk-or-delegation-key") 943 self.assertEqual(kwargs["api_mode"], "chat_completions") 944 945 @patch("tools.delegate_tool._load_config") 946 @patch("tools.delegate_tool._resolve_delegation_credentials") 947 def test_cross_provider_delegation(self, mock_creds, mock_cfg): 948 """Parent on Nous, subagent on OpenRouter — full credential switch.""" 949 mock_cfg.return_value = { 950 "max_iterations": 45, 951 "model": "google/gemini-3-flash-preview", 952 "provider": "openrouter", 953 } 954 mock_creds.return_value = { 955 "model": "google/gemini-3-flash-preview", 956 "provider": "openrouter", 957 "base_url": "https://openrouter.ai/api/v1", 958 "api_key": "sk-or-key", 959 "api_mode": "chat_completions", 960 } 961 parent = _make_mock_parent(depth=0) 962 parent.provider = "nous" 963 parent.base_url = "https://inference-api.nousresearch.com/v1" 964 parent.api_key = "nous-key-abc" 965 966 with patch("run_agent.AIAgent") as MockAgent: 967 mock_child = MagicMock() 968 mock_child.run_conversation.return_value = { 969 "final_response": "done", "completed": True, "api_calls": 1 970 } 971 MockAgent.return_value = mock_child 972 973 delegate_task(goal="Cross-provider test", parent_agent=parent) 974 975 _, kwargs = MockAgent.call_args 976 # Child should use OpenRouter, NOT Nous 977 self.assertEqual(kwargs["provider"], "openrouter") 978 self.assertEqual(kwargs["base_url"], "https://openrouter.ai/api/v1") 979 self.assertEqual(kwargs["api_key"], "sk-or-key") 980 self.assertNotEqual(kwargs["base_url"], parent.base_url) 981 self.assertNotEqual(kwargs["api_key"], parent.api_key) 982 983 @patch("tools.delegate_tool._load_config") 984 @patch("tools.delegate_tool._resolve_delegation_credentials") 985 def test_provider_override_clears_parent_openrouter_filters( 986 self, mock_creds, mock_cfg 987 ): 988 """Delegated provider should not inherit parent provider-preference filters.""" 989 mock_cfg.return_value = { 990 "max_iterations": 45, 991 "model": "google/gemini-3-flash-preview", 992 "provider": "openrouter", 993 } 994 mock_creds.return_value = { 995 "model": "google/gemini-3-flash-preview", 996 "provider": "openrouter", 997 "base_url": "https://openrouter.ai/api/v1", 998 "api_key": "sk-or-key", 999 "api_mode": "chat_completions", 1000 } 1001 parent = _make_mock_parent(depth=0) 1002 parent.providers_allowed = ["anthropic/claude-3.5-sonnet"] 1003 parent.providers_ignored = ["openai/gpt-4o-mini"] 1004 parent.providers_order = ["google/gemini-2.5-pro"] 1005 parent.provider_sort = "price" 1006 1007 with patch("run_agent.AIAgent") as MockAgent: 1008 mock_child = MagicMock() 1009 mock_child.run_conversation.return_value = { 1010 "final_response": "done", 1011 "completed": True, 1012 "api_calls": 1, 1013 } 1014 MockAgent.return_value = mock_child 1015 1016 delegate_task(goal="Cross-provider test", parent_agent=parent) 1017 1018 _, kwargs = MockAgent.call_args 1019 self.assertEqual(kwargs["provider"], "openrouter") 1020 self.assertIsNone(kwargs["providers_allowed"]) 1021 self.assertIsNone(kwargs["providers_ignored"]) 1022 self.assertIsNone(kwargs["providers_order"]) 1023 self.assertIsNone(kwargs["provider_sort"]) 1024 1025 @patch("tools.delegate_tool._load_config") 1026 @patch("tools.delegate_tool._resolve_delegation_credentials") 1027 def test_direct_endpoint_credentials_reach_child_agent(self, mock_creds, mock_cfg): 1028 mock_cfg.return_value = { 1029 "max_iterations": 45, 1030 "model": "qwen2.5-coder", 1031 "base_url": "http://localhost:1234/v1", 1032 "api_key": "local-key", 1033 } 1034 mock_creds.return_value = { 1035 "model": "qwen2.5-coder", 1036 "provider": "custom", 1037 "base_url": "http://localhost:1234/v1", 1038 "api_key": "local-key", 1039 "api_mode": "chat_completions", 1040 } 1041 parent = _make_mock_parent(depth=0) 1042 1043 with patch("run_agent.AIAgent") as MockAgent: 1044 mock_child = MagicMock() 1045 mock_child.run_conversation.return_value = { 1046 "final_response": "done", "completed": True, "api_calls": 1 1047 } 1048 MockAgent.return_value = mock_child 1049 1050 delegate_task(goal="Direct endpoint test", parent_agent=parent) 1051 1052 _, kwargs = MockAgent.call_args 1053 self.assertEqual(kwargs["model"], "qwen2.5-coder") 1054 self.assertEqual(kwargs["provider"], "custom") 1055 self.assertEqual(kwargs["base_url"], "http://localhost:1234/v1") 1056 self.assertEqual(kwargs["api_key"], "local-key") 1057 self.assertEqual(kwargs["api_mode"], "chat_completions") 1058 1059 @patch("tools.delegate_tool._load_config") 1060 @patch("tools.delegate_tool._resolve_delegation_credentials") 1061 def test_empty_config_inherits_parent(self, mock_creds, mock_cfg): 1062 """When delegation config is empty, child inherits parent credentials.""" 1063 mock_cfg.return_value = {"max_iterations": 45, "model": "", "provider": ""} 1064 mock_creds.return_value = { 1065 "model": None, 1066 "provider": None, 1067 "base_url": None, 1068 "api_key": None, 1069 "api_mode": None, 1070 } 1071 parent = _make_mock_parent(depth=0) 1072 1073 with patch("run_agent.AIAgent") as MockAgent: 1074 mock_child = MagicMock() 1075 mock_child.run_conversation.return_value = { 1076 "final_response": "done", "completed": True, "api_calls": 1 1077 } 1078 MockAgent.return_value = mock_child 1079 1080 delegate_task(goal="Test inherit", parent_agent=parent) 1081 1082 _, kwargs = MockAgent.call_args 1083 self.assertEqual(kwargs["model"], parent.model) 1084 self.assertEqual(kwargs["provider"], parent.provider) 1085 self.assertEqual(kwargs["base_url"], parent.base_url) 1086 1087 @patch("tools.delegate_tool._load_config") 1088 @patch("tools.delegate_tool._resolve_delegation_credentials") 1089 def test_credential_error_returns_json_error(self, mock_creds, mock_cfg): 1090 """When credential resolution fails, delegate_task returns a JSON error.""" 1091 mock_cfg.return_value = {"model": "bad-model", "provider": "nonexistent"} 1092 mock_creds.side_effect = ValueError( 1093 "Cannot resolve delegation provider 'nonexistent': Unknown provider" 1094 ) 1095 parent = _make_mock_parent(depth=0) 1096 1097 result = json.loads(delegate_task(goal="Should fail", parent_agent=parent)) 1098 self.assertIn("error", result) 1099 self.assertIn("Cannot resolve", result["error"]) 1100 self.assertIn("nonexistent", result["error"]) 1101 1102 @patch("tools.delegate_tool._load_config") 1103 @patch("tools.delegate_tool._resolve_delegation_credentials") 1104 def test_batch_mode_all_children_get_credentials(self, mock_creds, mock_cfg): 1105 """In batch mode, all children receive the resolved credentials.""" 1106 mock_cfg.return_value = { 1107 "max_iterations": 45, 1108 "model": "meta-llama/llama-4-scout", 1109 "provider": "openrouter", 1110 } 1111 mock_creds.return_value = { 1112 "model": "meta-llama/llama-4-scout", 1113 "provider": "openrouter", 1114 "base_url": "https://openrouter.ai/api/v1", 1115 "api_key": "sk-or-batch", 1116 "api_mode": "chat_completions", 1117 } 1118 parent = _make_mock_parent(depth=0) 1119 1120 # Patch _build_child_agent since credentials are now passed there 1121 # (agents are built in the main thread before being handed to workers) 1122 with patch("tools.delegate_tool._build_child_agent") as mock_build, \ 1123 patch("tools.delegate_tool._run_single_child") as mock_run: 1124 mock_child = MagicMock() 1125 mock_build.return_value = mock_child 1126 mock_run.return_value = { 1127 "task_index": 0, "status": "completed", 1128 "summary": "Done", "api_calls": 1, "duration_seconds": 1.0 1129 } 1130 1131 tasks = [{"goal": "Task A"}, {"goal": "Task B"}] 1132 delegate_task(tasks=tasks, parent_agent=parent) 1133 1134 self.assertEqual(mock_build.call_count, 2) 1135 for call in mock_build.call_args_list: 1136 self.assertEqual(call.kwargs.get("model"), "meta-llama/llama-4-scout") 1137 self.assertEqual(call.kwargs.get("override_provider"), "openrouter") 1138 self.assertEqual(call.kwargs.get("override_base_url"), "https://openrouter.ai/api/v1") 1139 self.assertEqual(call.kwargs.get("override_api_key"), "sk-or-batch") 1140 self.assertEqual(call.kwargs.get("override_api_mode"), "chat_completions") 1141 1142 @patch("tools.delegate_tool._load_config") 1143 @patch("tools.delegate_tool._resolve_delegation_credentials") 1144 def test_delegation_acp_runtime_reaches_child_agent(self, mock_creds, mock_cfg): 1145 """Resolved ACP runtime command/args must be forwarded to child agents.""" 1146 mock_cfg.return_value = { 1147 "max_iterations": 45, 1148 "model": "copilot-model", 1149 "provider": "copilot-acp", 1150 } 1151 mock_creds.return_value = { 1152 "model": "copilot-model", 1153 "provider": "copilot-acp", 1154 "base_url": "acp://copilot", 1155 "api_key": "copilot-acp", 1156 "api_mode": "chat_completions", 1157 "command": "custom-copilot", 1158 "args": ["--stdio-custom"], 1159 } 1160 parent = _make_mock_parent(depth=0) 1161 1162 with patch("tools.delegate_tool._build_child_agent") as mock_build, \ 1163 patch("tools.delegate_tool._run_single_child") as mock_run: 1164 mock_child = MagicMock() 1165 mock_build.return_value = mock_child 1166 mock_run.return_value = { 1167 "task_index": 0, "status": "completed", 1168 "summary": "Done", "api_calls": 1, "duration_seconds": 1.0 1169 } 1170 1171 delegate_task(goal="ACP delegation test", parent_agent=parent) 1172 1173 _, kwargs = mock_build.call_args 1174 self.assertEqual(kwargs.get("override_provider"), "copilot-acp") 1175 self.assertEqual(kwargs.get("override_base_url"), "acp://copilot") 1176 self.assertEqual(kwargs.get("override_api_key"), "copilot-acp") 1177 self.assertEqual(kwargs.get("override_api_mode"), "chat_completions") 1178 self.assertEqual(kwargs.get("override_acp_command"), "custom-copilot") 1179 self.assertEqual(kwargs.get("override_acp_args"), ["--stdio-custom"]) 1180 1181 @patch("tools.delegate_tool._load_config") 1182 @patch("tools.delegate_tool._resolve_delegation_credentials") 1183 def test_model_only_no_provider_inherits_parent_credentials(self, mock_creds, mock_cfg): 1184 """Setting only model (no provider) changes model but keeps parent credentials.""" 1185 mock_cfg.return_value = { 1186 "max_iterations": 45, 1187 "model": "google/gemini-3-flash-preview", 1188 "provider": "", 1189 } 1190 mock_creds.return_value = { 1191 "model": "google/gemini-3-flash-preview", 1192 "provider": None, 1193 "base_url": None, 1194 "api_key": None, 1195 "api_mode": None, 1196 } 1197 parent = _make_mock_parent(depth=0) 1198 1199 with patch("run_agent.AIAgent") as MockAgent: 1200 mock_child = MagicMock() 1201 mock_child.run_conversation.return_value = { 1202 "final_response": "done", "completed": True, "api_calls": 1 1203 } 1204 MockAgent.return_value = mock_child 1205 1206 delegate_task(goal="Model only test", parent_agent=parent) 1207 1208 _, kwargs = MockAgent.call_args 1209 # Model should be overridden 1210 self.assertEqual(kwargs["model"], "google/gemini-3-flash-preview") 1211 # But provider/base_url/api_key should inherit from parent 1212 self.assertEqual(kwargs["provider"], parent.provider) 1213 self.assertEqual(kwargs["base_url"], parent.base_url) 1214 1215 1216 class TestChildCredentialPoolResolution(unittest.TestCase): 1217 def test_same_provider_shares_parent_pool(self): 1218 parent = _make_mock_parent() 1219 mock_pool = MagicMock() 1220 parent._credential_pool = mock_pool 1221 1222 result = _resolve_child_credential_pool("openrouter", parent) 1223 self.assertIs(result, mock_pool) 1224 1225 def test_no_provider_inherits_parent_pool(self): 1226 parent = _make_mock_parent() 1227 mock_pool = MagicMock() 1228 parent._credential_pool = mock_pool 1229 1230 result = _resolve_child_credential_pool(None, parent) 1231 self.assertIs(result, mock_pool) 1232 1233 def test_different_provider_loads_own_pool(self): 1234 parent = _make_mock_parent() 1235 parent._credential_pool = MagicMock() 1236 mock_pool = MagicMock() 1237 mock_pool.has_credentials.return_value = True 1238 1239 with patch("agent.credential_pool.load_pool", return_value=mock_pool): 1240 result = _resolve_child_credential_pool("anthropic", parent) 1241 1242 self.assertIs(result, mock_pool) 1243 1244 def test_different_provider_empty_pool_returns_none(self): 1245 parent = _make_mock_parent() 1246 parent._credential_pool = MagicMock() 1247 mock_pool = MagicMock() 1248 mock_pool.has_credentials.return_value = False 1249 1250 with patch("agent.credential_pool.load_pool", return_value=mock_pool): 1251 result = _resolve_child_credential_pool("anthropic", parent) 1252 1253 self.assertIsNone(result) 1254 1255 def test_different_provider_load_failure_returns_none(self): 1256 parent = _make_mock_parent() 1257 parent._credential_pool = MagicMock() 1258 1259 with patch("agent.credential_pool.load_pool", side_effect=Exception("disk error")): 1260 result = _resolve_child_credential_pool("anthropic", parent) 1261 1262 self.assertIsNone(result) 1263 1264 def test_build_child_agent_assigns_parent_pool_when_shared(self): 1265 parent = _make_mock_parent() 1266 mock_pool = MagicMock() 1267 parent._credential_pool = mock_pool 1268 1269 with patch("run_agent.AIAgent") as MockAgent: 1270 mock_child = MagicMock() 1271 MockAgent.return_value = mock_child 1272 1273 _build_child_agent( 1274 task_index=0, 1275 goal="Test pool assignment", 1276 context=None, 1277 toolsets=["terminal"], 1278 model=None, 1279 max_iterations=10, 1280 parent_agent=parent, 1281 task_count=1, 1282 ) 1283 1284 self.assertEqual(mock_child._credential_pool, mock_pool) 1285 1286 @patch("tools.delegate_tool._load_config", return_value={}) 1287 def test_build_child_agent_preserves_mcp_toolsets_by_default(self, mock_cfg): 1288 parent = _make_mock_parent() 1289 parent.enabled_toolsets = ["web", "browser", "mcp-MiniMax"] 1290 1291 with patch("run_agent.AIAgent") as MockAgent: 1292 mock_child = MagicMock() 1293 MockAgent.return_value = mock_child 1294 1295 _build_child_agent( 1296 task_index=0, 1297 goal="Test narrowed toolsets", 1298 context=None, 1299 toolsets=["web", "browser"], 1300 model=None, 1301 max_iterations=10, 1302 parent_agent=parent, 1303 task_count=1, 1304 ) 1305 1306 self.assertEqual( 1307 MockAgent.call_args[1]["enabled_toolsets"], 1308 ["web", "browser", "mcp-MiniMax"], 1309 ) 1310 1311 @patch( 1312 "tools.delegate_tool._load_config", 1313 return_value={"inherit_mcp_toolsets": False}, 1314 ) 1315 def test_build_child_agent_strict_intersection_when_opted_out(self, mock_cfg): 1316 parent = _make_mock_parent() 1317 parent.enabled_toolsets = ["web", "browser", "mcp-MiniMax"] 1318 1319 with patch("run_agent.AIAgent") as MockAgent: 1320 mock_child = MagicMock() 1321 MockAgent.return_value = mock_child 1322 1323 _build_child_agent( 1324 task_index=0, 1325 goal="Test narrowed toolsets", 1326 context=None, 1327 toolsets=["web", "browser"], 1328 model=None, 1329 max_iterations=10, 1330 parent_agent=parent, 1331 task_count=1, 1332 ) 1333 1334 self.assertEqual( 1335 MockAgent.call_args[1]["enabled_toolsets"], 1336 ["web", "browser"], 1337 ) 1338 1339 1340 class TestChildCredentialLeasing(unittest.TestCase): 1341 def test_run_single_child_acquires_and_releases_lease(self): 1342 from tools.delegate_tool import _run_single_child 1343 1344 leased_entry = MagicMock() 1345 leased_entry.id = "cred-b" 1346 1347 child = MagicMock() 1348 child._credential_pool = MagicMock() 1349 child._credential_pool.acquire_lease.return_value = "cred-b" 1350 child._credential_pool.current.return_value = leased_entry 1351 child.run_conversation.return_value = { 1352 "final_response": "done", 1353 "completed": True, 1354 "interrupted": False, 1355 "api_calls": 1, 1356 "messages": [], 1357 } 1358 1359 result = _run_single_child( 1360 task_index=0, 1361 goal="Investigate rate limits", 1362 child=child, 1363 parent_agent=_make_mock_parent(), 1364 ) 1365 1366 self.assertEqual(result["status"], "completed") 1367 child._credential_pool.acquire_lease.assert_called_once_with() 1368 child._swap_credential.assert_called_once_with(leased_entry) 1369 child._credential_pool.release_lease.assert_called_once_with("cred-b") 1370 1371 def test_run_single_child_releases_lease_after_failure(self): 1372 from tools.delegate_tool import _run_single_child 1373 1374 child = MagicMock() 1375 child._credential_pool = MagicMock() 1376 child._credential_pool.acquire_lease.return_value = "cred-a" 1377 child._credential_pool.current.return_value = MagicMock(id="cred-a") 1378 child.run_conversation.side_effect = RuntimeError("boom") 1379 1380 result = _run_single_child( 1381 task_index=1, 1382 goal="Trigger failure", 1383 child=child, 1384 parent_agent=_make_mock_parent(), 1385 ) 1386 1387 self.assertEqual(result["status"], "error") 1388 child._credential_pool.release_lease.assert_called_once_with("cred-a") 1389 1390 1391 class TestDelegateHeartbeat(unittest.TestCase): 1392 """Heartbeat propagates child activity to parent during delegation. 1393 1394 Without the heartbeat, the gateway inactivity timeout fires because the 1395 parent's _last_activity_ts freezes when delegate_task starts. 1396 """ 1397 1398 def test_heartbeat_touches_parent_activity_during_child_run(self): 1399 """Parent's _touch_activity is called while child.run_conversation blocks.""" 1400 from tools.delegate_tool import _run_single_child 1401 1402 parent = _make_mock_parent() 1403 touch_calls = [] 1404 parent._touch_activity = lambda desc: touch_calls.append(desc) 1405 1406 child = MagicMock() 1407 child.get_activity_summary.return_value = { 1408 "current_tool": "terminal", 1409 "api_call_count": 3, 1410 "max_iterations": 50, 1411 "last_activity_desc": "executing tool: terminal", 1412 } 1413 1414 # Make run_conversation block long enough for heartbeats to fire 1415 def slow_run(**kwargs): 1416 time.sleep(0.25) 1417 return {"final_response": "done", "completed": True, "api_calls": 3} 1418 1419 child.run_conversation.side_effect = slow_run 1420 1421 # Patch the heartbeat interval to fire quickly 1422 with patch("tools.delegate_tool._HEARTBEAT_INTERVAL", 0.05): 1423 _run_single_child( 1424 task_index=0, 1425 goal="Test heartbeat", 1426 child=child, 1427 parent_agent=parent, 1428 ) 1429 1430 # Heartbeat should have fired at least once during the 0.25s sleep 1431 self.assertGreater(len(touch_calls), 0, 1432 "Heartbeat did not propagate activity to parent") 1433 # Verify the description includes child's current tool detail 1434 self.assertTrue( 1435 any("terminal" in desc for desc in touch_calls), 1436 f"Heartbeat descriptions should include child tool info: {touch_calls}") 1437 1438 def test_heartbeat_stops_after_child_completes(self): 1439 """Heartbeat thread is cleaned up when the child finishes.""" 1440 from tools.delegate_tool import _run_single_child 1441 1442 parent = _make_mock_parent() 1443 touch_calls = [] 1444 parent._touch_activity = lambda desc: touch_calls.append(desc) 1445 1446 child = MagicMock() 1447 child.get_activity_summary.return_value = { 1448 "current_tool": None, 1449 "api_call_count": 1, 1450 "max_iterations": 50, 1451 "last_activity_desc": "done", 1452 } 1453 child.run_conversation.return_value = { 1454 "final_response": "done", "completed": True, "api_calls": 1, 1455 } 1456 1457 with patch("tools.delegate_tool._HEARTBEAT_INTERVAL", 0.05): 1458 _run_single_child( 1459 task_index=0, 1460 goal="Test cleanup", 1461 child=child, 1462 parent_agent=parent, 1463 ) 1464 1465 # Record count after completion, wait, and verify no more calls 1466 count_after = len(touch_calls) 1467 time.sleep(0.15) 1468 self.assertEqual(len(touch_calls), count_after, 1469 "Heartbeat continued firing after child completed") 1470 1471 def test_heartbeat_stops_after_child_error(self): 1472 """Heartbeat thread is cleaned up even when the child raises.""" 1473 from tools.delegate_tool import _run_single_child 1474 1475 parent = _make_mock_parent() 1476 touch_calls = [] 1477 parent._touch_activity = lambda desc: touch_calls.append(desc) 1478 1479 child = MagicMock() 1480 child.get_activity_summary.return_value = { 1481 "current_tool": "web_search", 1482 "api_call_count": 2, 1483 "max_iterations": 50, 1484 "last_activity_desc": "executing tool: web_search", 1485 } 1486 1487 def slow_fail(**kwargs): 1488 time.sleep(0.15) 1489 raise RuntimeError("network timeout") 1490 1491 child.run_conversation.side_effect = slow_fail 1492 1493 with patch("tools.delegate_tool._HEARTBEAT_INTERVAL", 0.05): 1494 result = _run_single_child( 1495 task_index=0, 1496 goal="Test error cleanup", 1497 child=child, 1498 parent_agent=parent, 1499 ) 1500 1501 self.assertEqual(result["status"], "error") 1502 1503 # Verify heartbeat stopped 1504 count_after = len(touch_calls) 1505 time.sleep(0.15) 1506 self.assertEqual(len(touch_calls), count_after, 1507 "Heartbeat continued firing after child error") 1508 1509 def test_heartbeat_includes_child_activity_desc_when_no_tool(self): 1510 """When child has no current_tool, heartbeat uses last_activity_desc.""" 1511 from tools.delegate_tool import _run_single_child 1512 1513 parent = _make_mock_parent() 1514 touch_calls = [] 1515 parent._touch_activity = lambda desc: touch_calls.append(desc) 1516 1517 child = MagicMock() 1518 child.get_activity_summary.return_value = { 1519 "current_tool": None, 1520 "api_call_count": 5, 1521 "max_iterations": 90, 1522 "last_activity_desc": "API call #5 completed", 1523 } 1524 1525 def slow_run(**kwargs): 1526 time.sleep(0.15) 1527 return {"final_response": "done", "completed": True, "api_calls": 5} 1528 1529 child.run_conversation.side_effect = slow_run 1530 1531 with patch("tools.delegate_tool._HEARTBEAT_INTERVAL", 0.05): 1532 _run_single_child( 1533 task_index=0, 1534 goal="Test desc fallback", 1535 child=child, 1536 parent_agent=parent, 1537 ) 1538 1539 self.assertGreater(len(touch_calls), 0) 1540 self.assertTrue( 1541 any("API call #5 completed" in desc for desc in touch_calls), 1542 f"Heartbeat should include last_activity_desc: {touch_calls}") 1543 1544 def test_heartbeat_does_not_trip_idle_stale_while_inside_tool(self): 1545 """A long-running tool (no iteration advance, but current_tool set) 1546 must not be flagged stale at the idle threshold. 1547 1548 Bug #13041: when a child is legitimately busy inside a slow tool 1549 (terminal command, browser fetch), api_call_count does not advance. 1550 The previous stale check treated this as idle and stopped the 1551 heartbeat after 5 cycles (~150s), letting the gateway kill the 1552 session. The fix uses a much higher in-tool threshold and only 1553 applies the tight idle threshold when current_tool is None. 1554 """ 1555 from tools.delegate_tool import _run_single_child 1556 1557 parent = _make_mock_parent() 1558 touch_calls = [] 1559 parent._touch_activity = lambda desc: touch_calls.append(desc) 1560 1561 child = MagicMock() 1562 # Child is stuck inside a single terminal call for the whole run. 1563 # api_call_count never advances, current_tool is always set. 1564 child.get_activity_summary.return_value = { 1565 "current_tool": "terminal", 1566 "api_call_count": 1, 1567 "max_iterations": 50, 1568 "last_activity_desc": "executing tool: terminal", 1569 } 1570 1571 def slow_run(**kwargs): 1572 # Long enough to exceed the OLD idle threshold (5 cycles) at 1573 # the patched interval, but shorter than the new in-tool 1574 # threshold. 1575 time.sleep(0.4) 1576 return {"final_response": "done", "completed": True, "api_calls": 1} 1577 1578 child.run_conversation.side_effect = slow_run 1579 1580 # Patch both the interval AND the idle ceiling so the test proves 1581 # the in-tool branch takes effect: with a 0.05s interval and the 1582 # default _HEARTBEAT_STALE_CYCLES_IDLE=5, the old behavior would 1583 # trip after 0.25s and stop firing. We should see heartbeats 1584 # continuing through the full 0.4s run. 1585 with patch("tools.delegate_tool._HEARTBEAT_INTERVAL", 0.05): 1586 _run_single_child( 1587 task_index=0, 1588 goal="Test long-running tool", 1589 child=child, 1590 parent_agent=parent, 1591 ) 1592 1593 # With the old idle threshold (5 cycles = 0.25s), touch_calls 1594 # would cap at ~5. With the in-tool threshold (20 cycles = 1.0s), 1595 # we should see substantially more heartbeats over 0.4s. 1596 self.assertGreater( 1597 len(touch_calls), 6, 1598 f"Heartbeat stopped too early while child was inside a tool; " 1599 f"got {len(touch_calls)} touches over 0.4s at 0.05s interval", 1600 ) 1601 1602 def test_heartbeat_still_trips_idle_stale_when_no_tool(self): 1603 """A wedged child with no current_tool still trips the idle threshold. 1604 1605 Regression guard: the fix for #13041 must not disable stale 1606 detection entirely. A child that's hung between turns (no tool 1607 running, no iteration progress) must still stop touching the 1608 parent so the gateway timeout can fire. 1609 """ 1610 from tools.delegate_tool import _run_single_child 1611 1612 parent = _make_mock_parent() 1613 touch_calls = [] 1614 parent._touch_activity = lambda desc: touch_calls.append(desc) 1615 1616 child = MagicMock() 1617 # Wedged child: no tool running, iteration frozen. 1618 child.get_activity_summary.return_value = { 1619 "current_tool": None, 1620 "api_call_count": 3, 1621 "max_iterations": 50, 1622 "last_activity_desc": "waiting for API response", 1623 } 1624 1625 def slow_run(**kwargs): 1626 time.sleep(0.6) 1627 return {"final_response": "done", "completed": True, "api_calls": 3} 1628 1629 child.run_conversation.side_effect = slow_run 1630 1631 # At interval 0.05s, idle threshold (5 cycles) trips at ~0.25s. 1632 # We should see the heartbeat stop firing well before 0.6s. 1633 with patch("tools.delegate_tool._HEARTBEAT_INTERVAL", 0.05): 1634 _run_single_child( 1635 task_index=0, 1636 goal="Test wedged child", 1637 child=child, 1638 parent_agent=parent, 1639 ) 1640 1641 # With idle threshold=5 + interval=0.05s, touches should cap 1642 # around 5. Bound loosely to avoid timing flakes. 1643 self.assertLess( 1644 len(touch_calls), 9, 1645 f"Idle stale detection did not fire: got {len(touch_calls)} " 1646 f"touches over 0.6s — expected heartbeat to stop after " 1647 f"~5 stale cycles", 1648 ) 1649 1650 1651 class TestDelegationReasoningEffort(unittest.TestCase): 1652 """Tests for delegation.reasoning_effort config override.""" 1653 1654 @patch("tools.delegate_tool._load_config") 1655 @patch("run_agent.AIAgent") 1656 def test_inherits_parent_reasoning_when_no_override(self, MockAgent, mock_cfg): 1657 """With no delegation.reasoning_effort, child inherits parent's config.""" 1658 mock_cfg.return_value = {"max_iterations": 50, "reasoning_effort": ""} 1659 MockAgent.return_value = MagicMock() 1660 parent = _make_mock_parent() 1661 parent.reasoning_config = {"enabled": True, "effort": "xhigh"} 1662 1663 _build_child_agent( 1664 task_index=0, goal="test", context=None, toolsets=None, 1665 model=None, max_iterations=50, parent_agent=parent, 1666 task_count=1, 1667 ) 1668 call_kwargs = MockAgent.call_args[1] 1669 self.assertEqual(call_kwargs["reasoning_config"], {"enabled": True, "effort": "xhigh"}) 1670 1671 @patch("tools.delegate_tool._load_config") 1672 @patch("run_agent.AIAgent") 1673 def test_override_reasoning_effort_from_config(self, MockAgent, mock_cfg): 1674 """delegation.reasoning_effort overrides the parent's level.""" 1675 mock_cfg.return_value = {"max_iterations": 50, "reasoning_effort": "low"} 1676 MockAgent.return_value = MagicMock() 1677 parent = _make_mock_parent() 1678 parent.reasoning_config = {"enabled": True, "effort": "xhigh"} 1679 1680 _build_child_agent( 1681 task_index=0, goal="test", context=None, toolsets=None, 1682 model=None, max_iterations=50, parent_agent=parent, 1683 task_count=1, 1684 ) 1685 call_kwargs = MockAgent.call_args[1] 1686 self.assertEqual(call_kwargs["reasoning_config"], {"enabled": True, "effort": "low"}) 1687 1688 @patch("tools.delegate_tool._load_config") 1689 @patch("run_agent.AIAgent") 1690 def test_override_reasoning_effort_none_disables(self, MockAgent, mock_cfg): 1691 """delegation.reasoning_effort: 'none' disables thinking for subagents.""" 1692 mock_cfg.return_value = {"max_iterations": 50, "reasoning_effort": "none"} 1693 MockAgent.return_value = MagicMock() 1694 parent = _make_mock_parent() 1695 parent.reasoning_config = {"enabled": True, "effort": "high"} 1696 1697 _build_child_agent( 1698 task_index=0, goal="test", context=None, toolsets=None, 1699 model=None, max_iterations=50, parent_agent=parent, 1700 task_count=1, 1701 ) 1702 call_kwargs = MockAgent.call_args[1] 1703 self.assertEqual(call_kwargs["reasoning_config"], {"enabled": False}) 1704 1705 @patch("tools.delegate_tool._load_config") 1706 @patch("run_agent.AIAgent") 1707 def test_invalid_reasoning_effort_falls_back_to_parent(self, MockAgent, mock_cfg): 1708 """Invalid delegation.reasoning_effort falls back to parent's config.""" 1709 mock_cfg.return_value = {"max_iterations": 50, "reasoning_effort": "banana"} 1710 MockAgent.return_value = MagicMock() 1711 parent = _make_mock_parent() 1712 parent.reasoning_config = {"enabled": True, "effort": "medium"} 1713 1714 _build_child_agent( 1715 task_index=0, goal="test", context=None, toolsets=None, 1716 model=None, max_iterations=50, parent_agent=parent, 1717 task_count=1, 1718 ) 1719 call_kwargs = MockAgent.call_args[1] 1720 self.assertEqual(call_kwargs["reasoning_config"], {"enabled": True, "effort": "medium"}) 1721 1722 1723 # ========================================================================= 1724 # Dispatch helper, progress events, concurrency 1725 # ========================================================================= 1726 1727 class TestDispatchDelegateTask(unittest.TestCase): 1728 """Tests for the _dispatch_delegate_task helper and full param forwarding.""" 1729 1730 @patch("tools.delegate_tool._load_config", return_value={}) 1731 @patch("tools.delegate_tool._resolve_delegation_credentials") 1732 def test_acp_args_forwarded(self, mock_creds, mock_cfg): 1733 """Both acp_command and acp_args reach delegate_task via the helper.""" 1734 mock_creds.return_value = { 1735 "provider": None, "base_url": None, 1736 "api_key": None, "api_mode": None, "model": None, 1737 } 1738 parent = _make_mock_parent(depth=0) 1739 with patch("tools.delegate_tool._build_child_agent") as mock_build: 1740 mock_child = MagicMock() 1741 mock_child.run_conversation.return_value = { 1742 "final_response": "done", "completed": True, 1743 "api_calls": 1, "messages": [], 1744 } 1745 mock_child._delegate_saved_tool_names = [] 1746 mock_child._credential_pool = None 1747 mock_child.session_prompt_tokens = 0 1748 mock_child.session_completion_tokens = 0 1749 mock_child.model = "test" 1750 mock_build.return_value = mock_child 1751 1752 delegate_task( 1753 goal="test", 1754 acp_command="claude", 1755 acp_args=["--acp", "--stdio"], 1756 parent_agent=parent, 1757 ) 1758 _, kwargs = mock_build.call_args 1759 self.assertEqual(kwargs["override_acp_command"], "claude") 1760 self.assertEqual(kwargs["override_acp_args"], ["--acp", "--stdio"]) 1761 1762 class TestDelegateEventEnum(unittest.TestCase): 1763 """Tests for DelegateEvent enum and back-compat aliases.""" 1764 1765 def test_enum_values_are_strings(self): 1766 for event in DelegateEvent: 1767 self.assertIsInstance(event.value, str) 1768 self.assertTrue(event.value.startswith("delegate.")) 1769 1770 def test_legacy_map_covers_all_old_names(self): 1771 expected_legacy = {"_thinking", "reasoning.available", 1772 "tool.started", "tool.completed", "subagent_progress"} 1773 self.assertEqual(set(_LEGACY_EVENT_MAP.keys()), expected_legacy) 1774 1775 def test_legacy_map_values_are_delegate_events(self): 1776 for old_name, event in _LEGACY_EVENT_MAP.items(): 1777 self.assertIsInstance(event, DelegateEvent) 1778 1779 def test_progress_callback_normalises_tool_started(self): 1780 """_build_child_progress_callback handles tool.started via enum.""" 1781 parent = _make_mock_parent() 1782 parent._delegate_spinner = MagicMock() 1783 parent.tool_progress_callback = MagicMock() 1784 1785 cb = _build_child_progress_callback(0, "test goal", parent, task_count=1) 1786 self.assertIsNotNone(cb) 1787 1788 cb("tool.started", tool_name="terminal", preview="ls") 1789 parent._delegate_spinner.print_above.assert_called() 1790 1791 def test_progress_callback_normalises_thinking(self): 1792 """Both _thinking and reasoning.available route to TASK_THINKING.""" 1793 parent = _make_mock_parent() 1794 parent._delegate_spinner = MagicMock() 1795 parent.tool_progress_callback = None 1796 1797 cb = _build_child_progress_callback(0, "test goal", parent, task_count=1) 1798 1799 cb("_thinking", tool_name=None, preview="pondering...") 1800 assert any("💭" in str(c) for c in parent._delegate_spinner.print_above.call_args_list) 1801 1802 parent._delegate_spinner.print_above.reset_mock() 1803 cb("reasoning.available", tool_name=None, preview="hmm") 1804 assert any("💭" in str(c) for c in parent._delegate_spinner.print_above.call_args_list) 1805 1806 def test_progress_callback_tool_completed_is_noop(self): 1807 """tool.completed is normalised but produces no display output.""" 1808 parent = _make_mock_parent() 1809 parent._delegate_spinner = MagicMock() 1810 parent.tool_progress_callback = None 1811 1812 cb = _build_child_progress_callback(0, "test goal", parent, task_count=1) 1813 cb("tool.completed", tool_name="terminal") 1814 parent._delegate_spinner.print_above.assert_not_called() 1815 1816 def test_progress_callback_ignores_unknown_events(self): 1817 """Unknown event types are silently ignored.""" 1818 parent = _make_mock_parent() 1819 parent._delegate_spinner = MagicMock() 1820 1821 cb = _build_child_progress_callback(0, "test goal", parent, task_count=1) 1822 # Should not raise 1823 cb("some.unknown.event", tool_name="x") 1824 parent._delegate_spinner.print_above.assert_not_called() 1825 1826 def test_progress_callback_accepts_enum_value_directly(self): 1827 """cb(DelegateEvent.TASK_THINKING, ...) must route to the thinking 1828 branch. Pre-fix the callback only handled legacy strings via 1829 _LEGACY_EVENT_MAP.get and silently dropped enum-typed callers.""" 1830 parent = _make_mock_parent() 1831 parent._delegate_spinner = MagicMock() 1832 parent.tool_progress_callback = None 1833 1834 cb = _build_child_progress_callback(0, "test goal", parent, task_count=1) 1835 cb(DelegateEvent.TASK_THINKING, preview="pondering") 1836 # If the enum was accepted, the thinking emoji got printed. 1837 assert any( 1838 "💭" in str(c) 1839 for c in parent._delegate_spinner.print_above.call_args_list 1840 ) 1841 1842 def test_progress_callback_accepts_new_style_string(self): 1843 """cb('delegate.task_thinking', ...) — the string form of the 1844 enum value — must route to the thinking branch too, so new-style 1845 emitters don't have to import DelegateEvent.""" 1846 parent = _make_mock_parent() 1847 parent._delegate_spinner = MagicMock() 1848 1849 cb = _build_child_progress_callback(0, "test goal", parent, task_count=1) 1850 cb("delegate.task_thinking", preview="hmm") 1851 assert any( 1852 "💭" in str(c) 1853 for c in parent._delegate_spinner.print_above.call_args_list 1854 ) 1855 1856 def test_progress_callback_task_progress_not_misrendered(self): 1857 """'subagent_progress' (legacy name for TASK_PROGRESS) carries a 1858 pre-batched summary in the tool_name slot. Before the fix, this 1859 fell through to the TASK_TOOL_STARTED rendering path, treating 1860 the summary string as a tool name. After the fix: distinct 1861 render (no tool-start emoji lookup) and pass-through relay 1862 upward (no re-batching). 1863 1864 Regression path only reachable once nested orchestration is 1865 enabled: nested orchestrators relay subagent_progress from 1866 grandchildren upward through this callback. 1867 """ 1868 parent = _make_mock_parent() 1869 parent._delegate_spinner = MagicMock() 1870 parent.tool_progress_callback = MagicMock() 1871 1872 cb = _build_child_progress_callback(0, "test goal", parent, task_count=1) 1873 cb("subagent_progress", tool_name="🔀 [1] terminal, file") 1874 1875 # Spinner gets a distinct 🔀-prefixed line, NOT a tool emoji 1876 # followed by the summary string as if it were a tool name. 1877 calls = parent._delegate_spinner.print_above.call_args_list 1878 self.assertTrue(any("🔀 🔀 [1] terminal, file" in str(c) for c in calls)) 1879 # Parent callback receives the relay (pass-through, no re-batching). 1880 parent.tool_progress_callback.assert_called_once() 1881 # No '⚡' tool-start emoji should appear — that's the pre-fix bug. 1882 self.assertFalse(any("⚡" in str(c) for c in calls)) 1883 1884 1885 class TestConcurrencyDefaults(unittest.TestCase): 1886 """Tests for the concurrency default and no hard ceiling.""" 1887 1888 @patch("tools.delegate_tool._load_config", return_value={}) 1889 def test_default_is_three(self, mock_cfg): 1890 # Clear env var if set 1891 with patch.dict(os.environ, {}, clear=True): 1892 self.assertEqual(_get_max_concurrent_children(), 3) 1893 1894 @patch("tools.delegate_tool._load_config", 1895 return_value={"max_concurrent_children": 10}) 1896 def test_no_upper_ceiling(self, mock_cfg): 1897 """Users can raise concurrency as high as they want — no hard cap.""" 1898 self.assertEqual(_get_max_concurrent_children(), 10) 1899 1900 @patch("tools.delegate_tool._load_config", 1901 return_value={"max_concurrent_children": 100}) 1902 def test_very_high_values_honored(self, mock_cfg): 1903 self.assertEqual(_get_max_concurrent_children(), 100) 1904 1905 @patch("tools.delegate_tool._load_config", 1906 return_value={"max_concurrent_children": 0}) 1907 def test_zero_clamped_to_one(self, mock_cfg): 1908 """Floor of 1 is enforced; zero or negative values raise to 1.""" 1909 self.assertEqual(_get_max_concurrent_children(), 1) 1910 1911 @patch("tools.delegate_tool._load_config", return_value={}) 1912 def test_env_var_honored_uncapped(self, mock_cfg): 1913 with patch.dict(os.environ, {"DELEGATION_MAX_CONCURRENT_CHILDREN": "12"}): 1914 self.assertEqual(_get_max_concurrent_children(), 12) 1915 1916 @patch("tools.delegate_tool._load_config", 1917 return_value={"max_concurrent_children": 6}) 1918 def test_configured_value_returned(self, mock_cfg): 1919 self.assertEqual(_get_max_concurrent_children(), 6) 1920 1921 1922 # ========================================================================= 1923 # max_spawn_depth clamping 1924 # ========================================================================= 1925 1926 class TestMaxSpawnDepth(unittest.TestCase): 1927 """Tests for _get_max_spawn_depth clamping and fallback behavior.""" 1928 1929 @patch("tools.delegate_tool._load_config", return_value={}) 1930 def test_max_spawn_depth_defaults_to_1(self, mock_cfg): 1931 from tools.delegate_tool import _get_max_spawn_depth 1932 self.assertEqual(_get_max_spawn_depth(), 1) 1933 1934 @patch("tools.delegate_tool._load_config", 1935 return_value={"max_spawn_depth": 0}) 1936 def test_max_spawn_depth_clamped_below_one(self, mock_cfg): 1937 import logging 1938 from tools.delegate_tool import _get_max_spawn_depth 1939 with self.assertLogs("tools.delegate_tool", level=logging.WARNING) as cm: 1940 result = _get_max_spawn_depth() 1941 self.assertEqual(result, 1) 1942 self.assertTrue(any("clamping to 1" in m for m in cm.output)) 1943 1944 @patch("tools.delegate_tool._load_config", 1945 return_value={"max_spawn_depth": 99}) 1946 def test_max_spawn_depth_clamped_above_three(self, mock_cfg): 1947 import logging 1948 from tools.delegate_tool import _get_max_spawn_depth 1949 with self.assertLogs("tools.delegate_tool", level=logging.WARNING) as cm: 1950 result = _get_max_spawn_depth() 1951 self.assertEqual(result, 3) 1952 self.assertTrue(any("clamping to 3" in m for m in cm.output)) 1953 1954 @patch("tools.delegate_tool._load_config", 1955 return_value={"max_spawn_depth": "not-a-number"}) 1956 def test_max_spawn_depth_invalid_falls_back_to_default(self, mock_cfg): 1957 from tools.delegate_tool import _get_max_spawn_depth 1958 self.assertEqual(_get_max_spawn_depth(), 1) 1959 1960 1961 # ========================================================================= 1962 # role param plumbing 1963 # ========================================================================= 1964 # 1965 # These tests cover the schema + signature + stash plumbing of the role 1966 # param. The full role-honoring behavior (toolset re-add, role-aware 1967 # prompt) lives in TestOrchestratorRoleBehavior below; these tests only 1968 # assert on _delegate_role stashing and on the schema shape. 1969 1970 1971 class TestOrchestratorRoleSchema(unittest.TestCase): 1972 """Tests that the role param reaches the child via dispatch.""" 1973 1974 @patch("tools.delegate_tool._resolve_delegation_credentials") 1975 @patch("tools.delegate_tool._load_config", 1976 return_value={"max_spawn_depth": 2}) 1977 def _run_with_mock_child(self, role_arg, mock_cfg, mock_creds): 1978 mock_creds.return_value = { 1979 "provider": None, "base_url": None, 1980 "api_key": None, "api_mode": None, "model": None, 1981 } 1982 parent = _make_mock_parent(depth=0) 1983 with patch("run_agent.AIAgent") as MockAgent: 1984 mock_child = MagicMock() 1985 mock_child.run_conversation.return_value = { 1986 "final_response": "done", "completed": True, 1987 "api_calls": 1, "messages": [], 1988 } 1989 mock_child._delegate_saved_tool_names = [] 1990 mock_child._credential_pool = None 1991 mock_child.session_prompt_tokens = 0 1992 mock_child.session_completion_tokens = 0 1993 mock_child.model = "test" 1994 MockAgent.return_value = mock_child 1995 kwargs = {"goal": "test", "parent_agent": parent} 1996 if role_arg is not _SENTINEL: 1997 kwargs["role"] = role_arg 1998 delegate_task(**kwargs) 1999 return mock_child 2000 2001 def test_default_role_is_leaf(self): 2002 child = self._run_with_mock_child(_SENTINEL) 2003 self.assertEqual(child._delegate_role, "leaf") 2004 2005 def test_explicit_orchestrator_role_stashed(self): 2006 """role='orchestrator' reaches _build_child_agent and is stashed. 2007 Full behavior (toolset re-add) lands in commit 3; commit 2 only 2008 verifies the plumbing.""" 2009 child = self._run_with_mock_child("orchestrator") 2010 self.assertEqual(child._delegate_role, "orchestrator") 2011 2012 def test_unknown_role_coerces_to_leaf(self): 2013 """role='nonsense' → _normalize_role warns and returns 'leaf'.""" 2014 import logging 2015 with self.assertLogs("tools.delegate_tool", level=logging.WARNING) as cm: 2016 child = self._run_with_mock_child("nonsense") 2017 self.assertEqual(child._delegate_role, "leaf") 2018 self.assertTrue(any("coercing" in m.lower() for m in cm.output)) 2019 2020 def test_schema_has_role_top_level_and_per_task(self): 2021 from tools.delegate_tool import DELEGATE_TASK_SCHEMA 2022 props = DELEGATE_TASK_SCHEMA["parameters"]["properties"] 2023 self.assertIn("role", props) 2024 self.assertEqual(props["role"]["enum"], ["leaf", "orchestrator"]) 2025 task_props = props["tasks"]["items"]["properties"] 2026 self.assertIn("role", task_props) 2027 self.assertEqual(task_props["role"]["enum"], ["leaf", "orchestrator"]) 2028 2029 2030 # Sentinel used to distinguish "role kwarg omitted" from "role=None". 2031 _SENTINEL = object() 2032 2033 2034 # ========================================================================= 2035 # role-honoring behavior 2036 # ========================================================================= 2037 2038 2039 def _make_role_mock_child(): 2040 """Helper: mock child with minimal fields for delegate_task to process.""" 2041 mock_child = MagicMock() 2042 mock_child.run_conversation.return_value = { 2043 "final_response": "done", "completed": True, 2044 "api_calls": 1, "messages": [], 2045 } 2046 mock_child._delegate_saved_tool_names = [] 2047 mock_child._credential_pool = None 2048 mock_child.session_prompt_tokens = 0 2049 mock_child.session_completion_tokens = 0 2050 mock_child.model = "test" 2051 return mock_child 2052 2053 2054 class TestOrchestratorRoleBehavior(unittest.TestCase): 2055 """Tests that role='orchestrator' actually changes toolset + prompt.""" 2056 2057 @patch("tools.delegate_tool._resolve_delegation_credentials") 2058 @patch("tools.delegate_tool._load_config", 2059 return_value={"max_spawn_depth": 2}) 2060 def test_orchestrator_role_keeps_delegation_at_depth_1( 2061 self, mock_cfg, mock_creds 2062 ): 2063 """role='orchestrator' + depth-0 parent with max_spawn_depth=2 → 2064 child at depth 1 gets 'delegation' in enabled_toolsets (can 2065 further delegate). Requires max_spawn_depth>=2 since the new 2066 default is 1 (flat).""" 2067 mock_creds.return_value = { 2068 "provider": None, "base_url": None, 2069 "api_key": None, "api_mode": None, "model": None, 2070 } 2071 parent = _make_mock_parent(depth=0) 2072 parent.enabled_toolsets = ["terminal", "file"] 2073 with patch("run_agent.AIAgent") as MockAgent: 2074 mock_child = _make_role_mock_child() 2075 MockAgent.return_value = mock_child 2076 delegate_task(goal="test", role="orchestrator", parent_agent=parent) 2077 kwargs = MockAgent.call_args[1] 2078 self.assertIn("delegation", kwargs["enabled_toolsets"]) 2079 self.assertEqual(mock_child._delegate_role, "orchestrator") 2080 2081 @patch("tools.delegate_tool._resolve_delegation_credentials") 2082 @patch("tools.delegate_tool._load_config", 2083 return_value={"max_spawn_depth": 2}) 2084 def test_orchestrator_blocked_at_max_spawn_depth( 2085 self, mock_cfg, mock_creds 2086 ): 2087 """Parent at depth 1 with max_spawn_depth=2 spawns child 2088 at depth 2 (the floor); role='orchestrator' degrades to leaf.""" 2089 mock_creds.return_value = { 2090 "provider": None, "base_url": None, 2091 "api_key": None, "api_mode": None, "model": None, 2092 } 2093 parent = _make_mock_parent(depth=1) 2094 parent.enabled_toolsets = ["terminal", "delegation"] 2095 with patch("run_agent.AIAgent") as MockAgent: 2096 mock_child = _make_role_mock_child() 2097 MockAgent.return_value = mock_child 2098 delegate_task(goal="test", role="orchestrator", parent_agent=parent) 2099 kwargs = MockAgent.call_args[1] 2100 self.assertNotIn("delegation", kwargs["enabled_toolsets"]) 2101 self.assertEqual(mock_child._delegate_role, "leaf") 2102 2103 @patch("tools.delegate_tool._resolve_delegation_credentials") 2104 @patch("tools.delegate_tool._load_config", return_value={}) 2105 def test_orchestrator_blocked_at_default_flat_depth( 2106 self, mock_cfg, mock_creds 2107 ): 2108 """With default max_spawn_depth=1 (flat), role='orchestrator' 2109 on a depth-0 parent produces a depth-1 child that is already at 2110 the floor — the role degrades to 'leaf' and the delegation 2111 toolset is stripped. This is the new default posture.""" 2112 mock_creds.return_value = { 2113 "provider": None, "base_url": None, 2114 "api_key": None, "api_mode": None, "model": None, 2115 } 2116 parent = _make_mock_parent(depth=0) 2117 parent.enabled_toolsets = ["terminal", "file", "delegation"] 2118 with patch("run_agent.AIAgent") as MockAgent: 2119 mock_child = _make_role_mock_child() 2120 MockAgent.return_value = mock_child 2121 delegate_task(goal="test", role="orchestrator", parent_agent=parent) 2122 kwargs = MockAgent.call_args[1] 2123 self.assertNotIn("delegation", kwargs["enabled_toolsets"]) 2124 self.assertEqual(mock_child._delegate_role, "leaf") 2125 2126 @patch("tools.delegate_tool._resolve_delegation_credentials") 2127 def test_orchestrator_enabled_false_forces_leaf(self, mock_creds): 2128 """Kill switch delegation.orchestrator_enabled=false overrides 2129 role='orchestrator'.""" 2130 mock_creds.return_value = { 2131 "provider": None, "base_url": None, 2132 "api_key": None, "api_mode": None, "model": None, 2133 } 2134 parent = _make_mock_parent(depth=0) 2135 parent.enabled_toolsets = ["terminal", "delegation"] 2136 with patch("tools.delegate_tool._load_config", 2137 return_value={"orchestrator_enabled": False}): 2138 with patch("run_agent.AIAgent") as MockAgent: 2139 mock_child = _make_role_mock_child() 2140 MockAgent.return_value = mock_child 2141 delegate_task(goal="test", role="orchestrator", 2142 parent_agent=parent) 2143 kwargs = MockAgent.call_args[1] 2144 self.assertNotIn("delegation", kwargs["enabled_toolsets"]) 2145 self.assertEqual(mock_child._delegate_role, "leaf") 2146 2147 # ── Role-aware system prompt ──────────────────────────────────────── 2148 2149 def test_leaf_prompt_does_not_mention_delegation(self): 2150 prompt = _build_child_system_prompt( 2151 "Fix tests", role="leaf", 2152 max_spawn_depth=2, child_depth=1, 2153 ) 2154 self.assertNotIn("delegate_task", prompt) 2155 self.assertNotIn("Orchestrator Role", prompt) 2156 2157 def test_orchestrator_prompt_mentions_delegation_capability(self): 2158 prompt = _build_child_system_prompt( 2159 "Survey approaches", role="orchestrator", 2160 max_spawn_depth=2, child_depth=1, 2161 ) 2162 self.assertIn("delegate_task", prompt) 2163 self.assertIn("Orchestrator Role", prompt) 2164 # Depth/max-depth note present and literal: 2165 self.assertIn("depth 1", prompt) 2166 self.assertIn("max_spawn_depth=2", prompt) 2167 2168 def test_orchestrator_prompt_at_depth_floor_says_children_are_leaves(self): 2169 """With max_spawn_depth=2 and child_depth=1, the orchestrator's 2170 own children would be at depth 2 (the floor) → must be leaves.""" 2171 prompt = _build_child_system_prompt( 2172 "Survey", role="orchestrator", 2173 max_spawn_depth=2, child_depth=1, 2174 ) 2175 self.assertIn("MUST be leaves", prompt) 2176 2177 def test_orchestrator_prompt_below_floor_allows_more_nesting(self): 2178 """With max_spawn_depth=3 and child_depth=1, the orchestrator's 2179 own children can themselves be orchestrators (depth 2 < 3).""" 2180 prompt = _build_child_system_prompt( 2181 "Deep work", role="orchestrator", 2182 max_spawn_depth=3, child_depth=1, 2183 ) 2184 self.assertIn("can themselves be orchestrators", prompt) 2185 2186 # ── Batch mode and intersection ───────────────────────────────────── 2187 2188 @patch("tools.delegate_tool._resolve_delegation_credentials") 2189 @patch("tools.delegate_tool._load_config", 2190 return_value={"max_spawn_depth": 2}) 2191 def test_batch_mode_per_task_role_override(self, mock_cfg, mock_creds): 2192 """Per-task role beats top-level; no top-level role → "leaf". 2193 2194 tasks=[{role:'orchestrator'},{role:'leaf'},{}] → first gets 2195 delegation, second and third don't. Requires max_spawn_depth>=2 2196 (raised explicitly here) since the new default is 1 (flat). 2197 """ 2198 mock_creds.return_value = { 2199 "provider": None, "base_url": None, 2200 "api_key": None, "api_mode": None, "model": None, 2201 } 2202 parent = _make_mock_parent(depth=0) 2203 parent.enabled_toolsets = ["terminal", "file", "delegation"] 2204 built_toolsets = [] 2205 2206 def _factory(*a, **kw): 2207 m = _make_role_mock_child() 2208 built_toolsets.append(kw.get("enabled_toolsets")) 2209 return m 2210 2211 with patch("run_agent.AIAgent", side_effect=_factory): 2212 delegate_task( 2213 tasks=[ 2214 {"goal": "A", "role": "orchestrator"}, 2215 {"goal": "B", "role": "leaf"}, 2216 {"goal": "C"}, # no role → falls back to top_role (leaf) 2217 ], 2218 parent_agent=parent, 2219 ) 2220 self.assertIn("delegation", built_toolsets[0]) 2221 self.assertNotIn("delegation", built_toolsets[1]) 2222 self.assertNotIn("delegation", built_toolsets[2]) 2223 2224 @patch("tools.delegate_tool._resolve_delegation_credentials") 2225 @patch("tools.delegate_tool._load_config", 2226 return_value={"max_spawn_depth": 2}) 2227 def test_intersection_preserves_delegation_bound( 2228 self, mock_cfg, mock_creds 2229 ): 2230 """Design decision: orchestrator capability is granted by role, 2231 NOT inherited from the parent's toolset. A parent without 2232 'delegation' in its enabled_toolsets can still spawn an 2233 orchestrator child — the re-add in _build_child_agent runs 2234 unconditionally for orchestrators (when max_spawn_depth allows). 2235 2236 If you want to change to "parent must have delegation too", 2237 update _build_child_agent to check parent_toolsets before the 2238 re-add and update this test to match. 2239 """ 2240 mock_creds.return_value = { 2241 "provider": None, "base_url": None, 2242 "api_key": None, "api_mode": None, "model": None, 2243 } 2244 parent = _make_mock_parent(depth=0) 2245 parent.enabled_toolsets = ["terminal", "file"] # no delegation 2246 with patch("run_agent.AIAgent") as MockAgent: 2247 mock_child = _make_role_mock_child() 2248 MockAgent.return_value = mock_child 2249 delegate_task(goal="test", role="orchestrator", 2250 parent_agent=parent) 2251 self.assertIn("delegation", MockAgent.call_args[1]["enabled_toolsets"]) 2252 2253 2254 class TestOrchestratorEndToEnd(unittest.TestCase): 2255 """End-to-end: parent -> orchestrator -> two-leaf nested orchestration. 2256 2257 Covers the acceptance gate: parent delegates to an orchestrator 2258 child; the orchestrator delegates to two leaf grandchildren; the 2259 role/toolset/depth chain all resolve correctly. 2260 2261 Mock strategy: a single AIAgent patch with a side_effect factory 2262 that keys on the child's ephemeral_system_prompt — orchestrator 2263 prompts contain the string "Orchestrator Role" (see 2264 _build_child_system_prompt), leaves don't. The orchestrator 2265 mock's run_conversation recursively calls delegate_task with 2266 tasks=[{goal:...},{goal:...}] to spawn two leaves. This keeps 2267 the test in one patch context and avoids depth-indexed nesting. 2268 """ 2269 2270 @patch("tools.delegate_tool._resolve_delegation_credentials") 2271 @patch("tools.delegate_tool._load_config", 2272 return_value={"max_spawn_depth": 2}) 2273 def test_end_to_end_nested_orchestration(self, mock_cfg, mock_creds): 2274 mock_creds.return_value = { 2275 "provider": None, "base_url": None, 2276 "api_key": None, "api_mode": None, "model": None, 2277 } 2278 parent = _make_mock_parent(depth=0) 2279 parent.enabled_toolsets = ["terminal", "file", "delegation"] 2280 2281 # (enabled_toolsets, _delegate_role) for each agent built 2282 built_agents: list = [] 2283 # Keep the orchestrator mock around so the re-entrant delegate_task 2284 # can reach it via closure. 2285 orch_mock = {} 2286 2287 def _factory(*a, **kw): 2288 prompt = kw.get("ephemeral_system_prompt", "") or "" 2289 is_orchestrator = "Orchestrator Role" in prompt 2290 m = _make_role_mock_child() 2291 built_agents.append({ 2292 "enabled_toolsets": list(kw.get("enabled_toolsets") or []), 2293 "is_orchestrator_prompt": is_orchestrator, 2294 }) 2295 2296 if is_orchestrator: 2297 # Prepare the orchestrator mock as a parent-capable object 2298 # so the nested delegate_task call succeeds. 2299 m._delegate_depth = 1 2300 m._delegate_role = "orchestrator" 2301 m._active_children = [] 2302 m._active_children_lock = threading.Lock() 2303 m._session_db = None 2304 m.platform = "cli" 2305 m.enabled_toolsets = ["terminal", "file", "delegation"] 2306 m.api_key = "***" 2307 m.base_url = "" 2308 m.provider = None 2309 m.api_mode = None 2310 m.providers_allowed = None 2311 m.providers_ignored = None 2312 m.providers_order = None 2313 m.provider_sort = None 2314 m._print_fn = None 2315 m.tool_progress_callback = None 2316 m.thinking_callback = None 2317 orch_mock["agent"] = m 2318 2319 def _orchestrator_run(user_message=None, task_id=None): 2320 # Re-entrant: orchestrator spawns two leaves 2321 delegate_task( 2322 tasks=[{"goal": "leaf-A"}, {"goal": "leaf-B"}], 2323 parent_agent=m, 2324 ) 2325 return { 2326 "final_response": "orchestrated 2 workers", 2327 "completed": True, "api_calls": 1, 2328 "messages": [], 2329 } 2330 m.run_conversation.side_effect = _orchestrator_run 2331 2332 return m 2333 2334 with patch("run_agent.AIAgent", side_effect=_factory) as MockAgent: 2335 delegate_task( 2336 goal="top-level orchestration", 2337 role="orchestrator", 2338 parent_agent=parent, 2339 ) 2340 2341 # 1 orchestrator + 2 leaf grandchildren = 3 agents 2342 self.assertEqual(MockAgent.call_count, 3) 2343 # First built = the orchestrator (parent's direct child) 2344 self.assertIn("delegation", built_agents[0]["enabled_toolsets"]) 2345 self.assertTrue(built_agents[0]["is_orchestrator_prompt"]) 2346 # Next two = leaves (grandchildren) 2347 self.assertNotIn("delegation", built_agents[1]["enabled_toolsets"]) 2348 self.assertFalse(built_agents[1]["is_orchestrator_prompt"]) 2349 self.assertNotIn("delegation", built_agents[2]["enabled_toolsets"]) 2350 self.assertFalse(built_agents[2]["is_orchestrator_prompt"]) 2351 2352 2353 class TestSubagentApprovalCallback(unittest.TestCase): 2354 """Subagent worker threads must have a non-interactive approval callback 2355 installed so dangerous-command prompts don't fall back to input() and 2356 deadlock the parent's prompt_toolkit TUI. 2357 2358 Governed by delegation.subagent_auto_approve: 2359 false (default) → _subagent_auto_deny 2360 true → _subagent_auto_approve 2361 """ 2362 2363 def test_auto_deny_returns_deny(self): 2364 from tools.delegate_tool import _subagent_auto_deny 2365 self.assertEqual( 2366 _subagent_auto_deny("rm -rf /tmp/x", "dangerous"), 2367 "deny", 2368 ) 2369 2370 def test_auto_approve_returns_once(self): 2371 from tools.delegate_tool import _subagent_auto_approve 2372 self.assertEqual( 2373 _subagent_auto_approve("rm -rf /tmp/x", "dangerous"), 2374 "once", 2375 ) 2376 2377 @patch("tools.delegate_tool._load_config", return_value={}) 2378 def test_getter_defaults_to_deny(self, _mock_cfg): 2379 from tools.delegate_tool import ( 2380 _get_subagent_approval_callback, 2381 _subagent_auto_deny, 2382 ) 2383 self.assertIs(_get_subagent_approval_callback(), _subagent_auto_deny) 2384 2385 @patch( 2386 "tools.delegate_tool._load_config", 2387 return_value={"subagent_auto_approve": False}, 2388 ) 2389 def test_getter_explicit_false_is_deny(self, _mock_cfg): 2390 from tools.delegate_tool import ( 2391 _get_subagent_approval_callback, 2392 _subagent_auto_deny, 2393 ) 2394 self.assertIs(_get_subagent_approval_callback(), _subagent_auto_deny) 2395 2396 @patch( 2397 "tools.delegate_tool._load_config", 2398 return_value={"subagent_auto_approve": True}, 2399 ) 2400 def test_getter_true_is_approve(self, _mock_cfg): 2401 from tools.delegate_tool import ( 2402 _get_subagent_approval_callback, 2403 _subagent_auto_approve, 2404 ) 2405 self.assertIs(_get_subagent_approval_callback(), _subagent_auto_approve) 2406 2407 @patch( 2408 "tools.delegate_tool._load_config", 2409 return_value={"subagent_auto_approve": "yes"}, 2410 ) 2411 def test_getter_truthy_string_is_approve(self, _mock_cfg): 2412 """is_truthy_value accepts 'yes'/'1'/'true' as truthy.""" 2413 from tools.delegate_tool import ( 2414 _get_subagent_approval_callback, 2415 _subagent_auto_approve, 2416 ) 2417 self.assertIs(_get_subagent_approval_callback(), _subagent_auto_approve) 2418 2419 def test_executor_initializer_installs_callback_in_worker(self): 2420 """The initializer sets the callback on the worker thread's TLS, 2421 not the parent's — verifies the fix actually scopes to workers. 2422 """ 2423 from concurrent.futures import ThreadPoolExecutor 2424 from tools.terminal_tool import ( 2425 set_approval_callback as _set_cb, 2426 _get_approval_callback, 2427 ) 2428 from tools.delegate_tool import _subagent_auto_deny 2429 2430 # Parent thread has no callback. 2431 _set_cb(None) 2432 self.assertIsNone(_get_approval_callback()) 2433 2434 seen = [] 2435 2436 def worker(): 2437 seen.append(_get_approval_callback()) 2438 2439 with ThreadPoolExecutor( 2440 max_workers=1, 2441 initializer=_set_cb, 2442 initargs=(_subagent_auto_deny,), 2443 ) as executor: 2444 executor.submit(worker).result() 2445 2446 self.assertEqual(seen, [_subagent_auto_deny]) 2447 # Parent's callback slot is still empty (TLS isolates threads). 2448 self.assertIsNone(_get_approval_callback()) 2449 2450 2451 class TestFallbackModelInheritance(unittest.TestCase): 2452 """Subagents must inherit the parent's fallback provider chain.""" 2453 2454 def test_child_inherits_fallback_chain(self): 2455 """_build_child_agent passes parent._fallback_chain as fallback_model.""" 2456 parent = _make_mock_parent(depth=0) 2457 fallback_entry = {"provider": "openrouter", "model": "gpt-4o-mini", "api_key": "sk-or-x"} 2458 parent._fallback_chain = [fallback_entry] 2459 2460 with patch("run_agent.AIAgent") as MockAgent: 2461 MockAgent.return_value = MagicMock() 2462 _build_child_agent( 2463 task_index=0, 2464 goal="test fallback inheritance", 2465 context=None, 2466 toolsets=None, 2467 model=None, 2468 max_iterations=10, 2469 parent_agent=parent, 2470 task_count=1, 2471 ) 2472 2473 _, kwargs = MockAgent.call_args 2474 self.assertEqual(kwargs["fallback_model"], [fallback_entry]) 2475 2476 def test_child_gets_no_fallback_when_parent_chain_empty(self): 2477 """When parent._fallback_chain is empty, fallback_model is None.""" 2478 parent = _make_mock_parent(depth=0) 2479 parent._fallback_chain = [] 2480 2481 with patch("run_agent.AIAgent") as MockAgent: 2482 MockAgent.return_value = MagicMock() 2483 _build_child_agent( 2484 task_index=0, 2485 goal="test no fallback", 2486 context=None, 2487 toolsets=None, 2488 model=None, 2489 max_iterations=10, 2490 parent_agent=parent, 2491 task_count=1, 2492 ) 2493 2494 _, kwargs = MockAgent.call_args 2495 self.assertIsNone(kwargs["fallback_model"]) 2496 2497 2498 if __name__ == "__main__": 2499 unittest.main()