test_run_agent.py
1 """Unit tests for run_agent.py (AIAgent). 2 3 Tests cover pure functions, state/structure methods, and conversation loop 4 pieces. The OpenAI client and tool loading are mocked so no network calls 5 are made. 6 """ 7 8 import io 9 import json 10 import logging 11 import re 12 import uuid 13 from logging.handlers import RotatingFileHandler 14 from pathlib import Path 15 from types import SimpleNamespace 16 from unittest.mock import AsyncMock, MagicMock, patch 17 18 import pytest 19 from agent.codex_responses_adapter import _normalize_codex_response 20 21 import run_agent 22 from run_agent import AIAgent 23 from agent.error_classifier import FailoverReason 24 from agent.prompt_builder import DEFAULT_AGENT_IDENTITY 25 26 27 # --------------------------------------------------------------------------- 28 # Fixtures 29 # --------------------------------------------------------------------------- 30 31 32 def _make_tool_defs(*names: str) -> list: 33 """Build minimal tool definition list accepted by AIAgent.__init__.""" 34 return [ 35 { 36 "type": "function", 37 "function": { 38 "name": n, 39 "description": f"{n} tool", 40 "parameters": {"type": "object", "properties": {}}, 41 }, 42 } 43 for n in names 44 ] 45 46 47 def test_is_destructive_command_treats_cp_as_mutating(): 48 assert run_agent._is_destructive_command("cp .env.local .env") is True 49 50 51 def test_is_destructive_command_treats_install_as_mutating(): 52 assert run_agent._is_destructive_command("install template.env .env") is True 53 54 55 @pytest.fixture() 56 def agent(): 57 """Minimal AIAgent with mocked OpenAI client and tool loading.""" 58 with ( 59 patch( 60 "run_agent.get_tool_definitions", return_value=_make_tool_defs("web_search") 61 ), 62 patch("run_agent.check_toolset_requirements", return_value={}), 63 patch("run_agent.OpenAI"), 64 ): 65 a = AIAgent( 66 api_key="test-key-1234567890", 67 base_url="https://openrouter.ai/api/v1", 68 quiet_mode=True, 69 skip_context_files=True, 70 skip_memory=True, 71 ) 72 a.client = MagicMock() 73 return a 74 75 76 @pytest.fixture() 77 def agent_with_memory_tool(): 78 """Agent whose valid_tool_names includes 'memory'.""" 79 with ( 80 patch( 81 "run_agent.get_tool_definitions", 82 return_value=_make_tool_defs("web_search", "memory"), 83 ), 84 patch("run_agent.check_toolset_requirements", return_value={}), 85 patch("run_agent.OpenAI"), 86 ): 87 a = AIAgent( 88 api_key="test-k...7890", 89 base_url="https://openrouter.ai/api/v1", 90 quiet_mode=True, 91 skip_context_files=True, 92 skip_memory=True, 93 ) 94 a.client = MagicMock() 95 return a 96 97 98 def test_aiagent_reuses_existing_errors_log_handler(): 99 """Repeated AIAgent init should not accumulate duplicate errors.log handlers.""" 100 root_logger = logging.getLogger() 101 original_handlers = list(root_logger.handlers) 102 error_log_path = (run_agent._hermes_home / "logs" / "errors.log").resolve() 103 104 try: 105 for handler in list(root_logger.handlers): 106 root_logger.removeHandler(handler) 107 108 error_log_path.parent.mkdir(parents=True, exist_ok=True) 109 preexisting_handler = RotatingFileHandler( 110 error_log_path, 111 maxBytes=2 * 1024 * 1024, 112 backupCount=2, 113 ) 114 root_logger.addHandler(preexisting_handler) 115 116 with ( 117 patch( 118 "run_agent.get_tool_definitions", 119 return_value=_make_tool_defs("web_search"), 120 ), 121 patch("run_agent.check_toolset_requirements", return_value={}), 122 patch("run_agent.OpenAI"), 123 ): 124 AIAgent( 125 api_key="test-k...7890", 126 base_url="https://openrouter.ai/api/v1", 127 quiet_mode=True, 128 skip_context_files=True, 129 skip_memory=True, 130 ) 131 AIAgent( 132 api_key="test-k...7890", 133 base_url="https://openrouter.ai/api/v1", 134 quiet_mode=True, 135 skip_context_files=True, 136 skip_memory=True, 137 ) 138 139 matching_handlers = [ 140 handler for handler in root_logger.handlers 141 if isinstance(handler, RotatingFileHandler) 142 and error_log_path == Path(handler.baseFilename).resolve() 143 ] 144 assert len(matching_handlers) == 1 145 finally: 146 for handler in list(root_logger.handlers): 147 root_logger.removeHandler(handler) 148 if handler not in original_handlers: 149 handler.close() 150 for handler in original_handlers: 151 root_logger.addHandler(handler) 152 153 154 class TestProviderModelNormalization: 155 def test_aiagent_strips_matching_native_provider_prefix(self): 156 with ( 157 patch( 158 "run_agent.get_tool_definitions", return_value=_make_tool_defs("web_search") 159 ), 160 patch("run_agent.check_toolset_requirements", return_value={}), 161 patch("run_agent.OpenAI"), 162 ): 163 agent = AIAgent( 164 model="zai/glm-5.1", 165 provider="zai", 166 base_url="https://api.z.ai/api/paas/v4", 167 api_key="test-key-1234567890", 168 quiet_mode=True, 169 skip_context_files=True, 170 skip_memory=True, 171 ) 172 173 assert agent.model == "glm-5.1" 174 175 def test_aiagent_keeps_aggregator_vendor_slug(self): 176 with ( 177 patch( 178 "run_agent.get_tool_definitions", return_value=_make_tool_defs("web_search") 179 ), 180 patch("run_agent.check_toolset_requirements", return_value={}), 181 patch("run_agent.OpenAI"), 182 ): 183 agent = AIAgent( 184 model="anthropic/claude-sonnet-4.6", 185 provider="openrouter", 186 base_url="https://openrouter.ai/api/v1", 187 api_key="test-key-1234567890", 188 quiet_mode=True, 189 skip_context_files=True, 190 skip_memory=True, 191 ) 192 193 assert agent.model == "anthropic/claude-sonnet-4.6" 194 195 196 # --------------------------------------------------------------------------- 197 # Helper to build mock assistant messages (API response objects) 198 # --------------------------------------------------------------------------- 199 200 201 def _mock_assistant_msg( 202 content="Hello", 203 tool_calls=None, 204 reasoning=None, 205 reasoning_content=None, 206 reasoning_details=None, 207 ): 208 """Return a SimpleNamespace mimicking an OpenAI ChatCompletionMessage.""" 209 msg = SimpleNamespace(content=content, tool_calls=tool_calls) 210 if reasoning is not None: 211 msg.reasoning = reasoning 212 if reasoning_content is not None: 213 msg.reasoning_content = reasoning_content 214 if reasoning_details is not None: 215 msg.reasoning_details = reasoning_details 216 return msg 217 218 219 def _mock_tool_call(name="web_search", arguments="{}", call_id=None): 220 """Return a SimpleNamespace mimicking a tool call object.""" 221 return SimpleNamespace( 222 id=call_id or f"call_{uuid.uuid4().hex[:8]}", 223 type="function", 224 function=SimpleNamespace(name=name, arguments=arguments), 225 ) 226 227 228 def _mock_response( 229 content="Hello", 230 finish_reason="stop", 231 tool_calls=None, 232 reasoning=None, 233 reasoning_content=None, 234 reasoning_details=None, 235 usage=None, 236 ): 237 """Return a SimpleNamespace mimicking an OpenAI ChatCompletion response.""" 238 msg = _mock_assistant_msg( 239 content=content, 240 tool_calls=tool_calls, 241 reasoning=reasoning, 242 reasoning_content=reasoning_content, 243 reasoning_details=reasoning_details, 244 ) 245 choice = SimpleNamespace(message=msg, finish_reason=finish_reason) 246 resp = SimpleNamespace(choices=[choice], model="test/model") 247 if usage: 248 resp.usage = SimpleNamespace(**usage) 249 else: 250 resp.usage = None 251 return resp 252 253 254 # =================================================================== 255 # Group 1: Pure Functions 256 # =================================================================== 257 258 259 class TestHasContentAfterThinkBlock: 260 def test_none_returns_false(self, agent): 261 assert agent._has_content_after_think_block(None) is False 262 263 def test_empty_returns_false(self, agent): 264 assert agent._has_content_after_think_block("") is False 265 266 def test_only_think_block_returns_false(self, agent): 267 assert agent._has_content_after_think_block("<think>reasoning</think>") is False 268 269 def test_content_after_think_returns_true(self, agent): 270 assert ( 271 agent._has_content_after_think_block("<think>r</think> actual answer") 272 is True 273 ) 274 275 def test_no_think_block_returns_true(self, agent): 276 assert agent._has_content_after_think_block("just normal content") is True 277 278 279 class TestStripThinkBlocks: 280 def test_none_returns_empty(self, agent): 281 assert agent._strip_think_blocks(None) == "" 282 283 def test_no_blocks_unchanged(self, agent): 284 assert agent._strip_think_blocks("hello world") == "hello world" 285 286 def test_single_block_removed(self, agent): 287 result = agent._strip_think_blocks("<think>reasoning</think> answer") 288 assert "reasoning" not in result 289 assert "answer" in result 290 291 def test_multiline_block_removed(self, agent): 292 text = "<think>\nline1\nline2\n</think>\nvisible" 293 result = agent._strip_think_blocks(text) 294 assert "line1" not in result 295 assert "visible" in result 296 297 def test_orphaned_closing_think_tag(self, agent): 298 result = agent._strip_think_blocks("some reasoning</think>actual answer") 299 assert "</think>" not in result 300 assert "actual answer" in result 301 302 def test_orphaned_closing_thinking_tag(self, agent): 303 result = agent._strip_think_blocks("reasoning</thinking>answer") 304 assert "</thinking>" not in result 305 assert "answer" in result 306 307 def test_orphaned_opening_think_tag(self, agent): 308 result = agent._strip_think_blocks("<think>orphaned reasoning without close") 309 assert "<think>" not in result 310 311 def test_mixed_orphaned_and_paired_tags(self, agent): 312 text = "stray</think><think>paired reasoning</think> visible" 313 result = agent._strip_think_blocks(text) 314 assert "</think>" not in result 315 assert "<think>" not in result 316 assert "visible" in result 317 318 def test_thought_block_removed(self, agent): 319 """Gemma 4 uses <thought> tags for inline reasoning.""" 320 result = agent._strip_think_blocks("<thought>internal reasoning</thought> answer") 321 assert "internal reasoning" not in result 322 assert "<thought>" not in result 323 assert "answer" in result 324 325 def test_orphaned_thought_tag(self, agent): 326 result = agent._strip_think_blocks("<thought>orphaned reasoning without close") 327 assert "<thought>" not in result 328 329 # ─── Unterminated-block coverage (#8878, #9568, #10408) ────────────── 330 # Reasoning models served via NIM / MiniMax M2.7 frequently drop the 331 # closing tag, leaking raw reasoning into assistant content. The open 332 # tag appears at a block boundary (start of text or after a newline); 333 # everything from that tag to end-of-string is stripped. 334 335 def test_unterminated_think_block_content_stripped(self, agent): 336 """Content after unterminated <think> is fully stripped.""" 337 result = agent._strip_think_blocks("<think>orphaned reasoning without close") 338 assert "orphaned reasoning" not in result 339 assert result.strip() == "" 340 341 def test_unterminated_thought_block_content_stripped(self, agent): 342 """Gemma-style <thought> with no close is fully stripped.""" 343 result = agent._strip_think_blocks("<thought>orphaned reasoning without close") 344 assert "orphaned reasoning" not in result 345 assert result.strip() == "" 346 347 def test_unterminated_multiline_block_stripped(self, agent): 348 """Multi-line unterminated blocks are stripped in full.""" 349 result = agent._strip_think_blocks( 350 "<think>\nmulti\nline\nreasoning\nthat never closes" 351 ) 352 assert "multi" not in result 353 assert "never closes" not in result 354 355 def test_unterminated_block_after_answer_preserves_prefix(self, agent): 356 """Visible answer before a line-starting unterminated tag is kept.""" 357 result = agent._strip_think_blocks( 358 "Answer is 42.\n<think>actually let me reconsider" 359 ) 360 assert "Answer is 42." in result 361 assert "reconsider" not in result 362 363 def test_inline_think_mention_in_prose_not_over_stripped(self, agent): 364 """Mid-line `<think>` mentioned in prose must not swallow the rest 365 of the content (the block-boundary check prevents this).""" 366 text = "Use the <think> tag like this in your prose." 367 result = agent._strip_think_blocks(text) 368 # Block-boundary check prevents unterminated-strip from firing 369 assert "prose" in result 370 assert "Use the" in result 371 372 def test_mixed_case_closed_pair_stripped(self, agent): 373 """Mixed-case variants <THINK>…</THINK>, <Thinking>…</Thinking> are 374 handled by case-insensitive closed-pair regex, so the trailing 375 content is preserved.""" 376 result = agent._strip_think_blocks("<THINK>upper</THINK>final") 377 assert "upper" not in result 378 assert "final" in result 379 result = agent._strip_think_blocks("<Thinking>mixed</Thinking>final") 380 assert "mixed" not in result 381 assert "final" in result 382 383 # ─── Tool-call XML block stripping (openclaw/openclaw#67318) ───────── 384 # Some open models (notably Gemma variants via OpenRouter) emit 385 # standalone tool-call XML inside assistant content instead of via the 386 # structured `tool_calls` field. Left unstripped, raw XML leaks to 387 # gateway users (Discord/Telegram/Matrix) and the CLI. 388 389 def test_tool_call_block_stripped(self, agent): 390 text = '<tool_call>{"name": "read_file", "arguments": {"path": "/tmp/x"}}</tool_call> done' 391 result = agent._strip_think_blocks(text) 392 assert "<tool_call>" not in result 393 assert "read_file" not in result 394 assert "done" in result 395 396 def test_function_calls_block_stripped(self, agent): 397 text = '<function_calls>[{"name":"x"}]</function_calls>after' 398 result = agent._strip_think_blocks(text) 399 assert "<function_calls>" not in result 400 assert "after" in result 401 402 def test_gemma_function_name_block_stripped(self, agent): 403 """Gemma-style: <function name="read"><parameter>...</parameter></function>.""" 404 text = ( 405 'Let me check the file.\n' 406 '<function name="read_file"><parameter name="path">/tmp/x.md</parameter></function>\n' 407 'Here is the result.' 408 ) 409 result = agent._strip_think_blocks(text) 410 assert '<function name="read_file">' not in result 411 assert "/tmp/x.md" not in result 412 assert "Let me check the file." in result 413 assert "Here is the result." in result 414 415 def test_gemma_function_multiline_payload_stripped(self, agent): 416 text = ( 417 'Reading now.\n' 418 '<function name="read_file">\n' 419 ' <parameter name="path">/etc/passwd</parameter>\n' 420 '</function>\n' 421 'Done.' 422 ) 423 result = agent._strip_think_blocks(text) 424 assert "/etc/passwd" not in result 425 assert "Reading now." in result 426 assert "Done." in result 427 428 def test_function_mention_in_prose_preserved(self, agent): 429 """'Use <function> in JavaScript.' — no name attr, not at block boundary 430 in a way that suggests tool call. Must survive.""" 431 text = "In JS you can use <function> declarations for hoisting." 432 result = agent._strip_think_blocks(text) 433 # Prose mention has no name="..." attribute -> not stripped 434 assert "declarations for hoisting" in result 435 436 def test_function_with_attr_in_middle_of_sentence_preserved(self, agent): 437 """Docs example: 'Use <function name="x">...</function> in docs.' 438 The sentence-middle position without a preceding punctuation block 439 boundary means it is NOT stripped. Prose context remains.""" 440 text = 'You can write <function name="x">y</function> inline.' 441 result = agent._strip_think_blocks(text) 442 # Without a leading block boundary (no punctuation before), leaves intact 443 assert "You can write" in result 444 assert "inline" in result 445 446 def test_stray_function_close_tag_removed(self, agent): 447 text = "answer</function> trailing" 448 result = agent._strip_think_blocks(text) 449 assert "</function>" not in result 450 assert "answer" in result 451 assert "trailing" in result 452 453 def test_dangling_function_open_tag_preserved(self, agent): 454 """A streamed-but-truncated <function name="..."> block with no close 455 is intentionally NOT stripped (OpenClaw's asymmetry). The tail of a 456 streaming reply may still be valuable to the user.""" 457 text = 'Checking: <function name="read">' 458 result = agent._strip_think_blocks(text) 459 assert "Checking:" in result 460 461 def test_mixed_reasoning_and_tool_call_both_stripped(self, agent): 462 text = '<think>let me plan</think><tool_call>{"name":"x"}</tool_call>final answer' 463 result = agent._strip_think_blocks(text) 464 assert "let me plan" not in result 465 assert "<tool_call>" not in result 466 assert "final answer" in result 467 468 469 class TestExtractReasoning: 470 def test_reasoning_field(self, agent): 471 msg = _mock_assistant_msg(reasoning="thinking hard") 472 assert agent._extract_reasoning(msg) == "thinking hard" 473 474 def test_reasoning_content_field(self, agent): 475 msg = _mock_assistant_msg(reasoning_content="deep thought") 476 assert agent._extract_reasoning(msg) == "deep thought" 477 478 def test_reasoning_details_array(self, agent): 479 msg = _mock_assistant_msg( 480 reasoning_details=[{"summary": "step-by-step analysis"}], 481 ) 482 assert "step-by-step analysis" in agent._extract_reasoning(msg) 483 484 def test_no_reasoning_returns_none(self, agent): 485 msg = _mock_assistant_msg() 486 assert agent._extract_reasoning(msg) is None 487 488 def test_combined_reasoning(self, agent): 489 msg = _mock_assistant_msg( 490 reasoning="part1", 491 reasoning_content="part2", 492 ) 493 result = agent._extract_reasoning(msg) 494 assert "part1" in result 495 assert "part2" in result 496 497 def test_deduplication(self, agent): 498 msg = _mock_assistant_msg( 499 reasoning="same text", 500 reasoning_content="same text", 501 ) 502 result = agent._extract_reasoning(msg) 503 assert result == "same text" 504 505 @pytest.mark.parametrize( 506 ("content", "expected"), 507 [ 508 ("<think>thinking hard</think>", "thinking hard"), 509 ("<thinking>step by step</thinking>", "step by step"), 510 ( 511 "<REASONING_SCRATCHPAD>scratch analysis</REASONING_SCRATCHPAD>", 512 "scratch analysis", 513 ), 514 ], 515 ) 516 def test_inline_reasoning_blocks_fallback(self, agent, content, expected): 517 msg = _mock_assistant_msg(content=content) 518 assert agent._extract_reasoning(msg) == expected 519 520 521 class TestCleanSessionContent: 522 def test_none_passthrough(self): 523 assert AIAgent._clean_session_content(None) is None 524 525 def test_scratchpad_converted(self): 526 text = "<REASONING_SCRATCHPAD>think</REASONING_SCRATCHPAD> answer" 527 result = AIAgent._clean_session_content(text) 528 assert "<REASONING_SCRATCHPAD>" not in result 529 assert "<think>" in result 530 531 def test_extra_newlines_cleaned(self): 532 text = "\n\n\n<think>x</think>\n\n\nafter" 533 result = AIAgent._clean_session_content(text) 534 # Should not have excessive newlines around think block 535 assert "\n\n\n" not in result 536 # Content after think block must be preserved 537 assert "after" in result 538 539 540 class TestGetMessagesUpToLastAssistant: 541 def test_empty_list(self, agent): 542 assert agent._get_messages_up_to_last_assistant([]) == [] 543 544 def test_no_assistant_returns_copy(self, agent): 545 msgs = [{"role": "user", "content": "hi"}] 546 result = agent._get_messages_up_to_last_assistant(msgs) 547 assert result == msgs 548 assert result is not msgs # should be a copy 549 550 def test_single_assistant(self, agent): 551 msgs = [ 552 {"role": "user", "content": "hi"}, 553 {"role": "assistant", "content": "hello"}, 554 ] 555 result = agent._get_messages_up_to_last_assistant(msgs) 556 assert len(result) == 1 557 assert result[0]["role"] == "user" 558 559 def test_multiple_assistants_returns_up_to_last(self, agent): 560 msgs = [ 561 {"role": "user", "content": "q1"}, 562 {"role": "assistant", "content": "a1"}, 563 {"role": "user", "content": "q2"}, 564 {"role": "assistant", "content": "a2"}, 565 ] 566 result = agent._get_messages_up_to_last_assistant(msgs) 567 assert len(result) == 3 568 assert result[-1]["content"] == "q2" 569 570 def test_assistant_then_tool_messages(self, agent): 571 msgs = [ 572 {"role": "user", "content": "do something"}, 573 {"role": "assistant", "content": "ok", "tool_calls": [{"id": "1"}]}, 574 {"role": "tool", "content": "result", "tool_call_id": "1"}, 575 ] 576 # Last assistant is at index 1, so result = msgs[:1] 577 result = agent._get_messages_up_to_last_assistant(msgs) 578 assert len(result) == 1 579 assert result[0]["role"] == "user" 580 581 582 class TestMaskApiKey: 583 def test_none_returns_none(self, agent): 584 assert agent._mask_api_key_for_logs(None) is None 585 586 def test_short_key_returns_stars(self, agent): 587 assert agent._mask_api_key_for_logs("short") == "***" 588 589 def test_long_key_masked(self, agent): 590 key = "sk-or-v1-abcdefghijklmnop" 591 result = agent._mask_api_key_for_logs(key) 592 assert result.startswith("sk-or-v1") 593 assert result.endswith("mnop") 594 assert "..." in result 595 596 597 # =================================================================== 598 # Group 2: State / Structure Methods 599 # =================================================================== 600 601 602 class TestInit: 603 def test_anthropic_base_url_accepted(self): 604 """Anthropic base URLs should route to native Anthropic client.""" 605 with ( 606 patch("run_agent.get_tool_definitions", return_value=[]), 607 patch("run_agent.check_toolset_requirements", return_value={}), 608 patch("agent.anthropic_adapter._anthropic_sdk") as mock_anthropic, 609 ): 610 agent = AIAgent( 611 api_key="test-key-1234567890", 612 base_url="https://api.anthropic.com/v1/", 613 quiet_mode=True, 614 skip_context_files=True, 615 skip_memory=True, 616 ) 617 assert agent.api_mode == "anthropic_messages" 618 mock_anthropic.Anthropic.assert_called_once() 619 620 def test_prompt_caching_claude_openrouter(self): 621 """Claude model via OpenRouter should enable prompt caching.""" 622 with ( 623 patch("run_agent.get_tool_definitions", return_value=[]), 624 patch("run_agent.check_toolset_requirements", return_value={}), 625 patch("run_agent.OpenAI"), 626 ): 627 a = AIAgent( 628 api_key="test-k...7890", 629 model="anthropic/claude-sonnet-4-20250514", 630 base_url="https://openrouter.ai/api/v1", 631 quiet_mode=True, 632 skip_context_files=True, 633 skip_memory=True, 634 ) 635 assert a._use_prompt_caching is True 636 637 def test_prompt_caching_non_claude(self): 638 """Non-Claude model should disable prompt caching.""" 639 with ( 640 patch("run_agent.get_tool_definitions", return_value=[]), 641 patch("run_agent.check_toolset_requirements", return_value={}), 642 patch("run_agent.OpenAI"), 643 ): 644 a = AIAgent( 645 api_key="test-key-1234567890", 646 base_url="https://openrouter.ai/api/v1", 647 model="openai/gpt-4o", 648 quiet_mode=True, 649 skip_context_files=True, 650 skip_memory=True, 651 ) 652 assert a._use_prompt_caching is False 653 654 def test_prompt_caching_non_openrouter(self): 655 """Custom base_url (not OpenRouter) should disable prompt caching.""" 656 with ( 657 patch("run_agent.get_tool_definitions", return_value=[]), 658 patch("run_agent.check_toolset_requirements", return_value={}), 659 patch("run_agent.OpenAI"), 660 ): 661 a = AIAgent( 662 api_key="test-key-1234567890", 663 model="anthropic/claude-sonnet-4-20250514", 664 base_url="http://localhost:8080/v1", 665 quiet_mode=True, 666 skip_context_files=True, 667 skip_memory=True, 668 ) 669 assert a._use_prompt_caching is False 670 671 def test_prompt_caching_native_anthropic(self): 672 """Native Anthropic provider should enable prompt caching.""" 673 with ( 674 patch("run_agent.get_tool_definitions", return_value=[]), 675 patch("run_agent.check_toolset_requirements", return_value={}), 676 patch("agent.anthropic_adapter._anthropic_sdk"), 677 ): 678 a = AIAgent( 679 api_key="test-key-1234567890", 680 base_url="https://api.anthropic.com/v1/", 681 quiet_mode=True, 682 skip_context_files=True, 683 skip_memory=True, 684 ) 685 assert a.api_mode == "anthropic_messages" 686 assert a._use_prompt_caching is True 687 688 def test_prompt_caching_cache_ttl_defaults_without_config(self): 689 """cache_ttl stays 5m when prompt_caching is absent from config.""" 690 with ( 691 patch("run_agent.get_tool_definitions", return_value=[]), 692 patch("run_agent.check_toolset_requirements", return_value={}), 693 patch("run_agent.OpenAI"), 694 patch("hermes_cli.config.load_config", return_value={}), 695 ): 696 a = AIAgent( 697 api_key="test-k...7890", 698 model="anthropic/claude-sonnet-4-20250514", 699 base_url="https://openrouter.ai/api/v1", 700 quiet_mode=True, 701 skip_context_files=True, 702 skip_memory=True, 703 ) 704 assert a._cache_ttl == "5m" 705 706 def test_prompt_caching_cache_ttl_custom_1h(self): 707 """prompt_caching.cache_ttl 1h is applied when present in config.""" 708 with ( 709 patch("run_agent.get_tool_definitions", return_value=[]), 710 patch("run_agent.check_toolset_requirements", return_value={}), 711 patch("run_agent.OpenAI"), 712 patch( 713 "hermes_cli.config.load_config", 714 return_value={"prompt_caching": {"cache_ttl": "1h"}}, 715 ), 716 ): 717 a = AIAgent( 718 api_key="test-k...7890", 719 model="anthropic/claude-sonnet-4-20250514", 720 base_url="https://openrouter.ai/api/v1", 721 quiet_mode=True, 722 skip_context_files=True, 723 skip_memory=True, 724 ) 725 assert a._cache_ttl == "1h" 726 727 def test_prompt_caching_cache_ttl_invalid_falls_back(self): 728 """Non-Anthropic TTL values keep default 5m without raising.""" 729 with ( 730 patch("run_agent.get_tool_definitions", return_value=[]), 731 patch("run_agent.check_toolset_requirements", return_value={}), 732 patch("run_agent.OpenAI"), 733 patch( 734 "hermes_cli.config.load_config", 735 return_value={"prompt_caching": {"cache_ttl": "30m"}}, 736 ), 737 ): 738 a = AIAgent( 739 api_key="test-k...7890", 740 model="anthropic/claude-sonnet-4-20250514", 741 base_url="https://openrouter.ai/api/v1", 742 quiet_mode=True, 743 skip_context_files=True, 744 skip_memory=True, 745 ) 746 assert a._cache_ttl == "5m" 747 748 def test_valid_tool_names_populated(self): 749 """valid_tool_names should contain names from loaded tools.""" 750 tools = _make_tool_defs("web_search", "terminal") 751 with ( 752 patch("run_agent.get_tool_definitions", return_value=tools), 753 patch("run_agent.check_toolset_requirements", return_value={}), 754 patch("run_agent.OpenAI"), 755 ): 756 a = AIAgent( 757 api_key="test-key-1234567890", 758 base_url="https://openrouter.ai/api/v1", 759 quiet_mode=True, 760 skip_context_files=True, 761 skip_memory=True, 762 ) 763 assert a.valid_tool_names == {"web_search", "terminal"} 764 765 def test_session_id_auto_generated(self): 766 """Session ID should be auto-generated in YYYYMMDD_HHMMSS_<hex6> format.""" 767 with ( 768 patch("run_agent.get_tool_definitions", return_value=[]), 769 patch("run_agent.check_toolset_requirements", return_value={}), 770 patch("run_agent.OpenAI"), 771 ): 772 a = AIAgent( 773 api_key="test-key-1234567890", 774 base_url="https://openrouter.ai/api/v1", 775 quiet_mode=True, 776 skip_context_files=True, 777 skip_memory=True, 778 ) 779 # Format: YYYYMMDD_HHMMSS_<6 hex chars> 780 assert re.match(r"^\d{8}_\d{6}_[0-9a-f]{6}$", a.session_id), ( 781 f"session_id doesn't match expected format: {a.session_id}" 782 ) 783 784 785 class TestInterrupt: 786 def test_interrupt_sets_flag(self, agent): 787 with patch("run_agent._set_interrupt"): 788 agent.interrupt() 789 assert agent._interrupt_requested is True 790 791 def test_interrupt_with_message(self, agent): 792 with patch("run_agent._set_interrupt"): 793 agent.interrupt("new question") 794 assert agent._interrupt_message == "new question" 795 796 def test_clear_interrupt(self, agent): 797 with patch("run_agent._set_interrupt"): 798 agent.interrupt("msg") 799 agent.clear_interrupt() 800 assert agent._interrupt_requested is False 801 assert agent._interrupt_message is None 802 803 def test_is_interrupted_property(self, agent): 804 assert agent.is_interrupted is False 805 with patch("run_agent._set_interrupt"): 806 agent.interrupt() 807 assert agent.is_interrupted is True 808 809 810 class TestHydrateTodoStore: 811 def test_no_todo_in_history(self, agent): 812 history = [ 813 {"role": "user", "content": "hello"}, 814 {"role": "assistant", "content": "hi"}, 815 ] 816 with patch("run_agent._set_interrupt"): 817 agent._hydrate_todo_store(history) 818 assert not agent._todo_store.has_items() 819 820 def test_recovers_from_history(self, agent): 821 todos = [{"id": "1", "content": "do thing", "status": "pending"}] 822 history = [ 823 {"role": "user", "content": "plan"}, 824 {"role": "assistant", "content": "ok"}, 825 { 826 "role": "tool", 827 "content": json.dumps({"todos": todos}), 828 "tool_call_id": "c1", 829 }, 830 ] 831 with patch("run_agent._set_interrupt"): 832 agent._hydrate_todo_store(history) 833 assert agent._todo_store.has_items() 834 835 def test_skips_non_todo_tools(self, agent): 836 history = [ 837 { 838 "role": "tool", 839 "content": '{"result": "search done"}', 840 "tool_call_id": "c1", 841 }, 842 ] 843 with patch("run_agent._set_interrupt"): 844 agent._hydrate_todo_store(history) 845 assert not agent._todo_store.has_items() 846 847 def test_invalid_json_skipped(self, agent): 848 history = [ 849 { 850 "role": "tool", 851 "content": 'not valid json "todos" oops', 852 "tool_call_id": "c1", 853 }, 854 ] 855 with patch("run_agent._set_interrupt"): 856 agent._hydrate_todo_store(history) 857 assert not agent._todo_store.has_items() 858 859 860 class TestBuildSystemPrompt: 861 def test_always_has_identity(self, agent): 862 prompt = agent._build_system_prompt() 863 assert DEFAULT_AGENT_IDENTITY in prompt 864 865 def test_can_use_soul_identity_even_when_context_files_are_skipped(self): 866 with ( 867 patch("run_agent.get_tool_definitions", return_value=_make_tool_defs("terminal")), 868 patch("run_agent.check_toolset_requirements", return_value={}), 869 patch("run_agent.OpenAI"), 870 patch("run_agent.load_soul_md", return_value="SOUL IDENTITY"), 871 ): 872 agent = AIAgent( 873 api_key="test-k...7890", 874 base_url="https://openrouter.ai/api/v1", 875 quiet_mode=True, 876 skip_context_files=True, 877 load_soul_identity=True, 878 skip_memory=True, 879 ) 880 prompt = agent._build_system_prompt() 881 882 assert "SOUL IDENTITY" in prompt 883 assert DEFAULT_AGENT_IDENTITY not in prompt 884 885 def test_includes_system_message(self, agent): 886 prompt = agent._build_system_prompt(system_message="Custom instruction") 887 assert "Custom instruction" in prompt 888 889 def test_memory_guidance_when_memory_tool_loaded(self, agent_with_memory_tool): 890 from agent.prompt_builder import MEMORY_GUIDANCE 891 892 prompt = agent_with_memory_tool._build_system_prompt() 893 assert MEMORY_GUIDANCE in prompt 894 895 def test_no_memory_guidance_without_tool(self, agent): 896 from agent.prompt_builder import MEMORY_GUIDANCE 897 898 prompt = agent._build_system_prompt() 899 assert MEMORY_GUIDANCE not in prompt 900 901 def test_includes_datetime(self, agent): 902 prompt = agent._build_system_prompt() 903 # Should contain current date info like "Conversation started:" 904 assert "Conversation started:" in prompt 905 906 def test_includes_nous_subscription_prompt(self, agent, monkeypatch): 907 monkeypatch.setattr(run_agent, "build_nous_subscription_prompt", lambda tool_names: "NOUS SUBSCRIPTION BLOCK") 908 prompt = agent._build_system_prompt() 909 assert "NOUS SUBSCRIPTION BLOCK" in prompt 910 911 def test_skills_prompt_derives_available_toolsets_from_loaded_tools(self): 912 tools = _make_tool_defs("web_search", "skills_list", "skill_view", "skill_manage") 913 toolset_map = { 914 "web_search": "web", 915 "skills_list": "skills", 916 "skill_view": "skills", 917 "skill_manage": "skills", 918 } 919 920 with ( 921 patch("run_agent.get_tool_definitions", return_value=tools), 922 patch( 923 "run_agent.check_toolset_requirements", 924 side_effect=AssertionError("should not re-check toolset requirements"), 925 ), 926 patch("run_agent.get_toolset_for_tool", create=True, side_effect=toolset_map.get), 927 patch("run_agent.build_skills_system_prompt", return_value="SKILLS_PROMPT") as mock_skills, 928 patch("run_agent.OpenAI"), 929 ): 930 agent = AIAgent( 931 api_key="test-k...7890", 932 base_url="https://openrouter.ai/api/v1", 933 quiet_mode=True, 934 skip_context_files=True, 935 skip_memory=True, 936 ) 937 938 prompt = agent._build_system_prompt() 939 940 assert "SKILLS_PROMPT" in prompt 941 assert mock_skills.call_args.kwargs["available_tools"] == set(toolset_map) 942 assert mock_skills.call_args.kwargs["available_toolsets"] == {"web", "skills"} 943 944 945 class TestToolUseEnforcementConfig: 946 """Tests for the agent.tool_use_enforcement config option.""" 947 948 def _make_agent(self, model="openai/gpt-4.1", tool_use_enforcement="auto"): 949 """Create an agent with tools and a specific enforcement config.""" 950 with ( 951 patch( 952 "run_agent.get_tool_definitions", 953 return_value=_make_tool_defs("terminal", "web_search"), 954 ), 955 patch("run_agent.check_toolset_requirements", return_value={}), 956 patch("run_agent.OpenAI"), 957 patch( 958 "hermes_cli.config.load_config", 959 return_value={"agent": {"tool_use_enforcement": tool_use_enforcement}}, 960 ), 961 ): 962 a = AIAgent( 963 model=model, 964 api_key="test-key-1234567890", 965 base_url="https://openrouter.ai/api/v1", 966 quiet_mode=True, 967 skip_context_files=True, 968 skip_memory=True, 969 ) 970 a.client = MagicMock() 971 return a 972 973 def test_auto_injects_for_gpt(self): 974 from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE 975 agent = self._make_agent(model="openai/gpt-4.1", tool_use_enforcement="auto") 976 prompt = agent._build_system_prompt() 977 assert TOOL_USE_ENFORCEMENT_GUIDANCE in prompt 978 979 def test_auto_injects_for_codex(self): 980 from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE 981 agent = self._make_agent(model="openai/codex-mini", tool_use_enforcement="auto") 982 prompt = agent._build_system_prompt() 983 assert TOOL_USE_ENFORCEMENT_GUIDANCE in prompt 984 985 def test_auto_skips_for_claude(self): 986 from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE 987 agent = self._make_agent(model="anthropic/claude-sonnet-4", tool_use_enforcement="auto") 988 prompt = agent._build_system_prompt() 989 assert TOOL_USE_ENFORCEMENT_GUIDANCE not in prompt 990 991 def test_true_forces_for_all_models(self): 992 from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE 993 agent = self._make_agent(model="anthropic/claude-sonnet-4", tool_use_enforcement=True) 994 prompt = agent._build_system_prompt() 995 assert TOOL_USE_ENFORCEMENT_GUIDANCE in prompt 996 997 def test_string_true_forces_for_all_models(self): 998 from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE 999 agent = self._make_agent(model="anthropic/claude-sonnet-4", tool_use_enforcement="true") 1000 prompt = agent._build_system_prompt() 1001 assert TOOL_USE_ENFORCEMENT_GUIDANCE in prompt 1002 1003 def test_always_forces_for_all_models(self): 1004 from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE 1005 agent = self._make_agent(model="deepseek/deepseek-r1", tool_use_enforcement="always") 1006 prompt = agent._build_system_prompt() 1007 assert TOOL_USE_ENFORCEMENT_GUIDANCE in prompt 1008 1009 def test_false_disables_for_gpt(self): 1010 from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE 1011 agent = self._make_agent(model="openai/gpt-4.1", tool_use_enforcement=False) 1012 prompt = agent._build_system_prompt() 1013 assert TOOL_USE_ENFORCEMENT_GUIDANCE not in prompt 1014 1015 def test_string_false_disables(self): 1016 from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE 1017 agent = self._make_agent(model="openai/gpt-4.1", tool_use_enforcement="off") 1018 prompt = agent._build_system_prompt() 1019 assert TOOL_USE_ENFORCEMENT_GUIDANCE not in prompt 1020 1021 def test_custom_list_matches(self): 1022 from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE 1023 agent = self._make_agent( 1024 model="deepseek/deepseek-r1", 1025 tool_use_enforcement=["deepseek", "gemini"], 1026 ) 1027 prompt = agent._build_system_prompt() 1028 assert TOOL_USE_ENFORCEMENT_GUIDANCE in prompt 1029 1030 def test_custom_list_no_match(self): 1031 from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE 1032 agent = self._make_agent( 1033 model="anthropic/claude-sonnet-4", 1034 tool_use_enforcement=["deepseek", "gemini"], 1035 ) 1036 prompt = agent._build_system_prompt() 1037 assert TOOL_USE_ENFORCEMENT_GUIDANCE not in prompt 1038 1039 def test_custom_list_case_insensitive(self): 1040 from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE 1041 agent = self._make_agent( 1042 model="openai/GPT-4.1", 1043 tool_use_enforcement=["GPT", "Codex"], 1044 ) 1045 prompt = agent._build_system_prompt() 1046 assert TOOL_USE_ENFORCEMENT_GUIDANCE in prompt 1047 1048 def test_no_tools_never_injects(self): 1049 """Even with enforcement=true, no injection when agent has no tools.""" 1050 from agent.prompt_builder import TOOL_USE_ENFORCEMENT_GUIDANCE 1051 with ( 1052 patch("run_agent.get_tool_definitions", return_value=[]), 1053 patch("run_agent.check_toolset_requirements", return_value={}), 1054 patch("run_agent.OpenAI"), 1055 patch( 1056 "hermes_cli.config.load_config", 1057 return_value={"agent": {"tool_use_enforcement": True}}, 1058 ), 1059 ): 1060 a = AIAgent( 1061 api_key="test-key-1234567890", 1062 base_url="https://openrouter.ai/api/v1", 1063 quiet_mode=True, 1064 skip_context_files=True, 1065 skip_memory=True, 1066 enabled_toolsets=[], 1067 ) 1068 a.client = MagicMock() 1069 prompt = a._build_system_prompt() 1070 assert TOOL_USE_ENFORCEMENT_GUIDANCE not in prompt 1071 1072 1073 class TestInvalidateSystemPrompt: 1074 def test_clears_cache(self, agent): 1075 agent._cached_system_prompt = "cached value" 1076 agent._invalidate_system_prompt() 1077 assert agent._cached_system_prompt is None 1078 1079 def test_reloads_memory_store(self, agent): 1080 mock_store = MagicMock() 1081 agent._memory_store = mock_store 1082 agent._cached_system_prompt = "cached" 1083 agent._invalidate_system_prompt() 1084 mock_store.load_from_disk.assert_called_once() 1085 1086 1087 class TestBuildApiKwargs: 1088 def test_basic_kwargs(self, agent): 1089 messages = [{"role": "user", "content": "hi"}] 1090 kwargs = agent._build_api_kwargs(messages) 1091 assert kwargs["model"] == agent.model 1092 assert kwargs["messages"] is messages 1093 assert kwargs["timeout"] == 1800.0 1094 1095 def test_public_moonshot_kimi_k2_5_omits_temperature(self, agent): 1096 """Kimi models should NOT have client-side temperature overrides. 1097 1098 The Kimi gateway selects the correct temperature server-side. 1099 """ 1100 agent.base_url = "https://api.moonshot.ai/v1" 1101 agent._base_url_lower = agent.base_url.lower() 1102 agent.model = "kimi-k2.5" 1103 messages = [{"role": "user", "content": "hi"}] 1104 1105 kwargs = agent._build_api_kwargs(messages) 1106 1107 assert "temperature" not in kwargs 1108 1109 def test_public_moonshot_cn_kimi_k2_5_omits_temperature(self, agent): 1110 agent.base_url = "https://api.moonshot.cn/v1" 1111 agent._base_url_lower = agent.base_url.lower() 1112 agent.model = "kimi-k2.5" 1113 messages = [{"role": "user", "content": "hi"}] 1114 1115 kwargs = agent._build_api_kwargs(messages) 1116 1117 assert "temperature" not in kwargs 1118 1119 def test_kimi_coding_endpoint_omits_temperature(self, agent): 1120 agent.base_url = "https://api.kimi.com/coding/v1" 1121 agent._base_url_lower = agent.base_url.lower() 1122 agent.model = "kimi-k2.5" 1123 messages = [{"role": "user", "content": "hi"}] 1124 1125 kwargs = agent._build_api_kwargs(messages) 1126 1127 assert "temperature" not in kwargs 1128 1129 def test_kimi_coding_endpoint_sends_max_tokens_and_reasoning(self, agent): 1130 """Kimi endpoint should send max_tokens=32000 and reasoning_effort as 1131 top-level params, matching Kimi CLI's default behavior.""" 1132 agent.base_url = "https://api.kimi.com/coding/v1" 1133 agent._base_url_lower = agent.base_url.lower() 1134 agent.model = "kimi-for-coding" 1135 messages = [{"role": "user", "content": "hi"}] 1136 1137 kwargs = agent._build_api_kwargs(messages) 1138 1139 assert kwargs["max_tokens"] == 32000 1140 assert kwargs["reasoning_effort"] == "medium" 1141 1142 def test_kimi_coding_endpoint_respects_custom_effort(self, agent): 1143 """reasoning_effort should reflect reasoning_config.effort when set.""" 1144 agent.base_url = "https://api.kimi.com/coding/v1" 1145 agent._base_url_lower = agent.base_url.lower() 1146 agent.model = "kimi-for-coding" 1147 agent.reasoning_config = {"enabled": True, "effort": "high"} 1148 messages = [{"role": "user", "content": "hi"}] 1149 1150 kwargs = agent._build_api_kwargs(messages) 1151 1152 assert kwargs["reasoning_effort"] == "high" 1153 1154 def test_kimi_coding_endpoint_sends_thinking_extra_body(self, agent): 1155 """Kimi endpoint should send extra_body.thinking={"type":"enabled"} 1156 to activate reasoning mode, mirroring Kimi CLI's with_thinking().""" 1157 agent.base_url = "https://api.kimi.com/coding/v1" 1158 agent._base_url_lower = agent.base_url.lower() 1159 agent.model = "kimi-for-coding" 1160 messages = [{"role": "user", "content": "hi"}] 1161 1162 kwargs = agent._build_api_kwargs(messages) 1163 1164 assert kwargs["extra_body"]["thinking"] == {"type": "enabled"} 1165 1166 def test_kimi_coding_endpoint_disables_thinking(self, agent): 1167 """When reasoning_config.enabled=False, thinking should be disabled 1168 and reasoning_effort should be omitted entirely — mirroring Kimi 1169 CLI's with_thinking("off") which maps to reasoning_effort=None.""" 1170 agent.base_url = "https://api.kimi.com/coding/v1" 1171 agent._base_url_lower = agent.base_url.lower() 1172 agent.model = "kimi-for-coding" 1173 agent.reasoning_config = {"enabled": False} 1174 messages = [{"role": "user", "content": "hi"}] 1175 1176 kwargs = agent._build_api_kwargs(messages) 1177 1178 assert kwargs["extra_body"]["thinking"] == {"type": "disabled"} 1179 assert "reasoning_effort" not in kwargs 1180 1181 def test_moonshot_endpoint_sends_max_tokens_and_reasoning(self, agent): 1182 """api.moonshot.ai should get the same Kimi-compatible params.""" 1183 agent.base_url = "https://api.moonshot.ai/v1" 1184 agent._base_url_lower = agent.base_url.lower() 1185 agent.model = "kimi-k2.5" 1186 messages = [{"role": "user", "content": "hi"}] 1187 1188 kwargs = agent._build_api_kwargs(messages) 1189 1190 assert kwargs["max_tokens"] == 32000 1191 assert kwargs["reasoning_effort"] == "medium" 1192 assert kwargs["extra_body"]["thinking"] == {"type": "enabled"} 1193 1194 def test_moonshot_cn_endpoint_sends_max_tokens_and_reasoning(self, agent): 1195 """api.moonshot.cn (China endpoint) should get the same params.""" 1196 agent.base_url = "https://api.moonshot.cn/v1" 1197 agent._base_url_lower = agent.base_url.lower() 1198 agent.model = "kimi-k2.5" 1199 messages = [{"role": "user", "content": "hi"}] 1200 1201 kwargs = agent._build_api_kwargs(messages) 1202 1203 assert kwargs["max_tokens"] == 32000 1204 assert kwargs["reasoning_effort"] == "medium" 1205 assert kwargs["extra_body"]["thinking"] == {"type": "enabled"} 1206 1207 def test_provider_preferences_injected(self, agent): 1208 agent.base_url = "https://openrouter.ai/api/v1" 1209 agent.providers_allowed = ["Anthropic"] 1210 messages = [{"role": "user", "content": "hi"}] 1211 kwargs = agent._build_api_kwargs(messages) 1212 assert kwargs["extra_body"]["provider"]["only"] == ["Anthropic"] 1213 1214 def test_reasoning_config_default_openrouter(self, agent): 1215 """Default reasoning config for OpenRouter should be medium.""" 1216 agent.base_url = "https://openrouter.ai/api/v1" 1217 agent.model = "anthropic/claude-sonnet-4-20250514" 1218 messages = [{"role": "user", "content": "hi"}] 1219 kwargs = agent._build_api_kwargs(messages) 1220 reasoning = kwargs["extra_body"]["reasoning"] 1221 assert reasoning["enabled"] is True 1222 assert reasoning["effort"] == "medium" 1223 1224 def test_reasoning_config_custom(self, agent): 1225 agent.base_url = "https://openrouter.ai/api/v1" 1226 agent.model = "anthropic/claude-sonnet-4-20250514" 1227 agent.reasoning_config = {"enabled": False} 1228 messages = [{"role": "user", "content": "hi"}] 1229 kwargs = agent._build_api_kwargs(messages) 1230 assert kwargs["extra_body"]["reasoning"] == {"enabled": False} 1231 1232 def test_reasoning_not_sent_for_unsupported_openrouter_model(self, agent): 1233 agent.base_url = "https://openrouter.ai/api/v1" 1234 agent.model = "minimax/minimax-m2.5" 1235 messages = [{"role": "user", "content": "hi"}] 1236 kwargs = agent._build_api_kwargs(messages) 1237 assert "reasoning" not in kwargs.get("extra_body", {}) 1238 1239 def test_reasoning_sent_for_supported_openrouter_model(self, agent): 1240 agent.base_url = "https://openrouter.ai/api/v1" 1241 agent.model = "qwen/qwen3.5-plus-02-15" 1242 messages = [{"role": "user", "content": "hi"}] 1243 kwargs = agent._build_api_kwargs(messages) 1244 assert kwargs["extra_body"]["reasoning"]["effort"] == "medium" 1245 1246 def test_reasoning_sent_for_nous_route(self, agent): 1247 agent.base_url = "https://inference-api.nousresearch.com/v1" 1248 agent.model = "minimax/minimax-m2.5" 1249 messages = [{"role": "user", "content": "hi"}] 1250 kwargs = agent._build_api_kwargs(messages) 1251 assert kwargs["extra_body"]["reasoning"]["effort"] == "medium" 1252 1253 def test_reasoning_sent_for_copilot_gpt5(self, agent): 1254 agent.base_url = "https://api.githubcopilot.com" 1255 agent.model = "gpt-5.4" 1256 messages = [{"role": "user", "content": "hi"}] 1257 kwargs = agent._build_api_kwargs(messages) 1258 assert kwargs["extra_body"]["reasoning"] == {"effort": "medium"} 1259 1260 def test_reasoning_xhigh_normalized_for_copilot(self, agent): 1261 agent.base_url = "https://api.githubcopilot.com" 1262 agent.model = "gpt-5.4" 1263 agent.reasoning_config = {"enabled": True, "effort": "xhigh"} 1264 messages = [{"role": "user", "content": "hi"}] 1265 kwargs = agent._build_api_kwargs(messages) 1266 assert kwargs["extra_body"]["reasoning"] == {"effort": "high"} 1267 1268 def test_reasoning_omitted_for_non_reasoning_copilot_model(self, agent): 1269 agent.base_url = "https://api.githubcopilot.com" 1270 agent.model = "gpt-4.1" 1271 messages = [{"role": "user", "content": "hi"}] 1272 kwargs = agent._build_api_kwargs(messages) 1273 assert "reasoning" not in kwargs.get("extra_body", {}) 1274 1275 def test_max_tokens_injected(self, agent): 1276 agent.max_tokens = 4096 1277 messages = [{"role": "user", "content": "hi"}] 1278 kwargs = agent._build_api_kwargs(messages) 1279 assert kwargs["max_tokens"] == 4096 1280 1281 1282 def test_qwen_portal_formats_messages_and_metadata(self, agent): 1283 agent.base_url = "https://portal.qwen.ai/v1" 1284 agent._base_url_lower = agent.base_url.lower() 1285 agent.session_id = "sess-123" 1286 messages = [ 1287 {"role": "system", "content": "You are helpful"}, 1288 {"role": "assistant", "content": "Got it"}, 1289 {"role": "user", "content": "hi"}, 1290 ] 1291 kwargs = agent._build_api_kwargs(messages) 1292 assert kwargs["metadata"]["sessionId"] == "sess-123" 1293 assert kwargs["extra_body"]["vl_high_resolution_images"] is True 1294 assert isinstance(kwargs["messages"][0]["content"], list) 1295 assert kwargs["messages"][0]["content"][0]["cache_control"] == {"type": "ephemeral"} 1296 assert kwargs["messages"][2]["content"][0]["text"] == "hi" 1297 1298 def test_qwen_portal_normalizes_bare_string_content_parts(self, agent): 1299 agent.base_url = "https://portal.qwen.ai/v1" 1300 agent._base_url_lower = agent.base_url.lower() 1301 messages = [ 1302 {"role": "system", "content": [{"type": "text", "text": "system"}]}, 1303 {"role": "user", "content": ["hello", {"type": "text", "text": "world"}]}, 1304 ] 1305 kwargs = agent._build_api_kwargs(messages) 1306 user_content = kwargs["messages"][1]["content"] 1307 assert user_content[0] == {"type": "text", "text": "hello"} 1308 assert user_content[1] == {"type": "text", "text": "world"} 1309 1310 def test_qwen_portal_no_system_message(self, agent): 1311 agent.base_url = "https://portal.qwen.ai/v1" 1312 agent._base_url_lower = agent.base_url.lower() 1313 messages = [{"role": "user", "content": "hi"}] 1314 kwargs = agent._build_api_kwargs(messages) 1315 # Should not crash even without a system message 1316 assert kwargs["messages"][0]["content"][0]["text"] == "hi" 1317 assert "cache_control" not in kwargs["messages"][0]["content"][0] 1318 1319 def test_qwen_portal_sends_explicit_max_tokens(self, agent): 1320 """When the user explicitly sets max_tokens, it should be sent to Qwen Portal.""" 1321 agent.base_url = "https://portal.qwen.ai/v1" 1322 agent._base_url_lower = agent.base_url.lower() 1323 agent.max_tokens = 4096 1324 messages = [{"role": "system", "content": "sys"}, {"role": "user", "content": "hi"}] 1325 kwargs = agent._build_api_kwargs(messages) 1326 assert kwargs["max_tokens"] == 4096 1327 1328 def test_qwen_portal_default_max_tokens(self, agent): 1329 """When max_tokens is None, Qwen Portal gets a default of 65536 1330 to prevent reasoning models from exhausting their output budget.""" 1331 agent.base_url = "https://portal.qwen.ai/v1" 1332 agent._base_url_lower = agent.base_url.lower() 1333 agent.max_tokens = None 1334 messages = [{"role": "system", "content": "sys"}, {"role": "user", "content": "hi"}] 1335 kwargs = agent._build_api_kwargs(messages) 1336 assert kwargs["max_tokens"] == 65536 1337 1338 def test_ollama_think_false_on_effort_none(self, agent): 1339 """Custom (Ollama) provider with effort=none should inject think=false.""" 1340 agent.provider = "custom" 1341 agent.base_url = "http://localhost:11434/v1" 1342 agent._base_url_lower = agent.base_url.lower() 1343 agent.reasoning_config = {"effort": "none"} 1344 messages = [{"role": "user", "content": "hi"}] 1345 kwargs = agent._build_api_kwargs(messages) 1346 assert kwargs.get("extra_body", {}).get("think") is False 1347 1348 def test_ollama_think_false_on_enabled_false(self, agent): 1349 """Custom (Ollama) provider with enabled=false should inject think=false.""" 1350 agent.provider = "custom" 1351 agent.base_url = "http://localhost:11434/v1" 1352 agent._base_url_lower = agent.base_url.lower() 1353 agent.reasoning_config = {"enabled": False} 1354 messages = [{"role": "user", "content": "hi"}] 1355 kwargs = agent._build_api_kwargs(messages) 1356 assert kwargs.get("extra_body", {}).get("think") is False 1357 1358 def test_ollama_no_think_param_when_reasoning_enabled(self, agent): 1359 """Custom provider with reasoning enabled should NOT inject think=false.""" 1360 agent.provider = "custom" 1361 agent.base_url = "http://localhost:11434/v1" 1362 agent._base_url_lower = agent.base_url.lower() 1363 agent.reasoning_config = {"enabled": True, "effort": "medium"} 1364 messages = [{"role": "user", "content": "hi"}] 1365 kwargs = agent._build_api_kwargs(messages) 1366 assert kwargs.get("extra_body", {}).get("think") is None 1367 1368 def test_non_custom_provider_unaffected(self, agent): 1369 """OpenRouter provider with effort=none should NOT inject think=false.""" 1370 agent.provider = "openrouter" 1371 agent.model = "qwen/qwen3.5-plus-02-15" 1372 agent.reasoning_config = {"effort": "none"} 1373 messages = [{"role": "user", "content": "hi"}] 1374 kwargs = agent._build_api_kwargs(messages) 1375 assert kwargs.get("extra_body", {}).get("think") is None 1376 1377 1378 1379 class TestBuildAssistantMessage: 1380 def test_basic_message(self, agent): 1381 msg = _mock_assistant_msg(content="Hello!") 1382 result = agent._build_assistant_message(msg, "stop") 1383 assert result["role"] == "assistant" 1384 assert result["content"] == "Hello!" 1385 assert result["finish_reason"] == "stop" 1386 1387 def test_with_reasoning(self, agent): 1388 msg = _mock_assistant_msg(content="answer", reasoning="thinking") 1389 result = agent._build_assistant_message(msg, "stop") 1390 assert result["reasoning"] == "thinking" 1391 1392 def test_reasoning_content_preserved_separately(self, agent): 1393 msg = _mock_assistant_msg( 1394 content="answer", 1395 reasoning="summary", 1396 reasoning_content="provider scratchpad", 1397 ) 1398 result = agent._build_assistant_message(msg, "stop") 1399 assert result["reasoning_content"] == "provider scratchpad" 1400 1401 def test_with_tool_calls(self, agent): 1402 tc = _mock_tool_call(name="web_search", arguments='{"q":"test"}', call_id="c1") 1403 msg = _mock_assistant_msg(content="", tool_calls=[tc]) 1404 result = agent._build_assistant_message(msg, "tool_calls") 1405 assert len(result["tool_calls"]) == 1 1406 assert result["tool_calls"][0]["function"]["name"] == "web_search" 1407 1408 def test_with_reasoning_details(self, agent): 1409 details = [{"type": "reasoning.summary", "text": "step1", "signature": "sig1"}] 1410 msg = _mock_assistant_msg(content="ans", reasoning_details=details) 1411 result = agent._build_assistant_message(msg, "stop") 1412 assert "reasoning_details" in result 1413 assert result["reasoning_details"][0]["text"] == "step1" 1414 1415 def test_empty_content(self, agent): 1416 msg = _mock_assistant_msg(content=None) 1417 result = agent._build_assistant_message(msg, "stop") 1418 assert result["content"] == "" 1419 1420 def test_streaming_only_reasoning_promoted_to_reasoning_content(self, agent): 1421 """Refs #16844 / #16884. Streaming-only providers (glm, MiniMax, 1422 gpt-5.x via aigw, Anthropic via openai-compat shims) accumulate 1423 reasoning through delta chunks but never expose 1424 ``reasoning_content`` as a top-level attribute on the finalized 1425 message — only ``reasoning`` (or the internal accumulator). 1426 1427 Without write-side promotion, the persisted message stores the 1428 chain-of-thought under the internal ``reasoning`` key and omits 1429 ``reasoning_content``. When the user later replays that history 1430 through a DeepSeek-v4 / Kimi thinking model, the missing field 1431 causes HTTP 400 ("The reasoning_content in the thinking mode 1432 must be passed back to the API."). 1433 1434 Fix: when ``reasoning_content`` wasn't written by an earlier 1435 branch AND we captured reasoning text from streaming deltas, 1436 promote it to ``reasoning_content`` at write time. 1437 """ 1438 # SDK-style object that exposes ``reasoning`` but NOT 1439 # ``reasoning_content`` — the streaming-only provider shape. 1440 msg = _mock_assistant_msg(content="answer", reasoning="hidden thinking") 1441 assert not hasattr(msg, "reasoning_content") 1442 1443 result = agent._build_assistant_message(msg, "stop") 1444 1445 assert result["reasoning"] == "hidden thinking" 1446 assert result["reasoning_content"] == "hidden thinking" 1447 1448 def test_sdk_reasoning_content_still_wins_over_fallback(self, agent): 1449 """Additive fallback must not override SDK-supplied reasoning_content. 1450 1451 When both ``reasoning`` and ``reasoning_content`` are present, the 1452 SDK's own ``reasoning_content`` is authoritative (may carry 1453 structured data the accumulator doesn't have). 1454 """ 1455 msg = _mock_assistant_msg( 1456 content="answer", 1457 reasoning="summary only", 1458 reasoning_content="structured provider scratchpad", 1459 ) 1460 result = agent._build_assistant_message(msg, "stop") 1461 assert result["reasoning_content"] == "structured provider scratchpad" 1462 1463 def test_no_reasoning_text_leaves_field_absent(self, agent): 1464 """Non-thinking turns with no reasoning leave reasoning_content absent. 1465 1466 This preserves ``_copy_reasoning_content_for_api``'s downstream 1467 tiers at replay time — cross-provider leak guard (#15748), 1468 promote-from-``reasoning``, and DeepSeek/Kimi " "-pad — which 1469 would all be bypassed if we eagerly wrote ``reasoning_content=" "`` 1470 on every assistant turn regardless of provider. 1471 """ 1472 msg = _mock_assistant_msg(content="plain answer") 1473 result = agent._build_assistant_message(msg, "stop") 1474 assert "reasoning_content" not in result 1475 1476 def test_tool_call_extra_content_preserved(self, agent): 1477 """Gemini thinking models attach extra_content with thought_signature 1478 to tool calls. This must be preserved so subsequent API calls include it.""" 1479 tc = _mock_tool_call( 1480 name="get_weather", arguments='{"city":"NYC"}', call_id="c2" 1481 ) 1482 tc.extra_content = {"google": {"thought_signature": "abc123"}} 1483 msg = _mock_assistant_msg(content="", tool_calls=[tc]) 1484 result = agent._build_assistant_message(msg, "tool_calls") 1485 assert result["tool_calls"][0]["extra_content"] == { 1486 "google": {"thought_signature": "abc123"} 1487 } 1488 1489 def test_tool_call_without_extra_content(self, agent): 1490 """Standard tool calls (no thinking model) should not have extra_content.""" 1491 tc = _mock_tool_call(name="web_search", arguments="{}", call_id="c3") 1492 msg = _mock_assistant_msg(content="", tool_calls=[tc]) 1493 result = agent._build_assistant_message(msg, "tool_calls") 1494 assert "extra_content" not in result["tool_calls"][0] 1495 1496 def test_think_blocks_stripped_from_content(self, agent): 1497 """Inline <think> blocks are stripped from stored content (#8878, #9568). 1498 1499 The reasoning is captured into ``msg['reasoning']`` via the inline 1500 fallback in ``_extract_reasoning``; the raw tags in ``content`` are 1501 redundant and leak to messaging platforms / pollute titles / 1502 inflate context if left in place. 1503 """ 1504 msg = _mock_assistant_msg( 1505 content="<think>internal reasoning</think>The actual answer." 1506 ) 1507 result = agent._build_assistant_message(msg, "stop") 1508 assert "<think>" not in result["content"] 1509 assert "internal reasoning" not in result["content"] 1510 assert "The actual answer." in result["content"] 1511 # Reasoning preserved separately via inline extraction fallback 1512 assert result["reasoning"] == "internal reasoning" 1513 1514 def test_think_blocks_stripped_preserves_normal_content(self, agent): 1515 """Content without reasoning tags passes through unchanged.""" 1516 msg = _mock_assistant_msg(content="No thinking here.") 1517 result = agent._build_assistant_message(msg, "stop") 1518 assert result["content"] == "No thinking here." 1519 1520 def test_memory_context_in_stored_content_is_preserved(self, agent): 1521 """`_build_assistant_message` must not silently mutate model output 1522 containing literal <memory-context> markers — that's legitimate text 1523 (e.g. documentation, code) that the model may emit. Streaming-path 1524 leak prevention is handled by StreamingContextScrubber upstream.""" 1525 original = ( 1526 "<memory-context>\n" 1527 "[System note: The following is recalled memory context, NOT new user input. Treat as informational background data.]\n\n" 1528 "## Honcho Context\n" 1529 "stale memory\n" 1530 "</memory-context>\n\n" 1531 "Visible answer" 1532 ) 1533 msg = _mock_assistant_msg(content=original) 1534 result = agent._build_assistant_message(msg, "stop") 1535 assert "<memory-context>" in result["content"] 1536 assert "Visible answer" in result["content"] 1537 1538 def test_unterminated_think_block_stripped(self, agent): 1539 """Unterminated <think> block (MiniMax / NIM dropped close tag) is 1540 fully stripped from stored content.""" 1541 msg = _mock_assistant_msg( 1542 content="<think>reasoning that never closes on this NIM endpoint" 1543 ) 1544 result = agent._build_assistant_message(msg, "stop") 1545 assert "<think>" not in result["content"] 1546 assert "reasoning that never closes" not in result["content"] 1547 assert result["content"] == "" 1548 1549 1550 class TestFormatToolsForSystemMessage: 1551 def test_no_tools_returns_empty_array(self, agent): 1552 agent.tools = [] 1553 assert agent._format_tools_for_system_message() == "[]" 1554 1555 def test_formats_single_tool(self, agent): 1556 agent.tools = _make_tool_defs("web_search") 1557 result = agent._format_tools_for_system_message() 1558 parsed = json.loads(result) 1559 assert len(parsed) == 1 1560 assert parsed[0]["name"] == "web_search" 1561 1562 def test_formats_multiple_tools(self, agent): 1563 agent.tools = _make_tool_defs("web_search", "terminal", "read_file") 1564 result = agent._format_tools_for_system_message() 1565 parsed = json.loads(result) 1566 assert len(parsed) == 3 1567 names = {t["name"] for t in parsed} 1568 assert names == {"web_search", "terminal", "read_file"} 1569 1570 1571 # =================================================================== 1572 # Group 3: Conversation Loop Pieces (OpenAI mock) 1573 # =================================================================== 1574 1575 1576 class TestExecuteToolCalls: 1577 def test_single_tool_executed(self, agent): 1578 tc = _mock_tool_call(name="web_search", arguments='{"q":"test"}', call_id="c1") 1579 mock_msg = _mock_assistant_msg(content="", tool_calls=[tc]) 1580 messages = [] 1581 with patch( 1582 "run_agent.handle_function_call", return_value="search result" 1583 ) as mock_hfc: 1584 agent._execute_tool_calls(mock_msg, messages, "task-1") 1585 # enabled_tools passes the agent's own valid_tool_names 1586 args, kwargs = mock_hfc.call_args 1587 assert args[:3] == ("web_search", {"q": "test"}, "task-1") 1588 assert set(kwargs.get("enabled_tools", [])) == agent.valid_tool_names 1589 assert len(messages) == 1 1590 assert messages[0]["role"] == "tool" 1591 assert "search result" in messages[0]["content"] 1592 1593 def test_interrupt_skips_remaining(self, agent): 1594 tc1 = _mock_tool_call(name="web_search", arguments="{}", call_id="c1") 1595 tc2 = _mock_tool_call(name="web_search", arguments="{}", call_id="c2") 1596 mock_msg = _mock_assistant_msg(content="", tool_calls=[tc1, tc2]) 1597 messages = [] 1598 1599 with patch("run_agent._set_interrupt"): 1600 agent.interrupt() 1601 1602 agent._execute_tool_calls(mock_msg, messages, "task-1") 1603 # Both calls should be skipped with cancellation messages 1604 assert len(messages) == 2 1605 assert ( 1606 "cancelled" in messages[0]["content"].lower() 1607 or "interrupted" in messages[0]["content"].lower() 1608 ) 1609 1610 def test_invalid_json_args_defaults_empty(self, agent): 1611 tc = _mock_tool_call( 1612 name="web_search", arguments="not valid json", call_id="c1" 1613 ) 1614 mock_msg = _mock_assistant_msg(content="", tool_calls=[tc]) 1615 messages = [] 1616 with patch("run_agent.handle_function_call", return_value="ok") as mock_hfc: 1617 agent._execute_tool_calls(mock_msg, messages, "task-1") 1618 # Invalid JSON args should fall back to empty dict 1619 args, kwargs = mock_hfc.call_args 1620 assert args[:3] == ("web_search", {}, "task-1") 1621 assert set(kwargs.get("enabled_tools", [])) == agent.valid_tool_names 1622 assert len(messages) == 1 1623 assert messages[0]["role"] == "tool" 1624 assert messages[0]["tool_call_id"] == "c1" 1625 1626 def test_result_truncation_over_100k(self, agent, tmp_path, monkeypatch): 1627 monkeypatch.setenv("HERMES_HOME", str(tmp_path / ".hermes")) 1628 (tmp_path / ".hermes").mkdir() 1629 tc = _mock_tool_call(name="web_search", arguments="{}", call_id="c1") 1630 mock_msg = _mock_assistant_msg(content="", tool_calls=[tc]) 1631 messages = [] 1632 big_result = "x" * 150_000 1633 with patch("run_agent.handle_function_call", return_value=big_result): 1634 agent._execute_tool_calls(mock_msg, messages, "task-1") 1635 # Content should be replaced with persisted-output or truncation 1636 assert len(messages[0]["content"]) < 150_000 1637 assert ("Truncated" in messages[0]["content"] or "<persisted-output>" in messages[0]["content"]) 1638 1639 def test_quiet_tool_output_suppressed_when_progress_callback_present(self, agent): 1640 tc = _mock_tool_call(name="web_search", arguments='{"q":"test"}', call_id="c1") 1641 mock_msg = _mock_assistant_msg(content="", tool_calls=[tc]) 1642 messages = [] 1643 agent.tool_progress_callback = lambda *args, **kwargs: None 1644 1645 with patch("run_agent.handle_function_call", return_value="search result"), \ 1646 patch.object(agent, "_safe_print") as mock_print: 1647 agent._execute_tool_calls(mock_msg, messages, "task-1") 1648 1649 mock_print.assert_not_called() 1650 assert len(messages) == 1 1651 assert messages[0]["role"] == "tool" 1652 1653 def test_quiet_tool_output_prints_without_progress_callback(self, agent): 1654 tc = _mock_tool_call(name="web_search", arguments='{"q":"test"}', call_id="c1") 1655 mock_msg = _mock_assistant_msg(content="", tool_calls=[tc]) 1656 messages = [] 1657 agent.platform = "cli" 1658 agent.tool_progress_callback = None 1659 1660 with patch("run_agent.handle_function_call", return_value="search result"), \ 1661 patch.object(agent, "_safe_print") as mock_print: 1662 agent._execute_tool_calls(mock_msg, messages, "task-1") 1663 1664 mock_print.assert_called_once() 1665 assert "search" in str(mock_print.call_args.args[0]).lower() 1666 assert len(messages) == 1 1667 assert messages[0]["role"] == "tool" 1668 1669 def test_quiet_tool_output_suppressed_without_progress_callback_for_non_cli_agent(self, agent): 1670 tc = _mock_tool_call(name="web_search", arguments='{"q":"test"}', call_id="c1") 1671 mock_msg = _mock_assistant_msg(content="", tool_calls=[tc]) 1672 messages = [] 1673 agent.platform = None 1674 agent.tool_progress_callback = None 1675 1676 with patch("run_agent.handle_function_call", return_value="search result"), \ 1677 patch.object(agent, "_safe_print") as mock_print: 1678 agent._execute_tool_calls(mock_msg, messages, "task-1") 1679 1680 mock_print.assert_not_called() 1681 assert len(messages) == 1 1682 assert messages[0]["role"] == "tool" 1683 1684 def test_vprint_suppressed_in_parseable_quiet_mode(self, agent): 1685 agent.suppress_status_output = True 1686 1687 with patch.object(agent, "_safe_print") as mock_print: 1688 agent._vprint("status line", force=True) 1689 agent._vprint("normal line") 1690 1691 mock_print.assert_not_called() 1692 1693 def test_run_conversation_suppresses_retry_noise_in_parseable_quiet_mode(self, agent): 1694 class _RateLimitError(Exception): 1695 status_code = 429 1696 1697 def __str__(self): 1698 return "Error code: 429 - Rate limit exceeded." 1699 1700 responses = [_RateLimitError(), _mock_response(content="Recovered")] 1701 1702 def _fake_api_call(api_kwargs): 1703 result = responses.pop(0) 1704 if isinstance(result, Exception): 1705 raise result 1706 return result 1707 1708 agent.suppress_status_output = True 1709 agent._interruptible_api_call = _fake_api_call 1710 agent._persist_session = lambda *args, **kwargs: None 1711 agent._save_trajectory = lambda *args, **kwargs: None 1712 agent._save_session_log = lambda *args, **kwargs: None 1713 1714 captured = io.StringIO() 1715 agent._print_fn = lambda *args, **kw: print(*args, file=captured, **kw) 1716 1717 with patch("run_agent.time.sleep", return_value=None): 1718 result = agent.run_conversation("hello") 1719 1720 assert result["completed"] is True 1721 assert result["final_response"] == "Recovered" 1722 output = captured.getvalue() 1723 assert "API call failed" not in output 1724 assert "Rate limit reached" not in output 1725 1726 1727 class TestConcurrentToolExecution: 1728 """Tests for _execute_tool_calls_concurrent and dispatch logic.""" 1729 1730 def test_single_tool_uses_sequential_path(self, agent): 1731 """Single tool call should use sequential path, not concurrent.""" 1732 tc = _mock_tool_call(name="web_search", arguments='{"q":"test"}', call_id="c1") 1733 mock_msg = _mock_assistant_msg(content="", tool_calls=[tc]) 1734 messages = [] 1735 with patch.object(agent, "_execute_tool_calls_sequential") as mock_seq: 1736 with patch.object(agent, "_execute_tool_calls_concurrent") as mock_con: 1737 agent._execute_tool_calls(mock_msg, messages, "task-1") 1738 mock_seq.assert_called_once() 1739 mock_con.assert_not_called() 1740 1741 def test_clarify_forces_sequential(self, agent): 1742 """Batch containing clarify should use sequential path.""" 1743 tc1 = _mock_tool_call(name="web_search", arguments='{}', call_id="c1") 1744 tc2 = _mock_tool_call(name="clarify", arguments='{"question":"ok?"}', call_id="c2") 1745 mock_msg = _mock_assistant_msg(content="", tool_calls=[tc1, tc2]) 1746 messages = [] 1747 with patch.object(agent, "_execute_tool_calls_sequential") as mock_seq: 1748 with patch.object(agent, "_execute_tool_calls_concurrent") as mock_con: 1749 agent._execute_tool_calls(mock_msg, messages, "task-1") 1750 mock_seq.assert_called_once() 1751 mock_con.assert_not_called() 1752 1753 def test_multiple_tools_uses_concurrent_path(self, agent): 1754 """Multiple read-only tools should use concurrent path.""" 1755 tc1 = _mock_tool_call(name="web_search", arguments='{}', call_id="c1") 1756 tc2 = _mock_tool_call(name="read_file", arguments='{"path":"x.py"}', call_id="c2") 1757 mock_msg = _mock_assistant_msg(content="", tool_calls=[tc1, tc2]) 1758 messages = [] 1759 with patch.object(agent, "_execute_tool_calls_sequential") as mock_seq: 1760 with patch.object(agent, "_execute_tool_calls_concurrent") as mock_con: 1761 agent._execute_tool_calls(mock_msg, messages, "task-1") 1762 mock_con.assert_called_once() 1763 mock_seq.assert_not_called() 1764 1765 def test_terminal_batch_forces_sequential(self, agent): 1766 """Stateful tools should not share the concurrent execution path.""" 1767 tc1 = _mock_tool_call(name="web_search", arguments='{}', call_id="c1") 1768 tc2 = _mock_tool_call(name="terminal", arguments='{"command":"pwd"}', call_id="c2") 1769 mock_msg = _mock_assistant_msg(content="", tool_calls=[tc1, tc2]) 1770 messages = [] 1771 with patch.object(agent, "_execute_tool_calls_sequential") as mock_seq: 1772 with patch.object(agent, "_execute_tool_calls_concurrent") as mock_con: 1773 agent._execute_tool_calls(mock_msg, messages, "task-1") 1774 mock_seq.assert_called_once() 1775 mock_con.assert_not_called() 1776 1777 def test_write_batch_forces_sequential(self, agent): 1778 """File mutations should stay ordered within a turn.""" 1779 tc1 = _mock_tool_call(name="read_file", arguments='{"path":"x.py"}', call_id="c1") 1780 tc2 = _mock_tool_call(name="write_file", arguments='{"path":"x.py","content":"print(1)"}', call_id="c2") 1781 mock_msg = _mock_assistant_msg(content="", tool_calls=[tc1, tc2]) 1782 messages = [] 1783 with patch.object(agent, "_execute_tool_calls_sequential") as mock_seq: 1784 with patch.object(agent, "_execute_tool_calls_concurrent") as mock_con: 1785 agent._execute_tool_calls(mock_msg, messages, "task-1") 1786 mock_seq.assert_called_once() 1787 mock_con.assert_not_called() 1788 1789 def test_disjoint_write_batch_uses_concurrent_path(self, agent): 1790 """Independent file writes should still run concurrently.""" 1791 tc1 = _mock_tool_call( 1792 name="write_file", 1793 arguments='{"path":"src/a.py","content":"print(1)"}', 1794 call_id="c1", 1795 ) 1796 tc2 = _mock_tool_call( 1797 name="write_file", 1798 arguments='{"path":"src/b.py","content":"print(2)"}', 1799 call_id="c2", 1800 ) 1801 mock_msg = _mock_assistant_msg(content="", tool_calls=[tc1, tc2]) 1802 messages = [] 1803 with patch.object(agent, "_execute_tool_calls_sequential") as mock_seq: 1804 with patch.object(agent, "_execute_tool_calls_concurrent") as mock_con: 1805 agent._execute_tool_calls(mock_msg, messages, "task-1") 1806 mock_con.assert_called_once() 1807 mock_seq.assert_not_called() 1808 1809 def test_overlapping_write_batch_forces_sequential(self, agent): 1810 """Writes to the same file must stay ordered.""" 1811 tc1 = _mock_tool_call( 1812 name="write_file", 1813 arguments='{"path":"src/a.py","content":"print(1)"}', 1814 call_id="c1", 1815 ) 1816 tc2 = _mock_tool_call( 1817 name="patch", 1818 arguments='{"path":"src/a.py","old_string":"1","new_string":"2"}', 1819 call_id="c2", 1820 ) 1821 mock_msg = _mock_assistant_msg(content="", tool_calls=[tc1, tc2]) 1822 messages = [] 1823 with patch.object(agent, "_execute_tool_calls_sequential") as mock_seq: 1824 with patch.object(agent, "_execute_tool_calls_concurrent") as mock_con: 1825 agent._execute_tool_calls(mock_msg, messages, "task-1") 1826 mock_seq.assert_called_once() 1827 mock_con.assert_not_called() 1828 1829 def test_malformed_json_args_forces_sequential(self, agent): 1830 """Unparseable tool arguments should fall back to sequential.""" 1831 tc1 = _mock_tool_call(name="web_search", arguments='{}', call_id="c1") 1832 tc2 = _mock_tool_call(name="web_search", arguments="NOT JSON {{{", call_id="c2") 1833 mock_msg = _mock_assistant_msg(content="", tool_calls=[tc1, tc2]) 1834 messages = [] 1835 with patch.object(agent, "_execute_tool_calls_sequential") as mock_seq: 1836 with patch.object(agent, "_execute_tool_calls_concurrent") as mock_con: 1837 agent._execute_tool_calls(mock_msg, messages, "task-1") 1838 mock_seq.assert_called_once() 1839 mock_con.assert_not_called() 1840 1841 def test_non_dict_args_forces_sequential(self, agent): 1842 """Tool arguments that parse to a non-dict type should fall back to sequential.""" 1843 tc1 = _mock_tool_call(name="web_search", arguments='{}', call_id="c1") 1844 tc2 = _mock_tool_call(name="web_search", arguments='"just a string"', call_id="c2") 1845 mock_msg = _mock_assistant_msg(content="", tool_calls=[tc1, tc2]) 1846 messages = [] 1847 with patch.object(agent, "_execute_tool_calls_sequential") as mock_seq: 1848 with patch.object(agent, "_execute_tool_calls_concurrent") as mock_con: 1849 agent._execute_tool_calls(mock_msg, messages, "task-1") 1850 mock_seq.assert_called_once() 1851 mock_con.assert_not_called() 1852 1853 def test_concurrent_executes_all_tools(self, agent): 1854 """Concurrent path should execute all tools and append results in order.""" 1855 tc1 = _mock_tool_call(name="web_search", arguments='{"q":"alpha"}', call_id="c1") 1856 tc2 = _mock_tool_call(name="web_search", arguments='{"q":"beta"}', call_id="c2") 1857 tc3 = _mock_tool_call(name="web_search", arguments='{"q":"gamma"}', call_id="c3") 1858 mock_msg = _mock_assistant_msg(content="", tool_calls=[tc1, tc2, tc3]) 1859 messages = [] 1860 1861 call_log = [] 1862 1863 def fake_handle(name, args, task_id, **kwargs): 1864 call_log.append(name) 1865 return json.dumps({"result": args.get("q", "")}) 1866 1867 with patch("run_agent.handle_function_call", side_effect=fake_handle): 1868 agent._execute_tool_calls_concurrent(mock_msg, messages, "task-1") 1869 1870 assert len(messages) == 3 1871 # Results must be in original order 1872 assert messages[0]["tool_call_id"] == "c1" 1873 assert messages[1]["tool_call_id"] == "c2" 1874 assert messages[2]["tool_call_id"] == "c3" 1875 # All should be tool messages 1876 assert all(m["role"] == "tool" for m in messages) 1877 # Content should contain the query results 1878 assert "alpha" in messages[0]["content"] 1879 assert "beta" in messages[1]["content"] 1880 assert "gamma" in messages[2]["content"] 1881 1882 def test_concurrent_preserves_order_despite_timing(self, agent): 1883 """Even if tools finish in different order, messages should be in original order.""" 1884 import time as _time 1885 1886 tc1 = _mock_tool_call(name="web_search", arguments='{"q":"slow"}', call_id="c1") 1887 tc2 = _mock_tool_call(name="web_search", arguments='{"q":"fast"}', call_id="c2") 1888 mock_msg = _mock_assistant_msg(content="", tool_calls=[tc1, tc2]) 1889 messages = [] 1890 1891 def fake_handle(name, args, task_id, **kwargs): 1892 q = args.get("q", "") 1893 if q == "slow": 1894 _time.sleep(0.1) # Slow tool 1895 return f"result_{q}" 1896 1897 with patch("run_agent.handle_function_call", side_effect=fake_handle): 1898 agent._execute_tool_calls_concurrent(mock_msg, messages, "task-1") 1899 1900 assert messages[0]["tool_call_id"] == "c1" 1901 assert "result_slow" in messages[0]["content"] 1902 assert messages[1]["tool_call_id"] == "c2" 1903 assert "result_fast" in messages[1]["content"] 1904 1905 def test_concurrent_handles_tool_error(self, agent): 1906 """If one tool raises, others should still complete.""" 1907 tc1 = _mock_tool_call(name="web_search", arguments='{}', call_id="c1") 1908 tc2 = _mock_tool_call(name="web_search", arguments='{}', call_id="c2") 1909 mock_msg = _mock_assistant_msg(content="", tool_calls=[tc1, tc2]) 1910 messages = [] 1911 1912 call_count = [0] 1913 def fake_handle(name, args, task_id, **kwargs): 1914 call_count[0] += 1 1915 if call_count[0] == 1: 1916 raise RuntimeError("boom") 1917 return "success" 1918 1919 with patch("run_agent.handle_function_call", side_effect=fake_handle): 1920 agent._execute_tool_calls_concurrent(mock_msg, messages, "task-1") 1921 1922 assert len(messages) == 2 1923 # First tool should have error 1924 assert "Error" in messages[0]["content"] or "boom" in messages[0]["content"] 1925 # Second tool should succeed 1926 assert "success" in messages[1]["content"] 1927 1928 def test_concurrent_interrupt_before_start(self, agent): 1929 """If interrupt is requested before concurrent execution, all tools are skipped.""" 1930 tc1 = _mock_tool_call(name="web_search", arguments='{}', call_id="c1") 1931 tc2 = _mock_tool_call(name="read_file", arguments='{}', call_id="c2") 1932 mock_msg = _mock_assistant_msg(content="", tool_calls=[tc1, tc2]) 1933 messages = [] 1934 1935 with patch("run_agent._set_interrupt"): 1936 agent.interrupt() 1937 1938 agent._execute_tool_calls_concurrent(mock_msg, messages, "task-1") 1939 assert len(messages) == 2 1940 assert "cancelled" in messages[0]["content"].lower() or "skipped" in messages[0]["content"].lower() 1941 assert "cancelled" in messages[1]["content"].lower() or "skipped" in messages[1]["content"].lower() 1942 1943 def test_concurrent_truncates_large_results(self, agent, tmp_path, monkeypatch): 1944 """Concurrent path should save oversized results to file.""" 1945 monkeypatch.setenv("HERMES_HOME", str(tmp_path / ".hermes")) 1946 (tmp_path / ".hermes").mkdir() 1947 tc1 = _mock_tool_call(name="web_search", arguments='{}', call_id="c1") 1948 tc2 = _mock_tool_call(name="web_search", arguments='{}', call_id="c2") 1949 mock_msg = _mock_assistant_msg(content="", tool_calls=[tc1, tc2]) 1950 messages = [] 1951 big_result = "x" * 150_000 1952 1953 with patch("run_agent.handle_function_call", return_value=big_result): 1954 agent._execute_tool_calls_concurrent(mock_msg, messages, "task-1") 1955 1956 assert len(messages) == 2 1957 for m in messages: 1958 assert len(m["content"]) < 150_000 1959 assert ("Truncated" in m["content"] or "<persisted-output>" in m["content"]) 1960 1961 def test_invoke_tool_dispatches_to_handle_function_call(self, agent): 1962 """_invoke_tool should route regular tools through handle_function_call.""" 1963 with patch("run_agent.handle_function_call", return_value="result") as mock_hfc: 1964 result = agent._invoke_tool("web_search", {"q": "test"}, "task-1") 1965 mock_hfc.assert_called_once_with( 1966 "web_search", {"q": "test"}, "task-1", 1967 tool_call_id=None, 1968 session_id=agent.session_id, 1969 enabled_tools=list(agent.valid_tool_names), 1970 skip_pre_tool_call_hook=True, 1971 ) 1972 assert result == "result" 1973 1974 def test_sequential_tool_callbacks_fire_in_order(self, agent): 1975 tool_call = _mock_tool_call(name="web_search", arguments='{"query":"hello"}', call_id="c1") 1976 mock_msg = _mock_assistant_msg(content="", tool_calls=[tool_call]) 1977 messages = [] 1978 starts = [] 1979 completes = [] 1980 agent.tool_start_callback = lambda tool_call_id, function_name, function_args: starts.append((tool_call_id, function_name, function_args)) 1981 agent.tool_complete_callback = lambda tool_call_id, function_name, function_args, function_result: completes.append((tool_call_id, function_name, function_args, function_result)) 1982 1983 with patch("run_agent.handle_function_call", return_value='{"success": true}'): 1984 agent._execute_tool_calls_sequential(mock_msg, messages, "task-1") 1985 1986 assert starts == [("c1", "web_search", {"query": "hello"})] 1987 assert completes == [("c1", "web_search", {"query": "hello"}, '{"success": true}')] 1988 1989 def test_concurrent_tool_callbacks_fire_for_each_tool(self, agent): 1990 tc1 = _mock_tool_call(name="web_search", arguments='{"query":"one"}', call_id="c1") 1991 tc2 = _mock_tool_call(name="web_search", arguments='{"query":"two"}', call_id="c2") 1992 mock_msg = _mock_assistant_msg(content="", tool_calls=[tc1, tc2]) 1993 messages = [] 1994 starts = [] 1995 completes = [] 1996 agent.tool_start_callback = lambda tool_call_id, function_name, function_args: starts.append((tool_call_id, function_name, function_args)) 1997 agent.tool_complete_callback = lambda tool_call_id, function_name, function_args, function_result: completes.append((tool_call_id, function_name, function_args, function_result)) 1998 1999 with patch("run_agent.handle_function_call", side_effect=['{"id":1}', '{"id":2}']): 2000 agent._execute_tool_calls_concurrent(mock_msg, messages, "task-1") 2001 2002 assert starts == [ 2003 ("c1", "web_search", {"query": "one"}), 2004 ("c2", "web_search", {"query": "two"}), 2005 ] 2006 assert len(completes) == 2 2007 assert {entry[0] for entry in completes} == {"c1", "c2"} 2008 assert {entry[3] for entry in completes} == {'{"id":1}', '{"id":2}'} 2009 2010 def test_invoke_tool_handles_agent_level_tools(self, agent): 2011 """_invoke_tool should handle todo tool directly.""" 2012 with patch("tools.todo_tool.todo_tool", return_value='{"ok":true}') as mock_todo: 2013 result = agent._invoke_tool("todo", {"todos": []}, "task-1") 2014 mock_todo.assert_called_once() 2015 assert "ok" in result 2016 2017 def test_invoke_tool_blocked_returns_error_and_skips_execution(self, agent, monkeypatch): 2018 """_invoke_tool should return error JSON when a plugin blocks the tool.""" 2019 monkeypatch.setattr( 2020 "hermes_cli.plugins.get_pre_tool_call_block_message", 2021 lambda *args, **kwargs: "Blocked by test policy", 2022 ) 2023 with patch("tools.todo_tool.todo_tool", side_effect=AssertionError("should not run")) as mock_todo: 2024 result = agent._invoke_tool("todo", {"todos": []}, "task-1") 2025 2026 assert json.loads(result) == {"error": "Blocked by test policy"} 2027 mock_todo.assert_not_called() 2028 2029 def test_invoke_tool_blocked_skips_handle_function_call(self, agent, monkeypatch): 2030 """Blocked registry tools should not reach handle_function_call.""" 2031 monkeypatch.setattr( 2032 "hermes_cli.plugins.get_pre_tool_call_block_message", 2033 lambda *args, **kwargs: "Blocked", 2034 ) 2035 with patch("run_agent.handle_function_call", side_effect=AssertionError("should not run")): 2036 result = agent._invoke_tool("web_search", {"q": "test"}, "task-1") 2037 2038 assert json.loads(result) == {"error": "Blocked"} 2039 2040 def test_sequential_blocked_tool_skips_checkpoints_and_callbacks(self, agent, monkeypatch): 2041 """Sequential path: blocked tool should not trigger checkpoints or start callbacks.""" 2042 tool_call = _mock_tool_call(name="write_file", 2043 arguments='{"path":"test.txt","content":"hello"}', 2044 call_id="c1") 2045 mock_msg = _mock_assistant_msg(content="", tool_calls=[tool_call]) 2046 messages = [] 2047 2048 monkeypatch.setattr( 2049 "hermes_cli.plugins.get_pre_tool_call_block_message", 2050 lambda *args, **kwargs: "Blocked by policy", 2051 ) 2052 agent._checkpoint_mgr.enabled = True 2053 agent._checkpoint_mgr.ensure_checkpoint = MagicMock( 2054 side_effect=AssertionError("checkpoint should not run") 2055 ) 2056 2057 starts = [] 2058 agent.tool_start_callback = lambda *a: starts.append(a) 2059 2060 with patch("run_agent.handle_function_call", side_effect=AssertionError("should not run")): 2061 agent._execute_tool_calls_sequential(mock_msg, messages, "task-1") 2062 2063 agent._checkpoint_mgr.ensure_checkpoint.assert_not_called() 2064 assert starts == [] 2065 assert len(messages) == 1 2066 assert messages[0]["role"] == "tool" 2067 assert json.loads(messages[0]["content"]) == {"error": "Blocked by policy"} 2068 2069 def test_blocked_memory_tool_does_not_reset_counter(self, agent, monkeypatch): 2070 """Blocked memory tool should not reset the nudge counter.""" 2071 agent._turns_since_memory = 5 2072 monkeypatch.setattr( 2073 "hermes_cli.plugins.get_pre_tool_call_block_message", 2074 lambda *args, **kwargs: "Blocked", 2075 ) 2076 with patch("tools.memory_tool.memory_tool", side_effect=AssertionError("should not run")): 2077 result = agent._invoke_tool( 2078 "memory", {"action": "add", "target": "memory", "content": "x"}, "task-1", 2079 ) 2080 2081 assert json.loads(result) == {"error": "Blocked"} 2082 assert agent._turns_since_memory == 5 2083 2084 2085 class TestPathsOverlap: 2086 """Unit tests for the _paths_overlap helper.""" 2087 2088 def test_same_path_overlaps(self): 2089 from run_agent import _paths_overlap 2090 assert _paths_overlap(Path("src/a.py"), Path("src/a.py")) 2091 2092 def test_siblings_do_not_overlap(self): 2093 from run_agent import _paths_overlap 2094 assert not _paths_overlap(Path("src/a.py"), Path("src/b.py")) 2095 2096 def test_parent_child_overlap(self): 2097 from run_agent import _paths_overlap 2098 assert _paths_overlap(Path("src"), Path("src/sub/a.py")) 2099 2100 def test_different_roots_do_not_overlap(self): 2101 from run_agent import _paths_overlap 2102 assert not _paths_overlap(Path("src/a.py"), Path("other/a.py")) 2103 2104 def test_nested_vs_flat_do_not_overlap(self): 2105 from run_agent import _paths_overlap 2106 assert not _paths_overlap(Path("src/sub/a.py"), Path("src/a.py")) 2107 2108 def test_empty_paths_do_not_overlap(self): 2109 from run_agent import _paths_overlap 2110 assert not _paths_overlap(Path(""), Path("")) 2111 2112 def test_one_empty_path_does_not_overlap(self): 2113 from run_agent import _paths_overlap 2114 assert not _paths_overlap(Path(""), Path("src/a.py")) 2115 assert not _paths_overlap(Path("src/a.py"), Path("")) 2116 2117 2118 class TestParallelScopePathNormalization: 2119 def test_extract_parallel_scope_path_normalizes_relative_to_cwd(self, tmp_path, monkeypatch): 2120 from run_agent import _extract_parallel_scope_path 2121 2122 monkeypatch.chdir(tmp_path) 2123 2124 scoped = _extract_parallel_scope_path("write_file", {"path": "./notes.txt"}) 2125 2126 assert scoped == tmp_path / "notes.txt" 2127 2128 def test_extract_parallel_scope_path_treats_relative_and_absolute_same_file_as_same_scope(self, tmp_path, monkeypatch): 2129 from run_agent import _extract_parallel_scope_path, _paths_overlap 2130 2131 monkeypatch.chdir(tmp_path) 2132 abs_path = tmp_path / "notes.txt" 2133 2134 rel_scoped = _extract_parallel_scope_path("write_file", {"path": "notes.txt"}) 2135 abs_scoped = _extract_parallel_scope_path("write_file", {"path": str(abs_path)}) 2136 2137 assert rel_scoped == abs_scoped 2138 assert _paths_overlap(rel_scoped, abs_scoped) 2139 2140 def test_should_parallelize_tool_batch_rejects_same_file_with_mixed_path_spellings(self, tmp_path, monkeypatch): 2141 from run_agent import _should_parallelize_tool_batch 2142 2143 monkeypatch.chdir(tmp_path) 2144 tc1 = _mock_tool_call(name="write_file", arguments='{"path":"notes.txt","content":"one"}', call_id="c1") 2145 tc2 = _mock_tool_call(name="write_file", arguments=f'{{"path":"{tmp_path / "notes.txt"}","content":"two"}}', call_id="c2") 2146 2147 assert not _should_parallelize_tool_batch([tc1, tc2]) 2148 2149 2150 class TestHandleMaxIterations: 2151 def test_returns_summary(self, agent): 2152 resp = _mock_response(content="Here is a summary of what I did.") 2153 agent.client.chat.completions.create.return_value = resp 2154 agent._cached_system_prompt = "You are helpful." 2155 messages = [{"role": "user", "content": "do stuff"}] 2156 result = agent._handle_max_iterations(messages, 60) 2157 assert isinstance(result, str) 2158 assert len(result) > 0 2159 assert "summary" in result.lower() 2160 2161 def test_api_failure_returns_error(self, agent): 2162 agent.client.chat.completions.create.side_effect = Exception("API down") 2163 agent._cached_system_prompt = "You are helpful." 2164 messages = [{"role": "user", "content": "do stuff"}] 2165 result = agent._handle_max_iterations(messages, 60) 2166 assert isinstance(result, str) 2167 assert "error" in result.lower() 2168 assert "API down" in result 2169 2170 def test_summary_skips_reasoning_for_unsupported_openrouter_model(self, agent): 2171 agent.base_url = "https://openrouter.ai/api/v1" 2172 agent.model = "minimax/minimax-m2.5" 2173 resp = _mock_response(content="Summary") 2174 agent.client.chat.completions.create.return_value = resp 2175 agent._cached_system_prompt = "You are helpful." 2176 messages = [{"role": "user", "content": "do stuff"}] 2177 2178 result = agent._handle_max_iterations(messages, 60) 2179 2180 assert result == "Summary" 2181 kwargs = agent.client.chat.completions.create.call_args.kwargs 2182 assert "reasoning" not in kwargs.get("extra_body", {}) 2183 2184 def test_summary_request_removes_orphan_tool_result(self, agent): 2185 """Regression: max-iterations summary request must NOT contain 2186 orphan tool results (tool_call_id with no matching assistant tool_call).""" 2187 resp = _mock_response(content="Summary of work done.") 2188 agent.client.chat.completions.create.return_value = resp 2189 agent._cached_system_prompt = "You are helpful." 2190 messages = [ 2191 {"role": "user", "content": "Analyze finance-data-router"}, 2192 {"role": "assistant", "content": "[Session Arc Summary] ..."}, 2193 {"role": "tool", "tool_call_id": "call_cfedFhJjGmu1RvRc1OUC38j8", "content": "file content here"}, 2194 {"role": "assistant", "tool_calls": [{"id": "call_8fXBXsT592Vpvm7wnW4obPEu", "function": {"name": "patch", "arguments": "{}"}}]}, 2195 {"role": "tool", "tool_call_id": "call_8fXBXsT592Vpvm7wnW4obPEu", "content": "patch result"}, 2196 {"role": "assistant", "content": "Done."}, 2197 ] 2198 2199 result = agent._handle_max_iterations(messages, 120) 2200 2201 assert result == "Summary of work done." 2202 kwargs = agent.client.chat.completions.create.call_args.kwargs 2203 sent_msgs = kwargs.get("messages", []) 2204 orphan_ids = [ 2205 m.get("tool_call_id") for m in sent_msgs 2206 if m.get("role") == "tool" and m.get("tool_call_id") == "call_cfedFhJjGmu1RvRc1OUC38j8" 2207 ] 2208 assert len(orphan_ids) == 0, f"Orphan tool result still present: {orphan_ids}" 2209 2210 def test_summary_request_inserts_stub_for_missing_tool_result(self, agent): 2211 """If an assistant tool_call has no matching tool result in the 2212 summary request, a stub must be inserted to satisfy the API contract.""" 2213 resp = _mock_response(content="Summary") 2214 agent.client.chat.completions.create.return_value = resp 2215 agent._cached_system_prompt = "You are helpful." 2216 messages = [ 2217 {"role": "user", "content": "do stuff"}, 2218 {"role": "assistant", "tool_calls": [{"id": "call_no_result", "function": {"name": "terminal", "arguments": "{}"}}]}, 2219 {"role": "assistant", "content": "Continuing..."}, 2220 ] 2221 2222 result = agent._handle_max_iterations(messages, 60) 2223 2224 assert result == "Summary" 2225 kwargs = agent.client.chat.completions.create.call_args.kwargs 2226 sent_msgs = kwargs.get("messages", []) 2227 stub_ids = [ 2228 m.get("tool_call_id") for m in sent_msgs 2229 if m.get("role") == "tool" and m.get("tool_call_id") == "call_no_result" 2230 ] 2231 assert len(stub_ids) >= 1, f"No stub result for assistant tool_call: {stub_ids}" 2232 2233 def test_summary_omits_provider_preferences_for_non_openrouter(self, agent): 2234 agent.base_url = "https://api.openai.com/v1" 2235 agent._base_url_lower = agent.base_url.lower() 2236 agent.provider = "openai" 2237 agent.providers_allowed = ["Anthropic"] 2238 agent.client.chat.completions.create.return_value = _mock_response(content="Summary") 2239 agent._cached_system_prompt = "You are helpful." 2240 2241 result = agent._handle_max_iterations([{"role": "user", "content": "do stuff"}], 60) 2242 2243 assert result == "Summary" 2244 kwargs = agent.client.chat.completions.create.call_args.kwargs 2245 assert "provider" not in kwargs.get("extra_body", {}) 2246 2247 def test_summary_keeps_provider_preferences_for_openrouter(self, agent): 2248 agent.base_url = "https://openrouter.ai/api/v1" 2249 agent._base_url_lower = agent.base_url.lower() 2250 agent.provider = "openrouter" 2251 agent.providers_allowed = ["Anthropic"] 2252 agent.client.chat.completions.create.return_value = _mock_response(content="Summary") 2253 agent._cached_system_prompt = "You are helpful." 2254 2255 result = agent._handle_max_iterations([{"role": "user", "content": "do stuff"}], 60) 2256 2257 assert result == "Summary" 2258 kwargs = agent.client.chat.completions.create.call_args.kwargs 2259 assert kwargs["extra_body"]["provider"]["only"] == ["Anthropic"] 2260 2261 def test_codex_summary_sanitizes_orphan_tool_results(self, agent): 2262 agent.api_mode = "codex_responses" 2263 agent.provider = "openai-codex" 2264 agent.base_url = "https://chatgpt.com/backend-api/codex" 2265 agent._base_url_lower = agent.base_url.lower() 2266 agent._base_url_hostname = "chatgpt.com" 2267 agent.model = "gpt-5.5" 2268 agent._cached_system_prompt = "You are helpful." 2269 captured = {} 2270 2271 def fake_run_codex_stream(kwargs): 2272 captured.update(kwargs) 2273 return SimpleNamespace( 2274 status="completed", 2275 output=[ 2276 SimpleNamespace( 2277 type="message", 2278 status="completed", 2279 content=[SimpleNamespace(type="output_text", text="Summary")], 2280 ) 2281 ], 2282 ) 2283 2284 messages = [ 2285 {"role": "user", "content": "do stuff"}, 2286 { 2287 "role": "tool", 2288 "tool_call_id": "call_orphan", 2289 "content": "orphaned result from compressed history", 2290 }, 2291 ] 2292 2293 with patch.object(agent, "_run_codex_stream", side_effect=fake_run_codex_stream): 2294 result = agent._handle_max_iterations(messages, 90) 2295 2296 assert result == "Summary" 2297 input_items = captured["input"] 2298 assert not any( 2299 item.get("type") == "function_call_output" 2300 and item.get("call_id") == "call_orphan" 2301 for item in input_items 2302 ) 2303 2304 def test_api_sanitizer_matches_responses_call_id_when_id_differs(self, agent): 2305 messages = [ 2306 { 2307 "role": "assistant", 2308 "content": "", 2309 "tool_calls": [ 2310 { 2311 "id": "fc_123", 2312 "call_id": "call_123", 2313 "response_item_id": "fc_123", 2314 "type": "function", 2315 "function": {"name": "web_search", "arguments": "{}"}, 2316 } 2317 ], 2318 }, 2319 {"role": "tool", "tool_call_id": "call_123", "content": "result"}, 2320 ] 2321 2322 sanitized = agent._sanitize_api_messages(messages) 2323 2324 assert [m.get("tool_call_id") for m in sanitized if m.get("role") == "tool"] == [ 2325 "call_123" 2326 ] 2327 2328 2329 class TestRunConversation: 2330 """Tests for the main run_conversation method. 2331 2332 Each test mocks client.chat.completions.create to return controlled 2333 responses, exercising different code paths without real API calls. 2334 """ 2335 2336 def _setup_agent(self, agent): 2337 """Common setup for run_conversation tests.""" 2338 agent._cached_system_prompt = "You are helpful." 2339 agent._use_prompt_caching = False 2340 agent.tool_delay = 0 2341 agent.compression_enabled = False 2342 agent.save_trajectories = False 2343 2344 def test_stop_finish_reason_returns_response(self, agent): 2345 self._setup_agent(agent) 2346 resp = _mock_response(content="Final answer", finish_reason="stop") 2347 agent.client.chat.completions.create.return_value = resp 2348 with ( 2349 patch.object(agent, "_persist_session"), 2350 patch.object(agent, "_save_trajectory"), 2351 patch.object(agent, "_cleanup_task_resources"), 2352 ): 2353 result = agent.run_conversation("hello") 2354 assert result["final_response"] == "Final answer" 2355 assert result["completed"] is True 2356 2357 def test_tool_calls_then_stop(self, agent): 2358 self._setup_agent(agent) 2359 tc = _mock_tool_call(name="web_search", arguments="{}", call_id="c1") 2360 resp1 = _mock_response(content="", finish_reason="tool_calls", tool_calls=[tc]) 2361 resp2 = _mock_response(content="Done searching", finish_reason="stop") 2362 agent.client.chat.completions.create.side_effect = [resp1, resp2] 2363 with ( 2364 patch("run_agent.handle_function_call", return_value="search result") as mock_handle_function_call, 2365 patch.object(agent, "_persist_session"), 2366 patch.object(agent, "_save_trajectory"), 2367 patch.object(agent, "_cleanup_task_resources"), 2368 ): 2369 result = agent.run_conversation("search something") 2370 assert result["final_response"] == "Done searching" 2371 assert result["api_calls"] == 2 2372 assert mock_handle_function_call.call_args.kwargs["tool_call_id"] == "c1" 2373 assert mock_handle_function_call.call_args.kwargs["session_id"] == agent.session_id 2374 2375 def test_request_scoped_api_hooks_fire_for_each_api_call(self, agent): 2376 self._setup_agent(agent) 2377 tc = _mock_tool_call(name="web_search", arguments="{}", call_id="c1") 2378 resp1 = _mock_response(content="", finish_reason="tool_calls", tool_calls=[tc]) 2379 resp2 = _mock_response(content="Done searching", finish_reason="stop") 2380 agent.client.chat.completions.create.side_effect = [resp1, resp2] 2381 2382 hook_calls = [] 2383 2384 def _record_hook(name, **kwargs): 2385 hook_calls.append((name, kwargs)) 2386 return [] 2387 2388 with ( 2389 patch("run_agent.handle_function_call", return_value="search result"), 2390 patch("hermes_cli.plugins.invoke_hook", side_effect=_record_hook), 2391 patch.object(agent, "_persist_session"), 2392 patch.object(agent, "_save_trajectory"), 2393 patch.object(agent, "_cleanup_task_resources"), 2394 ): 2395 result = agent.run_conversation("search something") 2396 2397 assert result["final_response"] == "Done searching" 2398 pre_request_calls = [kw for name, kw in hook_calls if name == "pre_api_request"] 2399 post_request_calls = [kw for name, kw in hook_calls if name == "post_api_request"] 2400 assert len(pre_request_calls) == 2 2401 assert len(post_request_calls) == 2 2402 assert [call["api_call_count"] for call in pre_request_calls] == [1, 2] 2403 assert [call["api_call_count"] for call in post_request_calls] == [1, 2] 2404 assert all(call["session_id"] == agent.session_id for call in pre_request_calls) 2405 assert all("message_count" in c and "messages" not in c for c in pre_request_calls) 2406 assert all("usage" in c and "response" not in c for c in post_request_calls) 2407 2408 def test_content_with_tool_calls_stays_silent_for_non_cli_quiet_mode(self, agent): 2409 self._setup_agent(agent) 2410 agent.platform = None 2411 tc = _mock_tool_call(name="web_search", arguments="{}", call_id="c1") 2412 resp1 = _mock_response( 2413 content="I'll search for that.", 2414 finish_reason="tool_calls", 2415 tool_calls=[tc], 2416 ) 2417 resp2 = _mock_response(content="Done searching", finish_reason="stop") 2418 agent.client.chat.completions.create.side_effect = [resp1, resp2] 2419 2420 with ( 2421 patch("run_agent.handle_function_call", return_value="search result"), 2422 patch.object(agent, "_safe_print") as mock_print, 2423 patch.object(agent, "_persist_session"), 2424 patch.object(agent, "_save_trajectory"), 2425 patch.object(agent, "_cleanup_task_resources"), 2426 ): 2427 result = agent.run_conversation("search something") 2428 2429 assert result["final_response"] == "Done searching" 2430 mock_print.assert_not_called() 2431 2432 def test_interrupt_breaks_loop(self, agent): 2433 self._setup_agent(agent) 2434 2435 def interrupt_side_effect(api_kwargs): 2436 agent._interrupt_requested = True 2437 raise InterruptedError("Agent interrupted during API call") 2438 2439 with ( 2440 patch.object(agent, "_persist_session"), 2441 patch.object(agent, "_save_trajectory"), 2442 patch.object(agent, "_cleanup_task_resources"), 2443 patch("run_agent._set_interrupt"), 2444 patch.object( 2445 agent, "_interruptible_api_call", side_effect=interrupt_side_effect 2446 ), 2447 ): 2448 result = agent.run_conversation("hello") 2449 assert result["interrupted"] is True 2450 2451 def test_invalid_tool_name_retry(self, agent): 2452 """Model hallucinates an invalid tool name, agent retries and succeeds.""" 2453 self._setup_agent(agent) 2454 bad_tc = _mock_tool_call(name="nonexistent_tool", arguments="{}", call_id="c1") 2455 resp_bad = _mock_response( 2456 content="", finish_reason="tool_calls", tool_calls=[bad_tc] 2457 ) 2458 resp_good = _mock_response(content="Got it", finish_reason="stop") 2459 agent.client.chat.completions.create.side_effect = [resp_bad, resp_good] 2460 with ( 2461 patch.object(agent, "_persist_session"), 2462 patch.object(agent, "_save_trajectory"), 2463 patch.object(agent, "_cleanup_task_resources"), 2464 ): 2465 result = agent.run_conversation("do something") 2466 assert result["final_response"] == "Got it" 2467 assert result["completed"] is True 2468 assert result["api_calls"] == 2 2469 2470 def test_reasoning_only_local_resumed_no_compression_triggered(self, agent): 2471 """Reasoning-only responses no longer trigger compression — prefill then accepted.""" 2472 self._setup_agent(agent) 2473 agent.base_url = "http://127.0.0.1:1234/v1" 2474 agent.compression_enabled = True 2475 empty_resp = _mock_response( 2476 content=None, 2477 finish_reason="stop", 2478 reasoning_content="reasoning only", 2479 ) 2480 prefill = [ 2481 {"role": "user", "content": "old question"}, 2482 {"role": "assistant", "content": "old answer"}, 2483 ] 2484 2485 # 6 responses: original + 2 prefill + 3 retries after prefill exhaustion 2486 with ( 2487 patch.object(agent, "_interruptible_api_call", side_effect=[empty_resp] * 6), 2488 patch.object(agent, "_compress_context") as mock_compress, 2489 patch.object(agent, "_persist_session"), 2490 patch.object(agent, "_save_trajectory"), 2491 patch.object(agent, "_cleanup_task_resources"), 2492 ): 2493 result = agent.run_conversation("hello", conversation_history=prefill) 2494 2495 mock_compress.assert_not_called() # no compression triggered 2496 assert result["completed"] is True 2497 assert result["final_response"] == "(empty)" 2498 assert result["api_calls"] == 6 # 1 original + 2 prefill + 3 retries 2499 2500 def test_reasoning_only_response_prefill_then_empty(self, agent): 2501 """Structured reasoning-only triggers prefill (2), then retries (3), then (empty).""" 2502 self._setup_agent(agent) 2503 empty_resp = _mock_response( 2504 content=None, 2505 finish_reason="stop", 2506 reasoning_content="structured reasoning answer", 2507 ) 2508 # 6 responses: 1 original + 2 prefill + 3 retries after prefill exhaustion 2509 agent.client.chat.completions.create.side_effect = [empty_resp] * 6 2510 with ( 2511 patch.object(agent, "_persist_session"), 2512 patch.object(agent, "_save_trajectory"), 2513 patch.object(agent, "_cleanup_task_resources"), 2514 ): 2515 result = agent.run_conversation("answer me") 2516 assert result["completed"] is True 2517 assert result["final_response"] == "(empty)" 2518 assert result["api_calls"] == 6 # 1 original + 2 prefill + 3 retries 2519 2520 def test_reasoning_only_prefill_succeeds_on_continuation(self, agent): 2521 """When prefill continuation produces content, it becomes the final response.""" 2522 self._setup_agent(agent) 2523 empty_resp = _mock_response( 2524 content=None, 2525 finish_reason="stop", 2526 reasoning_content="structured reasoning answer", 2527 ) 2528 content_resp = _mock_response( 2529 content="Here is the actual answer.", 2530 finish_reason="stop", 2531 ) 2532 agent.client.chat.completions.create.side_effect = [empty_resp, content_resp] 2533 with ( 2534 patch.object(agent, "_persist_session"), 2535 patch.object(agent, "_save_trajectory"), 2536 patch.object(agent, "_cleanup_task_resources"), 2537 ): 2538 result = agent.run_conversation("answer me") 2539 assert result["completed"] is True 2540 assert result["final_response"] == "Here is the actual answer." 2541 assert result["api_calls"] == 2 # 1 original + 1 prefill continuation 2542 # Prefill message should be cleaned up — no consecutive assistant messages 2543 roles = [m.get("role") for m in result["messages"]] 2544 for i in range(len(roles) - 1): 2545 if roles[i] == "assistant" and roles[i + 1] == "assistant": 2546 raise AssertionError("Consecutive assistant messages found in history") 2547 2548 def test_truly_empty_response_retries_3_times_then_empty(self, agent): 2549 """Truly empty response (no content, no reasoning) retries 3 times then falls through to (empty).""" 2550 self._setup_agent(agent) 2551 agent.base_url = "http://127.0.0.1:1234/v1" 2552 empty_resp = _mock_response(content=None, finish_reason="stop") 2553 # 4 responses: 1 original + 3 nudge retries, all empty 2554 agent.client.chat.completions.create.side_effect = [ 2555 empty_resp, empty_resp, empty_resp, empty_resp, 2556 ] 2557 with ( 2558 patch.object(agent, "_persist_session"), 2559 patch.object(agent, "_save_trajectory"), 2560 patch.object(agent, "_cleanup_task_resources"), 2561 ): 2562 result = agent.run_conversation("answer me") 2563 assert result["completed"] is True 2564 assert result["final_response"] == "(empty)" 2565 assert result["api_calls"] == 4 # 1 original + 3 retries 2566 2567 def test_truly_empty_response_succeeds_on_nudge(self, agent): 2568 """Model produces content after being nudged for empty response.""" 2569 self._setup_agent(agent) 2570 agent.base_url = "http://127.0.0.1:1234/v1" 2571 empty_resp = _mock_response(content=None, finish_reason="stop") 2572 content_resp = _mock_response( 2573 content="Here is the actual answer.", 2574 finish_reason="stop", 2575 ) 2576 # 1 empty response, then model produces content on nudge 2577 agent.client.chat.completions.create.side_effect = [empty_resp, content_resp] 2578 with ( 2579 patch.object(agent, "_persist_session"), 2580 patch.object(agent, "_save_trajectory"), 2581 patch.object(agent, "_cleanup_task_resources"), 2582 ): 2583 result = agent.run_conversation("answer me") 2584 assert result["completed"] is True 2585 assert result["final_response"] == "Here is the actual answer." 2586 assert result["api_calls"] == 2 # 1 original + 1 nudge retry 2587 2588 def test_empty_response_triggers_fallback_provider(self, agent): 2589 """After 3 empty retries, fallback provider is activated and produces content.""" 2590 self._setup_agent(agent) 2591 agent.base_url = "http://127.0.0.1:1234/v1" 2592 # Configure a fallback chain 2593 agent._fallback_chain = [{"provider": "openrouter", "model": "anthropic/claude-sonnet-4"}] 2594 agent._fallback_index = 0 2595 agent._fallback_activated = False 2596 2597 empty_resp = _mock_response(content=None, finish_reason="stop") 2598 content_resp = _mock_response(content="Fallback answer.", finish_reason="stop") 2599 # 4 empty (1 orig + 3 retries), then fallback model answers 2600 agent.client.chat.completions.create.side_effect = [ 2601 empty_resp, empty_resp, empty_resp, empty_resp, content_resp, 2602 ] 2603 2604 fallback_called = {"called": False} 2605 2606 def _mock_fallback(): 2607 fallback_called["called"] = True 2608 # Simulate what _try_activate_fallback does: just advance the 2609 # index and set the flag (the client is already mocked). 2610 agent._fallback_index = 1 2611 agent._fallback_activated = True 2612 agent.model = "anthropic/claude-sonnet-4" 2613 agent.provider = "openrouter" 2614 return True 2615 2616 with ( 2617 patch.object(agent, "_persist_session"), 2618 patch.object(agent, "_save_trajectory"), 2619 patch.object(agent, "_cleanup_task_resources"), 2620 patch.object(agent, "_try_activate_fallback", side_effect=_mock_fallback), 2621 ): 2622 result = agent.run_conversation("answer me") 2623 assert fallback_called["called"], "Fallback should have been triggered" 2624 assert result["completed"] is True 2625 assert result["final_response"] == "Fallback answer." 2626 2627 def test_empty_response_fallback_also_empty_returns_empty(self, agent): 2628 """If fallback also returns empty, final response is (empty).""" 2629 self._setup_agent(agent) 2630 agent.base_url = "http://127.0.0.1:1234/v1" 2631 agent._fallback_chain = [{"provider": "openrouter", "model": "anthropic/claude-sonnet-4"}] 2632 agent._fallback_index = 0 2633 agent._fallback_activated = False 2634 2635 empty_resp = _mock_response(content=None, finish_reason="stop") 2636 # 4 empty from primary (1 + 3 retries), fallback activated, 2637 # then 4 more empty from fallback (1 + 3 retries), no more fallbacks 2638 agent.client.chat.completions.create.side_effect = [ 2639 empty_resp, empty_resp, empty_resp, empty_resp, # primary exhausted 2640 empty_resp, empty_resp, empty_resp, empty_resp, # fallback exhausted 2641 ] 2642 2643 def _mock_fallback(): 2644 if agent._fallback_index >= len(agent._fallback_chain): 2645 return False 2646 agent._fallback_index += 1 2647 agent._fallback_activated = True 2648 agent.model = "anthropic/claude-sonnet-4" 2649 agent.provider = "openrouter" 2650 return True 2651 2652 with ( 2653 patch.object(agent, "_persist_session"), 2654 patch.object(agent, "_save_trajectory"), 2655 patch.object(agent, "_cleanup_task_resources"), 2656 patch.object(agent, "_try_activate_fallback", side_effect=_mock_fallback), 2657 ): 2658 result = agent.run_conversation("answer me") 2659 assert result["completed"] is True 2660 assert result["final_response"] == "(empty)" 2661 2662 def test_empty_response_emits_status_for_gateway(self, agent): 2663 """_emit_status is called during empty retries so gateway users see feedback.""" 2664 self._setup_agent(agent) 2665 agent.base_url = "http://127.0.0.1:1234/v1" 2666 2667 empty_resp = _mock_response(content=None, finish_reason="stop") 2668 # 4 empty: 1 original + 3 retries, all empty, no fallback 2669 agent.client.chat.completions.create.side_effect = [ 2670 empty_resp, empty_resp, empty_resp, empty_resp, 2671 ] 2672 2673 status_messages = [] 2674 2675 def _capture_status(msg): 2676 status_messages.append(msg) 2677 2678 with ( 2679 patch.object(agent, "_persist_session"), 2680 patch.object(agent, "_save_trajectory"), 2681 patch.object(agent, "_cleanup_task_resources"), 2682 patch.object(agent, "_emit_status", side_effect=_capture_status), 2683 ): 2684 result = agent.run_conversation("answer me") 2685 2686 assert result["final_response"] == "(empty)" 2687 # Should have emitted retry statuses (3 retries) + final failure 2688 retry_msgs = [m for m in status_messages if "retrying" in m.lower()] 2689 assert len(retry_msgs) == 3, f"Expected 3 retry status messages, got {len(retry_msgs)}: {status_messages}" 2690 failure_msgs = [m for m in status_messages if "no content" in m.lower() or "no fallback" in m.lower()] 2691 assert len(failure_msgs) >= 1, f"Expected at least 1 failure status, got: {status_messages}" 2692 2693 def test_partial_stream_recovery_uses_streamed_content(self, agent): 2694 """When streaming fails after partial delivery, recovered partial content becomes final response.""" 2695 self._setup_agent(agent) 2696 # Simulate a partial-stream-stub response: content recovered from streaming 2697 partial_resp = _mock_response( 2698 content="Here is the partial answer that was stream", 2699 finish_reason="stop", 2700 ) 2701 agent.client.chat.completions.create.return_value = partial_resp 2702 # Simulate that streaming had already delivered this text 2703 agent._current_streamed_assistant_text = "Here is the partial answer that was stream" 2704 with ( 2705 patch.object(agent, "_persist_session"), 2706 patch.object(agent, "_save_trajectory"), 2707 patch.object(agent, "_cleanup_task_resources"), 2708 ): 2709 result = agent.run_conversation("explain something") 2710 # The partial content should be used as-is (not empty, not retried) 2711 assert result["completed"] is True 2712 assert result["final_response"] == "Here is the partial answer that was stream" 2713 assert result["api_calls"] == 1 # No retries 2714 2715 def test_partial_stream_recovery_on_empty_stub(self, agent): 2716 """When stub response has no content but text was streamed, use streamed text.""" 2717 self._setup_agent(agent) 2718 # Stub response with no content (old behavior before fix) 2719 empty_stub = _mock_response(content=None, finish_reason="stop") 2720 2721 def _fake_api_call(api_kwargs): 2722 # Simulate what streaming does: accumulate text before returning 2723 # a stub with no content (connection died mid-stream) 2724 agent._current_streamed_assistant_text = "The answer to your question is that" 2725 return empty_stub 2726 2727 status_messages = [] 2728 2729 def _capture_status(msg): 2730 status_messages.append(msg) 2731 2732 with ( 2733 patch.object(agent, "_interruptible_api_call", side_effect=_fake_api_call), 2734 patch.object(agent, "_persist_session"), 2735 patch.object(agent, "_save_trajectory"), 2736 patch.object(agent, "_cleanup_task_resources"), 2737 patch.object(agent, "_emit_status", side_effect=_capture_status), 2738 ): 2739 result = agent.run_conversation("ask me") 2740 # Should recover partial streamed content, not fall through to (empty) 2741 assert result["completed"] is True 2742 assert result["final_response"] == "The answer to your question is that" 2743 assert result["api_calls"] == 1 # No wasted retries 2744 # Should emit the stream-interrupted status, NOT the empty-retry status 2745 recovery_msgs = [m for m in status_messages if "stream interrupted" in m.lower()] 2746 assert len(recovery_msgs) >= 1, f"Expected stream recovery status, got: {status_messages}" 2747 # Should NOT have retry statuses 2748 retry_msgs = [m for m in status_messages if "retrying" in m.lower()] 2749 assert len(retry_msgs) == 0, f"Should not retry when stream content exists: {status_messages}" 2750 2751 def test_partial_stream_recovery_preempts_prior_turn_fallback(self, agent): 2752 """Partial streamed content takes priority over _last_content_with_tools fallback.""" 2753 self._setup_agent(agent) 2754 # Set up the prior-turn fallback content (from a previous turn with tool calls) 2755 agent._last_content_with_tools = "Old content from prior turn with tools" 2756 # Stub response with no content 2757 empty_stub = _mock_response(content=None, finish_reason="stop") 2758 2759 def _fake_api_call(api_kwargs): 2760 # Simulate partial streaming before connection death 2761 agent._current_streamed_assistant_text = "Fresh partial content from this turn" 2762 return empty_stub 2763 2764 with ( 2765 patch.object(agent, "_interruptible_api_call", side_effect=_fake_api_call), 2766 patch.object(agent, "_persist_session"), 2767 patch.object(agent, "_save_trajectory"), 2768 patch.object(agent, "_cleanup_task_resources"), 2769 ): 2770 result = agent.run_conversation("question") 2771 # Should use the streamed content, not the old prior-turn fallback 2772 assert result["final_response"] == "Fresh partial content from this turn" 2773 assert result["api_calls"] == 1 2774 2775 def test_nous_401_refreshes_after_remint_and_retries(self, agent): 2776 self._setup_agent(agent) 2777 agent.provider = "nous" 2778 agent.api_mode = "chat_completions" 2779 2780 calls = {"api": 0, "refresh": 0} 2781 2782 class _UnauthorizedError(RuntimeError): 2783 def __init__(self): 2784 super().__init__("Error code: 401 - unauthorized") 2785 self.status_code = 401 2786 2787 def _fake_api_call(api_kwargs): 2788 calls["api"] += 1 2789 if calls["api"] == 1: 2790 raise _UnauthorizedError() 2791 return _mock_response( 2792 content="Recovered after remint", finish_reason="stop" 2793 ) 2794 2795 def _fake_refresh(*, force=True): 2796 calls["refresh"] += 1 2797 assert force is True 2798 return True 2799 2800 with ( 2801 patch.object(agent, "_persist_session"), 2802 patch.object(agent, "_save_trajectory"), 2803 patch.object(agent, "_cleanup_task_resources"), 2804 patch.object(agent, "_interruptible_api_call", side_effect=_fake_api_call), 2805 patch.object( 2806 agent, "_try_refresh_nous_client_credentials", side_effect=_fake_refresh 2807 ), 2808 ): 2809 result = agent.run_conversation("hello") 2810 2811 assert calls["api"] == 2 2812 assert calls["refresh"] == 1 2813 assert result["completed"] is True 2814 assert result["final_response"] == "Recovered after remint" 2815 2816 def test_context_compression_triggered(self, agent): 2817 """When compressor says should_compress, compression runs.""" 2818 self._setup_agent(agent) 2819 agent.compression_enabled = True 2820 2821 tc = _mock_tool_call(name="web_search", arguments="{}", call_id="c1") 2822 resp1 = _mock_response(content="", finish_reason="tool_calls", tool_calls=[tc]) 2823 resp2 = _mock_response(content="All done", finish_reason="stop") 2824 agent.client.chat.completions.create.side_effect = [resp1, resp2] 2825 2826 with ( 2827 patch("run_agent.handle_function_call", return_value="result"), 2828 patch.object( 2829 agent.context_compressor, "should_compress", return_value=True 2830 ), 2831 patch.object(agent, "_compress_context") as mock_compress, 2832 patch.object(agent, "_persist_session"), 2833 patch.object(agent, "_save_trajectory"), 2834 patch.object(agent, "_cleanup_task_resources"), 2835 ): 2836 # _compress_context should return (messages, system_prompt) 2837 mock_compress.return_value = ( 2838 [{"role": "user", "content": "search something"}], 2839 "compressed system prompt", 2840 ) 2841 result = agent.run_conversation("search something") 2842 mock_compress.assert_called_once() 2843 assert result["final_response"] == "All done" 2844 assert result["completed"] is True 2845 2846 def test_glm_prompt_exceeds_max_length_triggers_compression(self, agent): 2847 """GLM/Z.AI uses 'Prompt exceeds max length' for context overflow.""" 2848 self._setup_agent(agent) 2849 err_400 = Exception( 2850 "Error code: 400 - {'error': {'code': '1261', 'message': 'Prompt exceeds max length'}}" 2851 ) 2852 err_400.status_code = 400 2853 ok_resp = _mock_response(content="Recovered after compression", finish_reason="stop") 2854 agent.client.chat.completions.create.side_effect = [err_400, ok_resp] 2855 prefill = [ 2856 {"role": "user", "content": "previous question"}, 2857 {"role": "assistant", "content": "previous answer"}, 2858 ] 2859 2860 with ( 2861 patch.object(agent, "_compress_context") as mock_compress, 2862 patch.object(agent, "_persist_session"), 2863 patch.object(agent, "_save_trajectory"), 2864 patch.object(agent, "_cleanup_task_resources"), 2865 ): 2866 mock_compress.return_value = ( 2867 [{"role": "user", "content": "hello"}], 2868 "compressed system prompt", 2869 ) 2870 result = agent.run_conversation("hello", conversation_history=prefill) 2871 2872 mock_compress.assert_called_once() 2873 assert result["final_response"] == "Recovered after compression" 2874 assert result["completed"] is True 2875 2876 def test_minimax_delta_overflow_keeps_known_context_length(self, agent): 2877 """MiniMax reports overflow deltas like 'limit (2013)' without the real window. 2878 2879 Keep the known 204,800-token window and compress instead of probing down 2880 to the generic 128K fallback tier. 2881 """ 2882 self._setup_agent(agent) 2883 agent.provider = "minimax" 2884 agent.model = "MiniMax-M2.7-highspeed" 2885 agent.base_url = "https://api.minimax.io/anthropic" 2886 agent.context_compressor.context_length = 204_800 2887 agent.context_compressor.threshold_tokens = int( 2888 agent.context_compressor.context_length * agent.context_compressor.threshold_percent 2889 ) 2890 2891 err_400 = Exception( 2892 "HTTP 400: invalid params, context window exceeds limit (2013)" 2893 ) 2894 err_400.status_code = 400 2895 ok_resp = _mock_response(content="Recovered after compression", finish_reason="stop") 2896 agent.client.chat.completions.create.side_effect = [err_400, ok_resp] 2897 prefill = [ 2898 {"role": "user", "content": "previous question"}, 2899 {"role": "assistant", "content": "previous answer"}, 2900 ] 2901 2902 with ( 2903 patch.object(agent, "_compress_context") as mock_compress, 2904 patch.object(agent, "_persist_session"), 2905 patch.object(agent, "_save_trajectory"), 2906 patch.object(agent, "_cleanup_task_resources"), 2907 ): 2908 mock_compress.return_value = ( 2909 [{"role": "user", "content": "hello"}], 2910 "compressed system prompt", 2911 ) 2912 result = agent.run_conversation("hello", conversation_history=prefill) 2913 2914 mock_compress.assert_called_once() 2915 assert agent.context_compressor.context_length == 204_800 2916 assert agent.context_compressor._context_probed is False 2917 assert result["final_response"] == "Recovered after compression" 2918 assert result["completed"] is True 2919 2920 def test_non_minimax_delta_overflow_still_probes_down(self, agent): 2921 """Non-MiniMax providers should keep the generic probe-down behavior.""" 2922 self._setup_agent(agent) 2923 agent.provider = "openrouter" 2924 agent.model = "some/unknown-model" 2925 agent.base_url = "https://openrouter.ai/api/v1" 2926 agent.context_compressor.context_length = 200_000 2927 agent.context_compressor.threshold_tokens = int( 2928 agent.context_compressor.context_length * agent.context_compressor.threshold_percent 2929 ) 2930 2931 err_400 = Exception( 2932 "HTTP 400: invalid params, context window exceeds limit (2013)" 2933 ) 2934 err_400.status_code = 400 2935 ok_resp = _mock_response(content="Recovered after compression", finish_reason="stop") 2936 agent.client.chat.completions.create.side_effect = [err_400, ok_resp] 2937 prefill = [ 2938 {"role": "user", "content": "previous question"}, 2939 {"role": "assistant", "content": "previous answer"}, 2940 ] 2941 2942 with ( 2943 patch.object(agent, "_compress_context") as mock_compress, 2944 patch.object(agent, "_persist_session"), 2945 patch.object(agent, "_save_trajectory"), 2946 patch.object(agent, "_cleanup_task_resources"), 2947 ): 2948 mock_compress.return_value = ( 2949 [{"role": "user", "content": "hello"}], 2950 "compressed system prompt", 2951 ) 2952 result = agent.run_conversation("hello", conversation_history=prefill) 2953 2954 mock_compress.assert_called_once() 2955 assert agent.context_compressor.context_length == 128_000 2956 assert result["final_response"] == "Recovered after compression" 2957 assert result["completed"] is True 2958 2959 def test_length_finish_reason_requests_continuation(self, agent): 2960 """Normal truncation (partial real content) triggers continuation.""" 2961 self._setup_agent(agent) 2962 first = _mock_response(content="Part 1 ", finish_reason="length") 2963 second = _mock_response(content="Part 2", finish_reason="stop") 2964 agent.client.chat.completions.create.side_effect = [first, second] 2965 2966 with ( 2967 patch.object(agent, "_persist_session"), 2968 patch.object(agent, "_save_trajectory"), 2969 patch.object(agent, "_cleanup_task_resources"), 2970 ): 2971 result = agent.run_conversation("hello") 2972 2973 assert result["completed"] is True 2974 assert result["api_calls"] == 2 2975 assert result["final_response"] == "Part 1 Part 2" 2976 2977 second_call_messages = agent.client.chat.completions.create.call_args_list[1].kwargs["messages"] 2978 assert second_call_messages[-1]["role"] == "user" 2979 assert "truncated by the output length limit" in second_call_messages[-1]["content"] 2980 2981 def test_ollama_glm_stop_after_tools_without_terminal_boundary_requests_continuation(self, agent): 2982 """Ollama-hosted GLM responses can misreport truncated output as stop.""" 2983 self._setup_agent(agent) 2984 agent.base_url = "http://localhost:11434/v1" 2985 agent._base_url_lower = agent.base_url.lower() 2986 agent.model = "glm-5.1:cloud" 2987 2988 tool_turn = _mock_response( 2989 content="", 2990 finish_reason="tool_calls", 2991 tool_calls=[_mock_tool_call(name="web_search", arguments="{}", call_id="c1")], 2992 ) 2993 misreported_stop = _mock_response( 2994 content="Based on the search results, the best next", 2995 finish_reason="stop", 2996 ) 2997 continued = _mock_response( 2998 content=" step is to update the config.", 2999 finish_reason="stop", 3000 ) 3001 agent.client.chat.completions.create.side_effect = [ 3002 tool_turn, 3003 misreported_stop, 3004 continued, 3005 ] 3006 3007 with ( 3008 patch("run_agent.handle_function_call", return_value="search result"), 3009 patch.object(agent, "_persist_session"), 3010 patch.object(agent, "_save_trajectory"), 3011 patch.object(agent, "_cleanup_task_resources"), 3012 ): 3013 result = agent.run_conversation("hello") 3014 3015 assert result["completed"] is True 3016 assert result["api_calls"] == 3 3017 assert ( 3018 result["final_response"] 3019 == "Based on the search results, the best next step is to update the config." 3020 ) 3021 3022 third_call_messages = agent.client.chat.completions.create.call_args_list[2].kwargs["messages"] 3023 assert third_call_messages[-1]["role"] == "user" 3024 assert "truncated by the output length limit" in third_call_messages[-1]["content"] 3025 3026 def test_ollama_glm_stop_with_terminal_boundary_does_not_continue(self, agent): 3027 """Complete Ollama/GLM responses should not be reclassified as truncated.""" 3028 self._setup_agent(agent) 3029 agent.base_url = "http://localhost:11434/v1" 3030 agent._base_url_lower = agent.base_url.lower() 3031 agent.model = "glm-5.1:cloud" 3032 3033 tool_turn = _mock_response( 3034 content="", 3035 finish_reason="tool_calls", 3036 tool_calls=[_mock_tool_call(name="web_search", arguments="{}", call_id="c1")], 3037 ) 3038 complete_stop = _mock_response( 3039 content="Based on the search results, the best next step is to update the config.", 3040 finish_reason="stop", 3041 ) 3042 agent.client.chat.completions.create.side_effect = [tool_turn, complete_stop] 3043 3044 with ( 3045 patch("run_agent.handle_function_call", return_value="search result"), 3046 patch.object(agent, "_persist_session"), 3047 patch.object(agent, "_save_trajectory"), 3048 patch.object(agent, "_cleanup_task_resources"), 3049 ): 3050 result = agent.run_conversation("hello") 3051 3052 assert result["completed"] is True 3053 assert result["api_calls"] == 2 3054 assert ( 3055 result["final_response"] 3056 == "Based on the search results, the best next step is to update the config." 3057 ) 3058 3059 def test_non_ollama_stop_without_terminal_boundary_does_not_continue(self, agent): 3060 """The stop->length workaround should stay scoped to Ollama/GLM backends.""" 3061 self._setup_agent(agent) 3062 agent.base_url = "https://api.openai.com/v1" 3063 agent._base_url_lower = agent.base_url.lower() 3064 agent.model = "gpt-4o-mini" 3065 3066 tool_turn = _mock_response( 3067 content="", 3068 finish_reason="tool_calls", 3069 tool_calls=[_mock_tool_call(name="web_search", arguments="{}", call_id="c1")], 3070 ) 3071 normal_stop = _mock_response( 3072 content="Based on the search results, the best next", 3073 finish_reason="stop", 3074 ) 3075 agent.client.chat.completions.create.side_effect = [tool_turn, normal_stop] 3076 3077 with ( 3078 patch("run_agent.handle_function_call", return_value="search result"), 3079 patch.object(agent, "_persist_session"), 3080 patch.object(agent, "_save_trajectory"), 3081 patch.object(agent, "_cleanup_task_resources"), 3082 ): 3083 result = agent.run_conversation("hello") 3084 3085 assert result["completed"] is True 3086 assert result["api_calls"] == 2 3087 assert result["final_response"] == "Based on the search results, the best next" 3088 3089 def test_length_thinking_exhausted_skips_continuation(self, agent): 3090 """When finish_reason='length' but content is only thinking, skip retries.""" 3091 self._setup_agent(agent) 3092 resp = _mock_response( 3093 content="<think>internal reasoning</think>", 3094 finish_reason="length", 3095 ) 3096 agent.client.chat.completions.create.return_value = resp 3097 3098 with ( 3099 patch.object(agent, "_persist_session"), 3100 patch.object(agent, "_save_trajectory"), 3101 patch.object(agent, "_cleanup_task_resources"), 3102 ): 3103 result = agent.run_conversation("hello") 3104 3105 # Should return immediately — no continuation, only 1 API call 3106 assert result["completed"] is False 3107 assert result["api_calls"] == 1 3108 assert "reasoning" in result["error"].lower() 3109 assert "output tokens" in result["error"].lower() 3110 # Should have a user-friendly response (not None) 3111 assert result["final_response"] is not None 3112 assert "Thinking Budget Exhausted" in result["final_response"] 3113 assert "/thinkon" in result["final_response"] 3114 3115 def test_length_empty_content_without_think_tags_retries_normally(self, agent): 3116 """When finish_reason='length' and content is None but no think tags, 3117 fall through to normal continuation retry (not thinking-exhaustion).""" 3118 self._setup_agent(agent) 3119 resp = _mock_response(content=None, finish_reason="length") 3120 agent.client.chat.completions.create.return_value = resp 3121 3122 with ( 3123 patch.object(agent, "_persist_session"), 3124 patch.object(agent, "_save_trajectory"), 3125 patch.object(agent, "_cleanup_task_resources"), 3126 ): 3127 result = agent.run_conversation("hello") 3128 3129 # Without think tags, the agent should attempt continuation retries 3130 # (up to 3), not immediately fire thinking-exhaustion. 3131 assert result["api_calls"] == 3 3132 assert result["completed"] is False 3133 3134 def test_length_with_tool_calls_returns_partial_without_executing_tools(self, agent): 3135 self._setup_agent(agent) 3136 bad_tc = _mock_tool_call( 3137 name="write_file", 3138 arguments='{"path":"report.md","content":"partial', 3139 call_id="c1", 3140 ) 3141 resp = _mock_response(content="", finish_reason="length", tool_calls=[bad_tc]) 3142 agent.client.chat.completions.create.return_value = resp 3143 3144 with ( 3145 patch("run_agent.handle_function_call") as mock_handle_function_call, 3146 patch.object(agent, "_persist_session"), 3147 patch.object(agent, "_save_trajectory"), 3148 patch.object(agent, "_cleanup_task_resources"), 3149 ): 3150 result = agent.run_conversation("write the report") 3151 3152 assert result["completed"] is False 3153 assert result["partial"] is True 3154 assert "truncated due to output length limit" in result["error"] 3155 mock_handle_function_call.assert_not_called() 3156 3157 def test_truncated_tool_call_retries_once_before_refusing(self, agent): 3158 """When tool call args are truncated, the agent retries the API call 3159 once. If the retry succeeds (valid JSON args), tool execution proceeds.""" 3160 self._setup_agent(agent) 3161 agent.valid_tool_names.add("write_file") 3162 bad_tc = _mock_tool_call( 3163 name="write_file", 3164 arguments='{"path":"report.md","content":"partial', 3165 call_id="c1", 3166 ) 3167 truncated_resp = _mock_response( 3168 content="", finish_reason="length", tool_calls=[bad_tc], 3169 ) 3170 good_tc = _mock_tool_call( 3171 name="write_file", 3172 arguments='{"path":"report.md","content":"full content"}', 3173 call_id="c2", 3174 ) 3175 good_resp = _mock_response( 3176 content="", finish_reason="stop", tool_calls=[good_tc], 3177 ) 3178 with ( 3179 patch("run_agent.handle_function_call", return_value='{"success":true}') as mock_hfc, 3180 patch.object(agent, "_persist_session"), 3181 patch.object(agent, "_save_trajectory"), 3182 patch.object(agent, "_cleanup_task_resources"), 3183 ): 3184 # First call: truncated → retry. Second: valid → execute tool. 3185 # Third: final text response. 3186 final_resp = _mock_response(content="Done!", finish_reason="stop") 3187 agent.client.chat.completions.create.side_effect = [ 3188 truncated_resp, good_resp, final_resp, 3189 ] 3190 result = agent.run_conversation("write the report") 3191 3192 # Tool was executed on the retry (good_resp) 3193 mock_hfc.assert_called_once() 3194 assert result["final_response"] == "Done!" 3195 3196 def test_truncated_tool_args_detected_when_finish_reason_not_length(self, agent): 3197 """When a router rewrites finish_reason from 'length' to 'tool_calls', 3198 truncated JSON arguments should still be detected and refused rather 3199 than wasting 3 retry attempts.""" 3200 self._setup_agent(agent) 3201 agent.valid_tool_names.add("write_file") 3202 bad_tc = _mock_tool_call( 3203 name="write_file", 3204 arguments='{"path":"report.md","content":"partial', 3205 call_id="c1", 3206 ) 3207 resp = _mock_response( 3208 content="", finish_reason="tool_calls", tool_calls=[bad_tc], 3209 ) 3210 agent.client.chat.completions.create.return_value = resp 3211 3212 with ( 3213 patch("run_agent.handle_function_call") as mock_handle_function_call, 3214 patch.object(agent, "_persist_session"), 3215 patch.object(agent, "_save_trajectory"), 3216 patch.object(agent, "_cleanup_task_resources"), 3217 ): 3218 result = agent.run_conversation("write the report") 3219 3220 assert result["completed"] is False 3221 assert result["partial"] is True 3222 assert "truncated due to output length limit" in result["error"] 3223 mock_handle_function_call.assert_not_called() 3224 3225 3226 class TestRetryExhaustion: 3227 """Regression: retry_count > max_retries was dead code (off-by-one). 3228 3229 When retries were exhausted the condition never triggered, causing 3230 the loop to exit and fall through to response.choices[0] on an 3231 invalid response, raising IndexError. 3232 """ 3233 3234 def _setup_agent(self, agent): 3235 agent._cached_system_prompt = "You are helpful." 3236 agent._use_prompt_caching = False 3237 agent.tool_delay = 0 3238 agent.compression_enabled = False 3239 agent.save_trajectories = False 3240 3241 @staticmethod 3242 def _make_fast_time_mock(): 3243 """Return a mock time module where sleep loops exit instantly.""" 3244 mock_time = MagicMock() 3245 _t = [1000.0] 3246 3247 def _advancing_time(): 3248 _t[0] += 500.0 # jump 500s per call so sleep_end is always in the past 3249 return _t[0] 3250 3251 mock_time.time.side_effect = _advancing_time 3252 mock_time.sleep = MagicMock() # no-op 3253 mock_time.monotonic.return_value = 12345.0 3254 return mock_time 3255 3256 def test_invalid_response_returns_error_not_crash(self, agent): 3257 """Exhausted retries on invalid (empty choices) response must not IndexError.""" 3258 self._setup_agent(agent) 3259 # Return response with empty choices every time 3260 bad_resp = SimpleNamespace( 3261 choices=[], 3262 model="test/model", 3263 usage=None, 3264 ) 3265 agent.client.chat.completions.create.return_value = bad_resp 3266 with ( 3267 patch.object(agent, "_persist_session"), 3268 patch.object(agent, "_save_trajectory"), 3269 patch.object(agent, "_cleanup_task_resources"), 3270 patch("run_agent.time", self._make_fast_time_mock()), 3271 ): 3272 result = agent.run_conversation("hello") 3273 assert result.get("completed") is False, ( 3274 f"Expected completed=False, got: {result}" 3275 ) 3276 assert result.get("failed") is True 3277 assert "error" in result 3278 assert "Invalid API response" in result["error"] 3279 3280 def test_api_error_returns_gracefully_after_retries(self, agent): 3281 """Exhausted retries on API errors must return error result, not crash.""" 3282 self._setup_agent(agent) 3283 agent.client.chat.completions.create.side_effect = RuntimeError("rate limited") 3284 with ( 3285 patch.object(agent, "_persist_session"), 3286 patch.object(agent, "_save_trajectory"), 3287 patch.object(agent, "_cleanup_task_resources"), 3288 patch("run_agent.time", self._make_fast_time_mock()), 3289 ): 3290 result = agent.run_conversation("hello") 3291 assert result.get("completed") is False 3292 assert result.get("failed") is True 3293 assert "error" in result 3294 assert "rate limited" in result["error"] 3295 3296 def test_build_api_kwargs_error_no_unbound_local(self, agent): 3297 """When _build_api_kwargs raises, except handler must not crash with UnboundLocalError. 3298 3299 Regression: _dump_api_request_debug(api_kwargs, ...) in the except block 3300 referenced api_kwargs before it was assigned when _build_api_kwargs threw. 3301 """ 3302 self._setup_agent(agent) 3303 with ( 3304 patch.object(agent, "_build_api_kwargs", side_effect=ValueError("bad messages")), 3305 patch.object(agent, "_persist_session"), 3306 patch.object(agent, "_save_trajectory"), 3307 patch.object(agent, "_cleanup_task_resources"), 3308 patch("run_agent.time", self._make_fast_time_mock()), 3309 ): 3310 result = agent.run_conversation("hello") 3311 # Must surface the real error, not UnboundLocalError 3312 assert result.get("completed") is False 3313 assert result.get("failed") is True 3314 assert "error" in result 3315 assert "UnboundLocalError" not in result.get("error", "") 3316 assert "bad messages" in result["error"] 3317 3318 3319 # --------------------------------------------------------------------------- 3320 # Conversation history mutation 3321 # --------------------------------------------------------------------------- 3322 3323 3324 class TestConversationHistoryNotMutated: 3325 """run_conversation must not mutate the caller's conversation_history list.""" 3326 3327 def test_caller_list_unchanged_after_run(self, agent): 3328 """Passing conversation_history should not modify the original list.""" 3329 history = [ 3330 {"role": "user", "content": "previous question"}, 3331 {"role": "assistant", "content": "previous answer"}, 3332 ] 3333 original_len = len(history) 3334 3335 resp = _mock_response(content="new answer", finish_reason="stop") 3336 agent.client.chat.completions.create.return_value = resp 3337 3338 with ( 3339 patch.object(agent, "_persist_session"), 3340 patch.object(agent, "_save_trajectory"), 3341 patch.object(agent, "_cleanup_task_resources"), 3342 ): 3343 result = agent.run_conversation( 3344 "new question", conversation_history=history 3345 ) 3346 3347 # Caller's list must be untouched 3348 assert len(history) == original_len, ( 3349 f"conversation_history was mutated: expected {original_len} items, got {len(history)}" 3350 ) 3351 # Result should have more messages than the original history 3352 assert len(result["messages"]) > original_len 3353 3354 3355 # --------------------------------------------------------------------------- 3356 # _max_tokens_param consistency 3357 # --------------------------------------------------------------------------- 3358 3359 3360 class TestNousCredentialRefresh: 3361 """Verify Nous credential refresh rebuilds the runtime client.""" 3362 3363 def test_try_refresh_nous_client_credentials_rebuilds_client( 3364 self, agent, monkeypatch 3365 ): 3366 agent.provider = "nous" 3367 agent.api_mode = "chat_completions" 3368 3369 closed = {"value": False} 3370 rebuilt = {"kwargs": None} 3371 captured = {} 3372 3373 class _ExistingClient: 3374 def close(self): 3375 closed["value"] = True 3376 3377 class _RebuiltClient: 3378 pass 3379 3380 def _fake_resolve(**kwargs): 3381 captured.update(kwargs) 3382 return { 3383 "api_key": "new-nous-key", 3384 "base_url": "https://inference-api.nousresearch.com/v1", 3385 } 3386 3387 def _fake_openai(**kwargs): 3388 rebuilt["kwargs"] = kwargs 3389 return _RebuiltClient() 3390 3391 monkeypatch.setattr( 3392 "hermes_cli.auth.resolve_nous_runtime_credentials", _fake_resolve 3393 ) 3394 3395 agent.client = _ExistingClient() 3396 with patch("run_agent.OpenAI", side_effect=_fake_openai): 3397 ok = agent._try_refresh_nous_client_credentials(force=True) 3398 3399 assert ok is True 3400 assert closed["value"] is True 3401 assert captured["force_mint"] is True 3402 assert rebuilt["kwargs"]["api_key"] == "new-nous-key" 3403 assert ( 3404 rebuilt["kwargs"]["base_url"] == "https://inference-api.nousresearch.com/v1" 3405 ) 3406 assert "default_headers" not in rebuilt["kwargs"] 3407 assert isinstance(agent.client, _RebuiltClient) 3408 3409 3410 class TestCredentialPoolRecovery: 3411 def test_recover_with_pool_rotates_on_402(self, agent): 3412 current = SimpleNamespace(label="primary") 3413 next_entry = SimpleNamespace(label="secondary") 3414 3415 class _Pool: 3416 def current(self): 3417 return current 3418 3419 def mark_exhausted_and_rotate(self, *, status_code, error_context=None): 3420 assert status_code == 402 3421 assert error_context is None 3422 return next_entry 3423 3424 agent._credential_pool = _Pool() 3425 agent._swap_credential = MagicMock() 3426 3427 recovered, retry_same = agent._recover_with_credential_pool( 3428 status_code=402, 3429 has_retried_429=False, 3430 ) 3431 3432 assert recovered is True 3433 assert retry_same is False 3434 agent._swap_credential.assert_called_once_with(next_entry) 3435 3436 def test_recover_with_pool_rotates_on_billing_reason_even_with_http_400(self, agent): 3437 next_entry = SimpleNamespace(label="secondary") 3438 3439 class _Pool: 3440 def mark_exhausted_and_rotate(self, *, status_code, error_context=None): 3441 assert status_code == 400 3442 assert error_context == {"reason": "out_of_extra_usage"} 3443 return next_entry 3444 3445 agent._credential_pool = _Pool() 3446 agent._swap_credential = MagicMock() 3447 3448 recovered, retry_same = agent._recover_with_credential_pool( 3449 status_code=400, 3450 has_retried_429=False, 3451 classified_reason=FailoverReason.billing, 3452 error_context={"reason": "out_of_extra_usage"}, 3453 ) 3454 3455 assert recovered is True 3456 assert retry_same is False 3457 agent._swap_credential.assert_called_once_with(next_entry) 3458 3459 def test_recover_with_pool_retries_first_429_then_rotates(self, agent): 3460 next_entry = SimpleNamespace(label="secondary") 3461 3462 class _Pool: 3463 def current(self): 3464 return SimpleNamespace(label="primary") 3465 3466 def mark_exhausted_and_rotate(self, *, status_code, error_context=None): 3467 assert status_code == 429 3468 assert error_context is None 3469 return next_entry 3470 3471 agent._credential_pool = _Pool() 3472 agent._swap_credential = MagicMock() 3473 3474 recovered, retry_same = agent._recover_with_credential_pool( 3475 status_code=429, 3476 has_retried_429=False, 3477 ) 3478 assert recovered is False 3479 assert retry_same is True 3480 agent._swap_credential.assert_not_called() 3481 3482 recovered, retry_same = agent._recover_with_credential_pool( 3483 status_code=429, 3484 has_retried_429=True, 3485 ) 3486 assert recovered is True 3487 assert retry_same is False 3488 agent._swap_credential.assert_called_once_with(next_entry) 3489 3490 3491 def test_recover_with_pool_refreshes_on_401(self, agent): 3492 """401 with successful refresh should swap to refreshed credential.""" 3493 refreshed_entry = SimpleNamespace(label="refreshed-primary", id="abc") 3494 3495 class _Pool: 3496 def try_refresh_current(self): 3497 return refreshed_entry 3498 3499 agent._credential_pool = _Pool() 3500 agent._swap_credential = MagicMock() 3501 3502 recovered, retry_same = agent._recover_with_credential_pool( 3503 status_code=401, 3504 has_retried_429=False, 3505 ) 3506 3507 assert recovered is True 3508 agent._swap_credential.assert_called_once_with(refreshed_entry) 3509 3510 def test_recover_with_pool_rotates_on_401_when_refresh_fails(self, agent): 3511 """401 with failed refresh should rotate to next credential.""" 3512 next_entry = SimpleNamespace(label="secondary", id="def") 3513 3514 class _Pool: 3515 def try_refresh_current(self): 3516 return None # refresh failed 3517 3518 def mark_exhausted_and_rotate(self, *, status_code, error_context=None): 3519 assert status_code == 401 3520 assert error_context is None 3521 return next_entry 3522 3523 agent._credential_pool = _Pool() 3524 agent._swap_credential = MagicMock() 3525 3526 recovered, retry_same = agent._recover_with_credential_pool( 3527 status_code=401, 3528 has_retried_429=False, 3529 ) 3530 3531 assert recovered is True 3532 assert retry_same is False 3533 agent._swap_credential.assert_called_once_with(next_entry) 3534 3535 def test_recover_with_pool_401_refresh_fails_no_more_credentials(self, agent): 3536 """401 with failed refresh and no other credentials returns not recovered.""" 3537 3538 class _Pool: 3539 def try_refresh_current(self): 3540 return None 3541 3542 def mark_exhausted_and_rotate(self, *, status_code, error_context=None): 3543 assert error_context is None 3544 return None # no more credentials 3545 3546 agent._credential_pool = _Pool() 3547 agent._swap_credential = MagicMock() 3548 3549 recovered, retry_same = agent._recover_with_credential_pool( 3550 status_code=401, 3551 has_retried_429=False, 3552 ) 3553 3554 assert recovered is False 3555 agent._swap_credential.assert_not_called() 3556 3557 def test_extract_api_error_context_uses_reset_timestamp_and_reason(self, agent): 3558 response = SimpleNamespace(headers={}) 3559 error = SimpleNamespace( 3560 body={ 3561 "error": { 3562 "code": "device_code_exhausted", 3563 "message": "Weekly credits exhausted.", 3564 "resets_at": "2026-04-12T10:30:00Z", 3565 } 3566 }, 3567 response=response, 3568 ) 3569 3570 context = agent._extract_api_error_context(error) 3571 3572 assert context["reason"] == "device_code_exhausted" 3573 assert context["message"] == "Weekly credits exhausted." 3574 assert context["reset_at"] == "2026-04-12T10:30:00Z" 3575 3576 def test_recover_with_pool_passes_error_context_on_rotated_429(self, agent): 3577 next_entry = SimpleNamespace(label="secondary") 3578 captured = {} 3579 3580 class _Pool: 3581 def current(self): 3582 return SimpleNamespace(label="primary") 3583 3584 def mark_exhausted_and_rotate(self, *, status_code, error_context=None): 3585 captured["status_code"] = status_code 3586 captured["error_context"] = error_context 3587 return next_entry 3588 3589 agent._credential_pool = _Pool() 3590 agent._swap_credential = MagicMock() 3591 3592 recovered, retry_same = agent._recover_with_credential_pool( 3593 status_code=429, 3594 has_retried_429=True, 3595 error_context={"reason": "device_code_exhausted", "reset_at": "2026-04-12T10:30:00Z"}, 3596 ) 3597 3598 assert recovered is True 3599 assert retry_same is False 3600 assert captured["status_code"] == 429 3601 assert captured["error_context"]["reason"] == "device_code_exhausted" 3602 3603 3604 class TestMaxTokensParam: 3605 """Verify _max_tokens_param returns the correct key for each provider.""" 3606 3607 def test_returns_max_completion_tokens_for_direct_openai(self, agent): 3608 agent.base_url = "https://api.openai.com/v1" 3609 result = agent._max_tokens_param(4096) 3610 assert result == {"max_completion_tokens": 4096} 3611 3612 def test_returns_max_tokens_for_openrouter(self, agent): 3613 agent.base_url = "https://openrouter.ai/api/v1" 3614 result = agent._max_tokens_param(4096) 3615 assert result == {"max_tokens": 4096} 3616 3617 def test_returns_max_tokens_for_local(self, agent): 3618 agent.base_url = "http://localhost:11434/v1" 3619 result = agent._max_tokens_param(4096) 3620 assert result == {"max_tokens": 4096} 3621 3622 def test_not_tricked_by_openai_in_openrouter_url(self, agent): 3623 agent.base_url = "https://openrouter.ai/api/v1/api.openai.com" 3624 result = agent._max_tokens_param(4096) 3625 assert result == {"max_tokens": 4096} 3626 3627 def test_returns_max_completion_tokens_for_azure(self, agent): 3628 """Azure OpenAI requires max_completion_tokens for gpt-5.x models.""" 3629 agent.base_url = "https://my-resource.openai.azure.com/openai/v1" 3630 result = agent._max_tokens_param(4096) 3631 assert result == {"max_completion_tokens": 4096} 3632 3633 3634 class TestAzureOpenAIRouting: 3635 """Verify Azure OpenAI endpoints stay on chat_completions for gpt-5.x.""" 3636 3637 def test_azure_gpt5_stays_on_chat_completions(self, agent): 3638 """Azure serves gpt-5.x on /chat/completions — must not upgrade to codex_responses.""" 3639 agent.base_url = "https://my-resource.openai.azure.com/openai/v1" 3640 agent.api_mode = "chat_completions" 3641 agent.model = "gpt-5.4-mini" 3642 # Mirror the routing logic from __init__ 3643 if ( 3644 agent.api_mode == "chat_completions" 3645 and not agent._is_azure_openai_url() 3646 and ( 3647 agent._is_direct_openai_url() 3648 or agent._provider_model_requires_responses_api( 3649 agent.model, provider=agent.provider, 3650 ) 3651 ) 3652 ): 3653 agent.api_mode = "codex_responses" 3654 assert agent.api_mode == "chat_completions" 3655 3656 def test_non_azure_gpt5_upgrades_to_codex_responses(self, agent): 3657 """On api.openai.com, gpt-5.x must still upgrade to codex_responses.""" 3658 agent.base_url = "https://api.openai.com/v1" 3659 agent.api_mode = "chat_completions" 3660 agent.model = "gpt-5.4-mini" 3661 if ( 3662 agent.api_mode == "chat_completions" 3663 and not agent._is_azure_openai_url() 3664 and ( 3665 agent._is_direct_openai_url() 3666 or agent._provider_model_requires_responses_api( 3667 agent.model, provider=agent.provider, 3668 ) 3669 ) 3670 ): 3671 agent.api_mode = "codex_responses" 3672 assert agent.api_mode == "codex_responses" 3673 3674 def test_is_azure_openai_url_detection(self, agent): 3675 assert agent._is_azure_openai_url("https://foo.openai.azure.com/openai/v1") is True 3676 assert agent._is_azure_openai_url("https://api.openai.com/v1") is False 3677 assert agent._is_azure_openai_url("https://openrouter.ai/api/v1") is False 3678 # Path-embedded azure string should still detect — we're ~substring matching 3679 agent.base_url = "https://my-resource.openai.azure.com/openai/v1" 3680 assert agent._is_azure_openai_url() is True 3681 3682 3683 # --------------------------------------------------------------------------- 3684 # System prompt stability for prompt caching 3685 # --------------------------------------------------------------------------- 3686 3687 class TestSystemPromptStability: 3688 """Verify that the system prompt stays stable across turns for cache hits.""" 3689 3690 def test_stored_prompt_reused_for_continuing_session(self, agent): 3691 """When conversation_history is non-empty and session DB has a stored 3692 prompt, it should be reused instead of rebuilding from disk.""" 3693 stored = "You are helpful. [stored from turn 1]" 3694 mock_db = MagicMock() 3695 mock_db.get_session.return_value = {"system_prompt": stored} 3696 agent._session_db = mock_db 3697 3698 # Simulate a continuing session with history 3699 history = [ 3700 {"role": "user", "content": "hello"}, 3701 {"role": "assistant", "content": "hi"}, 3702 ] 3703 3704 # First call — _cached_system_prompt is None, history is non-empty 3705 agent._cached_system_prompt = None 3706 3707 # Patch run_conversation internals to just test the system prompt logic. 3708 # We'll call the prompt caching block directly by simulating what 3709 # run_conversation does. 3710 conversation_history = history 3711 3712 # The block under test (from run_conversation): 3713 if agent._cached_system_prompt is None: 3714 stored_prompt = None 3715 if conversation_history and agent._session_db: 3716 try: 3717 session_row = agent._session_db.get_session(agent.session_id) 3718 if session_row: 3719 stored_prompt = session_row.get("system_prompt") or None 3720 except Exception: 3721 pass 3722 3723 if stored_prompt: 3724 agent._cached_system_prompt = stored_prompt 3725 3726 assert agent._cached_system_prompt == stored 3727 mock_db.get_session.assert_called_once_with(agent.session_id) 3728 3729 def test_fresh_build_when_no_history(self, agent): 3730 """On the first turn (no history), system prompt should be built fresh.""" 3731 mock_db = MagicMock() 3732 agent._session_db = mock_db 3733 3734 agent._cached_system_prompt = None 3735 conversation_history = [] 3736 3737 # The block under test: 3738 if agent._cached_system_prompt is None: 3739 stored_prompt = None 3740 if conversation_history and agent._session_db: 3741 session_row = agent._session_db.get_session(agent.session_id) 3742 if session_row: 3743 stored_prompt = session_row.get("system_prompt") or None 3744 3745 if stored_prompt: 3746 agent._cached_system_prompt = stored_prompt 3747 else: 3748 agent._cached_system_prompt = agent._build_system_prompt() 3749 3750 # Should have built fresh, not queried the DB 3751 mock_db.get_session.assert_not_called() 3752 assert agent._cached_system_prompt is not None 3753 assert "Hermes Agent" in agent._cached_system_prompt 3754 3755 def test_fresh_build_when_db_has_no_prompt(self, agent): 3756 """If the session DB has no stored prompt, build fresh even with history.""" 3757 mock_db = MagicMock() 3758 mock_db.get_session.return_value = {"system_prompt": ""} 3759 agent._session_db = mock_db 3760 3761 agent._cached_system_prompt = None 3762 conversation_history = [{"role": "user", "content": "hi"}] 3763 3764 if agent._cached_system_prompt is None: 3765 stored_prompt = None 3766 if conversation_history and agent._session_db: 3767 try: 3768 session_row = agent._session_db.get_session(agent.session_id) 3769 if session_row: 3770 stored_prompt = session_row.get("system_prompt") or None 3771 except Exception: 3772 pass 3773 3774 if stored_prompt: 3775 agent._cached_system_prompt = stored_prompt 3776 else: 3777 agent._cached_system_prompt = agent._build_system_prompt() 3778 3779 # Empty string is falsy, so should fall through to fresh build 3780 assert "Hermes Agent" in agent._cached_system_prompt 3781 3782 class TestBudgetPressure: 3783 """Budget exhaustion grace call system.""" 3784 3785 def test_grace_call_flags_initialized(self, agent): 3786 """Agent should have budget grace call flags.""" 3787 assert agent._budget_exhausted_injected is False 3788 assert agent._budget_grace_call is False 3789 3790 3791 class TestSafeWriter: 3792 """Verify _SafeWriter guards stdout against OSError (broken pipes).""" 3793 3794 def test_write_delegates_normally(self): 3795 """When stdout is healthy, _SafeWriter is transparent.""" 3796 from run_agent import _SafeWriter 3797 from io import StringIO 3798 inner = StringIO() 3799 writer = _SafeWriter(inner) 3800 writer.write("hello") 3801 assert inner.getvalue() == "hello" 3802 3803 def test_write_catches_oserror(self): 3804 """OSError on write is silently caught, returns len(data).""" 3805 from run_agent import _SafeWriter 3806 from unittest.mock import MagicMock 3807 inner = MagicMock() 3808 inner.write.side_effect = OSError(5, "Input/output error") 3809 writer = _SafeWriter(inner) 3810 result = writer.write("hello") 3811 assert result == 5 # len("hello") 3812 3813 def test_flush_catches_oserror(self): 3814 """OSError on flush is silently caught.""" 3815 from run_agent import _SafeWriter 3816 from unittest.mock import MagicMock 3817 inner = MagicMock() 3818 inner.flush.side_effect = OSError(5, "Input/output error") 3819 writer = _SafeWriter(inner) 3820 writer.flush() # should not raise 3821 3822 def test_print_survives_broken_stdout(self, monkeypatch): 3823 """print() through _SafeWriter doesn't crash on broken pipe.""" 3824 import sys 3825 from run_agent import _SafeWriter 3826 from unittest.mock import MagicMock 3827 broken = MagicMock() 3828 broken.write.side_effect = OSError(5, "Input/output error") 3829 original = sys.stdout 3830 sys.stdout = _SafeWriter(broken) 3831 try: 3832 print("this should not crash") # would raise without _SafeWriter 3833 finally: 3834 sys.stdout = original 3835 3836 def test_installed_in_run_conversation(self, agent): 3837 """run_conversation installs _SafeWriter on stdio.""" 3838 import sys 3839 from run_agent import _SafeWriter 3840 resp = _mock_response(content="Done", finish_reason="stop") 3841 agent.client.chat.completions.create.return_value = resp 3842 original_stdout = sys.stdout 3843 original_stderr = sys.stderr 3844 try: 3845 with ( 3846 patch.object(agent, "_persist_session"), 3847 patch.object(agent, "_save_trajectory"), 3848 patch.object(agent, "_cleanup_task_resources"), 3849 ): 3850 agent.run_conversation("test") 3851 assert isinstance(sys.stdout, _SafeWriter) 3852 assert isinstance(sys.stderr, _SafeWriter) 3853 finally: 3854 sys.stdout = original_stdout 3855 sys.stderr = original_stderr 3856 3857 # test_installed_before_init_time_honcho_error_prints removed — 3858 # Honcho integration extracted to plugin (PR #4154). 3859 3860 def test_double_wrap_prevented(self): 3861 """Wrapping an already-wrapped stream doesn't add layers.""" 3862 from run_agent import _SafeWriter 3863 from io import StringIO 3864 inner = StringIO() 3865 wrapped = _SafeWriter(inner) 3866 # isinstance check should prevent double-wrapping 3867 assert isinstance(wrapped, _SafeWriter) 3868 # The guard in run_conversation checks isinstance before wrapping 3869 if not isinstance(wrapped, _SafeWriter): 3870 wrapped = _SafeWriter(wrapped) 3871 # Still just one layer 3872 wrapped.write("test") 3873 assert inner.getvalue() == "test" 3874 3875 3876 class TestSaveSessionLogAtomicWrite: 3877 def test_uses_shared_atomic_json_helper(self, agent, tmp_path): 3878 agent.session_log_file = tmp_path / "session.json" 3879 messages = [{"role": "user", "content": "hello"}] 3880 3881 with patch("run_agent.atomic_json_write", create=True) as mock_atomic_write: 3882 agent._save_session_log(messages) 3883 3884 mock_atomic_write.assert_called_once() 3885 call_args = mock_atomic_write.call_args 3886 assert call_args.args[0] == agent.session_log_file 3887 payload = call_args.args[1] 3888 assert payload["session_id"] == agent.session_id 3889 assert payload["messages"] == messages 3890 assert call_args.kwargs["indent"] == 2 3891 assert call_args.kwargs["default"] is str 3892 3893 3894 # =================================================================== 3895 # Anthropic adapter integration fixes 3896 # =================================================================== 3897 3898 3899 class TestBuildApiKwargsAnthropicMaxTokens: 3900 """Bug fix: max_tokens was always None for Anthropic mode, ignoring user config.""" 3901 3902 def test_max_tokens_passed_to_anthropic(self, agent): 3903 agent.api_mode = "anthropic_messages" 3904 agent.max_tokens = 4096 3905 agent.reasoning_config = None 3906 3907 with patch("agent.anthropic_adapter.build_anthropic_kwargs") as mock_build: 3908 mock_build.return_value = {"model": "claude-sonnet-4-20250514", "messages": [], "max_tokens": 4096} 3909 agent._build_api_kwargs([{"role": "user", "content": "test"}]) 3910 _, kwargs = mock_build.call_args 3911 if not kwargs: 3912 kwargs = dict(zip( 3913 ["model", "messages", "tools", "max_tokens", "reasoning_config"], 3914 mock_build.call_args[0], 3915 )) 3916 assert kwargs.get("max_tokens") == 4096 or mock_build.call_args[1].get("max_tokens") == 4096 3917 3918 def test_max_tokens_none_when_unset(self, agent): 3919 agent.api_mode = "anthropic_messages" 3920 agent.max_tokens = None 3921 agent.reasoning_config = None 3922 3923 with patch("agent.anthropic_adapter.build_anthropic_kwargs") as mock_build: 3924 mock_build.return_value = {"model": "claude-sonnet-4-20250514", "messages": [], "max_tokens": 16384} 3925 agent._build_api_kwargs([{"role": "user", "content": "test"}]) 3926 call_args = mock_build.call_args 3927 # max_tokens should be None (let adapter use its default) 3928 if call_args[1]: 3929 assert call_args[1].get("max_tokens") is None 3930 else: 3931 assert call_args[0][3] is None 3932 3933 3934 class TestAnthropicImageFallback: 3935 def test_build_api_kwargs_converts_multimodal_user_image_to_text(self, agent): 3936 agent.api_mode = "anthropic_messages" 3937 agent.reasoning_config = None 3938 3939 api_messages = [{ 3940 "role": "user", 3941 "content": [ 3942 {"type": "text", "text": "Can you see this now?"}, 3943 {"type": "image_url", "image_url": {"url": "https://example.com/cat.png"}}, 3944 ], 3945 }] 3946 3947 with ( 3948 patch("tools.vision_tools.vision_analyze_tool", new=AsyncMock(return_value=json.dumps({"success": True, "analysis": "A cat sitting on a chair."}))), 3949 patch("agent.anthropic_adapter.build_anthropic_kwargs") as mock_build, 3950 ): 3951 mock_build.return_value = {"model": "claude-sonnet-4-20250514", "messages": [], "max_tokens": 4096} 3952 agent._build_api_kwargs(api_messages) 3953 3954 kwargs = mock_build.call_args.kwargs or dict(zip( 3955 ["model", "messages", "tools", "max_tokens", "reasoning_config"], 3956 mock_build.call_args.args, 3957 )) 3958 transformed = kwargs["messages"] 3959 assert isinstance(transformed[0]["content"], str) 3960 assert "A cat sitting on a chair." in transformed[0]["content"] 3961 assert "Can you see this now?" in transformed[0]["content"] 3962 assert "vision_analyze with image_url: https://example.com/cat.png" in transformed[0]["content"] 3963 3964 def test_build_api_kwargs_reuses_cached_image_analysis_for_duplicate_images(self, agent): 3965 agent.api_mode = "anthropic_messages" 3966 agent.reasoning_config = None 3967 data_url = "data:image/png;base64,QUFBQQ==" 3968 3969 api_messages = [ 3970 { 3971 "role": "user", 3972 "content": [ 3973 {"type": "text", "text": "first"}, 3974 {"type": "input_image", "image_url": data_url}, 3975 ], 3976 }, 3977 { 3978 "role": "user", 3979 "content": [ 3980 {"type": "text", "text": "second"}, 3981 {"type": "input_image", "image_url": data_url}, 3982 ], 3983 }, 3984 ] 3985 3986 mock_vision = AsyncMock(return_value=json.dumps({"success": True, "analysis": "A small test image."})) 3987 with ( 3988 patch("tools.vision_tools.vision_analyze_tool", new=mock_vision), 3989 patch("agent.anthropic_adapter.build_anthropic_kwargs") as mock_build, 3990 ): 3991 mock_build.return_value = {"model": "claude-sonnet-4-20250514", "messages": [], "max_tokens": 4096} 3992 agent._build_api_kwargs(api_messages) 3993 3994 assert mock_vision.await_count == 1 3995 3996 3997 class TestFallbackAnthropicProvider: 3998 """Bug fix: _try_activate_fallback had no case for anthropic provider.""" 3999 4000 def test_fallback_to_anthropic_sets_api_mode(self, agent): 4001 agent._fallback_activated = False 4002 agent._fallback_model = {"provider": "anthropic", "model": "claude-sonnet-4-20250514"} 4003 agent._fallback_chain = [agent._fallback_model] 4004 agent._fallback_index = 0 4005 4006 mock_client = MagicMock() 4007 mock_client.base_url = "https://api.anthropic.com/v1" 4008 mock_client.api_key = "sk-ant-api03-test" 4009 4010 with ( 4011 patch("agent.auxiliary_client.resolve_provider_client", return_value=(mock_client, None)), 4012 patch("agent.anthropic_adapter.build_anthropic_client") as mock_build, 4013 patch("agent.anthropic_adapter.resolve_anthropic_token", return_value=None), 4014 ): 4015 mock_build.return_value = MagicMock() 4016 result = agent._try_activate_fallback() 4017 4018 assert result is True 4019 assert agent.api_mode == "anthropic_messages" 4020 assert agent._anthropic_client is not None 4021 assert agent.client is None 4022 4023 def test_fallback_to_anthropic_enables_prompt_caching(self, agent): 4024 agent._fallback_activated = False 4025 agent._fallback_model = {"provider": "anthropic", "model": "claude-sonnet-4-20250514"} 4026 agent._fallback_chain = [agent._fallback_model] 4027 agent._fallback_index = 0 4028 4029 mock_client = MagicMock() 4030 mock_client.base_url = "https://api.anthropic.com/v1" 4031 mock_client.api_key = "sk-ant-api03-test" 4032 4033 with ( 4034 patch("agent.auxiliary_client.resolve_provider_client", return_value=(mock_client, None)), 4035 patch("agent.anthropic_adapter.build_anthropic_client", return_value=MagicMock()), 4036 patch("agent.anthropic_adapter.resolve_anthropic_token", return_value=None), 4037 ): 4038 agent._try_activate_fallback() 4039 4040 assert agent._use_prompt_caching is True 4041 4042 def test_fallback_to_openrouter_uses_openai_client(self, agent): 4043 agent._fallback_activated = False 4044 agent._fallback_model = {"provider": "openrouter", "model": "anthropic/claude-sonnet-4"} 4045 agent._fallback_chain = [agent._fallback_model] 4046 agent._fallback_index = 0 4047 4048 mock_client = MagicMock() 4049 mock_client.base_url = "https://openrouter.ai/api/v1" 4050 mock_client.api_key = "sk-or-test" 4051 4052 with patch("agent.auxiliary_client.resolve_provider_client", return_value=(mock_client, None)): 4053 result = agent._try_activate_fallback() 4054 4055 assert result is True 4056 assert agent.api_mode == "chat_completions" 4057 assert agent.client is mock_client 4058 4059 4060 def test_aiagent_uses_copilot_acp_client(): 4061 with ( 4062 patch("run_agent.get_tool_definitions", return_value=_make_tool_defs("web_search")), 4063 patch("run_agent.check_toolset_requirements", return_value={}), 4064 patch("run_agent.OpenAI") as mock_openai, 4065 patch("agent.copilot_acp_client.CopilotACPClient") as mock_acp_client, 4066 ): 4067 acp_client = MagicMock() 4068 mock_acp_client.return_value = acp_client 4069 4070 agent = AIAgent( 4071 api_key="copilot-acp", 4072 base_url="acp://copilot", 4073 provider="copilot-acp", 4074 acp_command="/usr/local/bin/copilot", 4075 acp_args=["--acp", "--stdio"], 4076 quiet_mode=True, 4077 skip_context_files=True, 4078 skip_memory=True, 4079 ) 4080 4081 assert agent.client is acp_client 4082 mock_openai.assert_not_called() 4083 mock_acp_client.assert_called_once() 4084 assert mock_acp_client.call_args.kwargs["base_url"] == "acp://copilot" 4085 assert mock_acp_client.call_args.kwargs["api_key"] == "copilot-acp" 4086 assert mock_acp_client.call_args.kwargs["command"] == "/usr/local/bin/copilot" 4087 assert mock_acp_client.call_args.kwargs["args"] == ["--acp", "--stdio"] 4088 4089 4090 def test_aiagent_uses_acp_client_for_opencode_acp_provider(): 4091 with ( 4092 patch("run_agent.get_tool_definitions", return_value=_make_tool_defs("web_search")), 4093 patch("run_agent.check_toolset_requirements", return_value={}), 4094 patch("run_agent.OpenAI") as mock_openai, 4095 patch("agent.copilot_acp_client.CopilotACPClient") as mock_acp_client, 4096 ): 4097 acp_client = MagicMock() 4098 mock_acp_client.return_value = acp_client 4099 4100 agent = AIAgent( 4101 api_key="***", 4102 base_url="acp://opencode", 4103 provider="opencode-kimi-oauth", 4104 acp_command="/home/user/.local/bin/opencode", 4105 acp_args=["acp"], 4106 quiet_mode=True, 4107 skip_context_files=True, 4108 skip_memory=True, 4109 ) 4110 4111 assert agent.client is acp_client 4112 mock_openai.assert_not_called() 4113 mock_acp_client.assert_called_once() 4114 assert mock_acp_client.call_args.kwargs["base_url"] == "acp://opencode" 4115 assert mock_acp_client.call_args.kwargs["api_key"] == "***" 4116 assert mock_acp_client.call_args.kwargs["command"] == "/home/user/.local/bin/opencode" 4117 assert mock_acp_client.call_args.kwargs["args"] == ["acp"] 4118 4119 4120 def test_aiagent_acp_omits_unset_command_args_to_use_client_defaults(): 4121 with ( 4122 patch("run_agent.get_tool_definitions", return_value=_make_tool_defs("web_search")), 4123 patch("run_agent.check_toolset_requirements", return_value={}), 4124 patch("run_agent.OpenAI") as mock_openai, 4125 patch("agent.copilot_acp_client.CopilotACPClient") as mock_acp_client, 4126 ): 4127 acp_client = MagicMock() 4128 mock_acp_client.return_value = acp_client 4129 4130 agent = AIAgent( 4131 api_key="copilot-acp", 4132 base_url="acp://copilot", 4133 provider="copilot-acp", 4134 quiet_mode=True, 4135 skip_context_files=True, 4136 skip_memory=True, 4137 ) 4138 4139 assert agent.client is acp_client 4140 mock_openai.assert_not_called() 4141 assert mock_acp_client.call_args.kwargs["base_url"] == "acp://copilot" 4142 assert "command" not in mock_acp_client.call_args.kwargs 4143 assert "args" not in mock_acp_client.call_args.kwargs 4144 4145 4146 def test_aiagent_acp_preserves_explicit_empty_args_for_custom_marker(): 4147 with ( 4148 patch("run_agent.get_tool_definitions", return_value=_make_tool_defs("web_search")), 4149 patch("run_agent.check_toolset_requirements", return_value={}), 4150 patch("run_agent.OpenAI"), 4151 patch("agent.copilot_acp_client.CopilotACPClient") as mock_acp_client, 4152 ): 4153 mock_acp_client.return_value = MagicMock() 4154 4155 AIAgent( 4156 api_key="***", 4157 base_url="acp://unknown", 4158 provider="custom", 4159 acp_command="custom-acp", 4160 acp_args=[], 4161 quiet_mode=True, 4162 skip_context_files=True, 4163 skip_memory=True, 4164 ) 4165 4166 assert mock_acp_client.call_args.kwargs["command"] == "custom-acp" 4167 assert mock_acp_client.call_args.kwargs["args"] == [] 4168 4169 4170 def test_aiagent_switch_model_preserves_opencode_acp_command_args(): 4171 with ( 4172 patch("run_agent.get_tool_definitions", return_value=_make_tool_defs("web_search")), 4173 patch("run_agent.check_toolset_requirements", return_value={}), 4174 patch("run_agent.OpenAI"), 4175 patch("agent.copilot_acp_client.CopilotACPClient") as mock_acp_client, 4176 ): 4177 acp_client = MagicMock() 4178 mock_acp_client.return_value = acp_client 4179 agent = AIAgent( 4180 api_key="sk-or-test", 4181 base_url="https://openrouter.ai/api/v1", 4182 provider="openrouter", 4183 model="openai/gpt-5.4", 4184 quiet_mode=True, 4185 skip_context_files=True, 4186 skip_memory=True, 4187 ) 4188 mock_acp_client.reset_mock() 4189 4190 agent.switch_model( 4191 new_model="kimi-for-coding", 4192 new_provider="opencode-kimi-oauth", 4193 api_key="***", 4194 base_url="acp://opencode", 4195 api_mode="chat_completions", 4196 acp_command="/home/user/.local/bin/opencode", 4197 acp_args=["acp"], 4198 ) 4199 4200 assert agent.client is acp_client 4201 assert agent.acp_command == "/home/user/.local/bin/opencode" 4202 assert agent.acp_args == ["acp"] 4203 assert mock_acp_client.call_args.kwargs["base_url"] == "acp://opencode" 4204 assert mock_acp_client.call_args.kwargs["command"] == "/home/user/.local/bin/opencode" 4205 assert mock_acp_client.call_args.kwargs["args"] == ["acp"] 4206 assert agent._primary_runtime["acp_command"] == "/home/user/.local/bin/opencode" 4207 assert agent._primary_runtime["acp_args"] == ["acp"] 4208 4209 4210 def test_quiet_spinner_allowed_with_explicit_print_fn(agent): 4211 agent._print_fn = lambda *_a, **_kw: None 4212 with patch.object(run_agent.sys.stdout, "isatty", return_value=False): 4213 assert agent._should_start_quiet_spinner() is True 4214 4215 4216 def test_quiet_spinner_allowed_on_real_tty(agent): 4217 agent._print_fn = None 4218 with patch.object(run_agent.sys.stdout, "isatty", return_value=True): 4219 assert agent._should_start_quiet_spinner() is True 4220 4221 4222 def test_quiet_spinner_suppressed_on_non_tty_without_print_fn(agent): 4223 agent._print_fn = None 4224 with patch.object(run_agent.sys.stdout, "isatty", return_value=False): 4225 assert agent._should_start_quiet_spinner() is False 4226 4227 4228 def test_is_openai_client_closed_honors_custom_client_flag(): 4229 assert AIAgent._is_openai_client_closed(SimpleNamespace(is_closed=True)) is True 4230 assert AIAgent._is_openai_client_closed(SimpleNamespace(is_closed=False)) is False 4231 4232 4233 def test_is_openai_client_closed_handles_method_form(): 4234 """Fix for issue #4377: is_closed as method (openai SDK) vs property (httpx). 4235 4236 The openai SDK's is_closed is a method, not a property. Prior to this fix, 4237 getattr(client, "is_closed", False) returned the bound method object, which 4238 is always truthy, causing the function to incorrectly report all clients as 4239 closed and triggering unnecessary client recreation on every API call. 4240 """ 4241 4242 class MethodFormClient: 4243 """Mimics openai.OpenAI where is_closed() is a method.""" 4244 4245 def __init__(self, closed: bool): 4246 self._closed = closed 4247 4248 def is_closed(self) -> bool: 4249 return self._closed 4250 4251 # Method returning False - client is open 4252 open_client = MethodFormClient(closed=False) 4253 assert AIAgent._is_openai_client_closed(open_client) is False 4254 4255 # Method returning True - client is closed 4256 closed_client = MethodFormClient(closed=True) 4257 assert AIAgent._is_openai_client_closed(closed_client) is True 4258 4259 4260 def test_is_openai_client_closed_falls_back_to_http_client(): 4261 """Verify fallback to _client.is_closed when top-level is_closed is None.""" 4262 4263 class ClientWithHttpClient: 4264 is_closed = None # No top-level is_closed 4265 4266 def __init__(self, http_closed: bool): 4267 self._client = SimpleNamespace(is_closed=http_closed) 4268 4269 assert AIAgent._is_openai_client_closed(ClientWithHttpClient(http_closed=False)) is False 4270 assert AIAgent._is_openai_client_closed(ClientWithHttpClient(http_closed=True)) is True 4271 4272 4273 class TestAnthropicBaseUrlPassthrough: 4274 """Bug fix: base_url was filtered with 'anthropic in base_url', blocking proxies.""" 4275 4276 def test_custom_proxy_base_url_passed_through(self): 4277 with ( 4278 patch("run_agent.get_tool_definitions", return_value=_make_tool_defs("web_search")), 4279 patch("run_agent.check_toolset_requirements", return_value={}), 4280 patch("agent.anthropic_adapter.build_anthropic_client") as mock_build, 4281 ): 4282 mock_build.return_value = MagicMock() 4283 AIAgent( 4284 api_key="sk-ant-api03-test1234567890", 4285 base_url="https://llm-proxy.company.com/v1", 4286 api_mode="anthropic_messages", 4287 quiet_mode=True, 4288 skip_context_files=True, 4289 skip_memory=True, 4290 ) 4291 call_args = mock_build.call_args 4292 # base_url should be passed through, not filtered out 4293 assert call_args[0][1] == "https://llm-proxy.company.com/v1" 4294 4295 def test_none_base_url_passed_as_none(self): 4296 with ( 4297 patch("run_agent.get_tool_definitions", return_value=_make_tool_defs("web_search")), 4298 patch("run_agent.check_toolset_requirements", return_value={}), 4299 patch("agent.anthropic_adapter.build_anthropic_client") as mock_build, 4300 ): 4301 mock_build.return_value = MagicMock() 4302 AIAgent( 4303 api_key="sk-ant...7890", 4304 api_mode="anthropic_messages", 4305 quiet_mode=True, 4306 skip_context_files=True, 4307 skip_memory=True, 4308 ) 4309 call_args = mock_build.call_args 4310 # No base_url provided, should be default empty string or None 4311 passed_url = call_args[0][1] 4312 assert not passed_url or passed_url is None 4313 4314 4315 class TestAnthropicCredentialRefresh: 4316 def test_try_refresh_anthropic_client_credentials_rebuilds_client(self): 4317 with ( 4318 patch("run_agent.get_tool_definitions", return_value=_make_tool_defs("web_search")), 4319 patch("run_agent.check_toolset_requirements", return_value={}), 4320 patch("agent.anthropic_adapter.build_anthropic_client") as mock_build, 4321 ): 4322 old_client = MagicMock() 4323 new_client = MagicMock() 4324 mock_build.side_effect = [old_client, new_client] 4325 agent = AIAgent( 4326 api_key="sk-ant-oat01-stale-token", 4327 base_url="https://openrouter.ai/api/v1", 4328 api_mode="anthropic_messages", 4329 quiet_mode=True, 4330 skip_context_files=True, 4331 skip_memory=True, 4332 ) 4333 4334 agent._anthropic_client = old_client 4335 agent._anthropic_api_key = "sk-ant-oat01-stale-token" 4336 agent._anthropic_base_url = "https://api.anthropic.com" 4337 agent.provider = "anthropic" 4338 4339 with ( 4340 patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="sk-ant-oat01-fresh-token"), 4341 patch("agent.anthropic_adapter.build_anthropic_client", return_value=new_client) as rebuild, 4342 ): 4343 assert agent._try_refresh_anthropic_client_credentials() is True 4344 4345 old_client.close.assert_called_once() 4346 rebuild.assert_called_once_with( 4347 "sk-ant-oat01-fresh-token", "https://api.anthropic.com", timeout=None, 4348 ) 4349 assert agent._anthropic_client is new_client 4350 assert agent._anthropic_api_key == "sk-ant-oat01-fresh-token" 4351 4352 def test_try_refresh_anthropic_client_credentials_returns_false_when_token_unchanged(self): 4353 with ( 4354 patch("run_agent.get_tool_definitions", return_value=_make_tool_defs("web_search")), 4355 patch("run_agent.check_toolset_requirements", return_value={}), 4356 patch("agent.anthropic_adapter.build_anthropic_client", return_value=MagicMock()), 4357 ): 4358 agent = AIAgent( 4359 api_key="sk-ant-oat01-same-token", 4360 base_url="https://openrouter.ai/api/v1", 4361 api_mode="anthropic_messages", 4362 quiet_mode=True, 4363 skip_context_files=True, 4364 skip_memory=True, 4365 ) 4366 4367 old_client = MagicMock() 4368 agent._anthropic_client = old_client 4369 agent._anthropic_api_key = "sk-ant-oat01-same-token" 4370 4371 with ( 4372 patch("agent.anthropic_adapter.resolve_anthropic_token", return_value="sk-ant-oat01-same-token"), 4373 patch("agent.anthropic_adapter.build_anthropic_client") as rebuild, 4374 ): 4375 assert agent._try_refresh_anthropic_client_credentials() is False 4376 4377 old_client.close.assert_not_called() 4378 rebuild.assert_not_called() 4379 4380 def test_anthropic_messages_create_preflights_refresh(self): 4381 with ( 4382 patch("run_agent.get_tool_definitions", return_value=_make_tool_defs("web_search")), 4383 patch("run_agent.check_toolset_requirements", return_value={}), 4384 patch("agent.anthropic_adapter.build_anthropic_client", return_value=MagicMock()), 4385 ): 4386 agent = AIAgent( 4387 api_key="sk-ant-oat01-current-token", 4388 base_url="https://openrouter.ai/api/v1", 4389 api_mode="anthropic_messages", 4390 quiet_mode=True, 4391 skip_context_files=True, 4392 skip_memory=True, 4393 ) 4394 4395 response = SimpleNamespace(content=[]) 4396 agent._anthropic_client = MagicMock() 4397 agent._anthropic_client.messages.create.return_value = response 4398 4399 with patch.object(agent, "_try_refresh_anthropic_client_credentials", return_value=True) as refresh: 4400 result = agent._anthropic_messages_create({"model": "claude-sonnet-4-20250514"}) 4401 4402 refresh.assert_called_once_with() 4403 agent._anthropic_client.messages.create.assert_called_once_with(model="claude-sonnet-4-20250514") 4404 assert result is response 4405 4406 4407 # =================================================================== 4408 # _streaming_api_call tests 4409 # =================================================================== 4410 4411 def _make_chunk(content=None, tool_calls=None, finish_reason=None, model="test/model"): 4412 """Build a SimpleNamespace mimicking an OpenAI streaming chunk.""" 4413 delta = SimpleNamespace(content=content, tool_calls=tool_calls) 4414 choice = SimpleNamespace(delta=delta, finish_reason=finish_reason) 4415 return SimpleNamespace(model=model, choices=[choice]) 4416 4417 4418 def _make_tc_delta(index=0, tc_id=None, name=None, arguments=None): 4419 """Build a SimpleNamespace mimicking a streaming tool_call delta.""" 4420 func = SimpleNamespace(name=name, arguments=arguments) 4421 return SimpleNamespace(index=index, id=tc_id, function=func) 4422 4423 4424 class TestStreamingApiCall: 4425 """Tests for _streaming_api_call — voice TTS streaming pipeline.""" 4426 4427 def test_content_assembly(self, agent): 4428 chunks = [ 4429 _make_chunk(content="Hel"), 4430 _make_chunk(content="lo "), 4431 _make_chunk(content="World"), 4432 _make_chunk(finish_reason="stop"), 4433 ] 4434 agent.client.chat.completions.create.return_value = iter(chunks) 4435 callback = MagicMock() 4436 agent.stream_delta_callback = callback 4437 4438 resp = agent._interruptible_streaming_api_call({"messages": []}) 4439 4440 assert resp.choices[0].message.content == "Hello World" 4441 assert resp.choices[0].finish_reason == "stop" 4442 assert callback.call_count == 3 4443 callback.assert_any_call("Hel") 4444 callback.assert_any_call("lo ") 4445 callback.assert_any_call("World") 4446 4447 def test_tool_call_accumulation(self, agent): 4448 # Per OpenAI streaming spec, function names are delivered atomically 4449 # in the first chunk; only `arguments` is fragmented across chunks. 4450 # The accumulator uses assignment for names (immune to MiniMax/NIM 4451 # resends of the full name) and `+=` for arguments. 4452 chunks = [ 4453 _make_chunk(tool_calls=[_make_tc_delta(0, "call_1", "web_search", '{"q":')]), 4454 _make_chunk(tool_calls=[_make_tc_delta(0, None, None, '"test"}')]), 4455 _make_chunk(finish_reason="tool_calls"), 4456 ] 4457 agent.client.chat.completions.create.return_value = iter(chunks) 4458 4459 resp = agent._interruptible_streaming_api_call({"messages": []}) 4460 4461 tc = resp.choices[0].message.tool_calls 4462 assert len(tc) == 1 4463 assert tc[0].function.name == "web_search" 4464 assert tc[0].function.arguments == '{"q":"test"}' 4465 assert tc[0].id == "call_1" 4466 4467 def test_multiple_tool_calls(self, agent): 4468 chunks = [ 4469 _make_chunk(tool_calls=[_make_tc_delta(0, "call_a", "search", '{}')]), 4470 _make_chunk(tool_calls=[_make_tc_delta(1, "call_b", "read", '{}')]), 4471 _make_chunk(finish_reason="tool_calls"), 4472 ] 4473 agent.client.chat.completions.create.return_value = iter(chunks) 4474 4475 resp = agent._interruptible_streaming_api_call({"messages": []}) 4476 4477 tc = resp.choices[0].message.tool_calls 4478 assert len(tc) == 2 4479 assert tc[0].function.name == "search" 4480 assert tc[1].function.name == "read" 4481 4482 def test_truncated_tool_call_args_upgrade_finish_reason_to_length(self, agent): 4483 chunks = [ 4484 _make_chunk(tool_calls=[_make_tc_delta(0, "call_1", "write_file", '{"path":"x.txt","content":"hel')]), 4485 ] 4486 agent.client.chat.completions.create.return_value = iter(chunks) 4487 4488 resp = agent._interruptible_streaming_api_call({"messages": []}) 4489 4490 tc = resp.choices[0].message.tool_calls 4491 assert len(tc) == 1 4492 assert tc[0].function.name == "write_file" 4493 assert tc[0].function.arguments == '{"path":"x.txt","content":"hel' 4494 assert resp.choices[0].finish_reason == "length" 4495 4496 def test_ollama_reused_index_separate_tool_calls(self, agent): 4497 """Ollama sends every tool call at index 0 with different ids. 4498 4499 Without the fix, names and arguments get concatenated into one slot. 4500 """ 4501 chunks = [ 4502 _make_chunk(tool_calls=[_make_tc_delta(0, "call_a", "search", '{"q":"hello"}')]), 4503 # Second tool call at the SAME index 0, but different id 4504 _make_chunk(tool_calls=[_make_tc_delta(0, "call_b", "read_file", '{"path":"x.py"}')]), 4505 _make_chunk(finish_reason="tool_calls"), 4506 ] 4507 agent.client.chat.completions.create.return_value = iter(chunks) 4508 4509 resp = agent._interruptible_streaming_api_call({"messages": []}) 4510 4511 tc = resp.choices[0].message.tool_calls 4512 assert len(tc) == 2, f"Expected 2 tool calls, got {len(tc)}: {[t.function.name for t in tc]}" 4513 assert tc[0].function.name == "search" 4514 assert tc[0].function.arguments == '{"q":"hello"}' 4515 assert tc[0].id == "call_a" 4516 assert tc[1].function.name == "read_file" 4517 assert tc[1].function.arguments == '{"path":"x.py"}' 4518 assert tc[1].id == "call_b" 4519 4520 def test_ollama_reused_index_streamed_args(self, agent): 4521 """Ollama with streamed arguments across multiple chunks at same index.""" 4522 chunks = [ 4523 _make_chunk(tool_calls=[_make_tc_delta(0, "call_a", "search", '{"q":')]), 4524 _make_chunk(tool_calls=[_make_tc_delta(0, None, None, '"hello"}')]), 4525 # New tool call, same index 0 4526 _make_chunk(tool_calls=[_make_tc_delta(0, "call_b", "read", '{}')]), 4527 _make_chunk(finish_reason="tool_calls"), 4528 ] 4529 agent.client.chat.completions.create.return_value = iter(chunks) 4530 4531 resp = agent._interruptible_streaming_api_call({"messages": []}) 4532 4533 tc = resp.choices[0].message.tool_calls 4534 assert len(tc) == 2 4535 assert tc[0].function.name == "search" 4536 assert tc[0].function.arguments == '{"q":"hello"}' 4537 assert tc[1].function.name == "read" 4538 assert tc[1].function.arguments == '{}' 4539 4540 def test_content_and_tool_calls_together(self, agent): 4541 chunks = [ 4542 _make_chunk(content="I'll search"), 4543 _make_chunk(tool_calls=[_make_tc_delta(0, "call_1", "search", '{}')]), 4544 _make_chunk(finish_reason="tool_calls"), 4545 ] 4546 agent.client.chat.completions.create.return_value = iter(chunks) 4547 4548 resp = agent._interruptible_streaming_api_call({"messages": []}) 4549 4550 assert resp.choices[0].message.content == "I'll search" 4551 assert len(resp.choices[0].message.tool_calls) == 1 4552 4553 def test_empty_content_returns_none(self, agent): 4554 chunks = [_make_chunk(finish_reason="stop")] 4555 agent.client.chat.completions.create.return_value = iter(chunks) 4556 4557 resp = agent._interruptible_streaming_api_call({"messages": []}) 4558 4559 assert resp.choices[0].message.content is None 4560 assert resp.choices[0].message.tool_calls is None 4561 4562 def test_callback_exception_swallowed(self, agent): 4563 chunks = [ 4564 _make_chunk(content="Hello"), 4565 _make_chunk(content=" World"), 4566 _make_chunk(finish_reason="stop"), 4567 ] 4568 agent.client.chat.completions.create.return_value = iter(chunks) 4569 agent.stream_delta_callback = MagicMock(side_effect=ValueError("boom")) 4570 4571 resp = agent._interruptible_streaming_api_call({"messages": []}) 4572 4573 assert resp.choices[0].message.content == "Hello World" 4574 4575 def test_model_name_captured(self, agent): 4576 chunks = [ 4577 _make_chunk(content="Hi", model="gpt-4o"), 4578 _make_chunk(finish_reason="stop", model="gpt-4o"), 4579 ] 4580 agent.client.chat.completions.create.return_value = iter(chunks) 4581 4582 resp = agent._interruptible_streaming_api_call({"messages": []}) 4583 4584 assert resp.model == "gpt-4o" 4585 4586 def test_stream_kwarg_injected(self, agent): 4587 chunks = [_make_chunk(content="x"), _make_chunk(finish_reason="stop")] 4588 agent.client.chat.completions.create.return_value = iter(chunks) 4589 4590 agent._interruptible_streaming_api_call({"messages": [], "model": "test"}) 4591 4592 call_kwargs = agent.client.chat.completions.create.call_args 4593 assert call_kwargs[1].get("stream") is True or call_kwargs.kwargs.get("stream") is True 4594 4595 def test_api_exception_propagates_no_non_streaming_fallback(self, agent): 4596 """When streaming fails before any deltas, error propagates to the main retry loop.""" 4597 agent.client.chat.completions.create.side_effect = ConnectionError("fail") 4598 # Prevent stream retry logic from replacing the mock client 4599 with patch.object(agent, "_replace_primary_openai_client", return_value=False): 4600 # The fallback also uses the same client, so it'll fail too 4601 with pytest.raises(ConnectionError, match="fail"): 4602 agent._interruptible_streaming_api_call({"messages": []}) 4603 4604 def test_response_has_uuid_id(self, agent): 4605 chunks = [_make_chunk(content="x"), _make_chunk(finish_reason="stop")] 4606 agent.client.chat.completions.create.return_value = iter(chunks) 4607 4608 resp = agent._interruptible_streaming_api_call({"messages": []}) 4609 4610 assert resp.id.startswith("stream-") 4611 assert len(resp.id) > len("stream-") 4612 4613 def test_empty_choices_chunk_skipped(self, agent): 4614 empty_chunk = SimpleNamespace(model="gpt-4", choices=[]) 4615 chunks = [ 4616 empty_chunk, 4617 _make_chunk(content="Hello", model="gpt-4"), 4618 _make_chunk(finish_reason="stop", model="gpt-4"), 4619 ] 4620 agent.client.chat.completions.create.return_value = iter(chunks) 4621 4622 resp = agent._interruptible_streaming_api_call({"messages": []}) 4623 4624 assert resp.choices[0].message.content == "Hello" 4625 assert resp.model == "gpt-4" 4626 4627 4628 # =================================================================== 4629 # Interrupt _vprint force=True verification 4630 # =================================================================== 4631 4632 4633 class TestInterruptVprintForceTrue: 4634 """All interrupt _vprint calls must use force=True so they are always visible.""" 4635 4636 def test_all_interrupt_vprint_have_force_true(self): 4637 """Scan source for _vprint calls containing 'Interrupt' — each must have force=True.""" 4638 import inspect 4639 source = inspect.getsource(AIAgent) 4640 lines = source.split("\n") 4641 violations = [] 4642 for i, line in enumerate(lines, 1): 4643 stripped = line.strip() 4644 if "_vprint(" in stripped and "Interrupt" in stripped: 4645 if "force=True" not in stripped: 4646 violations.append(f"line {i}: {stripped}") 4647 assert not violations, ( 4648 "Interrupt _vprint calls missing force=True:\n" 4649 + "\n".join(violations) 4650 ) 4651 4652 4653 # =================================================================== 4654 # Anthropic interrupt handler in _interruptible_api_call 4655 # =================================================================== 4656 4657 4658 class TestAnthropicInterruptHandler: 4659 """_interruptible_api_call must handle Anthropic mode when interrupted.""" 4660 4661 def test_interruptible_has_anthropic_branch(self): 4662 """The interrupt handler must check api_mode == 'anthropic_messages'.""" 4663 import inspect 4664 source = inspect.getsource(AIAgent._interruptible_api_call) 4665 assert "anthropic_messages" in source, \ 4666 "_interruptible_api_call must handle Anthropic interrupt (api_mode check)" 4667 4668 def test_interruptible_rebuilds_anthropic_client(self): 4669 """After interrupting, the Anthropic client should be rebuilt.""" 4670 import inspect 4671 source = inspect.getsource(AIAgent._interruptible_api_call) 4672 assert "build_anthropic_client" in source, \ 4673 "_interruptible_api_call must rebuild Anthropic client after interrupt" 4674 4675 def test_streaming_has_anthropic_branch(self): 4676 """_streaming_api_call must also handle Anthropic interrupt.""" 4677 import inspect 4678 source = inspect.getsource(AIAgent._interruptible_streaming_api_call) 4679 assert "anthropic_messages" in source, \ 4680 "_streaming_api_call must handle Anthropic interrupt" 4681 4682 4683 # --------------------------------------------------------------------------- 4684 # Bugfix: stream_callback forwarding for non-streaming providers 4685 # --------------------------------------------------------------------------- 4686 4687 4688 class TestStreamCallbackNonStreamingProvider: 4689 """When api_mode != chat_completions, stream_callback must still receive 4690 the response content so TTS works (batch delivery).""" 4691 4692 def test_callback_receives_chat_completions_response(self, agent): 4693 """For chat_completions-shaped responses, callback gets content.""" 4694 agent.api_mode = "anthropic_messages" 4695 mock_response = SimpleNamespace( 4696 choices=[SimpleNamespace( 4697 message=SimpleNamespace(content="Hello", tool_calls=None, reasoning_content=None), 4698 finish_reason="stop", index=0, 4699 )], 4700 usage=None, model="test", id="test-id", 4701 ) 4702 agent._interruptible_api_call = MagicMock(return_value=mock_response) 4703 4704 received = [] 4705 def cb(delta): 4706 return received.append(delta) 4707 agent._stream_callback = cb 4708 4709 _cb = getattr(agent, "_stream_callback", None) 4710 response = agent._interruptible_api_call({}) 4711 if _cb is not None and response: 4712 try: 4713 if agent.api_mode == "anthropic_messages": 4714 text_parts = [ 4715 block.text for block in getattr(response, "content", []) 4716 if getattr(block, "type", None) == "text" and getattr(block, "text", None) 4717 ] 4718 content = " ".join(text_parts) if text_parts else None 4719 else: 4720 content = response.choices[0].message.content 4721 if content: 4722 _cb(content) 4723 except Exception: 4724 pass 4725 4726 # Anthropic format not matched above; fallback via except 4727 # Test the actual code path by checking chat_completions branch 4728 received2 = [] 4729 agent.api_mode = "some_other_mode" 4730 agent._stream_callback = lambda d: received2.append(d) 4731 _cb2 = agent._stream_callback 4732 if _cb2 is not None and mock_response: 4733 try: 4734 content = mock_response.choices[0].message.content 4735 if content: 4736 _cb2(content) 4737 except Exception: 4738 pass 4739 assert received2 == ["Hello"] 4740 4741 def test_callback_receives_anthropic_content(self, agent): 4742 """For Anthropic responses, text blocks are extracted and forwarded.""" 4743 agent.api_mode = "anthropic_messages" 4744 mock_response = SimpleNamespace( 4745 content=[SimpleNamespace(type="text", text="Hello from Claude")], 4746 stop_reason="end_turn", 4747 ) 4748 4749 received = [] 4750 def cb(d): 4751 return received.append(d) 4752 agent._stream_callback = cb 4753 _cb = agent._stream_callback 4754 4755 if _cb is not None and mock_response: 4756 try: 4757 if agent.api_mode == "anthropic_messages": 4758 text_parts = [ 4759 block.text for block in getattr(mock_response, "content", []) 4760 if getattr(block, "type", None) == "text" and getattr(block, "text", None) 4761 ] 4762 content = " ".join(text_parts) if text_parts else None 4763 else: 4764 content = mock_response.choices[0].message.content 4765 if content: 4766 _cb(content) 4767 except Exception: 4768 pass 4769 4770 assert received == ["Hello from Claude"] 4771 4772 4773 # --------------------------------------------------------------------------- 4774 # Bugfix: API-only user message prefixes must not persist 4775 # --------------------------------------------------------------------------- 4776 4777 4778 class TestPersistUserMessageOverride: 4779 """Synthetic API-only user prefixes should never leak into transcripts.""" 4780 4781 def test_persist_session_rewrites_current_turn_user_message(self, agent): 4782 agent._session_db = MagicMock() 4783 agent.session_id = "session-123" 4784 agent._last_flushed_db_idx = 0 4785 agent._persist_user_message_idx = 0 4786 agent._persist_user_message_override = "Hello there" 4787 messages = [ 4788 { 4789 "role": "user", 4790 "content": ( 4791 "[Voice input — respond concisely and conversationally, " 4792 "2-3 sentences max. No code blocks or markdown.] Hello there" 4793 ), 4794 }, 4795 {"role": "assistant", "content": "Hi!"}, 4796 ] 4797 4798 with patch.object(agent, "_save_session_log") as mock_save: 4799 agent._persist_session(messages, []) 4800 4801 assert messages[0]["content"] == "Hello there" 4802 saved_messages = mock_save.call_args.args[0] 4803 assert saved_messages[0]["content"] == "Hello there" 4804 first_db_write = agent._session_db.append_message.call_args_list[0].kwargs 4805 assert first_db_write["content"] == "Hello there" 4806 4807 4808 class TestReasoningReplayForStrictProviders: 4809 """Assistant replay must preserve provider-native reasoning fields.""" 4810 4811 def _setup_agent(self, agent): 4812 agent._cached_system_prompt = "You are helpful." 4813 agent._use_prompt_caching = False 4814 agent.tool_delay = 0 4815 agent.compression_enabled = False 4816 agent.save_trajectories = False 4817 4818 def test_kimi_tool_replay_includes_space_reasoning_content(self, agent): 4819 self._setup_agent(agent) 4820 agent.base_url = "https://api.kimi.com/coding/v1" 4821 agent._base_url_lower = agent.base_url.lower() 4822 agent.provider = "kimi-coding" 4823 4824 prior_assistant = { 4825 "role": "assistant", 4826 "content": "", 4827 "tool_calls": [ 4828 { 4829 "id": "c1", 4830 "type": "function", 4831 "function": {"name": "terminal", "arguments": "{\"command\":\"date\"}"}, 4832 } 4833 ], 4834 } 4835 tool_result = {"role": "tool", "tool_call_id": "c1", "content": "Tue Apr 21"} 4836 final_resp = _mock_response(content="done", finish_reason="stop") 4837 agent.client.chat.completions.create.return_value = final_resp 4838 4839 with ( 4840 patch.object(agent, "_persist_session"), 4841 patch.object(agent, "_save_trajectory"), 4842 patch.object(agent, "_cleanup_task_resources"), 4843 ): 4844 result = agent.run_conversation( 4845 "next step", 4846 conversation_history=[prior_assistant, tool_result], 4847 ) 4848 4849 assert result["completed"] is True 4850 sent_messages = agent.client.chat.completions.create.call_args.kwargs["messages"] 4851 replayed_assistant = next(msg for msg in sent_messages if msg.get("role") == "assistant") 4852 assert replayed_assistant["role"] == "assistant" 4853 assert replayed_assistant["tool_calls"][0]["function"]["name"] == "terminal" 4854 assert "reasoning_content" in replayed_assistant 4855 assert replayed_assistant["reasoning_content"] == " " 4856 4857 def test_explicit_reasoning_content_beats_normalized_reasoning_on_replay(self, agent): 4858 self._setup_agent(agent) 4859 prior_assistant = { 4860 "role": "assistant", 4861 "content": "", 4862 "tool_calls": [ 4863 { 4864 "id": "c1", 4865 "type": "function", 4866 "function": {"name": "web_search", "arguments": "{\"q\":\"test\"}"}, 4867 } 4868 ], 4869 "reasoning": "summary reasoning", 4870 "reasoning_content": "provider-native scratchpad", 4871 } 4872 tool_result = {"role": "tool", "tool_call_id": "c1", "content": "ok"} 4873 final_resp = _mock_response(content="done", finish_reason="stop") 4874 agent.client.chat.completions.create.return_value = final_resp 4875 4876 with ( 4877 patch.object(agent, "_persist_session"), 4878 patch.object(agent, "_save_trajectory"), 4879 patch.object(agent, "_cleanup_task_resources"), 4880 ): 4881 result = agent.run_conversation( 4882 "next step", 4883 conversation_history=[prior_assistant, tool_result], 4884 ) 4885 4886 assert result["completed"] is True 4887 sent_messages = agent.client.chat.completions.create.call_args.kwargs["messages"] 4888 replayed_assistant = next(msg for msg in sent_messages if msg.get("role") == "assistant") 4889 assert replayed_assistant["reasoning_content"] == "provider-native scratchpad" 4890 4891 4892 # --------------------------------------------------------------------------- 4893 # Bugfix: _vprint force=True on error messages during TTS 4894 # --------------------------------------------------------------------------- 4895 4896 4897 class TestVprintForceOnErrors: 4898 """Error/warning messages must be visible during streaming TTS.""" 4899 4900 def test_forced_message_shown_during_tts(self, agent): 4901 agent._stream_callback = lambda x: None 4902 printed = [] 4903 with patch("builtins.print", side_effect=lambda *a, **kw: printed.append(a)): 4904 agent._vprint("error msg", force=True) 4905 assert len(printed) == 1 4906 4907 def test_non_forced_suppressed_during_tts(self, agent): 4908 agent._stream_callback = lambda x: None 4909 printed = [] 4910 with patch("builtins.print", side_effect=lambda *a, **kw: printed.append(a)): 4911 agent._vprint("debug info") 4912 assert len(printed) == 0 4913 4914 def test_all_shown_without_tts(self, agent): 4915 agent._stream_callback = None 4916 printed = [] 4917 with patch("builtins.print", side_effect=lambda *a, **kw: printed.append(a)): 4918 agent._vprint("debug") 4919 agent._vprint("error", force=True) 4920 assert len(printed) == 2 4921 4922 4923 class TestNormalizeCodexDictArguments: 4924 """_normalize_codex_response must produce valid JSON strings for tool 4925 call arguments, even when the Responses API returns them as dicts.""" 4926 4927 def _make_codex_response(self, item_type, arguments, item_status="completed"): 4928 """Build a minimal Responses API response with a single tool call.""" 4929 item = SimpleNamespace( 4930 type=item_type, 4931 status=item_status, 4932 ) 4933 if item_type == "function_call": 4934 item.name = "web_search" 4935 item.arguments = arguments 4936 item.call_id = "call_abc123" 4937 item.id = "fc_abc123" 4938 elif item_type == "custom_tool_call": 4939 item.name = "web_search" 4940 item.input = arguments 4941 item.call_id = "call_abc123" 4942 item.id = "fc_abc123" 4943 return SimpleNamespace( 4944 output=[item], 4945 status="completed", 4946 ) 4947 4948 def test_function_call_dict_arguments_produce_valid_json(self, agent): 4949 """dict arguments from function_call must be serialised with 4950 json.dumps, not str(), so downstream json.loads() succeeds.""" 4951 args_dict = {"query": "weather in NYC", "units": "celsius"} 4952 response = self._make_codex_response("function_call", args_dict) 4953 msg, _ = _normalize_codex_response(response) 4954 tc = msg.tool_calls[0] 4955 parsed = json.loads(tc.function.arguments) 4956 assert parsed == args_dict 4957 4958 def test_custom_tool_call_dict_arguments_produce_valid_json(self, agent): 4959 """dict arguments from custom_tool_call must also use json.dumps.""" 4960 args_dict = {"path": "/tmp/test.txt", "content": "hello"} 4961 response = self._make_codex_response("custom_tool_call", args_dict) 4962 msg, _ = _normalize_codex_response(response) 4963 tc = msg.tool_calls[0] 4964 parsed = json.loads(tc.function.arguments) 4965 assert parsed == args_dict 4966 4967 def test_string_arguments_unchanged(self, agent): 4968 """String arguments must pass through without modification.""" 4969 args_str = '{"query": "test"}' 4970 response = self._make_codex_response("function_call", args_str) 4971 msg, _ = _normalize_codex_response(response) 4972 tc = msg.tool_calls[0] 4973 assert tc.function.arguments == args_str 4974 4975 4976 # --------------------------------------------------------------------------- 4977 # OAuth flag and nudge counter fixes (salvaged from PR #1797) 4978 # --------------------------------------------------------------------------- 4979 4980 4981 class TestOAuthFlagAfterCredentialRefresh: 4982 """_is_anthropic_oauth must update when token type changes during refresh.""" 4983 4984 def test_oauth_flag_updates_api_key_to_oauth(self, agent): 4985 """Refreshing from API key to OAuth token must set flag to True.""" 4986 agent.api_mode = "anthropic_messages" 4987 agent.provider = "anthropic" 4988 agent._anthropic_api_key = "sk-ant-api-old" 4989 agent._anthropic_client = MagicMock() 4990 agent._is_anthropic_oauth = False 4991 4992 with ( 4993 patch("agent.anthropic_adapter.resolve_anthropic_token", 4994 return_value="sk-ant-setup-oauth-token"), 4995 patch("agent.anthropic_adapter.build_anthropic_client", 4996 return_value=MagicMock()), 4997 ): 4998 result = agent._try_refresh_anthropic_client_credentials() 4999 5000 assert result is True 5001 assert agent._is_anthropic_oauth is True 5002 5003 def test_oauth_flag_updates_oauth_to_api_key(self, agent): 5004 """Refreshing from OAuth to API key must set flag to False.""" 5005 agent.api_mode = "anthropic_messages" 5006 agent.provider = "anthropic" 5007 agent._anthropic_api_key = "sk-ant-setup-old" 5008 agent._anthropic_client = MagicMock() 5009 agent._is_anthropic_oauth = True 5010 5011 with ( 5012 patch("agent.anthropic_adapter.resolve_anthropic_token", 5013 return_value="sk-ant-api03-new-key"), 5014 patch("agent.anthropic_adapter.build_anthropic_client", 5015 return_value=MagicMock()), 5016 ): 5017 result = agent._try_refresh_anthropic_client_credentials() 5018 5019 assert result is True 5020 assert agent._is_anthropic_oauth is False 5021 5022 5023 class TestFallbackSetsOAuthFlag: 5024 """_try_activate_fallback must set _is_anthropic_oauth for Anthropic fallbacks.""" 5025 5026 def test_fallback_to_anthropic_oauth_sets_flag(self, agent): 5027 agent._fallback_activated = False 5028 agent._fallback_model = {"provider": "anthropic", "model": "claude-sonnet-4-6"} 5029 agent._fallback_chain = [agent._fallback_model] 5030 agent._fallback_index = 0 5031 5032 mock_client = MagicMock() 5033 mock_client.base_url = "https://api.anthropic.com/v1" 5034 mock_client.api_key = "sk-ant-setup-oauth-token" 5035 5036 with ( 5037 patch("agent.auxiliary_client.resolve_provider_client", 5038 return_value=(mock_client, None)), 5039 patch("agent.anthropic_adapter.build_anthropic_client", 5040 return_value=MagicMock()), 5041 patch("agent.anthropic_adapter.resolve_anthropic_token", 5042 return_value=None), 5043 ): 5044 result = agent._try_activate_fallback() 5045 5046 assert result is True 5047 assert agent._is_anthropic_oauth is True 5048 5049 def test_fallback_to_anthropic_api_key_clears_flag(self, agent): 5050 agent._fallback_activated = False 5051 agent._fallback_model = {"provider": "anthropic", "model": "claude-sonnet-4-6"} 5052 agent._fallback_chain = [agent._fallback_model] 5053 agent._fallback_index = 0 5054 5055 mock_client = MagicMock() 5056 mock_client.base_url = "https://api.anthropic.com/v1" 5057 mock_client.api_key = "sk-ant-api03-regular-key" 5058 5059 with ( 5060 patch("agent.auxiliary_client.resolve_provider_client", 5061 return_value=(mock_client, None)), 5062 patch("agent.anthropic_adapter.build_anthropic_client", 5063 return_value=MagicMock()), 5064 patch("agent.anthropic_adapter.resolve_anthropic_token", 5065 return_value=None), 5066 ): 5067 result = agent._try_activate_fallback() 5068 5069 assert result is True 5070 assert agent._is_anthropic_oauth is False 5071 5072 5073 class TestMemoryNudgeCounterPersistence: 5074 """_turns_since_memory must persist across run_conversation calls.""" 5075 5076 def test_counters_initialized_in_init(self): 5077 """Counters must exist on the agent after __init__.""" 5078 with patch("run_agent.get_tool_definitions", return_value=[]): 5079 a = AIAgent( 5080 model="test", api_key="test-key", base_url="http://localhost:1234/v1", 5081 provider="openrouter", skip_context_files=True, skip_memory=True, 5082 ) 5083 assert hasattr(a, "_turns_since_memory") 5084 assert hasattr(a, "_iters_since_skill") 5085 assert a._turns_since_memory == 0 5086 assert a._iters_since_skill == 0 5087 5088 def test_counters_not_reset_in_preamble(self): 5089 """The run_conversation preamble must not zero the nudge counters.""" 5090 import inspect 5091 src = inspect.getsource(AIAgent.run_conversation) 5092 # The preamble resets many fields (retry counts, budget, etc.) 5093 # before the main loop. Find that reset block and verify our 5094 # counters aren't in it. The reset block ends at iteration_budget. 5095 preamble_end = src.index("self.iteration_budget = IterationBudget") 5096 preamble = src[:preamble_end] 5097 assert "self._turns_since_memory = 0" not in preamble 5098 assert "self._iters_since_skill = 0" not in preamble 5099 5100 5101 class TestDeadRetryCode: 5102 """Unreachable retry_count >= max_retries after raise must not exist.""" 5103 5104 def test_no_unreachable_max_retries_after_backoff(self): 5105 import inspect 5106 source = inspect.getsource(AIAgent.run_conversation) 5107 occurrences = source.count("if retry_count >= max_retries:") 5108 assert occurrences == 2, ( 5109 f"Expected 2 occurrences of 'if retry_count >= max_retries:' " 5110 f"but found {occurrences}" 5111 ) 5112 5113 5114 class TestMemoryContextSanitization: 5115 """sanitize_context() helper correctness — used at provider boundaries.""" 5116 5117 def test_user_message_is_not_mutated_by_run_conversation(self): 5118 """User input must reach run_conversation untouched — if a user types 5119 a literal <memory-context> tag we don't silently delete their text. 5120 The streaming scrubber + plugin-side scrub cover real leak paths.""" 5121 import inspect 5122 src = inspect.getsource(AIAgent.run_conversation) 5123 assert "sanitize_context(user_message)" not in src 5124 assert "sanitize_context(persist_user_message)" not in src 5125 5126 def test_sanitize_context_strips_full_block(self): 5127 """Helper-level: a string with an embedded memory-context block is 5128 cleaned to just the surrounding text. Used by build_memory_context_block 5129 (input-validation) and by plugins on their own backend boundary.""" 5130 from agent.memory_manager import sanitize_context 5131 user_text = "how is the honcho working" 5132 injected = ( 5133 user_text + "\n\n" 5134 "<memory-context>\n" 5135 "[System note: The following is recalled memory context, " 5136 "NOT new user input. Treat as informational background data.]\n\n" 5137 "## User Representation\n" 5138 "[2026-01-13 02:13:00] stale observation about AstroMap\n" 5139 "</memory-context>" 5140 ) 5141 result = sanitize_context(injected) 5142 assert "memory-context" not in result.lower() 5143 assert "stale observation" not in result 5144 assert "how is the honcho working" in result 5145 5146 5147 class TestMemoryProviderTurnStart: 5148 """run_conversation() must call memory_manager.on_turn_start() before prefetch_all(). 5149 5150 Without this call, providers like Honcho never update _turn_count, so cadence 5151 checks (contextCadence, dialecticCadence) are always satisfied — every turn 5152 fires both context refresh and dialectic, ignoring the configured cadence. 5153 """ 5154 5155 def test_on_turn_start_called_before_prefetch(self): 5156 """Source-level check: on_turn_start appears before prefetch_all in run_conversation.""" 5157 import inspect 5158 src = inspect.getsource(AIAgent.run_conversation) 5159 # Find the actual method calls, not comments 5160 idx_turn_start = src.index(".on_turn_start(") 5161 idx_prefetch = src.index(".prefetch_all(") 5162 assert idx_turn_start < idx_prefetch, ( 5163 "on_turn_start() must be called before prefetch_all() in run_conversation " 5164 "so that memory providers have the correct turn count for cadence checks" 5165 ) 5166 5167 def test_on_turn_start_uses_user_turn_count(self): 5168 """Source-level check: on_turn_start receives self._user_turn_count.""" 5169 import inspect 5170 src = inspect.getsource(AIAgent.run_conversation) 5171 assert "on_turn_start(self._user_turn_count" in src