test_resume_display.py
1 """Tests for session resume history display — _display_resumed_history() and 2 _preload_resumed_session(). 3 4 Verifies that resuming a session shows a compact recap of the previous 5 conversation with correct formatting, truncation, and config behavior. 6 """ 7 8 import os 9 import sys 10 from io import StringIO 11 from unittest.mock import MagicMock, patch 12 13 import pytest 14 15 sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) 16 17 18 def _make_cli(config_overrides=None, env_overrides=None, **kwargs): 19 """Create a HermesCLI instance with minimal mocking.""" 20 import cli as _cli_mod 21 from cli import HermesCLI 22 23 _clean_config = { 24 "model": { 25 "default": "anthropic/claude-opus-4.6", 26 "base_url": "https://openrouter.ai/api/v1", 27 "provider": "auto", 28 }, 29 "display": {"compact": False, "tool_progress": "all", "resume_display": "full"}, 30 "agent": {}, 31 "terminal": {"env_type": "local"}, 32 } 33 if config_overrides: 34 for k, v in config_overrides.items(): 35 if isinstance(v, dict) and k in _clean_config and isinstance(_clean_config[k], dict): 36 _clean_config[k].update(v) 37 else: 38 _clean_config[k] = v 39 40 clean_env = {"LLM_MODEL": "", "HERMES_MAX_ITERATIONS": ""} 41 if env_overrides: 42 clean_env.update(env_overrides) 43 with ( 44 patch("cli.get_tool_definitions", return_value=[]), 45 patch.dict("os.environ", clean_env, clear=False), 46 patch.dict(_cli_mod.__dict__, {"CLI_CONFIG": _clean_config}), 47 ): 48 return HermesCLI(**kwargs) 49 50 51 # ── Sample conversation histories for tests ────────────────────────── 52 53 54 def _simple_history(): 55 """Two-turn conversation: user → assistant → user → assistant.""" 56 return [ 57 {"role": "system", "content": "You are a helpful assistant."}, 58 {"role": "user", "content": "What is Python?"}, 59 {"role": "assistant", "content": "Python is a high-level programming language."}, 60 {"role": "user", "content": "How do I install it?"}, 61 {"role": "assistant", "content": "You can install Python from python.org."}, 62 ] 63 64 65 def _tool_call_history(): 66 """Conversation with tool calls and tool results.""" 67 return [ 68 {"role": "system", "content": "system prompt"}, 69 {"role": "user", "content": "Search for Python tutorials"}, 70 { 71 "role": "assistant", 72 "content": None, 73 "tool_calls": [ 74 { 75 "id": "call_1", 76 "type": "function", 77 "function": {"name": "web_search", "arguments": '{"query":"python tutorials"}'}, 78 }, 79 { 80 "id": "call_2", 81 "type": "function", 82 "function": {"name": "web_extract", "arguments": '{"urls":["https://example.com"]}'}, 83 }, 84 ], 85 }, 86 {"role": "tool", "tool_call_id": "call_1", "content": "Found 5 results..."}, 87 {"role": "tool", "tool_call_id": "call_2", "content": "Page content..."}, 88 {"role": "assistant", "content": "Here are some great Python tutorials I found."}, 89 ] 90 91 92 def _large_history(n_exchanges=15): 93 """Build a history with many exchanges to test truncation.""" 94 msgs = [{"role": "system", "content": "system prompt"}] 95 for i in range(n_exchanges): 96 msgs.append({"role": "user", "content": f"Question #{i + 1}: What is item {i + 1}?"}) 97 msgs.append({"role": "assistant", "content": f"Answer #{i + 1}: Item {i + 1} is great."}) 98 return msgs 99 100 101 def _multimodal_history(): 102 """Conversation with multimodal (image) content.""" 103 return [ 104 {"role": "system", "content": "system prompt"}, 105 { 106 "role": "user", 107 "content": [ 108 {"type": "text", "text": "What's in this image?"}, 109 {"type": "image_url", "image_url": {"url": "https://example.com/cat.jpg"}}, 110 ], 111 }, 112 {"role": "assistant", "content": "I see a cat in the image."}, 113 ] 114 115 116 # ── Tests for _display_resumed_history ─────────────────────────────── 117 118 119 class TestDisplayResumedHistory: 120 """_display_resumed_history() renders a Rich panel with conversation recap.""" 121 122 def _capture_display(self, cli_obj): 123 """Run _display_resumed_history and capture the Rich console output.""" 124 buf = StringIO() 125 cli_obj.console.file = buf 126 cli_obj._display_resumed_history() 127 return buf.getvalue() 128 129 def test_simple_history_shows_user_and_assistant(self): 130 cli = _make_cli() 131 cli.conversation_history = _simple_history() 132 output = self._capture_display(cli) 133 134 assert "You:" in output 135 assert "Hermes:" in output 136 assert "What is Python?" in output 137 assert "Python is a high-level programming language." in output 138 assert "How do I install it?" in output 139 140 def test_system_messages_hidden(self): 141 cli = _make_cli() 142 cli.conversation_history = _simple_history() 143 output = self._capture_display(cli) 144 145 assert "You are a helpful assistant" not in output 146 147 def test_tool_messages_hidden(self): 148 cli = _make_cli() 149 cli.conversation_history = _tool_call_history() 150 output = self._capture_display(cli) 151 152 # Tool result content should NOT appear 153 assert "Found 5 results" not in output 154 assert "Page content" not in output 155 156 def test_tool_calls_shown_as_summary(self): 157 cli = _make_cli() 158 cli.conversation_history = _tool_call_history() 159 output = self._capture_display(cli) 160 161 assert "2 tool calls" in output 162 assert "web_search" in output 163 assert "web_extract" in output 164 165 def test_long_user_message_truncated(self): 166 cli = _make_cli() 167 long_text = "A" * 500 168 cli.conversation_history = [ 169 {"role": "user", "content": long_text}, 170 {"role": "assistant", "content": "OK."}, 171 ] 172 output = self._capture_display(cli) 173 174 # Should have truncation indicator and NOT contain the full 500 chars 175 assert "..." in output 176 assert "A" * 500 not in output 177 # The 300-char truncated text is present but may be line-wrapped by 178 # Rich's panel renderer, so check the total A count in the output 179 a_count = output.count("A") 180 assert 200 <= a_count <= 310 # roughly 300 chars (±panel padding) 181 182 def test_long_assistant_message_truncated(self): 183 """Non-last assistant messages are still truncated.""" 184 cli = _make_cli() 185 long_text = "B" * 400 186 cli.conversation_history = [ 187 {"role": "user", "content": "Tell me a lot."}, 188 {"role": "assistant", "content": long_text}, 189 {"role": "user", "content": "And more?"}, 190 {"role": "assistant", "content": "Short final reply."}, 191 ] 192 output = self._capture_display(cli) 193 194 # The non-last assistant message should be truncated 195 assert "B" * 400 not in output 196 # The last assistant message shown in full 197 assert "Short final reply." in output 198 199 def test_multiline_assistant_truncated(self): 200 """Non-last multiline assistant messages are truncated to 3 lines.""" 201 cli = _make_cli() 202 multi = "\n".join([f"Line {i}" for i in range(20)]) 203 cli.conversation_history = [ 204 {"role": "user", "content": "Show me lines."}, 205 {"role": "assistant", "content": multi}, 206 {"role": "user", "content": "What else?"}, 207 {"role": "assistant", "content": "Done."}, 208 ] 209 output = self._capture_display(cli) 210 211 # First 3 lines of non-last assistant should be there 212 assert "Line 0" in output 213 assert "Line 1" in output 214 assert "Line 2" in output 215 # Line 19 should NOT be in the truncated message 216 assert "Line 19" not in output 217 218 def test_last_assistant_response_shown_in_full(self): 219 """The last assistant response is shown un-truncated so the user 220 knows where they left off without wasting tokens re-asking.""" 221 cli = _make_cli() 222 long_text = "X" * 500 223 cli.conversation_history = [ 224 {"role": "user", "content": "Tell me everything."}, 225 {"role": "assistant", "content": long_text}, 226 ] 227 output = self._capture_display(cli) 228 229 # Full 500-char text should be present (may be line-wrapped by Rich) 230 x_count = output.count("X") 231 assert x_count >= 490 # allow small Rich formatting variance 232 233 def test_last_assistant_multiline_shown_in_full(self): 234 """The last assistant response shows all lines, not just 3.""" 235 cli = _make_cli() 236 multi = "\n".join([f"Line {i}" for i in range(20)]) 237 cli.conversation_history = [ 238 {"role": "user", "content": "Show me everything."}, 239 {"role": "assistant", "content": multi}, 240 ] 241 output = self._capture_display(cli) 242 243 # All 20 lines should be present since it's the last response 244 assert "Line 0" in output 245 assert "Line 10" in output 246 assert "Line 19" in output 247 248 def test_large_history_shows_truncation_indicator(self): 249 cli = _make_cli() 250 cli.conversation_history = _large_history(n_exchanges=15) 251 output = self._capture_display(cli) 252 253 # Should show "earlier messages" indicator 254 assert "earlier messages" in output 255 # Last question should still be visible 256 assert "Question #15" in output 257 258 def test_multimodal_content_handled(self): 259 cli = _make_cli() 260 cli.conversation_history = _multimodal_history() 261 output = self._capture_display(cli) 262 263 assert "What's in this image?" in output 264 assert "[image]" in output 265 266 def test_empty_history_no_output(self): 267 cli = _make_cli() 268 cli.conversation_history = [] 269 output = self._capture_display(cli) 270 271 assert output.strip() == "" 272 273 def test_minimal_config_suppresses_display(self): 274 cli = _make_cli(config_overrides={"display": {"resume_display": "minimal"}}) 275 # resume_display is captured as an instance variable during __init__ 276 assert cli.resume_display == "minimal" 277 cli.conversation_history = _simple_history() 278 output = self._capture_display(cli) 279 280 assert output.strip() == "" 281 282 def test_panel_has_title(self): 283 cli = _make_cli() 284 cli.conversation_history = _simple_history() 285 output = self._capture_display(cli) 286 287 assert "Previous Conversation" in output 288 289 def test_assistant_with_no_content_no_tools_skipped(self): 290 """Assistant messages with no visible output (e.g. pure reasoning) 291 are skipped in the recap.""" 292 cli = _make_cli() 293 cli.conversation_history = [ 294 {"role": "user", "content": "Hello"}, 295 {"role": "assistant", "content": None}, 296 ] 297 output = self._capture_display(cli) 298 299 # The assistant entry should be skipped, only the user message shown 300 assert "You:" in output 301 assert "Hermes:" not in output 302 303 def test_only_system_messages_no_output(self): 304 cli = _make_cli() 305 cli.conversation_history = [ 306 {"role": "system", "content": "You are helpful."}, 307 ] 308 output = self._capture_display(cli) 309 310 assert output.strip() == "" 311 312 def test_reasoning_scratchpad_stripped(self): 313 """<REASONING_SCRATCHPAD> blocks should be stripped from display.""" 314 cli = _make_cli() 315 cli.conversation_history = [ 316 {"role": "user", "content": "Think about this"}, 317 { 318 "role": "assistant", 319 "content": ( 320 "<REASONING_SCRATCHPAD>\nLet me think step by step.\n" 321 "</REASONING_SCRATCHPAD>\n\nThe answer is 42." 322 ), 323 }, 324 ] 325 output = self._capture_display(cli) 326 327 assert "REASONING_SCRATCHPAD" not in output 328 assert "Let me think step by step" not in output 329 assert "The answer is 42" in output 330 331 def test_pure_reasoning_message_skipped(self): 332 """Assistant messages that are only reasoning should be skipped.""" 333 cli = _make_cli() 334 cli.conversation_history = [ 335 {"role": "user", "content": "Hello"}, 336 { 337 "role": "assistant", 338 "content": "<REASONING_SCRATCHPAD>\nJust thinking...\n</REASONING_SCRATCHPAD>", 339 }, 340 {"role": "assistant", "content": "Hi there!"}, 341 ] 342 output = self._capture_display(cli) 343 344 assert "Just thinking" not in output 345 assert "Hi there!" in output 346 347 def test_think_tags_stripped(self): 348 """<think>...</think> blocks should be stripped from display (#11316).""" 349 cli = _make_cli() 350 cli.conversation_history = [ 351 {"role": "user", "content": "Solve this"}, 352 { 353 "role": "assistant", 354 "content": "<think>\nI need to reason carefully here.\n</think>\n\nThe answer is 7.", 355 }, 356 ] 357 output = self._capture_display(cli) 358 359 assert "<think>" not in output 360 assert "</think>" not in output 361 assert "I need to reason carefully here" not in output 362 assert "The answer is 7" in output 363 364 def test_thinking_tags_stripped(self): 365 """<thinking>...</thinking> blocks should be stripped from display.""" 366 cli = _make_cli() 367 cli.conversation_history = [ 368 {"role": "user", "content": "What is 2+2?"}, 369 { 370 "role": "assistant", 371 "content": "<thinking>\nLet me compute: 2 + 2 = 4\n</thinking>\n\nThe answer is 4.", 372 }, 373 ] 374 output = self._capture_display(cli) 375 376 assert "<thinking>" not in output 377 assert "Let me compute" not in output 378 assert "The answer is 4" in output 379 380 def test_reasoning_tags_stripped(self): 381 """<reasoning>...</reasoning> blocks should be stripped from display.""" 382 cli = _make_cli() 383 cli.conversation_history = [ 384 {"role": "user", "content": "Explain gravity"}, 385 { 386 "role": "assistant", 387 "content": ( 388 "<reasoning>\nGravity is a fundamental force...\n</reasoning>\n\n" 389 "Gravity pulls objects together." 390 ), 391 }, 392 ] 393 output = self._capture_display(cli) 394 395 assert "<reasoning>" not in output 396 assert "fundamental force" not in output 397 assert "Gravity pulls objects together" in output 398 399 def test_thought_tags_stripped(self): 400 """<thought>...</thought> blocks (Gemma 4) should be stripped.""" 401 cli = _make_cli() 402 cli.conversation_history = [ 403 {"role": "user", "content": "Say hello"}, 404 { 405 "role": "assistant", 406 "content": "<thought>\nInternal thought here.\n</thought>\n\nHello!", 407 }, 408 ] 409 output = self._capture_display(cli) 410 411 assert "<thought>" not in output 412 assert "Internal thought here" not in output 413 assert "Hello!" in output 414 415 def test_unclosed_think_tag_stripped(self): 416 """Unclosed <think> (truncated generation) should not leak reasoning.""" 417 cli = _make_cli() 418 cli.conversation_history = [ 419 {"role": "user", "content": "Truncated response"}, 420 { 421 "role": "assistant", 422 "content": "Some text before.\n<think>\nUnfinished reasoning...", 423 }, 424 ] 425 output = self._capture_display(cli) 426 427 assert "<think>" not in output 428 assert "Unfinished reasoning" not in output 429 assert "Some text before" in output 430 431 def test_multiple_reasoning_blocks_all_stripped(self): 432 """Multiple interleaved reasoning blocks are all stripped.""" 433 cli = _make_cli() 434 cli.conversation_history = [ 435 {"role": "user", "content": "Complex question"}, 436 { 437 "role": "assistant", 438 "content": ( 439 "<think>\nFirst thought.\n</think>\n" 440 "Partial text.\n" 441 "<reasoning>\nSecond thought.\n</reasoning>\n" 442 "Final answer." 443 ), 444 }, 445 ] 446 output = self._capture_display(cli) 447 448 assert "First thought" not in output 449 assert "Second thought" not in output 450 assert "Partial text" in output 451 assert "Final answer" in output 452 453 def test_orphan_closing_think_tag_stripped(self): 454 """A stray </think> with no matching open should not render to user.""" 455 cli = _make_cli() 456 cli.conversation_history = [ 457 {"role": "user", "content": "Broken output"}, 458 { 459 "role": "assistant", 460 "content": "some leftover reasoning</think>Visible answer.", 461 }, 462 ] 463 output = self._capture_display(cli) 464 465 assert "</think>" not in output 466 assert "Visible answer" in output 467 468 def test_assistant_with_text_and_tool_calls(self): 469 """When an assistant message has both text content AND tool_calls.""" 470 cli = _make_cli() 471 cli.conversation_history = [ 472 {"role": "user", "content": "Do something complex"}, 473 { 474 "role": "assistant", 475 "content": "Let me search for that.", 476 "tool_calls": [ 477 { 478 "id": "call_1", 479 "type": "function", 480 "function": {"name": "terminal", "arguments": '{"command":"ls"}'}, 481 } 482 ], 483 }, 484 ] 485 output = self._capture_display(cli) 486 487 assert "Let me search for that." in output 488 assert "1 tool call" in output 489 assert "terminal" in output 490 491 492 # ── Tests for _preload_resumed_session ────────────────────────────── 493 494 495 class TestPreloadResumedSession: 496 """_preload_resumed_session() loads session from DB early.""" 497 498 def test_returns_false_when_not_resumed(self): 499 cli = _make_cli() 500 assert cli._preload_resumed_session() is False 501 502 def test_returns_false_when_no_session_db(self): 503 cli = _make_cli(resume="test_session_id") 504 cli._session_db = None 505 assert cli._preload_resumed_session() is False 506 507 def test_returns_false_when_session_not_found(self): 508 cli = _make_cli(resume="nonexistent_session") 509 mock_db = MagicMock() 510 mock_db.get_session.return_value = None 511 cli._session_db = mock_db 512 513 buf = StringIO() 514 cli.console.file = buf 515 result = cli._preload_resumed_session() 516 517 assert result is False 518 output = buf.getvalue() 519 assert "Session not found" in output 520 521 def test_returns_false_when_session_has_no_messages(self): 522 cli = _make_cli(resume="empty_session") 523 mock_db = MagicMock() 524 mock_db.get_session.return_value = {"id": "empty_session", "title": None} 525 mock_db.get_messages_as_conversation.return_value = [] 526 cli._session_db = mock_db 527 528 buf = StringIO() 529 cli.console.file = buf 530 result = cli._preload_resumed_session() 531 532 assert result is False 533 output = buf.getvalue() 534 assert "no messages" in output 535 536 def test_loads_session_successfully(self): 537 cli = _make_cli(resume="good_session") 538 messages = _simple_history() 539 mock_db = MagicMock() 540 mock_db.get_session.return_value = {"id": "good_session", "title": "Test Session"} 541 mock_db.get_messages_as_conversation.return_value = messages 542 cli._session_db = mock_db 543 544 buf = StringIO() 545 cli.console.file = buf 546 result = cli._preload_resumed_session() 547 548 assert result is True 549 assert cli.conversation_history == messages 550 output = buf.getvalue() 551 assert "Resumed session" in output 552 assert "good_session" in output 553 assert "Test Session" in output 554 assert "2 user messages" in output 555 556 def test_reopens_session_in_db(self): 557 cli = _make_cli(resume="reopen_session") 558 messages = [{"role": "user", "content": "hi"}] 559 mock_db = MagicMock() 560 mock_db.get_session.return_value = {"id": "reopen_session", "title": None} 561 mock_db.get_messages_as_conversation.return_value = messages 562 mock_conn = MagicMock() 563 mock_db._conn = mock_conn 564 cli._session_db = mock_db 565 566 buf = StringIO() 567 cli.console.file = buf 568 cli._preload_resumed_session() 569 570 # Should have executed UPDATE to clear ended_at 571 mock_conn.execute.assert_called_once() 572 call_args = mock_conn.execute.call_args 573 assert "ended_at = NULL" in call_args[0][0] 574 mock_conn.commit.assert_called_once() 575 576 def test_singular_user_message_grammar(self): 577 """1 user message should say 'message' not 'messages'.""" 578 cli = _make_cli(resume="one_msg_session") 579 messages = [ 580 {"role": "user", "content": "hello"}, 581 {"role": "assistant", "content": "hi"}, 582 ] 583 mock_db = MagicMock() 584 mock_db.get_session.return_value = {"id": "one_msg_session", "title": None} 585 mock_db.get_messages_as_conversation.return_value = messages 586 mock_db._conn = MagicMock() 587 cli._session_db = mock_db 588 589 buf = StringIO() 590 cli.console.file = buf 591 cli._preload_resumed_session() 592 593 output = buf.getvalue() 594 assert "1 user message," in output 595 assert "1 user messages" not in output 596 597 598 # ── Integration: _init_agent skips when preloaded ──────────────────── 599 600 601 class TestInitAgentSkipsPreloaded: 602 """_init_agent() should skip DB load when history is already populated.""" 603 604 def test_init_agent_skips_db_when_preloaded(self): 605 """If conversation_history is already set, _init_agent should not 606 reload from the DB.""" 607 cli = _make_cli(resume="preloaded_session") 608 cli.conversation_history = _simple_history() 609 610 mock_db = MagicMock() 611 cli._session_db = mock_db 612 613 # _init_agent will fail at credential resolution (no real API key), 614 # but the session-loading block should be skipped entirely 615 with patch.object(cli, "_ensure_runtime_credentials", return_value=False): 616 cli._init_agent() 617 618 # get_messages_as_conversation should NOT have been called 619 mock_db.get_messages_as_conversation.assert_not_called() 620 621 622 # ── Config default tests ───────────────────────────────────────────── 623 624 625 class TestResumeDisplayConfig: 626 """resume_display config option defaults and behavior.""" 627 628 def test_default_config_has_resume_display(self): 629 """DEFAULT_CONFIG in hermes_cli/config.py includes resume_display.""" 630 from hermes_cli.config import DEFAULT_CONFIG 631 display = DEFAULT_CONFIG.get("display", {}) 632 assert "resume_display" in display 633 assert display["resume_display"] == "full" 634 635 def test_cli_defaults_have_resume_display(self): 636 """cli.py load_cli_config defaults include resume_display.""" 637 import cli as _cli_mod 638 from cli import load_cli_config 639 640 with ( 641 patch("pathlib.Path.exists", return_value=False), 642 patch.dict("os.environ", {"LLM_MODEL": ""}, clear=False), 643 ): 644 config = load_cli_config() 645 646 display = config.get("display", {}) 647 assert display.get("resume_display") == "full"