test_tracing.py
1 import importlib 2 import json 3 import logging 4 from pathlib import Path 5 6 import pytest 7 from claude_agent_sdk.types import ( 8 AssistantMessage, 9 ResultMessage, 10 TextBlock, 11 ToolResultBlock, 12 ToolUseBlock, 13 UserMessage, 14 ) 15 16 import mlflow 17 import mlflow.claude_code.tracing as tracing_module 18 from mlflow.claude_code.tracing import ( 19 CLAUDE_TRACING_LEVEL, 20 METADATA_KEY_CLAUDE_CODE_VERSION, 21 find_last_user_message_index, 22 get_hook_response, 23 parse_timestamp_to_ns, 24 process_sdk_messages, 25 process_transcript, 26 setup_logging, 27 ) 28 from mlflow.entities.span import SpanType 29 from mlflow.tracing.constant import SpanAttributeKey, TraceMetadataKey 30 31 # ============================================================================ 32 # TIMESTAMP PARSING TESTS 33 # ============================================================================ 34 35 36 def test_parse_timestamp_to_ns_iso_string(): 37 iso_timestamp = "2024-01-15T10:30:45.123456Z" 38 result = parse_timestamp_to_ns(iso_timestamp) 39 40 # Verify it returns an integer (nanoseconds) 41 assert isinstance(result, int) 42 assert result > 0 43 44 45 def test_parse_timestamp_to_ns_unix_seconds(): 46 unix_timestamp = 1705312245.123456 47 result = parse_timestamp_to_ns(unix_timestamp) 48 49 # Should convert seconds to nanoseconds 50 expected = int(unix_timestamp * 1_000_000_000) 51 assert result == expected 52 53 54 def test_parse_timestamp_to_ns_large_number(): 55 large_timestamp = 1705312245123 56 result = parse_timestamp_to_ns(large_timestamp) 57 58 # Function treats large numbers as seconds and converts to nanoseconds 59 # Just verify we get a reasonable nanosecond value 60 assert isinstance(result, int) 61 assert result > 0 62 63 64 # ============================================================================ 65 # LOGGING TESTS 66 # ============================================================================ 67 68 69 def test_setup_logging_creates_logger(monkeypatch, tmp_path): 70 monkeypatch.chdir(tmp_path) 71 logger = setup_logging() 72 73 # Verify logger was created 74 assert logger is not None 75 assert logger.name == "mlflow.claude_code.tracing" 76 77 # Verify log directory was created 78 log_dir = tmp_path / ".claude" / "mlflow" 79 assert log_dir.exists() 80 assert log_dir.is_dir() 81 82 83 def test_custom_logging_level(): 84 setup_logging() 85 86 assert CLAUDE_TRACING_LEVEL > logging.INFO 87 assert CLAUDE_TRACING_LEVEL < logging.WARNING 88 assert logging.getLevelName(CLAUDE_TRACING_LEVEL) == "CLAUDE_TRACING" 89 90 91 def test_get_logger_lazy_initialization(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None: 92 monkeypatch.chdir(tmp_path) 93 94 # Force reload to reset the module state 95 importlib.reload(tracing_module) 96 97 log_dir = tmp_path / ".claude" / "mlflow" 98 99 # Before calling get_logger(), the log directory should NOT exist 100 assert not log_dir.exists() 101 102 # Call get_logger() for the first time - this should trigger initialization 103 logger1 = tracing_module.get_logger() 104 105 # After calling get_logger(), the log directory SHOULD exist 106 assert log_dir.exists() 107 assert log_dir.is_dir() 108 109 # Verify logger was created properly 110 assert logger1 is not None 111 assert logger1.name == "mlflow.claude_code.tracing" 112 113 # Call get_logger() again - should return the same logger instance 114 logger2 = tracing_module.get_logger() 115 assert logger2 is logger1 116 117 118 # ============================================================================ 119 # HOOK RESPONSE TESTS 120 # ============================================================================ 121 122 123 def test_get_hook_response_success(): 124 response = get_hook_response() 125 assert response == {"continue": True} 126 127 128 def test_get_hook_response_with_error(): 129 response = get_hook_response(error="Test error") 130 assert response == {"continue": False, "stopReason": "Test error"} 131 132 133 def test_get_hook_response_with_additional_fields(): 134 response = get_hook_response(custom_field="value") 135 assert response == {"continue": True, "custom_field": "value"} 136 137 138 # ============================================================================ 139 # ASYNC TRACE LOGGING UTILITY TESTS 140 # ============================================================================ 141 142 143 def test_flush_trace_async_logging_calls_flush(monkeypatch): 144 mock_exporter = type("MockExporter", (), {"_async_queue": True})() 145 monkeypatch.setattr(tracing_module, "_get_trace_exporter", lambda: mock_exporter) 146 flushed = [] 147 monkeypatch.setattr(mlflow, "flush_trace_async_logging", lambda: flushed.append(True)) 148 tracing_module._flush_trace_async_logging() 149 assert len(flushed) == 1 150 151 152 def test_flush_trace_async_logging_skips_without_async_queue(monkeypatch): 153 mock_exporter = object() # no _async_queue attribute 154 monkeypatch.setattr(tracing_module, "_get_trace_exporter", lambda: mock_exporter) 155 flushed = [] 156 monkeypatch.setattr(mlflow, "flush_trace_async_logging", lambda: flushed.append(True)) 157 tracing_module._flush_trace_async_logging() 158 assert len(flushed) == 0 159 160 161 # ============================================================================ 162 # INTEGRATION TESTS 163 # ============================================================================ 164 165 # Sample Claude Code transcript for testing 166 DUMMY_TRANSCRIPT = [ 167 { 168 "type": "user", 169 "message": {"role": "user", "content": "What is 2 + 2?"}, 170 "timestamp": "2025-01-15T10:00:00.000Z", 171 "sessionId": "test-session-123", 172 }, 173 { 174 "type": "assistant", 175 "message": { 176 "role": "assistant", 177 "content": [{"type": "text", "text": "Let me calculate that for you."}], 178 }, 179 "timestamp": "2025-01-15T10:00:01.000Z", 180 }, 181 { 182 "type": "assistant", 183 "message": { 184 "role": "assistant", 185 "content": [ 186 { 187 "type": "tool_use", 188 "id": "tool_123", 189 "name": "Bash", 190 "input": {"command": "echo $((2 + 2))"}, 191 } 192 ], 193 }, 194 "timestamp": "2025-01-15T10:00:02.000Z", 195 }, 196 { 197 "type": "user", 198 "message": { 199 "role": "user", 200 "content": [{"type": "tool_result", "tool_use_id": "tool_123", "content": "4"}], 201 }, 202 "timestamp": "2025-01-15T10:00:03.000Z", 203 }, 204 { 205 "type": "assistant", 206 "message": { 207 "role": "assistant", 208 "content": [{"type": "text", "text": "The answer is 4."}], 209 }, 210 "timestamp": "2025-01-15T10:00:04.000Z", 211 }, 212 ] 213 214 215 @pytest.fixture 216 def mock_transcript_file(tmp_path): 217 transcript_path = tmp_path / "transcript.jsonl" 218 with open(transcript_path, "w") as f: 219 for entry in DUMMY_TRANSCRIPT: 220 f.write(json.dumps(entry) + "\n") 221 return str(transcript_path) 222 223 224 def test_process_transript_creates_trace(mock_transcript_file): 225 trace = process_transcript(mock_transcript_file, "test-session-123") 226 227 # Verify trace was created 228 assert trace is not None 229 230 # Verify trace has spans 231 spans = list(trace.search_spans()) 232 assert len(spans) > 0 233 234 # Verify root span and metadata 235 root_span = trace.data.spans[0] 236 assert root_span.name == "claude_code_conversation" 237 assert root_span.span_type == SpanType.AGENT 238 assert trace.info.trace_metadata.get("mlflow.trace.session") == "test-session-123" 239 240 241 def test_process_transcript_creates_spans(mock_transcript_file): 242 trace = process_transcript(mock_transcript_file, "test-session-123") 243 244 assert trace is not None 245 246 # Verify trace has spans 247 spans = list(trace.search_spans()) 248 assert len(spans) > 0 249 250 # Find LLM and tool spans 251 llm_spans = [s for s in spans if s.span_type == SpanType.LLM] 252 tool_spans = [s for s in spans if s.span_type == SpanType.TOOL] 253 254 assert len(llm_spans) == 2 255 assert len(tool_spans) == 1 256 257 # Verify tool span has proper attributes 258 tool_span = tool_spans[0] 259 assert tool_span.name == "tool_Bash" 260 261 # Verify LLM spans have MESSAGE_FORMAT set to "anthropic" for Chat UI rendering 262 for llm_span in llm_spans: 263 assert llm_span.get_attribute(SpanAttributeKey.MESSAGE_FORMAT) == "anthropic" 264 265 # Verify LLM span outputs are in Anthropic response format 266 first_llm = llm_spans[0] 267 outputs = first_llm.outputs 268 assert outputs["type"] == "message" 269 assert outputs["role"] == "assistant" 270 assert isinstance(outputs["content"], list) 271 272 # Verify LLM span inputs contain messages in Anthropic format 273 inputs = first_llm.inputs 274 assert "messages" in inputs 275 messages = inputs["messages"] 276 assert any(m["role"] == "user" for m in messages) 277 278 279 def test_process_transcript_returns_none_for_nonexistent_file(): 280 result = process_transcript("/nonexistent/path/transcript.jsonl", "test-session-123") 281 assert result is None 282 283 284 def test_process_transcript_links_trace_to_run(mock_transcript_file): 285 with mlflow.start_run() as run: 286 trace = process_transcript(mock_transcript_file, "test-session-123") 287 288 assert trace is not None 289 assert trace.info.trace_metadata.get(TraceMetadataKey.SOURCE_RUN) == run.info.run_id 290 291 292 # Sample Claude Code transcript with token usage for testing 293 DUMMY_TRANSCRIPT_WITH_USAGE = [ 294 { 295 "type": "user", 296 "message": {"role": "user", "content": "Hello Claude!"}, 297 "timestamp": "2025-01-15T10:00:00.000Z", 298 "sessionId": "test-session-usage", 299 }, 300 { 301 "type": "assistant", 302 "message": { 303 "role": "assistant", 304 "content": [{"type": "text", "text": "Hello! How can I help you today?"}], 305 "model": "claude-sonnet-4-20250514", 306 "usage": {"input_tokens": 150, "output_tokens": 25}, 307 }, 308 "timestamp": "2025-01-15T10:00:01.000Z", 309 }, 310 ] 311 312 313 @pytest.fixture 314 def mock_transcript_file_with_usage(tmp_path): 315 transcript_path = tmp_path / "transcript_with_usage.jsonl" 316 with open(transcript_path, "w") as f: 317 for entry in DUMMY_TRANSCRIPT_WITH_USAGE: 318 f.write(json.dumps(entry) + "\n") 319 return str(transcript_path) 320 321 322 def test_process_transcript_tracks_token_usage(mock_transcript_file_with_usage): 323 trace = process_transcript(mock_transcript_file_with_usage, "test-session-usage") 324 325 assert trace is not None 326 327 # Find the LLM span 328 spans = list(trace.search_spans()) 329 llm_spans = [s for s in spans if s.span_type == SpanType.LLM] 330 331 assert len(llm_spans) == 1 332 llm_span = llm_spans[0] 333 334 # Verify token usage is tracked using the standardized CHAT_USAGE attribute 335 token_usage = llm_span.get_attribute(SpanAttributeKey.CHAT_USAGE) 336 assert token_usage is not None 337 assert token_usage["input_tokens"] == 150 338 assert token_usage["output_tokens"] == 25 339 assert token_usage["total_tokens"] == 175 340 341 # Verify trace-level token usage aggregation works 342 assert trace.info.token_usage is not None 343 assert trace.info.token_usage["input_tokens"] == 150 344 assert trace.info.token_usage["output_tokens"] == 25 345 assert trace.info.token_usage["total_tokens"] == 175 346 347 348 def test_process_transcript_preserves_cache_tokens(tmp_path): 349 """Verify cache_read/cache_creation fields from Anthropic usage survive on the 350 CHAT_USAGE span attribute so prompt-cache hit rate is observable. 351 """ 352 transcript_entries = [ 353 { 354 "type": "user", 355 "message": {"role": "user", "content": "Cached prompt"}, 356 "timestamp": "2025-01-15T10:00:00.000Z", 357 "sessionId": "cache-transcript-session", 358 }, 359 { 360 "type": "assistant", 361 "message": { 362 "role": "assistant", 363 "content": [{"type": "text", "text": "Answer using cache."}], 364 "model": "claude-sonnet-4-20250514", 365 "usage": { 366 "input_tokens": 36, 367 "cache_creation_input_tokens": 23554, 368 "cache_read_input_tokens": 139035, 369 "output_tokens": 3344, 370 }, 371 }, 372 "timestamp": "2025-01-15T10:00:01.000Z", 373 }, 374 ] 375 376 transcript_path = tmp_path / "transcript_cache.jsonl" 377 with open(transcript_path, "w") as f: 378 for entry in transcript_entries: 379 f.write(json.dumps(entry) + "\n") 380 381 trace = process_transcript(str(transcript_path), "cache-transcript-session") 382 383 assert trace is not None 384 llm_spans = [s for s in trace.search_spans() if s.span_type == SpanType.LLM] 385 assert len(llm_spans) == 1 386 387 # input_tokens is the non-cached input the Anthropic API reports, matching 388 # mlflow.anthropic.autolog. Cache fields are exposed as separate keys so 389 # consumers can compute cache hit rate. 390 token_usage = llm_spans[0].get_attribute(SpanAttributeKey.CHAT_USAGE) 391 assert token_usage["input_tokens"] == 36 392 assert token_usage["output_tokens"] == 3344 393 assert token_usage["total_tokens"] == 36 + 3344 394 assert token_usage["cache_read_input_tokens"] == 139035 395 assert token_usage["cache_creation_input_tokens"] == 23554 396 397 398 # ============================================================================ 399 # SDK MESSAGE PROCESSING TESTS 400 # ============================================================================ 401 402 403 def test_process_sdk_messages_empty_list(): 404 assert process_sdk_messages([]) is None 405 406 407 def test_process_sdk_messages_no_user_prompt(): 408 messages = [ 409 AssistantMessage( 410 content=[TextBlock(text="Hello!")], 411 model="claude-sonnet-4-20250514", 412 ), 413 ] 414 assert process_sdk_messages(messages) is None 415 416 417 def test_process_sdk_messages_simple_conversation(): 418 messages = [ 419 UserMessage(content="What is 2 + 2?"), 420 AssistantMessage( 421 content=[TextBlock(text="The answer is 4.")], 422 model="claude-sonnet-4-20250514", 423 ), 424 ResultMessage( 425 subtype="success", 426 duration_ms=1000, 427 duration_api_ms=800, 428 is_error=False, 429 num_turns=1, 430 session_id="test-sdk-session", 431 usage={"input_tokens": 100, "output_tokens": 20}, 432 ), 433 ] 434 435 trace = process_sdk_messages(messages, "test-sdk-session") 436 437 assert trace is not None 438 spans = list(trace.search_spans()) 439 440 root_span = trace.data.spans[0] 441 assert root_span.name == "claude_code_conversation" 442 assert root_span.span_type == SpanType.AGENT 443 444 # LLM span should have conversation context as input in Anthropic format 445 llm_spans = [s for s in spans if s.span_type == SpanType.LLM] 446 assert len(llm_spans) == 1 447 assert llm_spans[0].name == "llm" 448 assert llm_spans[0].inputs["model"] == "claude-sonnet-4-20250514" 449 assert llm_spans[0].inputs["messages"] == [{"role": "user", "content": "What is 2 + 2?"}] 450 assert llm_spans[0].get_attribute(SpanAttributeKey.MESSAGE_FORMAT) == "anthropic" 451 452 # Output should be in Anthropic response format 453 outputs = llm_spans[0].outputs 454 assert outputs["type"] == "message" 455 assert outputs["role"] == "assistant" 456 assert outputs["content"] == [{"type": "text", "text": "The answer is 4."}] 457 458 # Token usage from ResultMessage should be on the root span and trace level 459 token_usage = root_span.get_attribute(SpanAttributeKey.CHAT_USAGE) 460 assert token_usage is not None 461 assert token_usage["input_tokens"] == 100 462 assert token_usage["output_tokens"] == 20 463 assert token_usage["total_tokens"] == 120 464 465 assert trace.info.token_usage is not None 466 assert trace.info.token_usage["input_tokens"] == 100 467 assert trace.info.token_usage["output_tokens"] == 20 468 assert trace.info.token_usage["total_tokens"] == 120 469 470 # Duration should reflect ResultMessage.duration_ms (1000ms = 1s) 471 duration_ns = root_span.end_time_ns - root_span.start_time_ns 472 assert abs(duration_ns - 1_000_000_000) < 1_000_000 # within 1ms tolerance 473 474 assert trace.info.trace_metadata.get("mlflow.trace.session") == "test-sdk-session" 475 assert trace.info.request_preview == "What is 2 + 2?" 476 assert trace.info.response_preview == "The answer is 4." 477 478 479 def test_process_sdk_messages_multiple_tools(): 480 messages = [ 481 UserMessage(content="Read two files"), 482 AssistantMessage( 483 content=[ 484 ToolUseBlock(id="tool_1", name="Read", input={"path": "a.py"}), 485 ToolUseBlock(id="tool_2", name="Read", input={"path": "b.py"}), 486 ], 487 model="claude-sonnet-4-20250514", 488 ), 489 UserMessage( 490 content=[ 491 ToolResultBlock(tool_use_id="tool_1", content="content of a"), 492 ToolResultBlock(tool_use_id="tool_2", content="content of b"), 493 ], 494 tool_use_result={"tool_use_id": "tool_1"}, 495 ), 496 AssistantMessage( 497 content=[TextBlock(text="Here are the contents.")], 498 model="claude-sonnet-4-20250514", 499 ), 500 ResultMessage( 501 subtype="success", 502 duration_ms=2000, 503 duration_api_ms=1500, 504 is_error=False, 505 num_turns=2, 506 session_id="multi-tool-session", 507 ), 508 ] 509 510 trace = process_sdk_messages(messages, "multi-tool-session") 511 512 assert trace is not None 513 spans = list(trace.search_spans()) 514 515 tool_spans = [s for s in spans if s.span_type == SpanType.TOOL] 516 assert len(tool_spans) == 2 517 assert all(s.name == "tool_Read" for s in tool_spans) 518 tool_results = {s.outputs["result"] for s in tool_spans} 519 assert tool_results == {"content of a", "content of b"} 520 521 522 def test_process_sdk_messages_cache_tokens(): 523 messages = [ 524 UserMessage(content="Hello"), 525 AssistantMessage( 526 content=[TextBlock(text="Hi!")], 527 model="claude-sonnet-4-20250514", 528 ), 529 ResultMessage( 530 subtype="success", 531 duration_ms=5000, 532 duration_api_ms=4000, 533 is_error=False, 534 num_turns=1, 535 session_id="cache-session", 536 usage={ 537 "input_tokens": 36, 538 "cache_creation_input_tokens": 23554, 539 "cache_read_input_tokens": 139035, 540 "output_tokens": 3344, 541 }, 542 ), 543 ] 544 545 trace = process_sdk_messages(messages, "cache-session") 546 547 assert trace is not None 548 root_span = trace.data.spans[0] 549 550 # input_tokens is the non-cached input the Anthropic API reports, matching 551 # mlflow.anthropic.autolog. Cache fields are exposed as separate keys so 552 # consumers can compute cache hit rate without scraping transcripts. 553 token_usage = root_span.get_attribute(SpanAttributeKey.CHAT_USAGE) 554 assert token_usage["input_tokens"] == 36 555 assert token_usage["output_tokens"] == 3344 556 assert token_usage["total_tokens"] == 36 + 3344 557 assert token_usage["cache_read_input_tokens"] == 139035 558 assert token_usage["cache_creation_input_tokens"] == 23554 559 560 # Trace-level aggregation should match 561 assert trace.info.token_usage["input_tokens"] == 36 562 assert trace.info.token_usage["output_tokens"] == 3344 563 564 565 # ============================================================================ 566 # FIND LAST USER MESSAGE INDEX TESTS 567 # ============================================================================ 568 569 570 def test_find_last_user_message_skips_skill_injection(): 571 transcript = [ 572 {"type": "queue-operation"}, 573 {"type": "queue-operation"}, 574 # Entry 2: actual user prompt 575 { 576 "type": "user", 577 "message": {"role": "user", "content": "Enable tracing on the agent."}, 578 "timestamp": "2025-01-01T00:00:00Z", 579 }, 580 # Entry 3: assistant thinking 581 { 582 "type": "assistant", 583 "message": { 584 "role": "assistant", 585 "content": [{"type": "thinking", "thinking": "Let me use the skill."}], 586 }, 587 "timestamp": "2025-01-01T00:00:01Z", 588 }, 589 # Entry 4: assistant invokes Skill tool 590 { 591 "type": "assistant", 592 "message": { 593 "role": "assistant", 594 "content": [ 595 { 596 "type": "tool_use", 597 "id": "toolu_abc123", 598 "name": "Skill", 599 "input": {"skill": "instrumenting-with-mlflow-tracing"}, 600 } 601 ], 602 }, 603 "timestamp": "2025-01-01T00:00:02Z", 604 }, 605 # Entry 5: tool result with commandName (correctly skipped by toolUseResult check) 606 { 607 "type": "user", 608 "toolUseResult": { 609 "success": True, 610 "commandName": "instrumenting-with-mlflow-tracing", 611 }, 612 "message": { 613 "role": "user", 614 "content": [ 615 { 616 "type": "tool_result", 617 "tool_use_id": "toolu_abc123", 618 "content": "Launching skill: instrumenting-with-mlflow-tracing", 619 } 620 ], 621 }, 622 "timestamp": "2025-01-01T00:00:03Z", 623 }, 624 # Entry 6: skill content injection (BUG: not flagged as tool result) 625 { 626 "type": "user", 627 "message": { 628 "role": "user", 629 "content": [ 630 { 631 "type": "text", 632 "text": ( 633 "Base directory for this skill: /path/to/skill\n\n" 634 "# MLflow Tracing Guide\n\n...(full skill content)..." 635 ), 636 } 637 ], 638 }, 639 "timestamp": "2025-01-01T00:00:04Z", 640 }, 641 # Entry 7: assistant continues 642 { 643 "type": "assistant", 644 "message": { 645 "role": "assistant", 646 "content": [{"type": "thinking", "thinking": "Now let me implement tracing."}], 647 }, 648 "timestamp": "2025-01-01T00:00:05Z", 649 }, 650 # Entry 8: assistant text response 651 { 652 "type": "assistant", 653 "message": { 654 "role": "assistant", 655 "content": [{"type": "text", "text": "I've enabled tracing on the agent."}], 656 }, 657 "timestamp": "2025-01-01T00:00:06Z", 658 }, 659 ] 660 661 idx = find_last_user_message_index(transcript) 662 663 # Should return index 2 (actual user prompt), not 6 (skill injection) 664 assert idx == 2 665 assert transcript[idx]["message"]["content"] == "Enable tracing on the agent." 666 667 668 def test_find_last_user_message_index_basic(): 669 transcript = [ 670 {"type": "queue-operation"}, 671 { 672 "type": "user", 673 "message": {"role": "user", "content": "First question"}, 674 "timestamp": "2025-01-01T00:00:00Z", 675 }, 676 { 677 "type": "assistant", 678 "message": { 679 "role": "assistant", 680 "content": [{"type": "text", "text": "First answer"}], 681 }, 682 "timestamp": "2025-01-01T00:00:01Z", 683 }, 684 { 685 "type": "user", 686 "message": {"role": "user", "content": "Second question"}, 687 "timestamp": "2025-01-01T00:00:02Z", 688 }, 689 { 690 "type": "assistant", 691 "message": { 692 "role": "assistant", 693 "content": [{"type": "text", "text": "Second answer"}], 694 }, 695 "timestamp": "2025-01-01T00:00:03Z", 696 }, 697 ] 698 699 idx = find_last_user_message_index(transcript) 700 701 assert idx == 3 702 assert transcript[idx]["message"]["content"] == "Second question" 703 704 705 def test_find_last_user_message_skips_consecutive_skill_injections(): 706 transcript = [ 707 # Entry 0: actual user prompt 708 { 709 "type": "user", 710 "message": {"role": "user", "content": "Do the thing."}, 711 "timestamp": "2025-01-01T00:00:00Z", 712 }, 713 # Entry 1: assistant invokes first Skill 714 { 715 "type": "assistant", 716 "message": { 717 "role": "assistant", 718 "content": [ 719 { 720 "type": "tool_use", 721 "id": "toolu_1", 722 "name": "Skill", 723 "input": {"skill": "skill-one"}, 724 } 725 ], 726 }, 727 "timestamp": "2025-01-01T00:00:01Z", 728 }, 729 # Entry 2: first skill tool result 730 { 731 "type": "user", 732 "toolUseResult": {"success": True, "commandName": "skill-one"}, 733 "message": { 734 "role": "user", 735 "content": [ 736 { 737 "type": "tool_result", 738 "tool_use_id": "toolu_1", 739 "content": "Launching skill: skill-one", 740 } 741 ], 742 }, 743 "timestamp": "2025-01-01T00:00:02Z", 744 }, 745 # Entry 3: first skill content injection 746 { 747 "type": "user", 748 "message": { 749 "role": "user", 750 "content": [{"type": "text", "text": "Base directory: /skill-one\n# Skill One"}], 751 }, 752 "timestamp": "2025-01-01T00:00:03Z", 753 }, 754 # Entry 4: assistant invokes second Skill 755 { 756 "type": "assistant", 757 "message": { 758 "role": "assistant", 759 "content": [ 760 { 761 "type": "tool_use", 762 "id": "toolu_2", 763 "name": "Skill", 764 "input": {"skill": "skill-two"}, 765 } 766 ], 767 }, 768 "timestamp": "2025-01-01T00:00:04Z", 769 }, 770 # Entry 5: second skill tool result 771 { 772 "type": "user", 773 "toolUseResult": {"success": True, "commandName": "skill-two"}, 774 "message": { 775 "role": "user", 776 "content": [ 777 { 778 "type": "tool_result", 779 "tool_use_id": "toolu_2", 780 "content": "Launching skill: skill-two", 781 } 782 ], 783 }, 784 "timestamp": "2025-01-01T00:00:05Z", 785 }, 786 # Entry 6: second skill content injection 787 { 788 "type": "user", 789 "message": { 790 "role": "user", 791 "content": [{"type": "text", "text": "Base directory: /skill-two\n# Skill Two"}], 792 }, 793 "timestamp": "2025-01-01T00:00:06Z", 794 }, 795 # Entry 7: assistant response 796 { 797 "type": "assistant", 798 "message": { 799 "role": "assistant", 800 "content": [{"type": "text", "text": "Done."}], 801 }, 802 "timestamp": "2025-01-01T00:00:07Z", 803 }, 804 ] 805 806 idx = find_last_user_message_index(transcript) 807 808 # Should skip both skill injections (entries 3 and 6) and return entry 0 809 assert idx == 0 810 assert transcript[idx]["message"]["content"] == "Do the thing." 811 812 813 def test_process_transcript_captures_claude_code_version(tmp_path): 814 transcript = [ 815 { 816 "type": "queue-operation", 817 "operation": "dequeue", 818 "timestamp": "2025-01-15T09:59:59.000Z", 819 "sessionId": "test-version-session", 820 }, 821 { 822 "type": "user", 823 "version": "2.1.34", 824 "message": {"role": "user", "content": "Hello!"}, 825 "timestamp": "2025-01-15T10:00:00.000Z", 826 }, 827 { 828 "type": "assistant", 829 "version": "2.1.34", 830 "message": { 831 "role": "assistant", 832 "content": [{"type": "text", "text": "Hi there!"}], 833 }, 834 "timestamp": "2025-01-15T10:00:01.000Z", 835 }, 836 ] 837 838 transcript_path = tmp_path / "version_transcript.jsonl" 839 transcript_path.write_text("\n".join(json.dumps(entry) for entry in transcript) + "\n") 840 trace = process_transcript(str(transcript_path), "test-version-session") 841 842 assert trace is not None 843 assert trace.info.trace_metadata.get(METADATA_KEY_CLAUDE_CODE_VERSION) == "2.1.34" 844 845 846 def test_process_transcript_no_version_field(mock_transcript_file): 847 trace = process_transcript(mock_transcript_file, "test-session-no-version") 848 849 assert trace is not None 850 assert METADATA_KEY_CLAUDE_CODE_VERSION not in trace.info.trace_metadata 851 852 853 def test_process_transcript_includes_steer_messages(tmp_path): 854 transcript = [ 855 { 856 "type": "user", 857 "message": {"role": "user", "content": "Tell me about Python."}, 858 "timestamp": "2025-01-15T10:00:00.000Z", 859 }, 860 { 861 "type": "assistant", 862 "message": { 863 "role": "assistant", 864 "content": [{"type": "text", "text": "Python is a programming language."}], 865 }, 866 "timestamp": "2025-01-15T10:00:01.000Z", 867 }, 868 { 869 "type": "queue-operation", 870 "operation": "enqueue", 871 "content": "also tell me about Java", 872 "timestamp": "2025-01-15T10:00:02.000Z", 873 "sessionId": "test-steer-session", 874 }, 875 { 876 "type": "queue-operation", 877 "operation": "remove", 878 "timestamp": "2025-01-15T10:00:03.000Z", 879 "sessionId": "test-steer-session", 880 }, 881 { 882 "type": "assistant", 883 "message": { 884 "role": "assistant", 885 "content": [{"type": "text", "text": "Java is also a programming language."}], 886 }, 887 "timestamp": "2025-01-15T10:00:04.000Z", 888 }, 889 ] 890 891 transcript_path = tmp_path / "steer_transcript.jsonl" 892 transcript_path.write_text("\n".join(json.dumps(entry) for entry in transcript) + "\n") 893 trace = process_transcript(str(transcript_path), "test-steer-session") 894 assert trace is not None 895 896 spans = list(trace.search_spans()) 897 llm_spans = [s for s in spans if s.span_type == SpanType.LLM] 898 assert len(llm_spans) == 2 899 900 # The second LLM span should include the steer message in its inputs 901 second_llm = llm_spans[1] 902 input_messages = second_llm.inputs["messages"] 903 steer_messages = [m for m in input_messages if m.get("content") == "also tell me about Java"] 904 assert len(steer_messages) == 1 905 assert steer_messages[0]["role"] == "user"