claude_code.py
1 """ 2 Claude Code provider for MLflow Assistant. 3 4 This module provides the Claude Code integration for the assistant API, 5 enabling AI-powered trace analysis through the Claude Code CLI. 6 """ 7 8 import asyncio 9 import json 10 import logging 11 import os 12 import shutil 13 import subprocess 14 from pathlib import Path 15 from typing import Any, AsyncGenerator, Callable 16 17 from mlflow.assistant.providers.base import ( 18 AssistantProvider, 19 CLINotInstalledError, 20 NotAuthenticatedError, 21 load_config, 22 ) 23 from mlflow.assistant.types import ( 24 ContentBlock, 25 Event, 26 Message, 27 TextBlock, 28 ThinkingBlock, 29 ToolResultBlock, 30 ToolUseBlock, 31 ) 32 from mlflow.server.assistant.session import clear_process_pid, save_process_pid 33 34 _logger = logging.getLogger(__name__) 35 36 37 # Allowed tools for Claude Code CLI 38 # Restrict to only Bash commands that use MLflow CLI 39 BASE_ALLOWED_TOOLS = [ 40 "Bash(mlflow:*)", 41 "Skill", # Skill tool needs to be explicitly allowed 42 ] 43 FILE_EDIT_TOOLS = [ 44 # Allow writing evaluation scripts, editing code, reading 45 # project files, etc. in the project directory 46 "Edit(*)", 47 "Read(*)", 48 "Write(*)", 49 # Allow writing large command output to files in /tmp so it 50 # can be analyzed with bash commands (e.g. grep, jq) without 51 # loading full contents into context 52 "Edit(//tmp/**)", 53 "Read(//tmp/**)", 54 "Write(//tmp/**)", 55 ] 56 DOCS_TOOLS = ["WebFetch(domain:mlflow.org)"] 57 58 CLAUDE_SYSTEM_PROMPT = """\ 59 You are an MLflow assistant helping users with their MLflow projects. Users interact with 60 you through the MLflow UI. You can answer questions about MLflow, read and analyze data 61 from MLflow, integrate MLflow with a codebase, run scripts to log data to MLflow, use 62 MLflow to debug and improve AI applications like models & agents, and perform many more 63 MLflow-related tasks. 64 65 The following instructions are fundamental to your behavior. You MUST ALWAYS follow them 66 exactly as specified. You MUST re-read them carefully whenever you start a new response to the user. 67 Do NOT ignore or skip these instructions under any circumstances! 68 69 ## CRITICAL: Be Proactive and Minimize User Effort 70 71 NEVER ask the user to do something manually that you can do for them. 72 73 You MUST always try to minimize the number of steps the user has to take manually. The user 74 is relying on you to accelerate their workflows. For example, if the user asks for a tutorial on 75 how to do something, find the answer and then offer to do it for them using MLflow commands or code, 76 rather than just telling them how to do it themselves. 77 78 ## CRITICAL: Using Skills 79 80 You have Claude Code skills for MLflow tasks. Each skill listed in your available skills has a 81 description that explains when to use it. 82 83 You MUST use skills for anything relating to: 84 85 - Onboarding and getting started with MLflow (e.g. new user questions about MLflow) 86 - Reading or analyzing traces and chat sessions 87 - Searching for traces and chat sessions 88 - Searching for MLflow documentation 89 - Running MLflow GenAI evaluation to evaluate traces or agents 90 - Querying MLflow metrics 91 - Anything else explicitly covered by a skill 92 (you MUST read skill descriptions carefully before acting) 93 94 ALWAYS abide by the following rules: 95 96 - Before responding to any user message or request, YOU MUST consult your list of available skills 97 to determine if a relevant skill exists. If a relevant skill exists, you MUST try using it first. 98 Using the right skill leads to more effective outcomes. 99 100 Even if your conversation with the user has many previous messages, EVERY new message from the 101 user MUST trigger a skills check. Do NOT skip this step. 102 103 - When following a skill, you MUST read its instructions VERY carefully — 104 especially command syntax, which must be followed precisely. 105 106 - NEVER run ANY command before checking for a relevant skill. ALWAYS 107 check for skills first. For example, do not try to consult the CLI 108 reference for searching traces until you have read the skills for 109 trace search and analysis first. 110 111 ## CRITICAL: Complete All Work Before Finishing Your Response 112 113 You may provide progress updates throughout the process, but do NOT finish your response until ALL 114 work — including work done by subagents — is fully complete. The user interacts with you 115 through a UI that does not support fetching results from async subagents. If you finish 116 responding before subagent work is done, the user will never see those results. Always wait for 117 all subagent tasks to finish and include their results in your final response. 118 119 ## MLflow Server Connection (Pre-configured) 120 121 The MLflow tracking server is running at: `{tracking_uri}` 122 123 **CRITICAL**: 124 - The server is ALREADY RUNNING. Never ask the user to start or set up the MLflow server. 125 - ALL MLflow operations MUST target this server. You must assume MLFLOW_TRACKING_URI env var is. 126 always set. DO NOT try to override it or set custom env var to the bash command. 127 - Assume the server is available and operational at all times, unless you have good reason 128 to believe otherwise (e.g. an error that seems likely caused by server unavailability). 129 130 ## User Context 131 132 The user has already installed MLflow and is working within the MLflow UI. Never instruct the 133 user to install MLflow or start the MLflow UI/server - these are already set up and running. 134 Under normal conditions, never verify that the server is running; if the user is using the 135 MLflow UI, the server is clearly operational. Only check server status when debugging or 136 investigating a suspected server error. 137 138 Since the user is already in the MLflow UI, do NOT unnecessarily reference the server URL in 139 your responses (e.g., "go to http://localhost:8888" or "refresh your MLflow UI at ..."). 140 Only include URLs when they are specific, actionable links to a particular page in the UI 141 (e.g., a link to a specific experiment, run, or trace). 142 143 User messages may include a <context> block containing JSON that represents what the user is 144 currently viewing on screen (e.g., traceId, experimentId, selectedTraceIds). Use this context 145 to understand what entities the user is referring to when they ask questions, as well as 146 where the user wants to log (write) or update information. 147 148 ## Command Preferences (IMPORTANT) 149 150 ### MLflow Read-Only Operations 151 152 For querying and reading MLflow data (experiments, runs, traces, metrics, etc.): 153 * STRONGLY PREFER MLflow CLI commands directly. Try to use the CLI until you are certain 154 that it cannot accomplish the task. Do NOT mistake syntax errors or your own mistakes 155 for limitations of the CLI. 156 * When using MLflow CLI, always use `--help` to discover all available options. 157 Do not skip this step or you will not get the correct command. 158 * Trust that MLflow CLI commands will work. Do not add error handling or fallbacks to Python. 159 * Never combine two bash commands with `&&` or `||`. That will error out. 160 * If the CLI cannot accomplish the task, fall back to the MLflow SDK. 161 * When working with large output, write it to files /tmp and use 162 bash commands to analyze the files, rather than reading the full contents into context. 163 164 ### MLflow Write Operations 165 166 For logging new data to MLflow (traces, runs, metrics, artifacts, etc.): 167 * The CLI does not support all write operations, so use an MLflow SDK instead. 168 * Use the appropriate SDK for your working directory's project language 169 (Python, TypeScript, etc.). Fall back to Python if no project is detected or if 170 MLflow does not offer an SDK for the detected language. 171 * Always set the tracking URI before logging (see "MLflow Server Connection" section above). 172 173 IMPORTANT: After writing data, always tell the user how to access it. Prefer directing them 174 to the MLflow UI (provide specific URLs where possible, e.g., `{tracking_uri}/#/experiments/123`). 175 If the data is not viewable in the UI, explain how to access it via MLflow CLI or API. 176 177 ### Handling permissions issues 178 179 If you require additional permissions to execute a command or perform an action, ALWAYS tell the 180 user what specific permission(s) you need. 181 182 If the permissions are for the MLflow CLI, then the user likely has a permissions override in 183 their Claude Code settings JSON file or Claude Code hooks. In this case, tell the user to edit 184 their settings files or hooks to provide the exact permission(s) needed in order to proceed. Give 185 them the exact permission(s) require in Claude Code syntax. 186 187 Otherwise, tell the user to enable full access permissions from the Assistant Settings UI. Also tell 188 the user that, if full access permissions are already enabled, then they need to check their 189 Claude Code settings JSON file or Claude Code hooks to ensure there are no permission overrides that 190 conflict with full access (Claude Code's 'bypassPermissions' mode). Finally, tell the user how to 191 edit their Claude Code settings or hooks to enable the specific permission(s) needed to proceed. 192 This gives the user all of the available options and necessary information to resolve permission 193 issues. 194 195 ### Data Access 196 197 NEVER access the MLflow server's backend storage directly. Always use MLflow APIs or CLIs and 198 let the server handle storage. Specifically: 199 - NEVER use the MLflow CLI or API with a database or file tracking URI - only use the configured 200 HTTP tracking URI (`{tracking_uri}`). 201 - NEVER use database CLI tools (e.g., sqlite3, psql) to connect directly to the MLflow database. 202 - NEVER read the filesystem or cloud storage to access MLflow artifact storage directly. 203 - ALWAYS let the MLflow server handle all storage operations through its APIs. 204 205 ## MLflow Documentation 206 207 If you have a permission to fetch MLflow documentation, use the WebFetch tool to fetch 208 pages from mlflow.org to provide accurate information about MLflow. 209 210 ### Accessing Documentation 211 212 When reading documentation, ALWAYS start from https://mlflow.org/docs/latest/llms.txt page that 213 lists links to each pages of the documentation. Start with that page and follow the links to the 214 relevant pages to get more information. 215 216 IMPORTANT: When accessing documentation pages or returning documentation links to users, always use 217 the latest version URL (https://mlflow.org/docs/latest/...) instead of version-specific URLs. 218 219 ### CRITICAL: Presenting Documentation Results 220 221 IMPORTANT: ALWAYS offer to complete tasks from the documentation results yourself, on behalf of the 222 user. Since you are capable of executing code, debugging, logging data to MLflow, and much more, do 223 NOT just return documentation links or excerpts for the user to read and act on themselves. 224 Only ask the user to do something manually if you have tried and cannot do it yourself, or 225 if you truly do not know how. 226 227 IMPORTANT: When presenting information from documentation, you MUST adapt it to the user's 228 context (see "User Context" section above). Before responding, thoroughly re-read the User Context 229 section and adjust your response accordingly. Always consider what the user already has set up 230 and running. For example: 231 - Do NOT tell the user to install MLflow or how to install it - it is already installed. 232 - Do NOT tell the user to start the MLflow server or UI - they are already running. 233 - Do NOT tell the user to open a browser to view the MLflow UI - they are already using it. 234 - Skip any setup/installation steps that are already complete for this user. 235 Focus on the substantive content that is relevant to the user's actual question. 236 """ 237 238 239 def _build_system_prompt(tracking_uri: str) -> str: 240 """ 241 Build the system prompt for the Claude Code assistant. 242 243 Args: 244 tracking_uri: The MLflow tracking server URI (e.g., "http://localhost:5000"). 245 246 Returns: 247 The complete system prompt string. 248 """ 249 return CLAUDE_SYSTEM_PROMPT.format(tracking_uri=tracking_uri) 250 251 252 class ClaudeCodeProvider(AssistantProvider): 253 """Assistant provider using Claude Code CLI.""" 254 255 @property 256 def name(self) -> str: 257 return "claude_code" 258 259 @property 260 def display_name(self) -> str: 261 return "Claude Code" 262 263 @property 264 def description(self) -> str: 265 return "AI-powered assistant using Claude Code CLI" 266 267 def is_available(self) -> bool: 268 return shutil.which("claude") is not None 269 270 def check_connection(self, echo: Callable[[str], None] | None = None) -> None: 271 """ 272 Check if Claude CLI is installed and authenticated. 273 274 Args: 275 echo: Optional function to print status messages. 276 277 Raises: 278 ProviderNotConfiguredError: If CLI is not installed or not authenticated. 279 """ 280 claude_path = shutil.which("claude") 281 if not claude_path: 282 if echo: 283 echo("Claude CLI not found") 284 raise CLINotInstalledError( 285 "Claude Code CLI is not installed. " 286 "Install it with: npm install -g @anthropic-ai/claude-code" 287 ) 288 289 if echo: 290 echo(f"Claude CLI found: {claude_path}") 291 echo("Checking connection... (this may take a few seconds)") 292 293 # Check authentication by running a minimal test prompt 294 try: 295 result = subprocess.run( 296 ["claude", "-p", "hi", "--max-turns", "1", "--output-format", "json"], 297 capture_output=True, 298 text=True, 299 timeout=30, 300 ) 301 302 if result.returncode == 0: 303 if echo: 304 echo("Authentication verified") 305 return 306 307 # Check for common auth errors in stderr 308 stderr = result.stderr.lower() 309 if "auth" in stderr or "login" in stderr or "unauthorized" in stderr: 310 error_msg = "Not authenticated. Please run: claude login" 311 else: 312 error_msg = result.stderr.strip() or f"Process exited with code {result.returncode}" 313 314 if echo: 315 echo(f"Authentication failed: {error_msg}") 316 raise NotAuthenticatedError(error_msg) 317 318 except subprocess.TimeoutExpired: 319 if echo: 320 echo("Authentication check timed out") 321 raise NotAuthenticatedError("Authentication check timed out") 322 except subprocess.SubprocessError as e: 323 if echo: 324 echo(f"Error checking authentication: {e}") 325 raise NotAuthenticatedError(str(e)) 326 327 def resolve_skills_path(self, base_directory: Path) -> Path: 328 """Resolve the path to the skills directory.""" 329 return base_directory / ".claude" / "skills" 330 331 async def astream( 332 self, 333 prompt: str, 334 tracking_uri: str, 335 session_id: str | None = None, 336 mlflow_session_id: str | None = None, 337 cwd: Path | None = None, 338 context: dict[str, Any] | None = None, 339 ) -> AsyncGenerator[Event, None]: 340 """ 341 Stream responses from Claude Code CLI asynchronously. 342 343 Args: 344 prompt: The prompt to send to Claude 345 tracking_uri: MLflow tracking server URI for the assistant to use 346 session_id: Claude session ID for resume 347 mlflow_session_id: MLflow session ID for PID tracking (enables cancellation) 348 cwd: Working directory for Claude Code CLI 349 context: Additional context for the assistant, such as information from 350 the current UI page the user is viewing (e.g., experimentId, traceId) 351 352 Yields: 353 Event objects 354 """ 355 claude_path = shutil.which("claude") 356 if not claude_path: 357 yield Event.from_error( 358 "Claude CLI not found. Please install Claude Code CLI and ensure it's in your PATH." 359 ) 360 return 361 362 # Build user message with context 363 if context: 364 user_message = f"<context>\n{json.dumps(context)}\n</context>\n\n{prompt}" 365 else: 366 user_message = prompt 367 368 # Build command 369 # Note: --verbose is required when using --output-format=stream-json with -p 370 cmd = [claude_path, "-p", user_message, "--output-format", "stream-json", "--verbose"] 371 372 # Add system prompt with tracking URI context 373 system_prompt = _build_system_prompt(tracking_uri) 374 cmd.extend(["--append-system-prompt", system_prompt]) 375 376 config = load_config(self.name) 377 378 # Handle permission mode 379 if config.permissions.full_access: 380 # Full access mode - bypass all permission checks 381 cmd.extend(["--permission-mode", "bypassPermissions"]) 382 else: 383 # Build allowed tools list based on permissions 384 allowed_tools = list(BASE_ALLOWED_TOOLS) 385 if config.permissions.allow_edit_files: 386 allowed_tools.extend(FILE_EDIT_TOOLS) 387 if config.permissions.allow_read_docs: 388 allowed_tools.extend(DOCS_TOOLS) 389 390 for tool in allowed_tools: 391 cmd.extend(["--allowed-tools", tool]) 392 393 if config.model and config.model != "default": 394 cmd.extend(["--model", config.model]) 395 396 if session_id: 397 cmd.extend(["--resume", session_id]) 398 399 process = None 400 try: 401 process = await asyncio.create_subprocess_exec( 402 *cmd, 403 stdout=asyncio.subprocess.PIPE, 404 stderr=asyncio.subprocess.PIPE, 405 cwd=cwd, 406 # Increase buffer limit from default 64KB to handle large JSON responses 407 # from Claude Code CLI (e.g., tool results containing large file contents) 408 limit=100 * 1024 * 1024, # 100 MB 409 # Specify tracking URI to let Claude Code CLI inherit it 410 # NB: `env` arg in `create_subprocess_exec` does not merge with the parent process's 411 # environment so we need to copy the parent process's environment explicitly. 412 env={**os.environ.copy(), "MLFLOW_TRACKING_URI": tracking_uri}, 413 ) 414 415 # Save PID for cancellation support 416 if mlflow_session_id and process.pid: 417 save_process_pid(mlflow_session_id, process.pid) 418 419 try: 420 async for line in process.stdout: 421 line_str = line.decode("utf-8").strip() 422 if not line_str: 423 continue 424 425 try: 426 data = json.loads(line_str) 427 428 if self._should_filter_out_message(data): 429 continue 430 431 if msg := self._parse_message_to_event(data): 432 yield msg 433 434 except json.JSONDecodeError: 435 # Non-JSON output, treat as plain text 436 yield Event.from_message(Message(role="user", content=line_str)) 437 finally: 438 # Clear PID when done (regardless of how we exit) 439 if mlflow_session_id: 440 clear_process_pid(mlflow_session_id) 441 442 # Wait for process to complete 443 await process.wait() 444 445 # Check if killed by interrupt (SIGKILL = -9) 446 if process.returncode == -9: 447 yield Event.from_interrupted() 448 return 449 450 if process.returncode != 0: 451 stderr = await process.stderr.read() 452 error_msg = ( 453 stderr.decode("utf-8").strip() 454 or f"Process exited with code {process.returncode}" 455 ) 456 yield Event.from_error(error_msg) 457 458 except Exception as e: 459 _logger.exception("Error running Claude Code CLI") 460 yield Event.from_error(str(e)) 461 finally: 462 if process is not None and process.returncode is None: 463 process.kill() 464 await process.wait() 465 466 def _parse_message_to_event(self, data: dict[str, Any]) -> Event | None: 467 """ 468 Parse json message from Claude Code CLI output. 469 470 Reference: https://github.com/anthropics/claude-agent-sdk-python/blob/29c12cd80b256e88f321b2b8f1f5a88445077aa5/src/claude_agent_sdk/_internal/message_parser.py#L24 471 472 Args: 473 data: Raw message dictionary from CLI output 474 475 Returns: 476 Parsed Event object 477 """ 478 message_type = data.get("type") 479 if not message_type: 480 return Event.from_error("Message missing 'type' field") 481 482 match message_type: 483 case "user": 484 try: 485 if isinstance(data["message"]["content"], list): 486 user_content_blocks = [] 487 for block in data["message"]["content"]: 488 match block["type"]: 489 case "text": 490 user_content_blocks.append(TextBlock(text=block["text"])) 491 case "tool_use": 492 user_content_blocks.append( 493 ToolUseBlock( 494 id=block["id"], 495 name=block["name"], 496 input=block["input"], 497 ) 498 ) 499 case "tool_result": 500 user_content_blocks.append( 501 ToolResultBlock( 502 tool_use_id=block["tool_use_id"], 503 content=block.get("content"), 504 is_error=block.get("is_error"), 505 ) 506 ) 507 msg = Message(role="user", content=user_content_blocks) 508 else: 509 msg = Message(role="user", content=data["message"]["content"]) 510 return Event.from_message(msg) 511 except KeyError as e: 512 return Event.from_error(f"Failed to parse user message: {e}") 513 514 case "assistant": 515 try: 516 if data["message"].get("error"): 517 return Event.from_error(data["message"]["error"]) 518 519 content_blocks: list[ContentBlock] = [] 520 for block in data["message"]["content"]: 521 match block["type"]: 522 case "text": 523 content_blocks.append(TextBlock(text=block["text"])) 524 case "thinking": 525 content_blocks.append( 526 ThinkingBlock( 527 thinking=block["thinking"], 528 signature=block["signature"], 529 ) 530 ) 531 case "tool_use": 532 content_blocks.append( 533 ToolUseBlock( 534 id=block["id"], 535 name=block["name"], 536 input=block["input"], 537 ) 538 ) 539 case "tool_result": 540 content_blocks.append( 541 ToolResultBlock( 542 tool_use_id=block["tool_use_id"], 543 content=block.get("content"), 544 is_error=block.get("is_error"), 545 ) 546 ) 547 548 msg = Message(role="assistant", content=content_blocks) 549 return Event.from_message(msg) 550 except KeyError as e: 551 return Event.from_error(f"Failed to parse assistant message: {e}") 552 553 case "system": 554 # NB: Skip system message. The system message from Claude Code CLI contains 555 # the various metadata about runtime, which is not used by the assistant UX. 556 return None 557 558 case "error": 559 try: 560 error_msg = data.get("error", {}).get("message", str(data.get("error"))) 561 return Event.from_error(error_msg) 562 except Exception as e: 563 return Event.from_error(f"Failed to parse error message: {e}") 564 565 case "result": 566 try: 567 return Event.from_result( 568 result=data.get("result"), 569 session_id=data["session_id"], 570 ) 571 except KeyError as e: 572 return Event.from_error(f"Failed to parse result message: {e}") 573 574 case "stream_event": 575 try: 576 return Event.from_stream_event(event=data["event"]) 577 except KeyError as e: 578 return Event.from_error(f"Failed to parse stream_event message: {e}") 579 580 case "rate_limit_event": 581 # rate_limit_event is a status event emitted by the CLI to report 582 # rate limit info. Only surface a message to the user when they are 583 # actually limited, not on every status update. 584 info = data.get("rate_limit_info", {}) 585 if info.get("status") == "limited": 586 resets_at = info.get("resetsAt") 587 msg = "You've hit a rate limit — please wait a moment and try again." 588 if resets_at: 589 msg += f" Your limit resets at {resets_at}." 590 return Event.from_message( 591 Message(role="assistant", content=[TextBlock(text=msg)]) 592 ) 593 return None 594 595 case _: 596 _logger.warning("Unexpected message type from CLI: %s", message_type) 597 return None 598 599 def _should_filter_out_message(self, data: dict[str, Any]) -> bool: 600 """ 601 Check if an internal message that should be filtered out before being displayed to the user. 602 603 Currently filters: 604 - Skill prompt messages: When a Skill tool is called, Claude Code sends an internal 605 user message containing the full skill instructions (starting with "Base directory 606 for this skill:"). These messages are internal and should not be displayed to users. 607 """ 608 if data.get("type") != "user": 609 return False 610 611 content = data.get("message", {}).get("content", []) 612 if not isinstance(content, list): 613 return False 614 615 return any( 616 block.get("type") == "text" 617 # TODO: This prefix is not guaranteed to be stable. We should find a better way to 618 # filter out these messages. 619 and block.get("text", "").startswith("Base directory for this skill:") 620 for block in content 621 )