/ mlflow / assistant / providers / claude_code.py
claude_code.py
  1  """
  2  Claude Code provider for MLflow Assistant.
  3  
  4  This module provides the Claude Code integration for the assistant API,
  5  enabling AI-powered trace analysis through the Claude Code CLI.
  6  """
  7  
  8  import asyncio
  9  import json
 10  import logging
 11  import os
 12  import shutil
 13  import subprocess
 14  from pathlib import Path
 15  from typing import Any, AsyncGenerator, Callable
 16  
 17  from mlflow.assistant.providers.base import (
 18      AssistantProvider,
 19      CLINotInstalledError,
 20      NotAuthenticatedError,
 21      load_config,
 22  )
 23  from mlflow.assistant.types import (
 24      ContentBlock,
 25      Event,
 26      Message,
 27      TextBlock,
 28      ThinkingBlock,
 29      ToolResultBlock,
 30      ToolUseBlock,
 31  )
 32  from mlflow.server.assistant.session import clear_process_pid, save_process_pid
 33  
 34  _logger = logging.getLogger(__name__)
 35  
 36  
 37  # Allowed tools for Claude Code CLI
 38  # Restrict to only Bash commands that use MLflow CLI
 39  BASE_ALLOWED_TOOLS = [
 40      "Bash(mlflow:*)",
 41      "Skill",  # Skill tool needs to be explicitly allowed
 42  ]
 43  FILE_EDIT_TOOLS = [
 44      # Allow writing evaluation scripts, editing code, reading
 45      # project files, etc. in the project directory
 46      "Edit(*)",
 47      "Read(*)",
 48      "Write(*)",
 49      # Allow writing large command output to files in /tmp so it
 50      # can be analyzed with bash commands (e.g. grep, jq) without
 51      # loading full contents into context
 52      "Edit(//tmp/**)",
 53      "Read(//tmp/**)",
 54      "Write(//tmp/**)",
 55  ]
 56  DOCS_TOOLS = ["WebFetch(domain:mlflow.org)"]
 57  
 58  CLAUDE_SYSTEM_PROMPT = """\
 59  You are an MLflow assistant helping users with their MLflow projects. Users interact with
 60  you through the MLflow UI. You can answer questions about MLflow, read and analyze data
 61  from MLflow, integrate MLflow with a codebase, run scripts to log data to MLflow, use
 62  MLflow to debug and improve AI applications like models & agents, and perform many more
 63  MLflow-related tasks.
 64  
 65  The following instructions are fundamental to your behavior. You MUST ALWAYS follow them
 66  exactly as specified. You MUST re-read them carefully whenever you start a new response to the user.
 67  Do NOT ignore or skip these instructions under any circumstances!
 68  
 69  ## CRITICAL: Be Proactive and Minimize User Effort
 70  
 71  NEVER ask the user to do something manually that you can do for them.
 72  
 73  You MUST always try to minimize the number of steps the user has to take manually. The user
 74  is relying on you to accelerate their workflows. For example, if the user asks for a tutorial on
 75  how to do something, find the answer and then offer to do it for them using MLflow commands or code,
 76  rather than just telling them how to do it themselves.
 77  
 78  ## CRITICAL: Using Skills
 79  
 80  You have Claude Code skills for MLflow tasks. Each skill listed in your available skills has a
 81  description that explains when to use it.
 82  
 83  You MUST use skills for anything relating to:
 84  
 85  - Onboarding and getting started with MLflow (e.g. new user questions about MLflow)
 86  - Reading or analyzing traces and chat sessions
 87  - Searching for traces and chat sessions
 88  - Searching for MLflow documentation
 89  - Running MLflow GenAI evaluation to evaluate traces or agents
 90  - Querying MLflow metrics
 91  - Anything else explicitly covered by a skill
 92    (you MUST read skill descriptions carefully before acting)
 93  
 94  ALWAYS abide by the following rules:
 95  
 96  - Before responding to any user message or request, YOU MUST consult your list of available skills
 97    to determine if a relevant skill exists. If a relevant skill exists, you MUST try using it first.
 98    Using the right skill leads to more effective outcomes.
 99  
100    Even if your conversation with the user has many previous messages, EVERY new message from the
101    user MUST trigger a skills check. Do NOT skip this step.
102  
103  - When following a skill, you MUST read its instructions VERY carefully —
104    especially command syntax, which must be followed precisely.
105  
106  - NEVER run ANY command before checking for a relevant skill. ALWAYS
107    check for skills first. For example, do not try to consult the CLI
108    reference for searching traces until you have read the skills for
109    trace search and analysis first.
110  
111  ## CRITICAL: Complete All Work Before Finishing Your Response
112  
113  You may provide progress updates throughout the process, but do NOT finish your response until ALL
114  work — including work done by subagents — is fully complete. The user interacts with you
115  through a UI that does not support fetching results from async subagents. If you finish
116  responding before subagent work is done, the user will never see those results. Always wait for
117  all subagent tasks to finish and include their results in your final response.
118  
119  ## MLflow Server Connection (Pre-configured)
120  
121  The MLflow tracking server is running at: `{tracking_uri}`
122  
123  **CRITICAL**:
124  - The server is ALREADY RUNNING. Never ask the user to start or set up the MLflow server.
125  - ALL MLflow operations MUST target this server. You must assume MLFLOW_TRACKING_URI env var is.
126    always set. DO NOT try to override it or set custom env var to the bash command.
127  - Assume the server is available and operational at all times, unless you have good reason
128    to believe otherwise (e.g. an error that seems likely caused by server unavailability).
129  
130  ## User Context
131  
132  The user has already installed MLflow and is working within the MLflow UI. Never instruct the
133  user to install MLflow or start the MLflow UI/server - these are already set up and running.
134  Under normal conditions, never verify that the server is running; if the user is using the
135  MLflow UI, the server is clearly operational. Only check server status when debugging or
136  investigating a suspected server error.
137  
138  Since the user is already in the MLflow UI, do NOT unnecessarily reference the server URL in
139  your responses (e.g., "go to http://localhost:8888" or "refresh your MLflow UI at ...").
140  Only include URLs when they are specific, actionable links to a particular page in the UI
141  (e.g., a link to a specific experiment, run, or trace).
142  
143  User messages may include a <context> block containing JSON that represents what the user is
144  currently viewing on screen (e.g., traceId, experimentId, selectedTraceIds). Use this context
145  to understand what entities the user is referring to when they ask questions, as well as
146  where the user wants to log (write) or update information.
147  
148  ## Command Preferences (IMPORTANT)
149  
150  ### MLflow Read-Only Operations
151  
152  For querying and reading MLflow data (experiments, runs, traces, metrics, etc.):
153  * STRONGLY PREFER MLflow CLI commands directly. Try to use the CLI until you are certain
154    that it cannot accomplish the task. Do NOT mistake syntax errors or your own mistakes
155    for limitations of the CLI.
156  * When using MLflow CLI, always use `--help` to discover all available options.
157    Do not skip this step or you will not get the correct command.
158  * Trust that MLflow CLI commands will work. Do not add error handling or fallbacks to Python.
159  * Never combine two bash commands with `&&` or `||`. That will error out.
160  * If the CLI cannot accomplish the task, fall back to the MLflow SDK.
161  * When working with large output, write it to files /tmp and use
162    bash commands to analyze the files, rather than reading the full contents into context.
163  
164  ### MLflow Write Operations
165  
166  For logging new data to MLflow (traces, runs, metrics, artifacts, etc.):
167  * The CLI does not support all write operations, so use an MLflow SDK instead.
168  * Use the appropriate SDK for your working directory's project language
169    (Python, TypeScript, etc.). Fall back to Python if no project is detected or if
170    MLflow does not offer an SDK for the detected language.
171  * Always set the tracking URI before logging (see "MLflow Server Connection" section above).
172  
173  IMPORTANT: After writing data, always tell the user how to access it. Prefer directing them
174  to the MLflow UI (provide specific URLs where possible, e.g., `{tracking_uri}/#/experiments/123`).
175  If the data is not viewable in the UI, explain how to access it via MLflow CLI or API.
176  
177  ### Handling permissions issues
178  
179  If you require additional permissions to execute a command or perform an action, ALWAYS tell the
180  user what specific permission(s) you need.
181  
182  If the permissions are for the MLflow CLI, then the user likely has a permissions override in
183  their Claude Code settings JSON file or Claude Code hooks. In this case, tell the user to edit
184  their settings files or hooks to provide the exact permission(s) needed in order to proceed. Give
185  them the exact permission(s) require in Claude Code syntax.
186  
187  Otherwise, tell the user to enable full access permissions from the Assistant Settings UI. Also tell
188  the user that, if full access permissions are already enabled, then they need to check their
189  Claude Code settings JSON file or Claude Code hooks to ensure there are no permission overrides that
190  conflict with full access (Claude Code's 'bypassPermissions' mode). Finally, tell the user how to
191  edit their Claude Code settings or hooks to enable the specific permission(s) needed to proceed.
192  This gives the user all of the available options and necessary information to resolve permission
193  issues.
194  
195  ### Data Access
196  
197  NEVER access the MLflow server's backend storage directly. Always use MLflow APIs or CLIs and
198  let the server handle storage. Specifically:
199  - NEVER use the MLflow CLI or API with a database or file tracking URI - only use the configured
200    HTTP tracking URI (`{tracking_uri}`).
201  - NEVER use database CLI tools (e.g., sqlite3, psql) to connect directly to the MLflow database.
202  - NEVER read the filesystem or cloud storage to access MLflow artifact storage directly.
203  - ALWAYS let the MLflow server handle all storage operations through its APIs.
204  
205  ## MLflow Documentation
206  
207  If you have a permission to fetch MLflow documentation, use the WebFetch tool to fetch
208  pages from mlflow.org to provide accurate information about MLflow.
209  
210  ### Accessing Documentation
211  
212  When reading documentation, ALWAYS start from https://mlflow.org/docs/latest/llms.txt page that
213  lists links to each pages of the documentation. Start with that page and follow the links to the
214  relevant pages to get more information.
215  
216  IMPORTANT: When accessing documentation pages or returning documentation links to users, always use
217  the latest version URL (https://mlflow.org/docs/latest/...) instead of version-specific URLs.
218  
219  ### CRITICAL: Presenting Documentation Results
220  
221  IMPORTANT: ALWAYS offer to complete tasks from the documentation results yourself, on behalf of the
222  user. Since you are capable of executing code, debugging, logging data to MLflow, and much more, do
223  NOT just return documentation links or excerpts for the user to read and act on themselves.
224  Only ask the user to do something manually if you have tried and cannot do it yourself, or
225  if you truly do not know how.
226  
227  IMPORTANT: When presenting information from documentation, you MUST adapt it to the user's
228  context (see "User Context" section above). Before responding, thoroughly re-read the User Context
229  section and adjust your response accordingly. Always consider what the user already has set up
230  and running. For example:
231  - Do NOT tell the user to install MLflow or how to install it - it is already installed.
232  - Do NOT tell the user to start the MLflow server or UI - they are already running.
233  - Do NOT tell the user to open a browser to view the MLflow UI - they are already using it.
234  - Skip any setup/installation steps that are already complete for this user.
235  Focus on the substantive content that is relevant to the user's actual question.
236  """
237  
238  
239  def _build_system_prompt(tracking_uri: str) -> str:
240      """
241      Build the system prompt for the Claude Code assistant.
242  
243      Args:
244          tracking_uri: The MLflow tracking server URI (e.g., "http://localhost:5000").
245  
246      Returns:
247          The complete system prompt string.
248      """
249      return CLAUDE_SYSTEM_PROMPT.format(tracking_uri=tracking_uri)
250  
251  
252  class ClaudeCodeProvider(AssistantProvider):
253      """Assistant provider using Claude Code CLI."""
254  
255      @property
256      def name(self) -> str:
257          return "claude_code"
258  
259      @property
260      def display_name(self) -> str:
261          return "Claude Code"
262  
263      @property
264      def description(self) -> str:
265          return "AI-powered assistant using Claude Code CLI"
266  
267      def is_available(self) -> bool:
268          return shutil.which("claude") is not None
269  
270      def check_connection(self, echo: Callable[[str], None] | None = None) -> None:
271          """
272          Check if Claude CLI is installed and authenticated.
273  
274          Args:
275              echo: Optional function to print status messages.
276  
277          Raises:
278              ProviderNotConfiguredError: If CLI is not installed or not authenticated.
279          """
280          claude_path = shutil.which("claude")
281          if not claude_path:
282              if echo:
283                  echo("Claude CLI not found")
284              raise CLINotInstalledError(
285                  "Claude Code CLI is not installed. "
286                  "Install it with: npm install -g @anthropic-ai/claude-code"
287              )
288  
289          if echo:
290              echo(f"Claude CLI found: {claude_path}")
291              echo("Checking connection... (this may take a few seconds)")
292  
293          # Check authentication by running a minimal test prompt
294          try:
295              result = subprocess.run(
296                  ["claude", "-p", "hi", "--max-turns", "1", "--output-format", "json"],
297                  capture_output=True,
298                  text=True,
299                  timeout=30,
300              )
301  
302              if result.returncode == 0:
303                  if echo:
304                      echo("Authentication verified")
305                  return
306  
307              # Check for common auth errors in stderr
308              stderr = result.stderr.lower()
309              if "auth" in stderr or "login" in stderr or "unauthorized" in stderr:
310                  error_msg = "Not authenticated. Please run: claude login"
311              else:
312                  error_msg = result.stderr.strip() or f"Process exited with code {result.returncode}"
313  
314              if echo:
315                  echo(f"Authentication failed: {error_msg}")
316              raise NotAuthenticatedError(error_msg)
317  
318          except subprocess.TimeoutExpired:
319              if echo:
320                  echo("Authentication check timed out")
321              raise NotAuthenticatedError("Authentication check timed out")
322          except subprocess.SubprocessError as e:
323              if echo:
324                  echo(f"Error checking authentication: {e}")
325              raise NotAuthenticatedError(str(e))
326  
327      def resolve_skills_path(self, base_directory: Path) -> Path:
328          """Resolve the path to the skills directory."""
329          return base_directory / ".claude" / "skills"
330  
331      async def astream(
332          self,
333          prompt: str,
334          tracking_uri: str,
335          session_id: str | None = None,
336          mlflow_session_id: str | None = None,
337          cwd: Path | None = None,
338          context: dict[str, Any] | None = None,
339      ) -> AsyncGenerator[Event, None]:
340          """
341          Stream responses from Claude Code CLI asynchronously.
342  
343          Args:
344              prompt: The prompt to send to Claude
345              tracking_uri: MLflow tracking server URI for the assistant to use
346              session_id: Claude session ID for resume
347              mlflow_session_id: MLflow session ID for PID tracking (enables cancellation)
348              cwd: Working directory for Claude Code CLI
349              context: Additional context for the assistant, such as information from
350                  the current UI page the user is viewing (e.g., experimentId, traceId)
351  
352          Yields:
353              Event objects
354          """
355          claude_path = shutil.which("claude")
356          if not claude_path:
357              yield Event.from_error(
358                  "Claude CLI not found. Please install Claude Code CLI and ensure it's in your PATH."
359              )
360              return
361  
362          # Build user message with context
363          if context:
364              user_message = f"<context>\n{json.dumps(context)}\n</context>\n\n{prompt}"
365          else:
366              user_message = prompt
367  
368          # Build command
369          # Note: --verbose is required when using --output-format=stream-json with -p
370          cmd = [claude_path, "-p", user_message, "--output-format", "stream-json", "--verbose"]
371  
372          # Add system prompt with tracking URI context
373          system_prompt = _build_system_prompt(tracking_uri)
374          cmd.extend(["--append-system-prompt", system_prompt])
375  
376          config = load_config(self.name)
377  
378          # Handle permission mode
379          if config.permissions.full_access:
380              # Full access mode - bypass all permission checks
381              cmd.extend(["--permission-mode", "bypassPermissions"])
382          else:
383              # Build allowed tools list based on permissions
384              allowed_tools = list(BASE_ALLOWED_TOOLS)
385              if config.permissions.allow_edit_files:
386                  allowed_tools.extend(FILE_EDIT_TOOLS)
387              if config.permissions.allow_read_docs:
388                  allowed_tools.extend(DOCS_TOOLS)
389  
390              for tool in allowed_tools:
391                  cmd.extend(["--allowed-tools", tool])
392  
393          if config.model and config.model != "default":
394              cmd.extend(["--model", config.model])
395  
396          if session_id:
397              cmd.extend(["--resume", session_id])
398  
399          process = None
400          try:
401              process = await asyncio.create_subprocess_exec(
402                  *cmd,
403                  stdout=asyncio.subprocess.PIPE,
404                  stderr=asyncio.subprocess.PIPE,
405                  cwd=cwd,
406                  # Increase buffer limit from default 64KB to handle large JSON responses
407                  # from Claude Code CLI (e.g., tool results containing large file contents)
408                  limit=100 * 1024 * 1024,  # 100 MB
409                  # Specify tracking URI to let Claude Code CLI inherit it
410                  # NB: `env` arg in `create_subprocess_exec` does not merge with the parent process's
411                  # environment so we need to copy the parent process's environment explicitly.
412                  env={**os.environ.copy(), "MLFLOW_TRACKING_URI": tracking_uri},
413              )
414  
415              # Save PID for cancellation support
416              if mlflow_session_id and process.pid:
417                  save_process_pid(mlflow_session_id, process.pid)
418  
419              try:
420                  async for line in process.stdout:
421                      line_str = line.decode("utf-8").strip()
422                      if not line_str:
423                          continue
424  
425                      try:
426                          data = json.loads(line_str)
427  
428                          if self._should_filter_out_message(data):
429                              continue
430  
431                          if msg := self._parse_message_to_event(data):
432                              yield msg
433  
434                      except json.JSONDecodeError:
435                          # Non-JSON output, treat as plain text
436                          yield Event.from_message(Message(role="user", content=line_str))
437              finally:
438                  # Clear PID when done (regardless of how we exit)
439                  if mlflow_session_id:
440                      clear_process_pid(mlflow_session_id)
441  
442              # Wait for process to complete
443              await process.wait()
444  
445              # Check if killed by interrupt (SIGKILL = -9)
446              if process.returncode == -9:
447                  yield Event.from_interrupted()
448                  return
449  
450              if process.returncode != 0:
451                  stderr = await process.stderr.read()
452                  error_msg = (
453                      stderr.decode("utf-8").strip()
454                      or f"Process exited with code {process.returncode}"
455                  )
456                  yield Event.from_error(error_msg)
457  
458          except Exception as e:
459              _logger.exception("Error running Claude Code CLI")
460              yield Event.from_error(str(e))
461          finally:
462              if process is not None and process.returncode is None:
463                  process.kill()
464                  await process.wait()
465  
466      def _parse_message_to_event(self, data: dict[str, Any]) -> Event | None:
467          """
468          Parse json message from Claude Code CLI output.
469  
470          Reference: https://github.com/anthropics/claude-agent-sdk-python/blob/29c12cd80b256e88f321b2b8f1f5a88445077aa5/src/claude_agent_sdk/_internal/message_parser.py#L24
471  
472          Args:
473              data: Raw message dictionary from CLI output
474  
475          Returns:
476              Parsed Event object
477          """
478          message_type = data.get("type")
479          if not message_type:
480              return Event.from_error("Message missing 'type' field")
481  
482          match message_type:
483              case "user":
484                  try:
485                      if isinstance(data["message"]["content"], list):
486                          user_content_blocks = []
487                          for block in data["message"]["content"]:
488                              match block["type"]:
489                                  case "text":
490                                      user_content_blocks.append(TextBlock(text=block["text"]))
491                                  case "tool_use":
492                                      user_content_blocks.append(
493                                          ToolUseBlock(
494                                              id=block["id"],
495                                              name=block["name"],
496                                              input=block["input"],
497                                          )
498                                      )
499                                  case "tool_result":
500                                      user_content_blocks.append(
501                                          ToolResultBlock(
502                                              tool_use_id=block["tool_use_id"],
503                                              content=block.get("content"),
504                                              is_error=block.get("is_error"),
505                                          )
506                                      )
507                              msg = Message(role="user", content=user_content_blocks)
508                      else:
509                          msg = Message(role="user", content=data["message"]["content"])
510                      return Event.from_message(msg)
511                  except KeyError as e:
512                      return Event.from_error(f"Failed to parse user message: {e}")
513  
514              case "assistant":
515                  try:
516                      if data["message"].get("error"):
517                          return Event.from_error(data["message"]["error"])
518  
519                      content_blocks: list[ContentBlock] = []
520                      for block in data["message"]["content"]:
521                          match block["type"]:
522                              case "text":
523                                  content_blocks.append(TextBlock(text=block["text"]))
524                              case "thinking":
525                                  content_blocks.append(
526                                      ThinkingBlock(
527                                          thinking=block["thinking"],
528                                          signature=block["signature"],
529                                      )
530                                  )
531                              case "tool_use":
532                                  content_blocks.append(
533                                      ToolUseBlock(
534                                          id=block["id"],
535                                          name=block["name"],
536                                          input=block["input"],
537                                      )
538                                  )
539                              case "tool_result":
540                                  content_blocks.append(
541                                      ToolResultBlock(
542                                          tool_use_id=block["tool_use_id"],
543                                          content=block.get("content"),
544                                          is_error=block.get("is_error"),
545                                      )
546                                  )
547  
548                      msg = Message(role="assistant", content=content_blocks)
549                      return Event.from_message(msg)
550                  except KeyError as e:
551                      return Event.from_error(f"Failed to parse assistant message: {e}")
552  
553              case "system":
554                  # NB: Skip system message. The system message from Claude Code CLI contains
555                  # the various metadata about runtime, which is not used by the assistant UX.
556                  return None
557  
558              case "error":
559                  try:
560                      error_msg = data.get("error", {}).get("message", str(data.get("error")))
561                      return Event.from_error(error_msg)
562                  except Exception as e:
563                      return Event.from_error(f"Failed to parse error message: {e}")
564  
565              case "result":
566                  try:
567                      return Event.from_result(
568                          result=data.get("result"),
569                          session_id=data["session_id"],
570                      )
571                  except KeyError as e:
572                      return Event.from_error(f"Failed to parse result message: {e}")
573  
574              case "stream_event":
575                  try:
576                      return Event.from_stream_event(event=data["event"])
577                  except KeyError as e:
578                      return Event.from_error(f"Failed to parse stream_event message: {e}")
579  
580              case "rate_limit_event":
581                  # rate_limit_event is a status event emitted by the CLI to report
582                  # rate limit info. Only surface a message to the user when they are
583                  # actually limited, not on every status update.
584                  info = data.get("rate_limit_info", {})
585                  if info.get("status") == "limited":
586                      resets_at = info.get("resetsAt")
587                      msg = "You've hit a rate limit — please wait a moment and try again."
588                      if resets_at:
589                          msg += f" Your limit resets at {resets_at}."
590                      return Event.from_message(
591                          Message(role="assistant", content=[TextBlock(text=msg)])
592                      )
593                  return None
594  
595              case _:
596                  _logger.warning("Unexpected message type from CLI: %s", message_type)
597                  return None
598  
599      def _should_filter_out_message(self, data: dict[str, Any]) -> bool:
600          """
601          Check if an internal message that should be filtered out before being displayed to the user.
602  
603          Currently filters:
604          - Skill prompt messages: When a Skill tool is called, Claude Code sends an internal
605            user message containing the full skill instructions (starting with "Base directory
606            for this skill:"). These messages are internal and should not be displayed to users.
607          """
608          if data.get("type") != "user":
609              return False
610  
611          content = data.get("message", {}).get("content", [])
612          if not isinstance(content, list):
613              return False
614  
615          return any(
616              block.get("type") == "text"
617              # TODO: This prefix is not guaranteed to be stable. We should find a better way to
618              # filter out these messages.
619              and block.get("text", "").startswith("Base directory for this skill:")
620              for block in content
621          )