/ src / solace_agent_mesh / agent / tools / builtin_artifact_tools.py
builtin_artifact_tools.py
   1  """
   2  Built-in ADK Tools for Artifact Management within the A2A Host.
   3  These tools interact with the ADK ArtifactService via the ToolContext and
   4  use state_delta for signaling artifact return requests to the host component.
   5  Metadata handling is integrated via artifact_helpers.
   6  """
   7  
   8  import logging
   9  import uuid
  10  import json
  11  import re
  12  import fnmatch
  13  from typing import Any, Dict, List, Optional, Tuple, Union, TYPE_CHECKING
  14  from datetime import datetime, timezone
  15  from google.adk.tools import ToolContext
  16  
  17  if TYPE_CHECKING:
  18      from google.adk.agents.invocation_context import InvocationContext
  19  from google.genai import types as adk_types
  20  from .tool_definition import BuiltinTool
  21  from .tool_result import ToolResult, DataObject, DataDisposition
  22  from .artifact_types import Artifact
  23  from .registry import tool_registry
  24  from ...agent.utils.artifact_helpers import (
  25      save_artifact_with_metadata,
  26      decode_and_get_bytes,
  27      load_artifact_content_or_metadata,
  28      is_filename_safe,
  29      METADATA_SUFFIX,
  30      DEFAULT_SCHEMA_MAX_KEYS,
  31  )
  32  from ...common.utils.embeds import (
  33      evaluate_embed,
  34      EMBED_REGEX,
  35      EMBED_CHAIN_DELIMITER,
  36  )
  37  from ...common.utils.embeds.types import ResolutionMode
  38  from ...agent.utils.context_helpers import get_original_session_id
  39  from ...agent.adk.models.lite_llm import LiteLlm
  40  from google.adk.models import LlmRequest
  41  from google.adk.models.registry import LLMRegistry
  42  from ...common.utils.mime_helpers import is_text_based_file, is_image_artifact
  43  
  44  log = logging.getLogger(__name__)
  45  
  46  CATEGORY_NAME = "Artifact Management"
  47  CATEGORY_DESCRIPTION = "List, read, create, update, and delete artifacts."
  48  
  49  
  50  async def _internal_create_artifact(
  51      filename: str,
  52      content: str,
  53      mime_type: str,
  54      tool_context: ToolContext = None,
  55      description: Optional[str] = None,
  56      metadata_json: Optional[str] = None,
  57      schema_max_keys: Optional[int] = None,
  58      tags: Optional[List[str]] = None,
  59  ) -> ToolResult:
  60      """
  61      Internal helper to create an artifact with its first chunk of content and metadata.
  62      This function is not intended to be called directly by the LLM.
  63      It is used by callbacks that process fenced artifact blocks.
  64  
  65      Args:
  66          filename: The desired name for the artifact.
  67          content: The first chunk of the artifact content, as a string.
  68                   If the mime_type suggests binary data, this string is expected
  69                   to be base64 encoded.
  70          mime_type: The MIME type of the content.
  71          tool_context: The ADK ToolContext, required for accessing services.
  72          description (str, optional): A description for the artifact.
  73          metadata_json (str, optional): A JSON string of additional metadata.
  74          schema_max_keys (int, optional): Max keys for schema inference.
  75          tags (List[str], optional): Tags for categorization (e.g., ["__working"]).
  76  
  77      Returns:
  78          A ToolResult indicating the result of the save operation.
  79      """
  80      if not tool_context:
  81          return ToolResult.error(
  82              "ToolContext is missing, cannot save artifact.",
  83              data={"filename": filename}
  84          )
  85  
  86      if not is_filename_safe(filename):
  87          return ToolResult.error(
  88              "Filename is invalid or contains disallowed characters (e.g., '/', '..').",
  89              data={"filename": filename}
  90          )
  91  
  92      log_identifier = f"[BuiltinArtifactTool:_internal_create_artifact:{filename}]"
  93  
  94      final_metadata = {}
  95      if description:
  96          final_metadata["description"] = description
  97      if metadata_json:
  98          try:
  99              final_metadata.update(json.loads(metadata_json))
 100          except (json.JSONDecodeError, TypeError):
 101              log.warning(
 102                  "%s Invalid JSON in metadata_json attribute: %s",
 103                  log_identifier,
 104                  metadata_json,
 105              )
 106  
 107              final_metadata["metadata_parsing_error"] = (
 108                  f"Invalid JSON provided: {metadata_json}"
 109              )
 110  
 111      log.debug("%s Processing request with metadata: %s", log_identifier, final_metadata)
 112  
 113      try:
 114          inv_context = tool_context._invocation_context
 115          artifact_bytes, final_mime_type = decode_and_get_bytes(
 116              content, mime_type, log_identifier
 117          )
 118          max_keys_to_use = (
 119              schema_max_keys if schema_max_keys is not None else DEFAULT_SCHEMA_MAX_KEYS
 120          )
 121          if schema_max_keys is not None:
 122              log.debug(
 123                  "%s Using schema_max_keys provided by LLM: %d",
 124                  log_identifier,
 125                  schema_max_keys,
 126              )
 127          else:
 128              log.debug(
 129                  "%s Using default schema_max_keys: %d",
 130                  log_identifier,
 131                  DEFAULT_SCHEMA_MAX_KEYS,
 132              )
 133  
 134          artifact_service = inv_context.artifact_service
 135          if not artifact_service:
 136              raise ValueError("ArtifactService is not available in the context.")
 137          session_last_update_time = inv_context.session.last_update_time
 138          timestamp_for_artifact: datetime
 139          if isinstance(session_last_update_time, datetime):
 140              timestamp_for_artifact = session_last_update_time
 141          elif isinstance(session_last_update_time, (int, float)):
 142              log.debug(
 143                  "%s Converting numeric session.last_update_time (%s) to datetime.",
 144                  log_identifier,
 145                  session_last_update_time,
 146              )
 147              try:
 148                  timestamp_for_artifact = datetime.fromtimestamp(
 149                      session_last_update_time, timezone.utc
 150                  )
 151              except Exception as e:
 152                  log.warning(
 153                      "%s Failed to convert numeric timestamp %s to datetime: %s. Using current time.",
 154                      log_identifier,
 155                      session_last_update_time,
 156                      e,
 157                  )
 158                  timestamp_for_artifact = datetime.now(timezone.utc)
 159          else:
 160              if session_last_update_time is not None:
 161                  log.warning(
 162                      "%s Unexpected type for session.last_update_time: %s. Using current time.",
 163                      log_identifier,
 164                      type(session_last_update_time),
 165                  )
 166              timestamp_for_artifact = datetime.now(timezone.utc)
 167          result = await save_artifact_with_metadata(
 168              artifact_service=artifact_service,
 169              app_name=inv_context.app_name,
 170              user_id=inv_context.user_id,
 171              session_id=get_original_session_id(inv_context),
 172              filename=filename,
 173              content_bytes=artifact_bytes,
 174              mime_type=final_mime_type,
 175              metadata_dict=final_metadata,
 176              timestamp=timestamp_for_artifact,
 177              schema_max_keys=max_keys_to_use,
 178              tags=tags,
 179              tool_context=tool_context,
 180              suppress_visualization_signal=True,  # Fenced blocks handle their own visualization signals
 181          )
 182          log.info(
 183              "%s Result from save_artifact_with_metadata: %s", log_identifier, result
 184          )
 185          # Convert helper dict result to ToolResult
 186          status = result.pop("status", "success")
 187          if status == "error":
 188              message = result.pop("message", "Unknown error")
 189              return ToolResult.error(message, data=result)
 190          message = result.pop("message", f"Created artifact {filename}")
 191          return ToolResult.ok(message, data=result)
 192      except Exception as e:
 193          log.exception(
 194              "%s Error creating artifact '%s': %s", log_identifier, filename, e
 195          )
 196          return ToolResult.error(
 197              f"Failed to create artifact: {e}",
 198              data={"filename": filename}
 199          )
 200  
 201  
 202  async def list_artifacts(tool_context: ToolContext = None) -> ToolResult:
 203      """
 204      Lists all available data artifact filenames and their versions for the current session.
 205      Includes a summary of the latest version's metadata for each artifact.
 206  
 207      Args:
 208          tool_context: The context provided by the ADK framework.
 209  
 210      Returns:
 211          A ToolResult containing the list of artifacts with metadata summaries or an error.
 212      """
 213      if not tool_context:
 214          return ToolResult.error("ToolContext is missing.")
 215      log_identifier = "[BuiltinArtifactTool:list_artifacts]"
 216      log.debug("%s Processing request.", log_identifier)
 217      try:
 218          artifact_service = tool_context._invocation_context.artifact_service
 219          if not artifact_service:
 220              raise ValueError("ArtifactService is not available in the context.")
 221          app_name = tool_context._invocation_context.app_name
 222          user_id = tool_context._invocation_context.user_id
 223          session_id = get_original_session_id(tool_context._invocation_context)
 224          list_keys_method = getattr(artifact_service, "list_artifact_keys")
 225          all_keys = await list_keys_method(
 226              app_name=app_name, user_id=user_id, session_id=session_id
 227          )
 228          response_files = []
 229          processed_data_files = set()
 230          for key in all_keys:
 231              if key.endswith(METADATA_SUFFIX):
 232                  continue  # Skip metadata files initially
 233  
 234              if key in processed_data_files:
 235                  continue  # Already processed this data file
 236  
 237              filename = key
 238              metadata_summary = None
 239              versions = []
 240              try:
 241                  versions = await artifact_service.list_versions(
 242                      app_name=app_name,
 243                      user_id=user_id,
 244                      session_id=session_id,
 245                      filename=filename,
 246                  )
 247                  if not versions:
 248                      log.warning(
 249                          "%s Found artifact key '%s' but no versions listed. Skipping.",
 250                          log_identifier,
 251                          filename,
 252                      )
 253                      continue
 254                  latest_version = max(versions)
 255                  metadata_filename = f"{filename}{METADATA_SUFFIX}"
 256                  if metadata_filename in all_keys:
 257                      try:
 258                          metadata_part = await artifact_service.load_artifact(
 259                              app_name=app_name,
 260                              user_id=user_id,
 261                              session_id=session_id,
 262                              filename=metadata_filename,
 263                              version=latest_version,
 264                          )
 265                          if metadata_part and metadata_part.inline_data:
 266                              try:
 267                                  metadata_dict = json.loads(
 268                                      metadata_part.inline_data.data.decode("utf-8")
 269                                  )
 270                                  schema = metadata_dict.get("schema", {})
 271                                  metadata_summary = {
 272                                      "description": metadata_dict.get("description"),
 273                                      "source": metadata_dict.get("source"),
 274                                      "type": metadata_dict.get("mime_type"),
 275                                      "size": metadata_dict.get("size_bytes"),
 276                                      "schema_type": schema.get(
 277                                          "type", metadata_dict.get("mime_type")
 278                                      ),
 279                                      "schema_inferred": schema.get("inferred"),
 280                                  }
 281                                  metadata_summary = {
 282                                      k: v
 283                                      for k, v in metadata_summary.items()
 284                                      if v is not None
 285                                  }
 286                                  log.debug(
 287                                      "%s Loaded metadata summary for '%s' v%d.",
 288                                      log_identifier,
 289                                      filename,
 290                                      latest_version,
 291                                  )
 292                              except json.JSONDecodeError as json_err:
 293                                  log.warning(
 294                                      "%s Failed to parse metadata JSON for '%s' v%d: %s",
 295                                      log_identifier,
 296                                      metadata_filename,
 297                                      latest_version,
 298                                      json_err,
 299                                  )
 300                                  metadata_summary = {"error": "Failed to parse metadata"}
 301                              except Exception as fmt_err:
 302                                  log.warning(
 303                                      "%s Failed to format metadata summary for '%s' v%d: %s",
 304                                      log_identifier,
 305                                      metadata_filename,
 306                                      latest_version,
 307                                      fmt_err,
 308                                  )
 309                                  metadata_summary = {
 310                                      "error": "Failed to format metadata"
 311                                  }
 312                          else:
 313                              log.warning(
 314                                  "%s Metadata file '%s' v%d found but empty or unreadable.",
 315                                  log_identifier,
 316                                  metadata_filename,
 317                                  latest_version,
 318                              )
 319                              metadata_summary = {
 320                                  "error": "Metadata file empty or unreadable"
 321                              }
 322                      except Exception as load_err:
 323                          log.warning(
 324                              "%s Failed to load metadata file '%s' v%d: %s",
 325                              log_identifier,
 326                              metadata_filename,
 327                              latest_version,
 328                              load_err,
 329                          )
 330                          metadata_summary = {
 331                              "error": f"Failed to load metadata: {load_err}"
 332                          }
 333                  else:
 334                      log.debug(
 335                          "%s No companion metadata file found for '%s'.",
 336                          log_identifier,
 337                          filename,
 338                      )
 339                      metadata_summary = {"info": "No metadata file found"}
 340              except Exception as version_err:
 341                  log.warning(
 342                      "%s Failed to list versions or process metadata for file '%s': %s. Skipping file.",
 343                      log_identifier,
 344                      filename,
 345                      version_err,
 346                  )
 347                  continue
 348              response_files.append(
 349                  {
 350                      "filename": filename,
 351                      "versions": versions,
 352                      "metadata_summary": metadata_summary,
 353                  }
 354              )
 355              processed_data_files.add(filename)
 356          log.info(
 357              "%s Found %d data artifacts for session %s.",
 358              log_identifier,
 359              len(response_files),
 360              session_id,
 361          )
 362          return ToolResult.ok(
 363              f"Found {len(response_files)} artifacts.",
 364              data={"artifacts": response_files}
 365          )
 366      except Exception as e:
 367          log.exception("%s Error listing artifacts: %s", log_identifier, e)
 368          return ToolResult.error(f"Failed to list artifacts: {e}")
 369  
 370  
 371  async def load_artifact(
 372      filename: str,
 373      version: int,
 374      load_metadata_only: bool = False,
 375      max_content_length: Optional[int] = None,
 376      include_line_numbers: bool = False,
 377      tool_context: ToolContext = None,
 378  ) -> ToolResult:
 379      """
 380      Loads the content or metadata of a specific artifact version.
 381      Early-stage embeds in the filename argument are resolved.
 382  
 383      If load_metadata_only is True, loads the full metadata dictionary.
 384      Otherwise, loads text content (potentially truncated) or binary metadata summary.
 385  
 386      Args:
 387          filename: The name of the artifact to load. May contain embeds.
 388          version: The specific version number to load. Must be explicitly provided. Versions are 0-indexed.
 389          load_metadata_only (bool): If True, load only the metadata JSON. Default False.
 390          max_content_length (Optional[int]): Maximum character length for text content.
 391                                             If None, uses app configuration. Range: 100-100,000.
 392          include_line_numbers (bool): If True, prefix each line with its 1-based line number
 393                                      followed by a TAB character for LLM viewing. Line numbers
 394                                      are not stored in the artifact. Default False.
 395          tool_context: The context provided by the ADK framework.
 396  
 397      Returns:
 398          A ToolResult containing the artifact details and content/metadata or an error.
 399      """
 400      if not tool_context:
 401          return ToolResult.error(
 402              "ToolContext is missing.",
 403              data={"filename": filename, "version": version}
 404          )
 405      log_identifier = f"[BuiltinArtifactTool:load_artifact:{filename}:{version}]"
 406      log.debug(
 407          "%s Processing request (load_metadata_only=%s).",
 408          log_identifier,
 409          load_metadata_only,
 410      )
 411      if version is None:
 412          version = "latest"
 413      try:
 414          artifact_service = tool_context._invocation_context.artifact_service
 415          if not artifact_service:
 416              raise ValueError("ArtifactService is not available in the context.")
 417          app_name = tool_context._invocation_context.app_name
 418          user_id = tool_context._invocation_context.user_id
 419          session_id = get_original_session_id(tool_context._invocation_context)
 420          agent = getattr(tool_context._invocation_context, "agent", None)
 421          host_component = getattr(agent, "host_component", None) if agent else None
 422  
 423          # Check if inline vision is enabled and this is an image artifact
 424          enable_inline_vision = getattr(host_component, "enable_inline_vision", False) if host_component else False
 425          is_image = is_image_artifact(filename, None)
 426  
 427          if enable_inline_vision and is_image and not load_metadata_only:
 428              # Load raw bytes for image artifacts so the LLM can see them
 429              result = await load_artifact_content_or_metadata(
 430                  artifact_service=artifact_service,
 431                  app_name=app_name,
 432                  user_id=user_id,
 433                  session_id=session_id,
 434                  filename=filename,
 435                  version=version,
 436                  load_metadata_only=False,
 437                  return_raw_bytes=True,
 438                  log_identifier_prefix="[BuiltinArtifactTool:load_artifact:vision]",
 439              )
 440              status = result.pop("status", "success")
 441              if status in ("error", "not_found"):
 442                  message = result.pop("message", "Unknown error")
 443                  return ToolResult.error(message, data=result)
 444  
 445              raw_bytes = result.get("raw_bytes")
 446              mime_type = result.get("mime_type", "image/png")
 447              if raw_bytes:
 448                  import base64 as b64
 449                  b64_data = b64.b64encode(raw_bytes).decode("utf-8")
 450                  data_url = f"data:{mime_type};base64,{b64_data}"
 451                  log.info(
 452                      "%s Inline vision: returning image '%s' (%d bytes) as data URL for LLM viewing.",
 453                      log_identifier, filename, len(raw_bytes),
 454                  )
 455                  return ToolResult.ok(
 456                      f"Image '{filename}' loaded for viewing. The image is included inline below.",
 457                      data={
 458                          "filename": result.get("filename", filename),
 459                          "version": result.get("version", version),
 460                          "mime_type": mime_type,
 461                          "size_bytes": len(raw_bytes),
 462                          "_vision_image_data_url": data_url,
 463                      }
 464                  )
 465  
 466          result = await load_artifact_content_or_metadata(
 467              artifact_service=artifact_service,
 468              app_name=app_name,
 469              user_id=user_id,
 470              session_id=session_id,
 471              filename=filename,
 472              version=version,
 473              load_metadata_only=load_metadata_only,
 474              max_content_length=max_content_length,
 475              include_line_numbers=include_line_numbers,
 476              component=host_component,
 477              log_identifier_prefix="[BuiltinArtifactTool:load_artifact]",
 478          )
 479          # Convert helper dict result to ToolResult
 480          status = result.pop("status", "success")
 481          if status in ("error", "not_found"):
 482              message = result.pop("message", "Unknown error")
 483              return ToolResult.error(message, data=result)
 484          message = result.pop("message", f"Loaded artifact {filename}")
 485          return ToolResult.ok(message, data=result)
 486      except FileNotFoundError as fnf_err:
 487          log.warning(
 488              "%s Artifact not found (reported by helper): %s", log_identifier, fnf_err
 489          )
 490          return ToolResult.error(
 491              str(fnf_err),
 492              data={"filename": filename, "version": version}
 493          )
 494      except ValueError as val_err:
 495          log.warning(
 496              "%s Value error during load (reported by helper): %s",
 497              log_identifier,
 498              val_err,
 499          )
 500          return ToolResult.error(
 501              str(val_err),
 502              data={"filename": filename, "version": version}
 503          )
 504      except Exception as e:
 505          log.exception(
 506              "%s Unexpected error in load_artifact tool: %s", log_identifier, e
 507          )
 508          return ToolResult.error(
 509              f"Unexpected error processing load request: {e}",
 510              data={"filename": filename, "version": version}
 511          )
 512  
 513  
 514  async def apply_embed_and_create_artifact(
 515      output_filename: str,
 516      embed_directive: str,
 517      output_metadata: Optional[Dict[str, Any]] = None,
 518      tool_context: ToolContext = None,
 519  ) -> ToolResult:
 520      """
 521      Resolves an 'artifact_content' embed directive (including modifiers and formatting)
 522      and saves the resulting content as a new artifact. The entire embed directive
 523      must be provided as a string as the embed_directive argument.
 524  
 525      Args:
 526          output_filename: The desired name for the new artifact.
 527          embed_directive: The full '«artifact_content:...>>>...>>>format:...»' string.
 528          output_metadata (dict, optional): Metadata for the new artifact.
 529          tool_context: The context provided by the ADK framework.
 530  
 531      Returns:
 532          A ToolResult indicating the result, including the new filename and version.
 533      """
 534      if not tool_context:
 535          return ToolResult.error("ToolContext is missing.")
 536  
 537      log_identifier = f"[BuiltinArtifactTool:apply_embed:{output_filename}]"
 538      log.info(
 539          "%s Processing request with directive: %s", log_identifier, embed_directive
 540      )
 541  
 542      match = EMBED_REGEX.fullmatch(embed_directive)
 543      if not match:
 544          return ToolResult.error(f"Invalid embed directive format: {embed_directive}")
 545  
 546      embed_type = match.group(1)
 547      expression = match.group(2)
 548      format_spec = match.group(3)
 549  
 550      if embed_type != "artifact_content":
 551          return ToolResult.error(
 552              f"This tool only supports 'artifact_content' embeds, got '{embed_type}'."
 553          )
 554  
 555      try:
 556          inv_context = tool_context._invocation_context
 557          artifact_service = inv_context.artifact_service
 558          if not artifact_service:
 559              raise ValueError("ArtifactService not available.")
 560  
 561          host_component = getattr(inv_context.agent, "host_component", None)
 562          if not host_component:
 563              log.warning(
 564                  "%s Could not access host component config for limits. Proceeding without them.",
 565                  log_identifier,
 566              )
 567              embed_config = {}
 568          else:
 569              embed_config = {
 570                  "gateway_artifact_content_limit_bytes": host_component.get_config(
 571                      "gateway_artifact_content_limit_bytes", -1
 572                  ),
 573                  "gateway_recursive_embed_depth": host_component.get_config(
 574                      "gateway_recursive_embed_depth", 3
 575                  ),
 576              }
 577  
 578          gateway_context = {
 579              "artifact_service": artifact_service,
 580              "session_context": {
 581                  "app_name": inv_context.app_name,
 582                  "user_id": inv_context.user_id,
 583                  "session_id": get_original_session_id(inv_context),
 584              },
 585          }
 586      except Exception as ctx_err:
 587          log.error(
 588              "%s Failed to prepare context/config for embed evaluation: %s",
 589              log_identifier,
 590              ctx_err,
 591          )
 592          return ToolResult.error(f"Internal error preparing context: {ctx_err}")
 593  
 594      resolved_content_str, error_msg_from_eval, _ = await evaluate_embed(
 595          embed_type=embed_type,
 596          expression=expression,
 597          format_spec=format_spec,
 598          context=gateway_context,
 599          log_identifier=log_identifier,
 600          resolution_mode=ResolutionMode.TOOL_PARAMETER,
 601          config=embed_config,
 602      )
 603  
 604      if error_msg_from_eval or (
 605          resolved_content_str and resolved_content_str.startswith("[Error:")
 606      ):
 607          error_to_report = error_msg_from_eval or resolved_content_str
 608          log.error("%s Embed resolution failed: %s", log_identifier, error_to_report)
 609          return ToolResult.error(f"Embed resolution failed: {error_to_report}")
 610  
 611      output_mime_type = "text/plain"
 612      final_format = None
 613      chain_parts = expression.split(EMBED_CHAIN_DELIMITER)
 614      if len(chain_parts) > 1:
 615          last_part = chain_parts[-1].strip()
 616          format_match = re.match(r"format:(.*)", last_part, re.DOTALL)
 617          if format_match:
 618              final_format = format_match.group(1).strip().lower()
 619      elif format_spec:
 620          final_format = format_spec.strip().lower()
 621  
 622      if final_format:
 623          if final_format == "html":
 624              output_mime_type = "text/html"
 625          elif final_format == "json" or final_format == "json_pretty":
 626              output_mime_type = "application/json"
 627          elif final_format == "csv":
 628              output_mime_type = "text/csv"
 629          elif final_format == "datauri":
 630              output_mime_type = "text/plain"
 631              log.warning(
 632                  "%s Embed resolved to data URI; saving new artifact as text/plain.",
 633                  log_identifier,
 634              )
 635  
 636      log.debug("%s Determined output MIME type as: %s", log_identifier, output_mime_type)
 637  
 638      try:
 639          resolved_bytes = resolved_content_str.encode("utf-8")
 640          inv_context = tool_context._invocation_context
 641          artifact_service = inv_context.artifact_service
 642          if not artifact_service:
 643              raise ValueError("ArtifactService is not available in the context.")
 644  
 645          save_result = await save_artifact_with_metadata(
 646              artifact_service=artifact_service,
 647              app_name=inv_context.app_name,
 648              user_id=inv_context.user_id,
 649              session_id=get_original_session_id(inv_context),
 650              filename=output_filename,
 651              content_bytes=resolved_bytes,
 652              mime_type=output_mime_type,
 653              metadata_dict=(
 654                  lambda base_meta, user_meta: (
 655                      base_meta.update(user_meta or {}),
 656                      base_meta,
 657                  )[1]
 658              )({"source_directive": embed_directive}, output_metadata),
 659              timestamp=inv_context.session.last_update_time
 660              or datetime.now(timezone.utc),
 661              schema_max_keys=(
 662                  host_component.get_config("schema_max_keys", DEFAULT_SCHEMA_MAX_KEYS)
 663                  if host_component
 664                  else DEFAULT_SCHEMA_MAX_KEYS
 665              ),
 666              tool_context=tool_context,
 667          )
 668  
 669          log.info(
 670              "%s Successfully applied embed and saved new artifact '%s' (v%s).",
 671              log_identifier,
 672              output_filename,
 673              save_result.get("data_version"),
 674          )
 675          return ToolResult.ok(
 676              f"Successfully created artifact '{output_filename}' v{save_result.get('data_version')} from embed directive.",
 677              data={
 678                  "output_filename": output_filename,
 679                  "output_version": save_result.get("data_version"),
 680                  "output_mime_type": output_mime_type,
 681              }
 682          )
 683  
 684      except Exception as save_err:
 685          log.exception(
 686              "%s Failed to save resolved content as artifact '%s': %s",
 687              log_identifier,
 688              output_filename,
 689              save_err,
 690          )
 691          return ToolResult.error(f"Failed to save new artifact: {save_err}")
 692  
 693  
 694  async def extract_content_from_artifact(
 695      filename: str,
 696      extraction_goal: str,
 697      version: Optional[str] = "latest",
 698      output_filename_base: Optional[str] = None,
 699      tool_context: ToolContext = None,
 700  ) -> ToolResult:
 701      """
 702      Loads an existing artifact, uses an internal LLM to process its content
 703      based on an "extraction_goal," and manages the output by returning it
 704      or saving it as a new artifact.
 705  
 706      The tool's description for the LLM might dynamically update based on
 707      the 'supported_binary_mime_types' configuration of the agent, indicating
 708      which binary types it can attempt to process.
 709  
 710      Args:
 711          filename (str): Name of the source artifact. May contain embeds.
 712          extraction_goal (str): Natural language instruction for the LLM on what
 713                                 to extract or how to transform the content.
 714                                 May contain embeds.
 715          version (Optional[Union[int, str]]): Version of the source artifact.
 716                                               Can be an integer or "latest".
 717                                               Defaults to "latest". May contain embeds.
 718          output_filename_base (Optional[str]): Optional base name for the new
 719                                                artifact if the extracted content
 720                                                is saved. May contain embeds.
 721          tool_context (ToolContext): Provided by the ADK framework.
 722  
 723      Returns:
 724          ToolResult: A ToolResult containing the status of the operation,
 725                      a message for the LLM, and potentially the extracted
 726                      data or details of a newly saved artifact.
 727      """
 728      log_identifier = f"[BuiltinArtifactTool:extract_content:{filename}:{version}]"
 729      log.debug(
 730          "%s Processing request. Goal: '%s', Output base: '%s'",
 731          log_identifier,
 732          extraction_goal,
 733          output_filename_base,
 734      )
 735  
 736      if not tool_context:
 737          return ToolResult.error(
 738              "Tool execution failed: ToolContext is missing.",
 739              data={"filename": filename, "version_requested": str(version)}
 740          )
 741      if not filename:
 742          return ToolResult.error(
 743              "Tool execution failed: 'filename' parameter is required.",
 744              data={"version_requested": str(version)}
 745          )
 746      if not extraction_goal:
 747          return ToolResult.error(
 748              "Tool execution failed: 'extraction_goal' parameter is required.",
 749              data={"filename": filename, "version_requested": str(version)}
 750          )
 751  
 752      inv_context = tool_context._invocation_context
 753      host_component = getattr(inv_context.agent, "host_component", None)
 754      if not host_component:
 755          log.error(
 756              "%s Host component not found on agent. Cannot retrieve config.",
 757              log_identifier,
 758          )
 759          return ToolResult.error(
 760              "Tool configuration error: Host component not accessible.",
 761              data={"filename": filename, "version_requested": str(version)}
 762          )
 763  
 764      try:
 765          save_threshold = host_component.get_config(
 766              "tool_output_save_threshold_bytes", 2048
 767          )
 768          llm_max_bytes = host_component.get_config(
 769              "tool_output_llm_return_max_bytes", 4096
 770          )
 771          extraction_config = host_component.get_config(
 772              "extract_content_from_artifact_config", {}
 773          )
 774          supported_binary_mime_types = extraction_config.get(
 775              "supported_binary_mime_types", []
 776          )
 777          model_config_for_extraction = extraction_config.get("model")
 778      except Exception as e:
 779          log.exception("%s Error retrieving tool configuration: %s", log_identifier, e)
 780          return ToolResult.error(
 781              f"Tool configuration error: {e}",
 782              data={"filename": filename, "version_requested": str(version)}
 783          )
 784  
 785      source_artifact_data = None
 786      processed_version: Union[int, str]
 787  
 788      if version is None or (
 789          isinstance(version, str) and version.strip().lower() == "latest"
 790      ):
 791          processed_version = "latest"
 792      else:
 793          try:
 794              processed_version = int(version)
 795          except ValueError:
 796              log.warning(
 797                  "%s Invalid version string: '%s'. Must be an integer or 'latest'.",
 798                  log_identifier,
 799                  version,
 800              )
 801              return ToolResult.error(
 802                  f"Invalid version format '{version}'. Version must be an integer or 'latest'.",
 803                  data={"filename": filename, "version_requested": str(version)}
 804              )
 805      try:
 806          log.debug(
 807              "%s Loading source artifact '%s' version '%s' (processed as: %s)",
 808              log_identifier,
 809              filename,
 810              version,
 811              processed_version,
 812          )
 813          source_artifact_data = await load_artifact_content_or_metadata(
 814              artifact_service=inv_context.artifact_service,
 815              app_name=inv_context.app_name,
 816              user_id=inv_context.user_id,
 817              session_id=get_original_session_id(inv_context),
 818              filename=filename,
 819              version=processed_version,
 820              return_raw_bytes=True,
 821              log_identifier_prefix=log_identifier,
 822          )
 823          if source_artifact_data.get("status") != "success":
 824              raise FileNotFoundError(
 825                  source_artifact_data.get("message", "Failed to load artifact")
 826              )
 827          log.info(
 828              "%s Successfully loaded source artifact '%s' version %s (actual: v%s)",
 829              log_identifier,
 830              filename,
 831              version,
 832              source_artifact_data.get("version"),
 833          )
 834      except FileNotFoundError as e:
 835          log.warning("%s Source artifact not found: %s", log_identifier, e)
 836          return ToolResult.error(
 837              f"Could not extract content. Source artifact '{filename}' (version {version}) was not found: {e}",
 838              data={"filename": filename, "version_requested": str(version)}
 839          )
 840      except Exception as e:
 841          log.exception("%s Error loading source artifact: %s", log_identifier, e)
 842          return ToolResult.error(
 843              f"Error loading source artifact '{filename}': {e}",
 844              data={"filename": filename, "version_requested": str(version)}
 845          )
 846  
 847      source_artifact_content_bytes = source_artifact_data.get("raw_bytes")
 848      source_mime_type = source_artifact_data.get("mime_type", "application/octet-stream")
 849      actual_source_version = source_artifact_data.get("version", "unknown")
 850      host_component = getattr(inv_context.agent, "host_component", None)
 851      chosen_llm = None
 852      try:
 853          if model_config_for_extraction:
 854              if isinstance(model_config_for_extraction, str):
 855                  chosen_llm = LLMRegistry.new_llm(model_config_for_extraction)
 856                  log.info(
 857                      "%s Using tool-specific LLM (string): %s",
 858                      log_identifier,
 859                      model_config_for_extraction,
 860                  )
 861              elif isinstance(model_config_for_extraction, dict):
 862                  chosen_llm = LiteLlm(**model_config_for_extraction)
 863                  log.info(
 864                      "%s Using tool-specific LLM (dict): %s",
 865                      log_identifier,
 866                      model_config_for_extraction.get("model"),
 867                  )
 868              else:
 869                  log.warning(
 870                      "%s Invalid 'model' config for extraction tool. Falling back to agent default.",
 871                      log_identifier,
 872                  )
 873                  chosen_llm = host_component.get_lite_llm_model()
 874          else:
 875              chosen_llm = host_component.get_lite_llm_model()
 876              log.info(
 877                  "%s Using agent's default LLM: %s", log_identifier, chosen_llm.model
 878              )
 879      except Exception as e:
 880          log.exception("%s Error initializing LLM for extraction: %s", log_identifier, e)
 881          return ToolResult.error(
 882              f"Failed to set up LLM for extraction: {e}",
 883              data={"filename": filename, "version_requested": str(version)}
 884          )
 885  
 886      llm_parts = []
 887      is_binary_supported = False
 888  
 889      normalized_source_mime_type = source_mime_type.lower() if source_mime_type else ""
 890  
 891      is_text_based = is_text_based_file(
 892          mime_type=normalized_source_mime_type,
 893          content_bytes=source_artifact_content_bytes,
 894      )
 895  
 896      if is_text_based:
 897          # Try multiple encodings to handle files from different sources (e.g., Windows Excel exports)
 898          # Includes common Windows encodings like CP1252 and UTF-16
 899          artifact_text_content = None
 900          encoding_used = None
 901          encodings_to_try = ['utf-8', 'utf-16', 'cp1252', 'latin-1']
 902          decode_errors = []
 903          
 904          for encoding in encodings_to_try:
 905              try:
 906                  artifact_text_content = source_artifact_content_bytes.decode(encoding)
 907                  encoding_used = encoding
 908                  log.debug(
 909                      "%s Successfully decoded artifact using %s encoding.",
 910                      log_identifier,
 911                      encoding,
 912                  )
 913                  break
 914              except UnicodeDecodeError as e:
 915                  decode_errors.append(f"{encoding}: {e}")
 916                  continue
 917          
 918          if artifact_text_content is not None:
 919              llm_parts.append(
 920                  adk_types.Part(
 921                      text=f"Artifact Content (MIME type: {source_mime_type}, encoding: {encoding_used}):\n```\n{artifact_text_content}\n```"
 922                  )
 923              )
 924              log.debug("%s Prepared text content for LLM.", log_identifier)
 925          else:
 926              # All encoding attempts failed - return an error to the calling agent
 927              # instead of passing a misleading message to the internal LLM
 928              log.error(
 929                  "%s Failed to decode text artifact with any supported encoding. Errors: %s",
 930                  log_identifier,
 931                  "; ".join(decode_errors),
 932              )
 933              return {
 934                  "status": "error_encoding_failed",
 935                  "message_to_llm": f"Could not extract content from artifact '{filename}' (v{actual_source_version}). "
 936                                    f"The file appears to be a text file (MIME type: {source_mime_type}) but could not be decoded "
 937                                    f"with any supported encoding (UTF-8, CP1252, Latin-1). The file may be corrupted or use an "
 938                                    f"unsupported encoding. Please inform the user that the file cannot be processed.",
 939                  "filename": filename,
 940                  "version_requested": str(version),
 941                  "encoding_errors": decode_errors,
 942              }
 943      else:  # Binary
 944          for supported_pattern in supported_binary_mime_types:
 945              if fnmatch.fnmatch(source_mime_type, supported_pattern):
 946                  is_binary_supported = True
 947                  break
 948          if is_binary_supported:
 949              llm_parts.append(
 950                  adk_types.Part(
 951                      inline_data=adk_types.Blob(
 952                          mime_type=source_mime_type, data=source_artifact_content_bytes
 953                      )
 954                  )
 955              )
 956              llm_parts.append(
 957                  adk_types.Part(
 958                      text=f"The above is the content of artifact '{filename}' (MIME type: {source_mime_type})."
 959                  )
 960              )
 961              log.debug(
 962                  "%s Prepared supported binary content (MIME: %s) for LLM.",
 963                  log_identifier,
 964                  source_mime_type,
 965              )
 966          else:
 967              llm_parts.append(
 968                  adk_types.Part(
 969                      text=f"The artifact '{filename}' is a binary file of type '{source_mime_type}'. Direct content processing is not supported by this tool's current configuration. Perform the extraction goal based on its filename and type if possible, or state that the content cannot be analyzed."
 970                  )
 971              )
 972              log.debug(
 973                  "%s Prepared message for unsupported binary content (MIME: %s) for LLM.",
 974                  log_identifier,
 975                  source_mime_type,
 976              )
 977  
 978      # System instruction to ensure the LLM directly analyzes data rather than generating code
 979      system_instruction = """You are a data extraction and analysis assistant. Your task is to directly analyze the provided artifact content and return the requested information.
 980  
 981  CRITICAL RULES:
 982  1. DIRECTLY ANALYZE the data provided - do NOT write code (Python, SQL, or any other language) to analyze it
 983  2. The artifact content is already loaded and provided to you - you must work with it directly
 984  3. Provide actual results, counts, summaries, or extracted data based on what you find in the content
 985  4. If you cannot find the requested information, clearly state what you found instead
 986  5. Format your response as plain text or structured data (JSON, markdown tables) - NOT as code
 987  6. You do NOT have access to execute code - any code you write will NOT be run
 988  
 989  Example of WRONG response (do not do this):
 990  ```python
 991  import pandas as pd
 992  df = pd.read_csv('file.csv')
 993  print(df['column'].count())
 994  ```
 995  
 996  Example of CORRECT response:
 997  Based on analyzing the CSV data, I found 102 records containing 'Employee' in the 'Type' column. Here's the breakdown:
 998  - Employee-A: 65 records
 999  - Employee-B: 37 records"""
1000  
1001      internal_llm_contents = [
1002          adk_types.Content(
1003              role="user", parts=[adk_types.Part(text=extraction_goal)] + llm_parts
1004          )
1005      ]
1006      internal_llm_request = LlmRequest(
1007          model=chosen_llm.model,
1008          contents=internal_llm_contents,
1009          config=adk_types.GenerateContentConfig(
1010              temperature=0.1,
1011              system_instruction=system_instruction,
1012          ),
1013      )
1014  
1015      extracted_content_str = ""
1016      try:
1017          log.info(
1018              "%s Executing internal LLM call for extraction. Goal: %s",
1019              log_identifier,
1020              extraction_goal,
1021          )
1022          if hasattr(chosen_llm, "generate_content") and not hasattr(
1023              chosen_llm, "generate_content_async"
1024          ):
1025              llm_response = chosen_llm.generate_content(request=internal_llm_request)
1026              if llm_response.parts:
1027                  extracted_content_str = llm_response.parts[0].text or ""
1028              else:
1029                  extracted_content_str = ""
1030          elif hasattr(chosen_llm, "generate_content_async"):
1031              log.debug(
1032                  "%s Calling LLM's generate_content_async (non-streaming) for extraction.",
1033                  log_identifier,
1034              )
1035              try:
1036                  llm_response_obj = None
1037                  async for response_event in chosen_llm.generate_content_async(
1038                      internal_llm_request
1039                  ):
1040                      llm_response_obj = response_event
1041                      break
1042                  if (
1043                      llm_response_obj
1044                      and hasattr(llm_response_obj, "text")
1045                      and llm_response_obj.text
1046                  ):
1047                      extracted_content_str = llm_response_obj.text
1048                  elif (
1049                      llm_response_obj
1050                      and hasattr(llm_response_obj, "parts")
1051                      and llm_response_obj.parts
1052                  ):
1053                      extracted_content_str = "".join(
1054                          [
1055                              part.text
1056                              for part in llm_response_obj.parts
1057                              if hasattr(part, "text") and part.text
1058                          ]
1059                      )
1060                  elif (
1061                      llm_response_obj
1062                      and hasattr(llm_response_obj, "content")
1063                      and hasattr(llm_response_obj.content, "parts")
1064                      and llm_response_obj.content.parts
1065                  ):
1066                      extracted_content_str = "".join(
1067                          [
1068                              part.text
1069                              for part in llm_response_obj.content.parts
1070                              if hasattr(part, "text") and part.text
1071                          ]
1072                      )
1073                  else:
1074                      extracted_content_str = ""
1075                      log.warning(
1076                          "%s LLM response object or its text/parts were not found or empty after non-streaming call.",
1077                          log_identifier,
1078                      )
1079  
1080              except Exception as llm_async_err:
1081                  log.exception(
1082                      "%s Asynchronous LLM call for extraction failed: %s",
1083                      log_identifier,
1084                      llm_async_err,
1085                  )
1086                  # Return an error status instead of continuing with error message as "extracted data"
1087                  return {
1088                      "status": "error_llm_call_failed",
1089                      "message_to_llm": f"The internal LLM call failed while processing artifact '{filename}' for your goal '{extraction_goal}'. "
1090                                        f"Error: {llm_async_err}. Please inform the user that the extraction could not be completed.",
1091                      "filename": filename,
1092                      "version_requested": str(version),
1093                      "error_details": str(llm_async_err),
1094                  }
1095          else:
1096              log.error(
1097                  "%s LLM does not have a known generate_content or generate_content_async method.",
1098                  log_identifier,
1099              )
1100              return {
1101                  "status": "error_llm_configuration",
1102                  "message_to_llm": f"The LLM configured for extraction does not have a supported generation method. "
1103                                    f"Please inform the user that the extraction tool is misconfigured.",
1104                  "filename": filename,
1105                  "version_requested": str(version),
1106              }
1107  
1108          log.info(
1109              "%s Internal LLM call completed. Extracted content length: %d chars",
1110              log_identifier,
1111              len(extracted_content_str),
1112          )
1113          if not extracted_content_str.strip():
1114              log.warning(
1115                  "%s Internal LLM produced empty or whitespace-only content for extraction goal.",
1116                  log_identifier,
1117              )
1118          
1119          # Check if the LLM generated code instead of actual analysis results
1120          # This is a safety check to prevent hallucinated code from being saved as "extracted data"
1121          code_indicators = [
1122              "```python",
1123              "```sql",
1124              "import pandas",
1125              "import csv",
1126              "pd.read_csv",
1127              "df = pd.",
1128              "SELECT * FROM",
1129              "def analyze(",
1130              "def extract(",
1131          ]
1132          content_lower = extracted_content_str.lower()
1133          detected_code = [indicator for indicator in code_indicators if indicator.lower() in content_lower]
1134          
1135          if detected_code and len(extracted_content_str) > 100:
1136              # Check if the response is primarily code (more than 50% of content is in code blocks)
1137              code_block_pattern = r'```[\s\S]*?```'
1138              code_blocks = re.findall(code_block_pattern, extracted_content_str)
1139              code_content_length = sum(len(block) for block in code_blocks)
1140              
1141              if code_content_length > len(extracted_content_str) * 0.3:
1142                  log.warning(
1143                      "%s Internal LLM generated code instead of analyzing data. Detected indicators: %s. "
1144                      "Code blocks comprise %.1f%% of response.",
1145                      log_identifier,
1146                      detected_code,
1147                      (code_content_length / len(extracted_content_str)) * 100,
1148                  )
1149                  return {
1150                      "status": "error_llm_generated_code",
1151                      "message_to_llm": f"The extraction tool's internal LLM generated code instead of analyzing the data directly. "
1152                                        f"This tool cannot execute code. For CSV data analysis, please use the 'query_data_with_sql' tool"
1153                                        f"from the Data Analysis tools instead, if available, which can execute SQL queries on CSV files. "
1154                                        f"Alternatively, use 'load_artifact' to view the raw content and analyze it yourself.",
1155                      "filename": filename,
1156                      "version_requested": str(version),
1157                      "detected_code_indicators": detected_code,
1158                  }
1159  
1160      except Exception as e:
1161          log.exception(
1162              "%s Internal LLM call for extraction failed: %s", log_identifier, e
1163          )
1164          return ToolResult.error(
1165              f"The LLM failed to process the artifact content for your goal '{extraction_goal}'. Error: {e}",
1166              data={"filename": filename, "version_requested": str(version)}
1167          )
1168  
1169      extracted_content_bytes = extracted_content_str.encode("utf-8")
1170      extracted_content_size_bytes = len(extracted_content_bytes)
1171      output_mime_type = "text/plain"
1172      try:
1173          json.loads(extracted_content_str)
1174          output_mime_type = "application/json"
1175          log.debug(
1176              "%s Extracted content appears to be valid JSON. Setting output MIME to application/json.",
1177              log_identifier,
1178          )
1179      except json.JSONDecodeError:
1180          log.debug(
1181              "%s Extracted content is not JSON. Using output MIME text/plain.",
1182              log_identifier,
1183          )
1184  
1185      response_for_llm_str = extracted_content_str
1186      saved_extracted_artifact_details = None
1187      final_status = "success"
1188      message_to_llm_parts = [
1189          f"Successfully extracted content from '{filename}' (v{actual_source_version}) based on your goal: '{extraction_goal}'."
1190      ]
1191      was_saved = False
1192      was_truncated = False
1193  
1194      if extracted_content_size_bytes > save_threshold:
1195          log.info(
1196              "%s Extracted content size (%d bytes) exceeds save threshold (%d bytes). Saving as new artifact.",
1197              log_identifier,
1198              extracted_content_size_bytes,
1199              save_threshold,
1200          )
1201          saved_extracted_artifact_details = await _save_extracted_artifact(
1202              tool_context,
1203              host_component,
1204              extracted_content_bytes,
1205              filename,
1206              actual_source_version,
1207              extraction_goal,
1208              output_filename_base,
1209              output_mime_type,
1210          )
1211          if saved_extracted_artifact_details.get("status") == "success":
1212              was_saved = True
1213              message_to_llm_parts.append(
1214                  f"The full extracted content was saved as artifact '{saved_extracted_artifact_details.get('data_filename')}' "
1215                  f"(version {saved_extracted_artifact_details.get('data_version')}). "
1216                  f"You can retrieve it using 'load_artifact' or perform further extractions on it using 'extract_content_from_artifact' "
1217                  f"with this new filename and version."
1218              )
1219          else:
1220              message_to_llm_parts.append(
1221                  f"Attempted to save the large extracted content, but failed: {saved_extracted_artifact_details.get('message')}"
1222              )
1223  
1224      if len(extracted_content_str.encode("utf-8")) > llm_max_bytes:
1225          was_truncated = True
1226          log.info(
1227              "%s Original extracted content (%d bytes) exceeds LLM return max bytes (%d bytes). Truncating for LLM response.",
1228              log_identifier,
1229              len(extracted_content_str.encode("utf-8")),
1230              llm_max_bytes,
1231          )
1232  
1233          if not was_saved:
1234              log.info(
1235                  "%s Saving extracted content now because it needs truncation for LLM response and wasn't saved previously.",
1236                  log_identifier,
1237              )
1238              saved_extracted_artifact_details = await _save_extracted_artifact(
1239                  tool_context,
1240                  host_component,
1241                  extracted_content_bytes,
1242                  filename,
1243                  actual_source_version,
1244                  extraction_goal,
1245                  output_filename_base,
1246                  output_mime_type,
1247              )
1248              if saved_extracted_artifact_details.get("status") == "success":
1249                  was_saved = True
1250                  message_to_llm_parts.append(
1251                      f"The full extracted content (which is being truncated for this response) was saved as artifact "
1252                      f"'{saved_extracted_artifact_details.get('data_filename')}' (version {saved_extracted_artifact_details.get('data_version')}). "
1253                      f"You can retrieve the full content using 'load_artifact' or perform further extractions on it."
1254                  )
1255              else:
1256                  message_to_llm_parts.append(
1257                      f"Attempted to save the extracted content before truncation, but failed: {saved_extracted_artifact_details.get('message')}"
1258                  )
1259  
1260          truncation_suffix = "... [Content truncated]"
1261          adjusted_max_bytes = llm_max_bytes - len(truncation_suffix.encode("utf-8"))
1262          if adjusted_max_bytes < 0:
1263              adjusted_max_bytes = 0
1264  
1265          temp_response_bytes = extracted_content_str.encode("utf-8")
1266          truncated_bytes = temp_response_bytes[:adjusted_max_bytes]
1267          response_for_llm_str = (
1268              truncated_bytes.decode("utf-8", "ignore") + truncation_suffix
1269          )
1270  
1271          message_to_llm_parts.append(
1272              "The extracted content provided in 'extracted_data_preview' has been truncated due to size limits. "
1273              "If saved, the full version is available in the specified artifact."
1274          )
1275  
1276      if was_saved and was_truncated:
1277          final_status = "success_full_content_saved_preview_returned"
1278      elif was_saved:
1279          final_status = "success_full_content_saved_and_returned"
1280      elif was_truncated:
1281          final_status = "success_content_returned_truncated_and_saved"
1282      else:
1283          final_status = "success_content_returned"
1284  
1285      final_response_dict = {
1286          "status": final_status,
1287          "message_to_llm": " ".join(list(dict.fromkeys(message_to_llm_parts))),
1288          "source_filename": filename,
1289          "source_version_processed": actual_source_version,
1290          "extraction_goal_used": extraction_goal,
1291      }
1292  
1293      if was_truncated:
1294          final_response_dict["extracted_data_preview"] = response_for_llm_str
1295      else:
1296          final_response_dict["extracted_data"] = response_for_llm_str
1297  
1298      if (
1299          saved_extracted_artifact_details
1300          and saved_extracted_artifact_details.get("status") == "success"
1301      ):
1302          final_response_dict["saved_extracted_artifact_details"] = (
1303              saved_extracted_artifact_details
1304          )
1305      elif saved_extracted_artifact_details:
1306          final_response_dict["saved_extracted_artifact_attempt_details"] = (
1307              saved_extracted_artifact_details
1308          )
1309  
1310      log.info(
1311          "%s Tool execution finished. Final status: %s. Response preview: %s",
1312          log_identifier,
1313          final_status,
1314          final_response_dict,
1315      )
1316      # Convert to ToolResult - this is a success path
1317      message = final_response_dict.pop("message_to_llm", "Extraction completed.")
1318      return ToolResult.ok(message, data=final_response_dict)
1319  
1320  
1321  async def append_to_artifact(
1322      filename: str,
1323      content_chunk: str,
1324      mime_type: str,
1325      tool_context: ToolContext = None,
1326  ) -> ToolResult:
1327      """
1328      Appends a chunk of content to an existing artifact. This operation will
1329      create a new version of the artifact. The content_chunk should be a string,
1330      potentially base64 encoded if it represents binary data (indicated by mime_type).
1331      The chunk size should be limited (e.g., max 3KB) by the LLM.
1332  
1333      Args:
1334          filename: The name of the artifact to append to. May contain embeds.
1335          content_chunk: The chunk of content to append (max approx. 3KB).
1336                         If mime_type suggests binary, this should be base64 encoded.
1337                         May contain embeds.
1338          mime_type: The MIME type of the content_chunk. This helps determine if
1339                     base64 decoding is needed for the chunk. The overall artifact's
1340                     MIME type will be preserved from its latest version.
1341                     May contain embeds.
1342          tool_context: The context provided by the ADK framework.
1343  
1344      Returns:
1345          A ToolResult indicating the result, including the new version of the artifact.
1346      """
1347      if not tool_context:
1348          return ToolResult.error(
1349              "ToolContext is missing, cannot append to artifact.",
1350              data={"filename": filename}
1351          )
1352  
1353      log_identifier = f"[BuiltinArtifactTool:append_to_artifact:{filename}]"
1354      log.debug("%s Processing request to append chunk.", log_identifier)
1355  
1356      try:
1357          inv_context = tool_context._invocation_context
1358          artifact_service = inv_context.artifact_service
1359          if not artifact_service:
1360              raise ValueError("ArtifactService is not available in the context.")
1361  
1362          app_name = inv_context.app_name
1363          user_id = inv_context.user_id
1364          session_id = get_original_session_id(inv_context)
1365          host_component = getattr(inv_context.agent, "host_component", None)
1366  
1367          log.debug(
1368              "%s Loading latest version of artifact '%s' content to append to.",
1369              log_identifier,
1370              filename,
1371          )
1372          content_load_result = await load_artifact_content_or_metadata(
1373              artifact_service=artifact_service,
1374              app_name=app_name,
1375              user_id=user_id,
1376              session_id=session_id,
1377              filename=filename,
1378              version="latest",
1379              load_metadata_only=False,
1380              return_raw_bytes=True,
1381              component=host_component,
1382              log_identifier_prefix=f"{log_identifier}[LoadOriginalContent]",
1383          )
1384  
1385          if content_load_result.get("status") != "success":
1386              log.error(
1387                  "%s Failed to load original artifact content '%s': %s",
1388                  log_identifier,
1389                  filename,
1390                  content_load_result.get("message"),
1391              )
1392              return ToolResult.error(
1393                  f"Failed to load original artifact content to append to: {content_load_result.get('message')}",
1394                  data={"filename": filename}
1395              )
1396  
1397          original_artifact_bytes = content_load_result.get("raw_bytes", b"")
1398          original_mime_type = content_load_result.get(
1399              "mime_type", "application/octet-stream"
1400          )
1401          original_version_loaded = content_load_result.get("version", "unknown")
1402          log.info(
1403              "%s Loaded original artifact content '%s' v%s, type: %s, size: %d bytes.",
1404              log_identifier,
1405              filename,
1406              original_version_loaded,
1407              original_mime_type,
1408              len(original_artifact_bytes),
1409          )
1410  
1411          log.debug(
1412              "%s Loading latest version of artifact '%s' metadata.",
1413              log_identifier,
1414              filename,
1415          )
1416          metadata_load_result = await load_artifact_content_or_metadata(
1417              artifact_service=artifact_service,
1418              app_name=app_name,
1419              user_id=user_id,
1420              session_id=session_id,
1421              filename=filename,
1422              version="latest",
1423              load_metadata_only=True,
1424              component=host_component,
1425              log_identifier_prefix=f"{log_identifier}[LoadOriginalMetadata]",
1426          )
1427          original_metadata_dict = {}
1428          if metadata_load_result.get("status") == "success":
1429              original_metadata_dict = metadata_load_result.get("metadata", {})
1430              log.info(
1431                  "%s Loaded original artifact metadata for '%s' v%s.",
1432                  log_identifier,
1433                  filename,
1434                  metadata_load_result.get("version", "unknown"),
1435              )
1436          else:
1437              log.warning(
1438                  "%s Failed to load original artifact metadata for '%s': %s. Proceeding with minimal metadata.",
1439                  log_identifier,
1440                  filename,
1441                  metadata_load_result.get("message"),
1442              )
1443  
1444          chunk_bytes, _ = decode_and_get_bytes(
1445              content_chunk, mime_type, f"{log_identifier}[DecodeChunk]"
1446          )
1447          log.debug(
1448              "%s Decoded content_chunk (declared type: %s) to %d bytes.",
1449              log_identifier,
1450              mime_type,
1451              len(chunk_bytes),
1452          )
1453  
1454          combined_bytes = original_artifact_bytes + chunk_bytes
1455          log.debug(
1456              "%s Appended chunk. New total size: %d bytes.",
1457              log_identifier,
1458              len(combined_bytes),
1459          )
1460  
1461          new_metadata_for_save = {
1462              key: value
1463              for key, value in original_metadata_dict.items()
1464              if key
1465              not in [
1466                  "filename",
1467                  "mime_type",
1468                  "size_bytes",
1469                  "timestamp_utc",
1470                  "schema",
1471                  "version",
1472              ]
1473          }
1474          new_metadata_for_save["appended_from_version"] = original_version_loaded
1475          new_metadata_for_save["appended_chunk_declared_mime_type"] = mime_type
1476  
1477          schema_max_keys = (
1478              host_component.get_config("schema_max_keys", DEFAULT_SCHEMA_MAX_KEYS)
1479              if host_component
1480              else DEFAULT_SCHEMA_MAX_KEYS
1481          )
1482  
1483          save_result = await save_artifact_with_metadata(
1484              artifact_service=artifact_service,
1485              app_name=app_name,
1486              user_id=user_id,
1487              session_id=session_id,
1488              filename=filename,
1489              content_bytes=combined_bytes,
1490              mime_type=original_mime_type,
1491              metadata_dict=new_metadata_for_save,
1492              timestamp=datetime.now(timezone.utc),
1493              schema_max_keys=schema_max_keys,
1494              tool_context=tool_context,
1495          )
1496  
1497          log.info(
1498              "%s Result from save_artifact_with_metadata after append: %s",
1499              log_identifier,
1500              save_result,
1501          )
1502  
1503          if save_result.get("status") == "error":
1504              raise IOError(
1505                  f"Failed to save appended artifact: {save_result.get('message', 'Unknown error')}"
1506              )
1507  
1508          return ToolResult.ok(
1509              f"Chunk appended to '{filename}'. New version is {save_result.get('data_version')} with total size {len(combined_bytes)} bytes.",
1510              data={
1511                  "filename": filename,
1512                  "new_version": save_result.get("data_version"),
1513                  "total_size_bytes": len(combined_bytes),
1514              }
1515          )
1516  
1517      except FileNotFoundError as e:
1518          log.warning("%s Original artifact not found for append: %s", log_identifier, e)
1519          return ToolResult.error(
1520              f"Original artifact '{filename}' not found: {e}",
1521              data={"filename": filename}
1522          )
1523      except ValueError as e:
1524          log.warning("%s Value error during append: %s", log_identifier, e)
1525          return ToolResult.error(str(e), data={"filename": filename})
1526      except IOError as e:
1527          log.warning("%s IO error during append: %s", log_identifier, e)
1528          return ToolResult.error(str(e), data={"filename": filename})
1529      except Exception as e:
1530          log.exception(
1531              "%s Unexpected error appending to artifact '%s': %s",
1532              log_identifier,
1533              filename,
1534              e,
1535          )
1536          return ToolResult.error(
1537              f"Failed to append to artifact: {e}",
1538              data={"filename": filename}
1539          )
1540  
1541  
1542  async def _save_extracted_artifact(
1543      tool_context: ToolContext,
1544      host_component: Any,
1545      extracted_content_bytes: bytes,
1546      source_artifact_filename: str,
1547      source_artifact_version: Union[int, str],
1548      extraction_goal: str,
1549      output_filename_base: Optional[str],
1550      output_mime_type: str,
1551  ) -> Dict[str, Any]:
1552      """
1553      Saves the extracted content as a new artifact with comprehensive metadata.
1554  
1555      Args:
1556          tool_context: The ADK ToolContext.
1557          host_component: The A2A_ADK_HostComponent instance for accessing config and services.
1558          extracted_content_bytes: The raw byte content of the extracted data.
1559          source_artifact_filename: The filename of the original artifact.
1560          source_artifact_version: The version of the original artifact.
1561          extraction_goal: The natural language goal used for extraction.
1562          output_filename_base: Optional base for the new artifact's filename.
1563          output_mime_type: The MIME type of the extracted content.
1564  
1565      Returns:
1566          A dictionary containing details of the saved artifact, as returned by
1567          `save_artifact_with_metadata`.
1568      """
1569      log_identifier = f"[BuiltinArtifactTool:_save_extracted_artifact]"
1570      log.debug("%s Saving extracted content...", log_identifier)
1571  
1572      try:
1573          base_name = output_filename_base or f"{source_artifact_filename}_extracted"
1574          base_name_sanitized = re.sub(r'[<>:"/\\|?*\s]+', "_", base_name)
1575          base_name_sanitized = base_name_sanitized.strip("_")
1576  
1577          suffix = uuid.uuid4().hex[:8]
1578          extension_map = {
1579              "text/plain": ".txt",
1580              "application/json": ".json",
1581              "text/csv": ".csv",
1582              "text/html": ".html",
1583              "image/png": ".png",
1584              "image/jpeg": ".jpg",
1585              "application/pdf": ".pdf",
1586          }
1587          ext = extension_map.get(output_mime_type.lower(), ".dat")
1588          filename = f"{base_name_sanitized}_{suffix}{ext}"
1589          log.debug("%s Generated output filename: %s", log_identifier, filename)
1590  
1591          timestamp = datetime.now(timezone.utc)
1592          metadata_for_saving = {
1593              "description": f"Content extracted/transformed from artifact '{source_artifact_filename}' (version {source_artifact_version}) using goal: '{extraction_goal}'.",
1594              "source_artifact_filename": source_artifact_filename,
1595              "source_artifact_version": source_artifact_version,
1596              "extraction_goal_used": extraction_goal,
1597          }
1598          log.debug(
1599              "%s Prepared metadata for saving: %s", log_identifier, metadata_for_saving
1600          )
1601  
1602          inv_context = tool_context._invocation_context
1603          artifact_service = inv_context.artifact_service
1604          if not artifact_service:
1605              raise ValueError("ArtifactService is not available in the context.")
1606  
1607          app_name = inv_context.app_name
1608          user_id = inv_context.user_id
1609          session_id = get_original_session_id(inv_context)
1610          schema_max_keys = host_component.get_config(
1611              "schema_max_keys", DEFAULT_SCHEMA_MAX_KEYS
1612          )
1613  
1614          log.debug(
1615              "%s Calling save_artifact_with_metadata for '%s' (app: %s, user: %s, session: %s, schema_keys: %d)",
1616              log_identifier,
1617              filename,
1618              app_name,
1619              user_id,
1620              session_id,
1621              schema_max_keys,
1622          )
1623  
1624          save_result = await save_artifact_with_metadata(
1625              artifact_service=artifact_service,
1626              app_name=app_name,
1627              user_id=user_id,
1628              session_id=session_id,
1629              filename=filename,
1630              content_bytes=extracted_content_bytes,
1631              mime_type=output_mime_type,
1632              metadata_dict=metadata_for_saving,
1633              timestamp=timestamp,
1634              schema_max_keys=schema_max_keys,
1635              tool_context=tool_context,
1636          )
1637  
1638          log.info(
1639              "%s Extracted content saved as artifact '%s' (version %s). Result: %s",
1640              log_identifier,
1641              save_result.get("data_filename", filename),
1642              save_result.get("data_version", "N/A"),
1643              save_result.get("status"),
1644          )
1645          return save_result
1646  
1647      except Exception as e:
1648          log.exception(
1649              "%s Error in _save_extracted_artifact for source '%s': %s",
1650              log_identifier,
1651              source_artifact_filename,
1652              e,
1653          )
1654          return {
1655              "status": "error",
1656              "data_filename": filename if "filename" in locals() else "unknown_filename",
1657              "message": f"Failed to save extracted content as artifact: {e}",
1658          }
1659  
1660  
1661  async def _notify_artifact_save(
1662      filename: str,
1663      version: int,
1664      status: str,
1665      tool_context: ToolContext = None,  # Keep tool_context for signature consistency
1666  ) -> Dict[str, Any]:
1667      """
1668      CRITICAL: _notify_artifact_save is automatically invoked by the system as a side-effect when you create artifacts. You should NEVER call this tool yourself. The system will call it for you and provide the results in your next turn. If you manually invoke it, you are making an error."
1669      """
1670      return {
1671          "filename": filename,
1672          "version": version,
1673          "status": status,
1674          "message": "Artifact has been created and provided to the requester",
1675      }
1676  
1677  
1678  _notify_artifact_save_tool_def = BuiltinTool(
1679      name="_notify_artifact_save",
1680      implementation=_notify_artifact_save,
1681      description="CRITICAL: _notify_artifact_save is automatically invoked by the system as a side-effect when you create artifacts. You should NEVER call this tool yourself. The system will call it for you and provide the results in your next turn. If you manually invoke it, you are making an error.",
1682      category="internal",
1683      required_scopes=[],  # No scopes needed for an internal notification tool
1684      parameters=adk_types.Schema(
1685          type=adk_types.Type.OBJECT,
1686          properties={
1687              "filename": adk_types.Schema(
1688                  type=adk_types.Type.STRING,
1689                  description="The name of the artifact that was saved.",
1690              ),
1691              "version": adk_types.Schema(
1692                  type=adk_types.Type.INTEGER,
1693                  description="The version number of the saved artifact.",
1694              ),
1695              "status": adk_types.Schema(
1696                  type=adk_types.Type.STRING,
1697                  description="The status of the save operation.",
1698              ),
1699          },
1700          required=["filename", "version", "status"],
1701      ),
1702      examples=[],
1703  )
1704  
1705  append_to_artifact_tool_def = BuiltinTool(
1706      name="append_to_artifact",
1707      implementation=append_to_artifact,
1708      description="Appends a chunk of content to an existing artifact. This operation will create a new version of the artifact. The content_chunk should be a string, potentially base64 encoded if it represents binary data (indicated by mime_type). The chunk size should be limited (e.g., max 3KB) by the LLM.",
1709      category="artifact_management",
1710      category_name=CATEGORY_NAME,
1711      category_description=CATEGORY_DESCRIPTION,
1712      required_scopes=["tool:artifact:append"],
1713      parameters=adk_types.Schema(
1714          type=adk_types.Type.OBJECT,
1715          properties={
1716              "filename": adk_types.Schema(
1717                  type=adk_types.Type.STRING,
1718                  description="The name of the artifact to append to. May contain embeds.",
1719              ),
1720              "content_chunk": adk_types.Schema(
1721                  type=adk_types.Type.STRING,
1722                  description="The chunk of content to append (max approx. 3KB). If mime_type suggests binary, this should be base64 encoded. May contain embeds.",
1723              ),
1724              "mime_type": adk_types.Schema(
1725                  type=adk_types.Type.STRING,
1726                  description="The MIME type of the content_chunk. This helps determine if base64 decoding is needed for the chunk. The overall artifact's MIME type will be preserved from its latest version. May contain embeds.",
1727              ),
1728          },
1729          required=["filename", "content_chunk", "mime_type"],
1730      ),
1731      examples=[],
1732  )
1733  
1734  list_artifacts_tool_def = BuiltinTool(
1735      name="list_artifacts",
1736      implementation=list_artifacts,
1737      description="Lists all available data artifact filenames and their versions for the current session. Includes a summary of the latest version's metadata for each artifact.",
1738      category="artifact_management",
1739      category_name=CATEGORY_NAME,
1740      category_description=CATEGORY_DESCRIPTION,
1741      required_scopes=["tool:artifact:list"],
1742      parameters=adk_types.Schema(
1743          type=adk_types.Type.OBJECT,
1744          properties={},
1745          required=[],
1746      ),
1747      examples=[],
1748  )
1749  
1750  load_artifact_tool_def = BuiltinTool(
1751      name="load_artifact",
1752      implementation=load_artifact,
1753      description="Loads the content or metadata of a specific artifact version. If load_metadata_only is True, loads the full metadata dictionary. Otherwise, loads text content (potentially truncated) or a summary for binary types. For image artifacts (PNG, JPG, etc.) on vision-enabled agents, the image is returned inline so you can see and analyze it directly. Use this to view images created by tools or uploaded by users. Line numbers can be optionally included for precise line range identification.",
1754      category="artifact_management",
1755      category_name=CATEGORY_NAME,
1756      category_description=CATEGORY_DESCRIPTION,
1757      required_scopes=["tool:artifact:load"],
1758      parameters=adk_types.Schema(
1759          type=adk_types.Type.OBJECT,
1760          properties={
1761              "filename": adk_types.Schema(
1762                  type=adk_types.Type.STRING,
1763                  description="The name of the artifact to load. May contain embeds.",
1764              ),
1765              "version": adk_types.Schema(
1766                  type=adk_types.Type.INTEGER,
1767                  description="The specific version number to load. Must be explicitly provided.",
1768              ),
1769              "load_metadata_only": adk_types.Schema(
1770                  type=adk_types.Type.BOOLEAN,
1771                  description="If True, load only the metadata JSON. Default False.",
1772                  nullable=True,
1773              ),
1774              "max_content_length": adk_types.Schema(
1775                  type=adk_types.Type.INTEGER,
1776                  description="Optional. Maximum character length for text content. If None, uses app configuration. Range: 100-100,000.",
1777                  nullable=True,
1778              ),
1779              "include_line_numbers": adk_types.Schema(
1780                  type=adk_types.Type.BOOLEAN,
1781                  description="If True, prefix each line with its 1-based line number followed by a TAB character. Line numbers are for LLM viewing only and are not stored in the artifact. Default False.",
1782                  nullable=True,
1783              ),
1784          },
1785          required=["filename", "version"],
1786      ),
1787      examples=[],
1788  )
1789  
1790  apply_embed_and_create_artifact_tool_def = BuiltinTool(
1791      name="apply_embed_and_create_artifact",
1792      implementation=apply_embed_and_create_artifact,
1793      description="Resolves an 'artifact_content' embed directive (including modifiers and formatting) and saves the resulting content as a new artifact. The entire embed directive must be provided as a string.",
1794      category="artifact_management",
1795      category_name=CATEGORY_NAME,
1796      category_description=CATEGORY_DESCRIPTION,
1797      required_scopes=["tool:artifact:create", "tool:artifact:load"],
1798      parameters=adk_types.Schema(
1799          type=adk_types.Type.OBJECT,
1800          properties={
1801              "output_filename": adk_types.Schema(
1802                  type=adk_types.Type.STRING,
1803                  description="The desired name for the new artifact.",
1804              ),
1805              "embed_directive": adk_types.Schema(
1806                  type=adk_types.Type.STRING,
1807                  description="The full '«artifact_content:...>>>...>>>format:...»' string.",
1808              ),
1809              "output_metadata": adk_types.Schema(
1810                  type=adk_types.Type.OBJECT,
1811                  description="Optional metadata for the new artifact.",
1812                  nullable=True,
1813              ),
1814          },
1815          required=["output_filename", "embed_directive"],
1816      ),
1817      raw_string_args=["embed_directive"],
1818      examples=[],
1819  )
1820  
1821  extract_content_from_artifact_tool_def = BuiltinTool(
1822      name="extract_content_from_artifact",
1823      implementation=extract_content_from_artifact,
1824      description="Loads an existing artifact, uses an internal LLM to process its content based on an 'extraction_goal,' and manages the output by returning it or saving it as a new artifact. IMPORTANT: If the tool returns an error status (e.g., 'error_encoding_failed', 'error_artifact_not_found'), you MUST relay this error to the user - do NOT attempt to generate or fabricate data. The tool will return a 'message_to_llm' field explaining the error.",
1825      category="artifact_management",
1826      category_name=CATEGORY_NAME,
1827      category_description=CATEGORY_DESCRIPTION,
1828      required_scopes=["tool:artifact:load", "tool:artifact:create"],
1829      parameters=adk_types.Schema(
1830          type=adk_types.Type.OBJECT,
1831          properties={
1832              "filename": adk_types.Schema(
1833                  type=adk_types.Type.STRING,
1834                  description="Name of the source artifact. May contain embeds.",
1835              ),
1836              "extraction_goal": adk_types.Schema(
1837                  type=adk_types.Type.STRING,
1838                  description="Natural language instruction for the LLM on what to extract or how to transform the content. May contain embeds.",
1839              ),
1840              "version": adk_types.Schema(
1841                  type=adk_types.Type.STRING,
1842                  description="Version of the source artifact. Can be an integer or 'latest'. Defaults to 'latest'. May contain embeds.",
1843                  nullable=True,
1844              ),
1845              "output_filename_base": adk_types.Schema(
1846                  type=adk_types.Type.STRING,
1847                  description="Optional base name for the new artifact if the extracted content is saved. May contain embeds.",
1848                  nullable=True,
1849              ),
1850          },
1851          required=["filename", "extraction_goal"],
1852      ),
1853      examples=[],
1854  )
1855  
1856  tool_registry.register(_notify_artifact_save_tool_def)
1857  tool_registry.register(append_to_artifact_tool_def)
1858  tool_registry.register(list_artifacts_tool_def)
1859  tool_registry.register(load_artifact_tool_def)
1860  tool_registry.register(apply_embed_and_create_artifact_tool_def)
1861  tool_registry.register(extract_content_from_artifact_tool_def)
1862  
1863  
1864  async def delete_artifact(
1865      filename: str,
1866      version: Optional[int] = None,
1867      confirm_delete: bool = False,
1868      tool_context: ToolContext = None,
1869  ) -> ToolResult:
1870      """
1871      Deletes all versions of an artifact. Version-specific deletion is not currently supported.
1872  
1873      Args:
1874          filename: The name of the artifact to delete.
1875          version: Reserved for future use. Currently not supported - returns error if specified.
1876          confirm_delete: Must be set to True to confirm deletion. If False, returns confirmation prompt.
1877          tool_context: The context provided by the ADK framework.
1878  
1879      Returns:
1880          A ToolResult indicating the result of the deletion or requesting confirmation.
1881      """
1882      if not tool_context:
1883          return ToolResult.error(
1884              "ToolContext is missing, cannot delete artifact.",
1885              data={"filename": filename}
1886          )
1887  
1888      log_identifier = f"[BuiltinArtifactTool:delete_artifact:{filename}]"
1889      log.debug("%s Processing request.", log_identifier)
1890  
1891      try:
1892          inv_context = tool_context._invocation_context
1893          artifact_service = inv_context.artifact_service
1894          if not artifact_service:
1895              raise ValueError("ArtifactService is not available in the context.")
1896  
1897          app_name = inv_context.app_name
1898          user_id = inv_context.user_id
1899          session_id = get_original_session_id(inv_context)
1900  
1901          if not hasattr(artifact_service, "delete_artifact"):
1902              raise NotImplementedError(
1903                  "ArtifactService does not support deleting artifacts."
1904              )
1905  
1906          # Error if version-specific deletion requested (not currently supported)
1907          if version is not None:
1908              return ToolResult.error(
1909                  f"Deleting a specific version ({version}) is not currently supported. Only deletion of ALL versions is supported. To delete all versions, omit 'version' and set confirm_delete=True.",
1910                  data={"filename": filename, "version_requested": version}
1911              )
1912  
1913          # Get version list for confirmation message
1914          versions = await artifact_service.list_versions(
1915              app_name=app_name, user_id=user_id, session_id=session_id, filename=filename
1916          )
1917  
1918          # Require confirmation before deleting
1919          if not confirm_delete:
1920              count = len(versions) if versions else "unknown number of"
1921              return ToolResult.partial(
1922                  f"WARNING: This operation is irreversible and will permanently delete artifact '{filename}' and ALL {count} version(s). To proceed, call this tool again with confirm_delete=True.",
1923                  data={
1924                      "filename": filename,
1925                      "version_count": len(versions) if versions else None,
1926                      "versions": versions,
1927                      "confirmation_required": True,
1928                  }
1929              )
1930  
1931          # Proceed with deletion
1932          await artifact_service.delete_artifact(
1933              app_name=app_name,
1934              user_id=user_id,
1935              session_id=session_id,
1936              filename=filename,
1937          )
1938  
1939          log.info("%s Successfully deleted artifact '%s'.", log_identifier, filename)
1940          return ToolResult.ok(
1941              f"Artifact '{filename}' deleted successfully.",
1942              data={
1943                  "filename": filename,
1944                  "versions_deleted": len(versions) if versions else None,
1945              }
1946          )
1947  
1948      except FileNotFoundError as e:
1949          log.warning("%s Artifact not found for deletion: %s", log_identifier, e)
1950          return ToolResult.error(
1951              f"Artifact '{filename}' not found.",
1952              data={"filename": filename}
1953          )
1954      except Exception as e:
1955          log.exception(
1956              "%s Error deleting artifact '%s': %s", log_identifier, filename, e
1957          )
1958          return ToolResult.error(
1959              f"Failed to delete artifact: {e}",
1960              data={"filename": filename}
1961          )
1962  
1963  
1964  delete_artifact_tool_def = BuiltinTool(
1965      name="delete_artifact",
1966      implementation=delete_artifact,
1967      description="Deletes all versions of an artifact. IMPORTANT: Requires explicit confirmation via confirm_delete=True parameter. The first call without confirmation will return details about what will be deleted.",
1968      category="artifact_management",
1969      category_name=CATEGORY_NAME,
1970      category_description=CATEGORY_DESCRIPTION,
1971      required_scopes=["tool:artifact:delete"],
1972      parameters=adk_types.Schema(
1973          type=adk_types.Type.OBJECT,
1974          properties={
1975              "filename": adk_types.Schema(
1976                  type=adk_types.Type.STRING,
1977                  description="The name of the artifact to delete.",
1978              ),
1979              "version": adk_types.Schema(
1980                  type=adk_types.Type.INTEGER,
1981                  description="Reserved for future use. Version-specific deletion is not currently supported - will return error if specified.",
1982                  nullable=True,
1983              ),
1984              "confirm_delete": adk_types.Schema(
1985                  type=adk_types.Type.BOOLEAN,
1986                  description="Must be set to True to actually perform the deletion. If False or omitted, returns a confirmation prompt with details about what will be deleted (including version count).",
1987                  nullable=True,
1988              ),
1989          },
1990          required=["filename"],
1991      ),
1992      examples=[],
1993  )
1994  
1995  tool_registry.register(delete_artifact_tool_def)
1996  
1997  
1998  def _perform_single_replacement(
1999      content: str,
2000      search_expr: str,
2001      replace_expr: str,
2002      is_regex: bool,
2003      regex_flags: str,
2004      log_identifier: str,
2005      strict_match_validation: bool = False,
2006  ) -> Tuple[str, int, Optional[str]]:
2007      """
2008      Performs a single search-and-replace operation.
2009  
2010      Args:
2011          content: The text content to search/replace in
2012          search_expr: The search pattern (literal or regex)
2013          replace_expr: The replacement text
2014          is_regex: If True, search_expr is treated as regex
2015          regex_flags: Flags for regex behavior ('g', 'i', 'm', 's')
2016          log_identifier: Logging prefix
2017          strict_match_validation: If True, error on multiple matches without 'g' flag (for batch mode)
2018  
2019      Returns:
2020          tuple: (new_content, match_count, error_message)
2021                 error_message is None on success
2022      """
2023      match_count = 0
2024      new_content = content
2025  
2026      if is_regex:
2027          # Parse regex flags
2028          flags_value = 0
2029          global_replace = False
2030  
2031          if regex_flags:
2032              for flag_char in regex_flags.lower():
2033                  if flag_char == "g":
2034                      global_replace = True
2035                  elif flag_char == "i":
2036                      flags_value |= re.IGNORECASE
2037                  elif flag_char == "m":
2038                      flags_value |= re.MULTILINE
2039                  elif flag_char == "s":
2040                      flags_value |= re.DOTALL
2041                  else:
2042                      log.warning(
2043                          "%s Ignoring unrecognized regexp flag: '%s'",
2044                          log_identifier,
2045                          flag_char,
2046                      )
2047  
2048          # Convert JavaScript-style capture groups ($1, $2) to Python style (\1, \2)
2049          # Also handle escaped dollar signs ($$) -> literal $
2050          python_replace_expr = replace_expr
2051          # First, protect escaped dollars: $$ -> a placeholder
2052          python_replace_expr = python_replace_expr.replace("$$", "\x00DOLLAR\x00")
2053          # Convert capture groups: $1 -> \1
2054          python_replace_expr = re.sub(r"\$(\d+)", r"\\\1", python_replace_expr)
2055          # Restore escaped dollars: placeholder -> $
2056          python_replace_expr = python_replace_expr.replace("\x00DOLLAR\x00", "$")
2057  
2058          try:
2059              # Compile the regex pattern
2060              pattern = re.compile(search_expr, flags_value)
2061  
2062              # Count matches first
2063              match_count = len(pattern.findall(content))
2064  
2065              if match_count == 0:
2066                  return content, 0, f"No matches found"
2067  
2068              # Check for multiple matches without global flag (only in strict mode for batch operations)
2069              if strict_match_validation and match_count > 1 and not global_replace:
2070                  return (
2071                      content,
2072                      match_count,
2073                      f"Multiple matches found ({match_count}) but global flag 'g' not set",
2074                  )
2075  
2076              # Perform replacement
2077              count_limit = 0 if global_replace else 1
2078              new_content = pattern.sub(python_replace_expr, content, count=count_limit)
2079  
2080              return new_content, match_count, None
2081  
2082          except re.error as regex_err:
2083              return content, 0, f"Invalid regular expression: {regex_err}"
2084  
2085      else:
2086          # Literal string replacement
2087          match_count = content.count(search_expr)
2088  
2089          if match_count == 0:
2090              return content, 0, f"No matches found"
2091  
2092          # Replace all occurrences for literal mode
2093          new_content = content.replace(search_expr, replace_expr)
2094          return new_content, match_count, None
2095  
2096  
2097  async def artifact_search_and_replace_regex(
2098      filename: Artifact,
2099      search_expression: Optional[str] = None,
2100      replace_expression: Optional[str] = None,
2101      is_regexp: bool = False,
2102      regexp_flags: Optional[str] = "",
2103      new_filename: Optional[str] = None,
2104      new_description: Optional[str] = None,
2105      replacements: Optional[List[Dict[str, Any]]] = None,
2106      tool_context: ToolContext = None,
2107  ) -> ToolResult:
2108      """
2109      Performs search and replace on an artifact's text content using either
2110      literal string matching or regular expressions. Note that this is run once across the entire artifact.
2111      If multiple replacements are needed, then set the 'g' flag in regexp_flags.
2112  
2113      Handling Multi-line Search and Replace:
2114  
2115          When searching for or replacing text that spans multiple lines:
2116  
2117          - In literal mode (is_regexp=false): Include actual newline characters directly in your search_expression
2118          and replace_expression parameters. Do NOT use escape sequences like \n - the tool will search for those
2119          literal characters. Multi-line parameter values are fully supported in the XML parameter format.
2120  
2121          - In regex mode (is_regexp=true): Use the regex pattern \n to match newline characters in your pattern.
2122  
2123      For multiple independent replacements:
2124  
2125          Use the replacements array parameter to perform all replacements atomically in a single tool call, which is more efficient than multiple sequential calls.
2126  
2127      Args:
2128          filename: The artifact to search/replace in (pre-loaded by the framework).
2129          search_expression: The pattern to search for (regex if is_regexp=true, literal otherwise).
2130          replace_expression: The replacement text. For regex mode, supports capture groups ($1, $2, etc.). Use $$ to insert a literal dollar sign
2131          is_regexp: If True, treat search_expression as a regular expression. If False, treat as literal string.
2132          regexp_flags: Flags for regex behavior (only used when is_regexp=true).
2133                       String of letters: 'g' (global/replace-all), 'i' (case-insensitive), 'm' (multiline), 's' (dotall).
2134                       Defaults to empty string (no flags).
2135          new_filename: Optional. If provided, saves the result as a new artifact with this name.
2136          new_description: Optional. Description for the new/updated artifact.
2137  
2138      Returns:
2139          A ToolResult containing the result status, filename, version, match count, and any error messages.
2140      """
2141      if not tool_context:
2142          return ToolResult.error(
2143              "ToolContext is missing, cannot perform search and replace.",
2144              data={"filename": filename.filename if isinstance(filename, Artifact) else filename}
2145          )
2146  
2147      artifact_filename = filename.filename
2148      artifact_version = filename.version
2149  
2150      log_identifier = (
2151          f"[BuiltinArtifactTool:artifact_search_and_replace_regex:{artifact_filename}:{artifact_version}]"
2152      )
2153      log.debug("%s Processing request.", log_identifier)
2154  
2155      # Validate parameter combinations
2156      if replacements is not None and (
2157          search_expression is not None or replace_expression is not None
2158      ):
2159          return ToolResult.error(
2160              "Cannot provide both 'replacements' array and individual 'search_expression'/'replace_expression'. Use one or the other.",
2161              data={"filename": artifact_filename}
2162          )
2163  
2164      if replacements is None and (
2165          search_expression is None or replace_expression is None
2166      ):
2167          return ToolResult.error(
2168              "Must provide either 'replacements' array or both 'search_expression' and 'replace_expression'.",
2169              data={"filename": artifact_filename}
2170          )
2171  
2172      if replacements is not None:
2173          if not isinstance(replacements, list) or len(replacements) == 0:
2174              return ToolResult.error(
2175                  "replacements must be a non-empty array.",
2176                  data={"filename": artifact_filename}
2177              )
2178  
2179          # Validate each replacement entry
2180          for idx, repl in enumerate(replacements):
2181              if not isinstance(repl, dict):
2182                  return ToolResult.error(
2183                      f"Replacement at index {idx} must be a dictionary.",
2184                      data={"filename": artifact_filename}
2185                  )
2186              if "search" not in repl or "replace" not in repl or "is_regexp" not in repl:
2187                  return ToolResult.error(
2188                      f"Replacement at index {idx} missing required fields: 'search', 'replace', 'is_regexp'.",
2189                      data={"filename": artifact_filename}
2190                  )
2191  
2192      # Validate inputs for single replacement mode
2193      if replacements is None and not search_expression:
2194          return ToolResult.error(
2195              "search_expression cannot be empty.",
2196              data={"filename": artifact_filename}
2197          )
2198  
2199      # Determine output filename
2200      output_filename = new_filename if new_filename else artifact_filename
2201  
2202      if new_filename and not is_filename_safe(new_filename):
2203          return ToolResult.error(
2204              f"Invalid new_filename: '{new_filename}'. Filename must not contain path separators or traversal sequences.",
2205              data={"filename": artifact_filename}
2206          )
2207  
2208      try:
2209          # Use pre-loaded artifact data
2210          source_bytes = filename.as_bytes()
2211          source_mime_type = filename.mime_type or "application/octet-stream"
2212          actual_version = artifact_version
2213  
2214          # Verify it's a text-based artifact
2215          if not is_text_based_file(source_mime_type, source_bytes):
2216              return ToolResult.error(
2217                  f"Cannot perform search and replace on binary artifact of type '{source_mime_type}'. This tool only works with text-based content.",
2218                  data={"filename": artifact_filename, "version": actual_version}
2219              )
2220  
2221          # Decode the content - try multiple encodings for Windows-exported files
2222          original_content = None
2223          encoding_used = None
2224          encodings_to_try = ['utf-8', 'utf-16', 'cp1252', 'latin-1']
2225          decode_errors = []
2226          
2227          for encoding in encodings_to_try:
2228              try:
2229                  original_content = source_bytes.decode(encoding)
2230                  encoding_used = encoding
2231                  if encoding != 'utf-8':
2232                      log.info(
2233                          "%s Successfully decoded artifact using fallback encoding '%s' (UTF-8 failed)",
2234                          log_identifier,
2235                          encoding,
2236                      )
2237                  break
2238              except UnicodeDecodeError as e:
2239                  decode_errors.append(f"{encoding}: {e}")
2240                  continue
2241          
2242          if original_content is None:
2243              log.error(
2244                  "%s Failed to decode artifact content with any supported encoding. Errors: %s",
2245                  log_identifier,
2246                  "; ".join(decode_errors),
2247              )
2248              return ToolResult.error(
2249                  f"Failed to decode artifact content as UTF-8: {decode_err}",
2250                  data={"filename": artifact_filename, "version": actual_version}
2251              )
2252  
2253          # Perform the search and replace
2254          if replacements:
2255              # Batch mode
2256              log.info(
2257                  "%s Processing batch of %d replacements.",
2258                  log_identifier,
2259                  len(replacements),
2260              )
2261  
2262              current_content = original_content
2263              replacement_results = []
2264              total_matches = 0
2265  
2266              for idx, repl in enumerate(replacements):
2267                  search_expr = repl["search"]
2268                  replace_expr = repl["replace"]
2269                  is_regex = repl["is_regexp"]
2270                  regex_flags = repl.get("regexp_flags", "")
2271  
2272                  # Perform replacement on current state (with strict validation for batch mode)
2273                  new_content, match_count, error_msg = _perform_single_replacement(
2274                      current_content,
2275                      search_expr,
2276                      replace_expr,
2277                      is_regex,
2278                      regex_flags,
2279                      log_identifier,
2280                      strict_match_validation=True,
2281                  )
2282  
2283                  if error_msg:
2284                      # Rollback - return error with details
2285                      log.warning(
2286                          "%s Batch replacement failed at index %d: %s",
2287                          log_identifier,
2288                          idx,
2289                          error_msg,
2290                      )
2291  
2292                      # Mark all as skipped
2293                      all_results = replacement_results + [
2294                          {
2295                              "search": repl["search"],
2296                              "match_count": match_count,
2297                              "status": "error",
2298                              "error": error_msg,
2299                          }
2300                      ]
2301                      # Add remaining as skipped
2302                      for i in range(idx + 1, len(replacements)):
2303                          all_results.append(
2304                              {
2305                                  "search": replacements[i]["search"],
2306                                  "match_count": 0,
2307                                  "status": "skipped",
2308                              }
2309                          )
2310  
2311                      return ToolResult.error(
2312                          f"Batch replacement failed: No changes applied due to error in replacement {idx + 1}",
2313                          data={
2314                              "filename": artifact_filename,
2315                              "version": actual_version,
2316                              "replacement_results": all_results,
2317                              "failed_replacement": {
2318                                  "index": idx,
2319                                  "search": search_expr,
2320                                  "error": error_msg,
2321                              },
2322                          }
2323                      )
2324  
2325                  # Success - update state and continue
2326                  current_content = new_content
2327                  total_matches += match_count
2328                  replacement_results.append(
2329                      {
2330                          "search": search_expr,
2331                          "match_count": match_count,
2332                          "status": "success",
2333                      }
2334                  )
2335  
2336                  log.debug(
2337                      "%s Replacement %d/%d succeeded: %d matches",
2338                      log_identifier,
2339                      idx + 1,
2340                      len(replacements),
2341                      match_count,
2342                  )
2343  
2344              # All replacements succeeded
2345              final_content = current_content
2346              total_replacements = len(replacements)
2347  
2348              log.info(
2349                  "%s Batch replacement succeeded: %d operations, %d total matches",
2350                  log_identifier,
2351                  total_replacements,
2352                  total_matches,
2353              )
2354  
2355          else:
2356              # Single replacement mode (backward compatible)
2357              final_content, match_count, error_msg = _perform_single_replacement(
2358                  original_content,
2359                  search_expression,
2360                  replace_expression,
2361                  is_regexp,
2362                  regexp_flags,
2363                  log_identifier,
2364              )
2365  
2366              if error_msg:
2367                  # Check if it's a "no matches" error specifically
2368                  if match_count == 0 and "No matches found" in error_msg:
2369                      return ToolResult.partial(
2370                          f"No matches found for pattern '{search_expression}'. Artifact not modified.",
2371                          data={
2372                              "filename": artifact_filename,
2373                              "version": actual_version,
2374                              "match_count": 0,
2375                              "no_matches": True,
2376                          }
2377                      )
2378                  else:
2379                      return ToolResult.error(
2380                          error_msg,
2381                          data={"filename": artifact_filename, "version": actual_version}
2382                      )
2383  
2384              total_replacements = 1
2385              total_matches = match_count
2386              replacement_results = None
2387  
2388          # Prepare metadata for the output artifact
2389          if replacements:
2390              new_metadata = {
2391                  "source": f"artifact_search_and_replace_regex (batch) from '{artifact_filename}' v{actual_version}",
2392                  "total_replacements": total_replacements,
2393                  "total_matches": total_matches,
2394              }
2395          else:
2396              new_metadata = {
2397                  "source": f"artifact_search_and_replace_regex from '{artifact_filename}' v{actual_version}",
2398                  "search_expression": search_expression,
2399                  "replace_expression": replace_expression,
2400                  "is_regexp": is_regexp,
2401                  "match_count": match_count,
2402              }
2403  
2404          if regexp_flags and is_regexp:
2405              new_metadata["regexp_flags"] = regexp_flags
2406  
2407          # Determine description for the output artifact
2408          artifact_description = new_description
2409          if not artifact_description and not new_filename:
2410              # Preserve original description when updating the same artifact
2411              original_metadata = filename.metadata or {}
2412              artifact_description = original_metadata.get("description")
2413  
2414          # Return appropriate response based on mode
2415          new_content_bytes = final_content.encode("utf-8")
2416  
2417          if replacements:
2418              return ToolResult.ok(
2419                  f"Batch replacement completed: {total_replacements} operations, {total_matches} total matches",
2420                  data={
2421                      "source_filename": artifact_filename,
2422                      "source_version": actual_version,
2423                      "total_replacements": total_replacements,
2424                      "replacement_results": replacement_results,
2425                      "total_matches": total_matches,
2426                  },
2427                  data_objects=[
2428                      DataObject(
2429                          name=output_filename,
2430                          content=new_content_bytes,
2431                          mime_type=source_mime_type,
2432                          disposition=DataDisposition.ARTIFACT,
2433                          description=artifact_description,
2434                          metadata=new_metadata,
2435                      )
2436                  ],
2437              )
2438          else:
2439              # Compute replacements_made
2440              # For literal replacements, all matches are replaced
2441              # For regex without 'g' flag, only first match is replaced
2442              global_replace = "g" in (regexp_flags or "")
2443              replacements_made = (
2444                  match_count if not is_regexp or global_replace else min(match_count, 1)
2445              )
2446  
2447              return ToolResult.ok(
2448                  f"Successfully performed {'regex' if is_regexp else 'literal'} search and replace. "
2449                  f"Found {match_count} match(es).",
2450                  data={
2451                      "source_filename": artifact_filename,
2452                      "source_version": actual_version,
2453                      "match_count": match_count,
2454                      "replacements_made": replacements_made,
2455                  },
2456                  data_objects=[
2457                      DataObject(
2458                          name=output_filename,
2459                          content=new_content_bytes,
2460                          mime_type=source_mime_type,
2461                          disposition=DataDisposition.ARTIFACT,
2462                          description=artifact_description,
2463                          metadata=new_metadata,
2464                      )
2465                  ],
2466              )
2467  
2468      except FileNotFoundError as fnf_err:
2469          log.warning("%s Artifact not found: %s", log_identifier, fnf_err)
2470          return ToolResult.error(
2471              f"Artifact not found: {fnf_err}",
2472              data={"filename": artifact_filename, "version": artifact_version}
2473          )
2474      except Exception as e:
2475          log.exception(
2476              "%s Unexpected error during search and replace: %s", log_identifier, e
2477          )
2478          return ToolResult.error(
2479              f"Unexpected error: {e}",
2480              data={"filename": artifact_filename, "version": artifact_version}
2481          )
2482  
2483  
2484  artifact_search_and_replace_regex_tool_def = BuiltinTool(
2485      name="artifact_search_and_replace_regex",
2486      implementation=artifact_search_and_replace_regex,
2487      description="Performs search and replace on an artifact's text content using either literal string matching or regular expressions. Supports both single replacements and atomic batch replacements for efficiency.",
2488      category="artifact_management",
2489      category_name=CATEGORY_NAME,
2490      category_description=CATEGORY_DESCRIPTION,
2491      required_scopes=["tool:artifact:load", "tool:artifact:create"],
2492      parameters=adk_types.Schema(
2493          type=adk_types.Type.OBJECT,
2494          properties={
2495              "filename": adk_types.Schema(
2496                  type=adk_types.Type.STRING,
2497                  description="The name (and optional :version) of the artifact to search/replace in.",
2498              ),
2499              "search_expression": adk_types.Schema(
2500                  type=adk_types.Type.STRING,
2501                  description="The pattern to search for (single replacement mode). If is_regexp is true, this is treated as a regular expression. Otherwise, it's a literal string. Do not use if 'replacements' is provided.",
2502                  nullable=True,
2503              ),
2504              "replace_expression": adk_types.Schema(
2505                  type=adk_types.Type.STRING,
2506                  description="The replacement text (single replacement mode). For regex mode, supports capture group references using $1, $2, etc. Use $$ to insert a literal dollar sign. Do not use if 'replacements' is provided.",
2507                  nullable=True,
2508              ),
2509              "is_regexp": adk_types.Schema(
2510                  type=adk_types.Type.BOOLEAN,
2511                  description="If true, treat search_expression as a regular expression. If false, treat as literal string. Only used in single replacement mode.",
2512                  nullable=True,
2513              ),
2514              "regexp_flags": adk_types.Schema(
2515                  type=adk_types.Type.STRING,
2516                  description="Flags for regex behavior (only used when is_regexp=true in single mode). String of letters: 'g' (global/replace all), 'i' (case-insensitive), 'm' (multiline), 's' (dotall). Example: 'gim'. Defaults to empty string.",
2517                  nullable=True,
2518              ),
2519              "new_filename": adk_types.Schema(
2520                  type=adk_types.Type.STRING,
2521                  description="Optional. If provided, saves the result as a new artifact with this name instead of creating a new version of the original.",
2522                  nullable=True,
2523              ),
2524              "new_description": adk_types.Schema(
2525                  type=adk_types.Type.STRING,
2526                  description="Optional. Description for the new/updated artifact.",
2527                  nullable=True,
2528              ),
2529              "replacements": adk_types.Schema(
2530                  type=adk_types.Type.ARRAY,
2531                  items=adk_types.Schema(
2532                      type=adk_types.Type.OBJECT,
2533                      properties={
2534                          "search": adk_types.Schema(
2535                              type=adk_types.Type.STRING,
2536                              description="The search pattern (literal string or regex).",
2537                          ),
2538                          "replace": adk_types.Schema(
2539                              type=adk_types.Type.STRING,
2540                              description="The replacement text. For regex mode, supports $1, $2, etc. Use $$ for literal $.",
2541                          ),
2542                          "is_regexp": adk_types.Schema(
2543                              type=adk_types.Type.BOOLEAN,
2544                              description="If true, 'search' is a regex pattern. If false, literal string.",
2545                          ),
2546                          "regexp_flags": adk_types.Schema(
2547                              type=adk_types.Type.STRING,
2548                              description="Flags for regex: 'g' (global), 'i' (case-insensitive), 'm' (multiline), 's' (dotall). Default: ''.",
2549                              nullable=True,
2550                          ),
2551                      },
2552                      required=["search", "replace", "is_regexp"],
2553                  ),
2554                  description="Optional. Array of replacement operations to perform atomically. Each operation is processed sequentially on the cumulative result. If any operation fails, all changes are rolled back. Do not use with 'search_expression' or 'replace_expression'.",
2555                  nullable=True,
2556              ),
2557          },
2558          required=["filename"],
2559      ),
2560      examples=[],
2561  )
2562  
2563  tool_registry.register(artifact_search_and_replace_regex_tool_def)