builtin_artifact_tools.py
1 """ 2 Built-in ADK Tools for Artifact Management within the A2A Host. 3 These tools interact with the ADK ArtifactService via the ToolContext and 4 use state_delta for signaling artifact return requests to the host component. 5 Metadata handling is integrated via artifact_helpers. 6 """ 7 8 import logging 9 import uuid 10 import json 11 import re 12 import fnmatch 13 from typing import Any, Dict, List, Optional, Tuple, Union, TYPE_CHECKING 14 from datetime import datetime, timezone 15 from google.adk.tools import ToolContext 16 17 if TYPE_CHECKING: 18 from google.adk.agents.invocation_context import InvocationContext 19 from google.genai import types as adk_types 20 from .tool_definition import BuiltinTool 21 from .tool_result import ToolResult, DataObject, DataDisposition 22 from .artifact_types import Artifact 23 from .registry import tool_registry 24 from ...agent.utils.artifact_helpers import ( 25 save_artifact_with_metadata, 26 decode_and_get_bytes, 27 load_artifact_content_or_metadata, 28 is_filename_safe, 29 METADATA_SUFFIX, 30 DEFAULT_SCHEMA_MAX_KEYS, 31 ) 32 from ...common.utils.embeds import ( 33 evaluate_embed, 34 EMBED_REGEX, 35 EMBED_CHAIN_DELIMITER, 36 ) 37 from ...common.utils.embeds.types import ResolutionMode 38 from ...agent.utils.context_helpers import get_original_session_id 39 from ...agent.adk.models.lite_llm import LiteLlm 40 from google.adk.models import LlmRequest 41 from google.adk.models.registry import LLMRegistry 42 from ...common.utils.mime_helpers import is_text_based_file, is_image_artifact 43 44 log = logging.getLogger(__name__) 45 46 CATEGORY_NAME = "Artifact Management" 47 CATEGORY_DESCRIPTION = "List, read, create, update, and delete artifacts." 48 49 50 async def _internal_create_artifact( 51 filename: str, 52 content: str, 53 mime_type: str, 54 tool_context: ToolContext = None, 55 description: Optional[str] = None, 56 metadata_json: Optional[str] = None, 57 schema_max_keys: Optional[int] = None, 58 tags: Optional[List[str]] = None, 59 ) -> ToolResult: 60 """ 61 Internal helper to create an artifact with its first chunk of content and metadata. 62 This function is not intended to be called directly by the LLM. 63 It is used by callbacks that process fenced artifact blocks. 64 65 Args: 66 filename: The desired name for the artifact. 67 content: The first chunk of the artifact content, as a string. 68 If the mime_type suggests binary data, this string is expected 69 to be base64 encoded. 70 mime_type: The MIME type of the content. 71 tool_context: The ADK ToolContext, required for accessing services. 72 description (str, optional): A description for the artifact. 73 metadata_json (str, optional): A JSON string of additional metadata. 74 schema_max_keys (int, optional): Max keys for schema inference. 75 tags (List[str], optional): Tags for categorization (e.g., ["__working"]). 76 77 Returns: 78 A ToolResult indicating the result of the save operation. 79 """ 80 if not tool_context: 81 return ToolResult.error( 82 "ToolContext is missing, cannot save artifact.", 83 data={"filename": filename} 84 ) 85 86 if not is_filename_safe(filename): 87 return ToolResult.error( 88 "Filename is invalid or contains disallowed characters (e.g., '/', '..').", 89 data={"filename": filename} 90 ) 91 92 log_identifier = f"[BuiltinArtifactTool:_internal_create_artifact:{filename}]" 93 94 final_metadata = {} 95 if description: 96 final_metadata["description"] = description 97 if metadata_json: 98 try: 99 final_metadata.update(json.loads(metadata_json)) 100 except (json.JSONDecodeError, TypeError): 101 log.warning( 102 "%s Invalid JSON in metadata_json attribute: %s", 103 log_identifier, 104 metadata_json, 105 ) 106 107 final_metadata["metadata_parsing_error"] = ( 108 f"Invalid JSON provided: {metadata_json}" 109 ) 110 111 log.debug("%s Processing request with metadata: %s", log_identifier, final_metadata) 112 113 try: 114 inv_context = tool_context._invocation_context 115 artifact_bytes, final_mime_type = decode_and_get_bytes( 116 content, mime_type, log_identifier 117 ) 118 max_keys_to_use = ( 119 schema_max_keys if schema_max_keys is not None else DEFAULT_SCHEMA_MAX_KEYS 120 ) 121 if schema_max_keys is not None: 122 log.debug( 123 "%s Using schema_max_keys provided by LLM: %d", 124 log_identifier, 125 schema_max_keys, 126 ) 127 else: 128 log.debug( 129 "%s Using default schema_max_keys: %d", 130 log_identifier, 131 DEFAULT_SCHEMA_MAX_KEYS, 132 ) 133 134 artifact_service = inv_context.artifact_service 135 if not artifact_service: 136 raise ValueError("ArtifactService is not available in the context.") 137 session_last_update_time = inv_context.session.last_update_time 138 timestamp_for_artifact: datetime 139 if isinstance(session_last_update_time, datetime): 140 timestamp_for_artifact = session_last_update_time 141 elif isinstance(session_last_update_time, (int, float)): 142 log.debug( 143 "%s Converting numeric session.last_update_time (%s) to datetime.", 144 log_identifier, 145 session_last_update_time, 146 ) 147 try: 148 timestamp_for_artifact = datetime.fromtimestamp( 149 session_last_update_time, timezone.utc 150 ) 151 except Exception as e: 152 log.warning( 153 "%s Failed to convert numeric timestamp %s to datetime: %s. Using current time.", 154 log_identifier, 155 session_last_update_time, 156 e, 157 ) 158 timestamp_for_artifact = datetime.now(timezone.utc) 159 else: 160 if session_last_update_time is not None: 161 log.warning( 162 "%s Unexpected type for session.last_update_time: %s. Using current time.", 163 log_identifier, 164 type(session_last_update_time), 165 ) 166 timestamp_for_artifact = datetime.now(timezone.utc) 167 result = await save_artifact_with_metadata( 168 artifact_service=artifact_service, 169 app_name=inv_context.app_name, 170 user_id=inv_context.user_id, 171 session_id=get_original_session_id(inv_context), 172 filename=filename, 173 content_bytes=artifact_bytes, 174 mime_type=final_mime_type, 175 metadata_dict=final_metadata, 176 timestamp=timestamp_for_artifact, 177 schema_max_keys=max_keys_to_use, 178 tags=tags, 179 tool_context=tool_context, 180 suppress_visualization_signal=True, # Fenced blocks handle their own visualization signals 181 ) 182 log.info( 183 "%s Result from save_artifact_with_metadata: %s", log_identifier, result 184 ) 185 # Convert helper dict result to ToolResult 186 status = result.pop("status", "success") 187 if status == "error": 188 message = result.pop("message", "Unknown error") 189 return ToolResult.error(message, data=result) 190 message = result.pop("message", f"Created artifact {filename}") 191 return ToolResult.ok(message, data=result) 192 except Exception as e: 193 log.exception( 194 "%s Error creating artifact '%s': %s", log_identifier, filename, e 195 ) 196 return ToolResult.error( 197 f"Failed to create artifact: {e}", 198 data={"filename": filename} 199 ) 200 201 202 async def list_artifacts(tool_context: ToolContext = None) -> ToolResult: 203 """ 204 Lists all available data artifact filenames and their versions for the current session. 205 Includes a summary of the latest version's metadata for each artifact. 206 207 Args: 208 tool_context: The context provided by the ADK framework. 209 210 Returns: 211 A ToolResult containing the list of artifacts with metadata summaries or an error. 212 """ 213 if not tool_context: 214 return ToolResult.error("ToolContext is missing.") 215 log_identifier = "[BuiltinArtifactTool:list_artifacts]" 216 log.debug("%s Processing request.", log_identifier) 217 try: 218 artifact_service = tool_context._invocation_context.artifact_service 219 if not artifact_service: 220 raise ValueError("ArtifactService is not available in the context.") 221 app_name = tool_context._invocation_context.app_name 222 user_id = tool_context._invocation_context.user_id 223 session_id = get_original_session_id(tool_context._invocation_context) 224 list_keys_method = getattr(artifact_service, "list_artifact_keys") 225 all_keys = await list_keys_method( 226 app_name=app_name, user_id=user_id, session_id=session_id 227 ) 228 response_files = [] 229 processed_data_files = set() 230 for key in all_keys: 231 if key.endswith(METADATA_SUFFIX): 232 continue # Skip metadata files initially 233 234 if key in processed_data_files: 235 continue # Already processed this data file 236 237 filename = key 238 metadata_summary = None 239 versions = [] 240 try: 241 versions = await artifact_service.list_versions( 242 app_name=app_name, 243 user_id=user_id, 244 session_id=session_id, 245 filename=filename, 246 ) 247 if not versions: 248 log.warning( 249 "%s Found artifact key '%s' but no versions listed. Skipping.", 250 log_identifier, 251 filename, 252 ) 253 continue 254 latest_version = max(versions) 255 metadata_filename = f"{filename}{METADATA_SUFFIX}" 256 if metadata_filename in all_keys: 257 try: 258 metadata_part = await artifact_service.load_artifact( 259 app_name=app_name, 260 user_id=user_id, 261 session_id=session_id, 262 filename=metadata_filename, 263 version=latest_version, 264 ) 265 if metadata_part and metadata_part.inline_data: 266 try: 267 metadata_dict = json.loads( 268 metadata_part.inline_data.data.decode("utf-8") 269 ) 270 schema = metadata_dict.get("schema", {}) 271 metadata_summary = { 272 "description": metadata_dict.get("description"), 273 "source": metadata_dict.get("source"), 274 "type": metadata_dict.get("mime_type"), 275 "size": metadata_dict.get("size_bytes"), 276 "schema_type": schema.get( 277 "type", metadata_dict.get("mime_type") 278 ), 279 "schema_inferred": schema.get("inferred"), 280 } 281 metadata_summary = { 282 k: v 283 for k, v in metadata_summary.items() 284 if v is not None 285 } 286 log.debug( 287 "%s Loaded metadata summary for '%s' v%d.", 288 log_identifier, 289 filename, 290 latest_version, 291 ) 292 except json.JSONDecodeError as json_err: 293 log.warning( 294 "%s Failed to parse metadata JSON for '%s' v%d: %s", 295 log_identifier, 296 metadata_filename, 297 latest_version, 298 json_err, 299 ) 300 metadata_summary = {"error": "Failed to parse metadata"} 301 except Exception as fmt_err: 302 log.warning( 303 "%s Failed to format metadata summary for '%s' v%d: %s", 304 log_identifier, 305 metadata_filename, 306 latest_version, 307 fmt_err, 308 ) 309 metadata_summary = { 310 "error": "Failed to format metadata" 311 } 312 else: 313 log.warning( 314 "%s Metadata file '%s' v%d found but empty or unreadable.", 315 log_identifier, 316 metadata_filename, 317 latest_version, 318 ) 319 metadata_summary = { 320 "error": "Metadata file empty or unreadable" 321 } 322 except Exception as load_err: 323 log.warning( 324 "%s Failed to load metadata file '%s' v%d: %s", 325 log_identifier, 326 metadata_filename, 327 latest_version, 328 load_err, 329 ) 330 metadata_summary = { 331 "error": f"Failed to load metadata: {load_err}" 332 } 333 else: 334 log.debug( 335 "%s No companion metadata file found for '%s'.", 336 log_identifier, 337 filename, 338 ) 339 metadata_summary = {"info": "No metadata file found"} 340 except Exception as version_err: 341 log.warning( 342 "%s Failed to list versions or process metadata for file '%s': %s. Skipping file.", 343 log_identifier, 344 filename, 345 version_err, 346 ) 347 continue 348 response_files.append( 349 { 350 "filename": filename, 351 "versions": versions, 352 "metadata_summary": metadata_summary, 353 } 354 ) 355 processed_data_files.add(filename) 356 log.info( 357 "%s Found %d data artifacts for session %s.", 358 log_identifier, 359 len(response_files), 360 session_id, 361 ) 362 return ToolResult.ok( 363 f"Found {len(response_files)} artifacts.", 364 data={"artifacts": response_files} 365 ) 366 except Exception as e: 367 log.exception("%s Error listing artifacts: %s", log_identifier, e) 368 return ToolResult.error(f"Failed to list artifacts: {e}") 369 370 371 async def load_artifact( 372 filename: str, 373 version: int, 374 load_metadata_only: bool = False, 375 max_content_length: Optional[int] = None, 376 include_line_numbers: bool = False, 377 tool_context: ToolContext = None, 378 ) -> ToolResult: 379 """ 380 Loads the content or metadata of a specific artifact version. 381 Early-stage embeds in the filename argument are resolved. 382 383 If load_metadata_only is True, loads the full metadata dictionary. 384 Otherwise, loads text content (potentially truncated) or binary metadata summary. 385 386 Args: 387 filename: The name of the artifact to load. May contain embeds. 388 version: The specific version number to load. Must be explicitly provided. Versions are 0-indexed. 389 load_metadata_only (bool): If True, load only the metadata JSON. Default False. 390 max_content_length (Optional[int]): Maximum character length for text content. 391 If None, uses app configuration. Range: 100-100,000. 392 include_line_numbers (bool): If True, prefix each line with its 1-based line number 393 followed by a TAB character for LLM viewing. Line numbers 394 are not stored in the artifact. Default False. 395 tool_context: The context provided by the ADK framework. 396 397 Returns: 398 A ToolResult containing the artifact details and content/metadata or an error. 399 """ 400 if not tool_context: 401 return ToolResult.error( 402 "ToolContext is missing.", 403 data={"filename": filename, "version": version} 404 ) 405 log_identifier = f"[BuiltinArtifactTool:load_artifact:{filename}:{version}]" 406 log.debug( 407 "%s Processing request (load_metadata_only=%s).", 408 log_identifier, 409 load_metadata_only, 410 ) 411 if version is None: 412 version = "latest" 413 try: 414 artifact_service = tool_context._invocation_context.artifact_service 415 if not artifact_service: 416 raise ValueError("ArtifactService is not available in the context.") 417 app_name = tool_context._invocation_context.app_name 418 user_id = tool_context._invocation_context.user_id 419 session_id = get_original_session_id(tool_context._invocation_context) 420 agent = getattr(tool_context._invocation_context, "agent", None) 421 host_component = getattr(agent, "host_component", None) if agent else None 422 423 # Check if inline vision is enabled and this is an image artifact 424 enable_inline_vision = getattr(host_component, "enable_inline_vision", False) if host_component else False 425 is_image = is_image_artifact(filename, None) 426 427 if enable_inline_vision and is_image and not load_metadata_only: 428 # Load raw bytes for image artifacts so the LLM can see them 429 result = await load_artifact_content_or_metadata( 430 artifact_service=artifact_service, 431 app_name=app_name, 432 user_id=user_id, 433 session_id=session_id, 434 filename=filename, 435 version=version, 436 load_metadata_only=False, 437 return_raw_bytes=True, 438 log_identifier_prefix="[BuiltinArtifactTool:load_artifact:vision]", 439 ) 440 status = result.pop("status", "success") 441 if status in ("error", "not_found"): 442 message = result.pop("message", "Unknown error") 443 return ToolResult.error(message, data=result) 444 445 raw_bytes = result.get("raw_bytes") 446 mime_type = result.get("mime_type", "image/png") 447 if raw_bytes: 448 import base64 as b64 449 b64_data = b64.b64encode(raw_bytes).decode("utf-8") 450 data_url = f"data:{mime_type};base64,{b64_data}" 451 log.info( 452 "%s Inline vision: returning image '%s' (%d bytes) as data URL for LLM viewing.", 453 log_identifier, filename, len(raw_bytes), 454 ) 455 return ToolResult.ok( 456 f"Image '{filename}' loaded for viewing. The image is included inline below.", 457 data={ 458 "filename": result.get("filename", filename), 459 "version": result.get("version", version), 460 "mime_type": mime_type, 461 "size_bytes": len(raw_bytes), 462 "_vision_image_data_url": data_url, 463 } 464 ) 465 466 result = await load_artifact_content_or_metadata( 467 artifact_service=artifact_service, 468 app_name=app_name, 469 user_id=user_id, 470 session_id=session_id, 471 filename=filename, 472 version=version, 473 load_metadata_only=load_metadata_only, 474 max_content_length=max_content_length, 475 include_line_numbers=include_line_numbers, 476 component=host_component, 477 log_identifier_prefix="[BuiltinArtifactTool:load_artifact]", 478 ) 479 # Convert helper dict result to ToolResult 480 status = result.pop("status", "success") 481 if status in ("error", "not_found"): 482 message = result.pop("message", "Unknown error") 483 return ToolResult.error(message, data=result) 484 message = result.pop("message", f"Loaded artifact {filename}") 485 return ToolResult.ok(message, data=result) 486 except FileNotFoundError as fnf_err: 487 log.warning( 488 "%s Artifact not found (reported by helper): %s", log_identifier, fnf_err 489 ) 490 return ToolResult.error( 491 str(fnf_err), 492 data={"filename": filename, "version": version} 493 ) 494 except ValueError as val_err: 495 log.warning( 496 "%s Value error during load (reported by helper): %s", 497 log_identifier, 498 val_err, 499 ) 500 return ToolResult.error( 501 str(val_err), 502 data={"filename": filename, "version": version} 503 ) 504 except Exception as e: 505 log.exception( 506 "%s Unexpected error in load_artifact tool: %s", log_identifier, e 507 ) 508 return ToolResult.error( 509 f"Unexpected error processing load request: {e}", 510 data={"filename": filename, "version": version} 511 ) 512 513 514 async def apply_embed_and_create_artifact( 515 output_filename: str, 516 embed_directive: str, 517 output_metadata: Optional[Dict[str, Any]] = None, 518 tool_context: ToolContext = None, 519 ) -> ToolResult: 520 """ 521 Resolves an 'artifact_content' embed directive (including modifiers and formatting) 522 and saves the resulting content as a new artifact. The entire embed directive 523 must be provided as a string as the embed_directive argument. 524 525 Args: 526 output_filename: The desired name for the new artifact. 527 embed_directive: The full '«artifact_content:...>>>...>>>format:...»' string. 528 output_metadata (dict, optional): Metadata for the new artifact. 529 tool_context: The context provided by the ADK framework. 530 531 Returns: 532 A ToolResult indicating the result, including the new filename and version. 533 """ 534 if not tool_context: 535 return ToolResult.error("ToolContext is missing.") 536 537 log_identifier = f"[BuiltinArtifactTool:apply_embed:{output_filename}]" 538 log.info( 539 "%s Processing request with directive: %s", log_identifier, embed_directive 540 ) 541 542 match = EMBED_REGEX.fullmatch(embed_directive) 543 if not match: 544 return ToolResult.error(f"Invalid embed directive format: {embed_directive}") 545 546 embed_type = match.group(1) 547 expression = match.group(2) 548 format_spec = match.group(3) 549 550 if embed_type != "artifact_content": 551 return ToolResult.error( 552 f"This tool only supports 'artifact_content' embeds, got '{embed_type}'." 553 ) 554 555 try: 556 inv_context = tool_context._invocation_context 557 artifact_service = inv_context.artifact_service 558 if not artifact_service: 559 raise ValueError("ArtifactService not available.") 560 561 host_component = getattr(inv_context.agent, "host_component", None) 562 if not host_component: 563 log.warning( 564 "%s Could not access host component config for limits. Proceeding without them.", 565 log_identifier, 566 ) 567 embed_config = {} 568 else: 569 embed_config = { 570 "gateway_artifact_content_limit_bytes": host_component.get_config( 571 "gateway_artifact_content_limit_bytes", -1 572 ), 573 "gateway_recursive_embed_depth": host_component.get_config( 574 "gateway_recursive_embed_depth", 3 575 ), 576 } 577 578 gateway_context = { 579 "artifact_service": artifact_service, 580 "session_context": { 581 "app_name": inv_context.app_name, 582 "user_id": inv_context.user_id, 583 "session_id": get_original_session_id(inv_context), 584 }, 585 } 586 except Exception as ctx_err: 587 log.error( 588 "%s Failed to prepare context/config for embed evaluation: %s", 589 log_identifier, 590 ctx_err, 591 ) 592 return ToolResult.error(f"Internal error preparing context: {ctx_err}") 593 594 resolved_content_str, error_msg_from_eval, _ = await evaluate_embed( 595 embed_type=embed_type, 596 expression=expression, 597 format_spec=format_spec, 598 context=gateway_context, 599 log_identifier=log_identifier, 600 resolution_mode=ResolutionMode.TOOL_PARAMETER, 601 config=embed_config, 602 ) 603 604 if error_msg_from_eval or ( 605 resolved_content_str and resolved_content_str.startswith("[Error:") 606 ): 607 error_to_report = error_msg_from_eval or resolved_content_str 608 log.error("%s Embed resolution failed: %s", log_identifier, error_to_report) 609 return ToolResult.error(f"Embed resolution failed: {error_to_report}") 610 611 output_mime_type = "text/plain" 612 final_format = None 613 chain_parts = expression.split(EMBED_CHAIN_DELIMITER) 614 if len(chain_parts) > 1: 615 last_part = chain_parts[-1].strip() 616 format_match = re.match(r"format:(.*)", last_part, re.DOTALL) 617 if format_match: 618 final_format = format_match.group(1).strip().lower() 619 elif format_spec: 620 final_format = format_spec.strip().lower() 621 622 if final_format: 623 if final_format == "html": 624 output_mime_type = "text/html" 625 elif final_format == "json" or final_format == "json_pretty": 626 output_mime_type = "application/json" 627 elif final_format == "csv": 628 output_mime_type = "text/csv" 629 elif final_format == "datauri": 630 output_mime_type = "text/plain" 631 log.warning( 632 "%s Embed resolved to data URI; saving new artifact as text/plain.", 633 log_identifier, 634 ) 635 636 log.debug("%s Determined output MIME type as: %s", log_identifier, output_mime_type) 637 638 try: 639 resolved_bytes = resolved_content_str.encode("utf-8") 640 inv_context = tool_context._invocation_context 641 artifact_service = inv_context.artifact_service 642 if not artifact_service: 643 raise ValueError("ArtifactService is not available in the context.") 644 645 save_result = await save_artifact_with_metadata( 646 artifact_service=artifact_service, 647 app_name=inv_context.app_name, 648 user_id=inv_context.user_id, 649 session_id=get_original_session_id(inv_context), 650 filename=output_filename, 651 content_bytes=resolved_bytes, 652 mime_type=output_mime_type, 653 metadata_dict=( 654 lambda base_meta, user_meta: ( 655 base_meta.update(user_meta or {}), 656 base_meta, 657 )[1] 658 )({"source_directive": embed_directive}, output_metadata), 659 timestamp=inv_context.session.last_update_time 660 or datetime.now(timezone.utc), 661 schema_max_keys=( 662 host_component.get_config("schema_max_keys", DEFAULT_SCHEMA_MAX_KEYS) 663 if host_component 664 else DEFAULT_SCHEMA_MAX_KEYS 665 ), 666 tool_context=tool_context, 667 ) 668 669 log.info( 670 "%s Successfully applied embed and saved new artifact '%s' (v%s).", 671 log_identifier, 672 output_filename, 673 save_result.get("data_version"), 674 ) 675 return ToolResult.ok( 676 f"Successfully created artifact '{output_filename}' v{save_result.get('data_version')} from embed directive.", 677 data={ 678 "output_filename": output_filename, 679 "output_version": save_result.get("data_version"), 680 "output_mime_type": output_mime_type, 681 } 682 ) 683 684 except Exception as save_err: 685 log.exception( 686 "%s Failed to save resolved content as artifact '%s': %s", 687 log_identifier, 688 output_filename, 689 save_err, 690 ) 691 return ToolResult.error(f"Failed to save new artifact: {save_err}") 692 693 694 async def extract_content_from_artifact( 695 filename: str, 696 extraction_goal: str, 697 version: Optional[str] = "latest", 698 output_filename_base: Optional[str] = None, 699 tool_context: ToolContext = None, 700 ) -> ToolResult: 701 """ 702 Loads an existing artifact, uses an internal LLM to process its content 703 based on an "extraction_goal," and manages the output by returning it 704 or saving it as a new artifact. 705 706 The tool's description for the LLM might dynamically update based on 707 the 'supported_binary_mime_types' configuration of the agent, indicating 708 which binary types it can attempt to process. 709 710 Args: 711 filename (str): Name of the source artifact. May contain embeds. 712 extraction_goal (str): Natural language instruction for the LLM on what 713 to extract or how to transform the content. 714 May contain embeds. 715 version (Optional[Union[int, str]]): Version of the source artifact. 716 Can be an integer or "latest". 717 Defaults to "latest". May contain embeds. 718 output_filename_base (Optional[str]): Optional base name for the new 719 artifact if the extracted content 720 is saved. May contain embeds. 721 tool_context (ToolContext): Provided by the ADK framework. 722 723 Returns: 724 ToolResult: A ToolResult containing the status of the operation, 725 a message for the LLM, and potentially the extracted 726 data or details of a newly saved artifact. 727 """ 728 log_identifier = f"[BuiltinArtifactTool:extract_content:{filename}:{version}]" 729 log.debug( 730 "%s Processing request. Goal: '%s', Output base: '%s'", 731 log_identifier, 732 extraction_goal, 733 output_filename_base, 734 ) 735 736 if not tool_context: 737 return ToolResult.error( 738 "Tool execution failed: ToolContext is missing.", 739 data={"filename": filename, "version_requested": str(version)} 740 ) 741 if not filename: 742 return ToolResult.error( 743 "Tool execution failed: 'filename' parameter is required.", 744 data={"version_requested": str(version)} 745 ) 746 if not extraction_goal: 747 return ToolResult.error( 748 "Tool execution failed: 'extraction_goal' parameter is required.", 749 data={"filename": filename, "version_requested": str(version)} 750 ) 751 752 inv_context = tool_context._invocation_context 753 host_component = getattr(inv_context.agent, "host_component", None) 754 if not host_component: 755 log.error( 756 "%s Host component not found on agent. Cannot retrieve config.", 757 log_identifier, 758 ) 759 return ToolResult.error( 760 "Tool configuration error: Host component not accessible.", 761 data={"filename": filename, "version_requested": str(version)} 762 ) 763 764 try: 765 save_threshold = host_component.get_config( 766 "tool_output_save_threshold_bytes", 2048 767 ) 768 llm_max_bytes = host_component.get_config( 769 "tool_output_llm_return_max_bytes", 4096 770 ) 771 extraction_config = host_component.get_config( 772 "extract_content_from_artifact_config", {} 773 ) 774 supported_binary_mime_types = extraction_config.get( 775 "supported_binary_mime_types", [] 776 ) 777 model_config_for_extraction = extraction_config.get("model") 778 except Exception as e: 779 log.exception("%s Error retrieving tool configuration: %s", log_identifier, e) 780 return ToolResult.error( 781 f"Tool configuration error: {e}", 782 data={"filename": filename, "version_requested": str(version)} 783 ) 784 785 source_artifact_data = None 786 processed_version: Union[int, str] 787 788 if version is None or ( 789 isinstance(version, str) and version.strip().lower() == "latest" 790 ): 791 processed_version = "latest" 792 else: 793 try: 794 processed_version = int(version) 795 except ValueError: 796 log.warning( 797 "%s Invalid version string: '%s'. Must be an integer or 'latest'.", 798 log_identifier, 799 version, 800 ) 801 return ToolResult.error( 802 f"Invalid version format '{version}'. Version must be an integer or 'latest'.", 803 data={"filename": filename, "version_requested": str(version)} 804 ) 805 try: 806 log.debug( 807 "%s Loading source artifact '%s' version '%s' (processed as: %s)", 808 log_identifier, 809 filename, 810 version, 811 processed_version, 812 ) 813 source_artifact_data = await load_artifact_content_or_metadata( 814 artifact_service=inv_context.artifact_service, 815 app_name=inv_context.app_name, 816 user_id=inv_context.user_id, 817 session_id=get_original_session_id(inv_context), 818 filename=filename, 819 version=processed_version, 820 return_raw_bytes=True, 821 log_identifier_prefix=log_identifier, 822 ) 823 if source_artifact_data.get("status") != "success": 824 raise FileNotFoundError( 825 source_artifact_data.get("message", "Failed to load artifact") 826 ) 827 log.info( 828 "%s Successfully loaded source artifact '%s' version %s (actual: v%s)", 829 log_identifier, 830 filename, 831 version, 832 source_artifact_data.get("version"), 833 ) 834 except FileNotFoundError as e: 835 log.warning("%s Source artifact not found: %s", log_identifier, e) 836 return ToolResult.error( 837 f"Could not extract content. Source artifact '{filename}' (version {version}) was not found: {e}", 838 data={"filename": filename, "version_requested": str(version)} 839 ) 840 except Exception as e: 841 log.exception("%s Error loading source artifact: %s", log_identifier, e) 842 return ToolResult.error( 843 f"Error loading source artifact '{filename}': {e}", 844 data={"filename": filename, "version_requested": str(version)} 845 ) 846 847 source_artifact_content_bytes = source_artifact_data.get("raw_bytes") 848 source_mime_type = source_artifact_data.get("mime_type", "application/octet-stream") 849 actual_source_version = source_artifact_data.get("version", "unknown") 850 host_component = getattr(inv_context.agent, "host_component", None) 851 chosen_llm = None 852 try: 853 if model_config_for_extraction: 854 if isinstance(model_config_for_extraction, str): 855 chosen_llm = LLMRegistry.new_llm(model_config_for_extraction) 856 log.info( 857 "%s Using tool-specific LLM (string): %s", 858 log_identifier, 859 model_config_for_extraction, 860 ) 861 elif isinstance(model_config_for_extraction, dict): 862 chosen_llm = LiteLlm(**model_config_for_extraction) 863 log.info( 864 "%s Using tool-specific LLM (dict): %s", 865 log_identifier, 866 model_config_for_extraction.get("model"), 867 ) 868 else: 869 log.warning( 870 "%s Invalid 'model' config for extraction tool. Falling back to agent default.", 871 log_identifier, 872 ) 873 chosen_llm = host_component.get_lite_llm_model() 874 else: 875 chosen_llm = host_component.get_lite_llm_model() 876 log.info( 877 "%s Using agent's default LLM: %s", log_identifier, chosen_llm.model 878 ) 879 except Exception as e: 880 log.exception("%s Error initializing LLM for extraction: %s", log_identifier, e) 881 return ToolResult.error( 882 f"Failed to set up LLM for extraction: {e}", 883 data={"filename": filename, "version_requested": str(version)} 884 ) 885 886 llm_parts = [] 887 is_binary_supported = False 888 889 normalized_source_mime_type = source_mime_type.lower() if source_mime_type else "" 890 891 is_text_based = is_text_based_file( 892 mime_type=normalized_source_mime_type, 893 content_bytes=source_artifact_content_bytes, 894 ) 895 896 if is_text_based: 897 # Try multiple encodings to handle files from different sources (e.g., Windows Excel exports) 898 # Includes common Windows encodings like CP1252 and UTF-16 899 artifact_text_content = None 900 encoding_used = None 901 encodings_to_try = ['utf-8', 'utf-16', 'cp1252', 'latin-1'] 902 decode_errors = [] 903 904 for encoding in encodings_to_try: 905 try: 906 artifact_text_content = source_artifact_content_bytes.decode(encoding) 907 encoding_used = encoding 908 log.debug( 909 "%s Successfully decoded artifact using %s encoding.", 910 log_identifier, 911 encoding, 912 ) 913 break 914 except UnicodeDecodeError as e: 915 decode_errors.append(f"{encoding}: {e}") 916 continue 917 918 if artifact_text_content is not None: 919 llm_parts.append( 920 adk_types.Part( 921 text=f"Artifact Content (MIME type: {source_mime_type}, encoding: {encoding_used}):\n```\n{artifact_text_content}\n```" 922 ) 923 ) 924 log.debug("%s Prepared text content for LLM.", log_identifier) 925 else: 926 # All encoding attempts failed - return an error to the calling agent 927 # instead of passing a misleading message to the internal LLM 928 log.error( 929 "%s Failed to decode text artifact with any supported encoding. Errors: %s", 930 log_identifier, 931 "; ".join(decode_errors), 932 ) 933 return { 934 "status": "error_encoding_failed", 935 "message_to_llm": f"Could not extract content from artifact '{filename}' (v{actual_source_version}). " 936 f"The file appears to be a text file (MIME type: {source_mime_type}) but could not be decoded " 937 f"with any supported encoding (UTF-8, CP1252, Latin-1). The file may be corrupted or use an " 938 f"unsupported encoding. Please inform the user that the file cannot be processed.", 939 "filename": filename, 940 "version_requested": str(version), 941 "encoding_errors": decode_errors, 942 } 943 else: # Binary 944 for supported_pattern in supported_binary_mime_types: 945 if fnmatch.fnmatch(source_mime_type, supported_pattern): 946 is_binary_supported = True 947 break 948 if is_binary_supported: 949 llm_parts.append( 950 adk_types.Part( 951 inline_data=adk_types.Blob( 952 mime_type=source_mime_type, data=source_artifact_content_bytes 953 ) 954 ) 955 ) 956 llm_parts.append( 957 adk_types.Part( 958 text=f"The above is the content of artifact '{filename}' (MIME type: {source_mime_type})." 959 ) 960 ) 961 log.debug( 962 "%s Prepared supported binary content (MIME: %s) for LLM.", 963 log_identifier, 964 source_mime_type, 965 ) 966 else: 967 llm_parts.append( 968 adk_types.Part( 969 text=f"The artifact '{filename}' is a binary file of type '{source_mime_type}'. Direct content processing is not supported by this tool's current configuration. Perform the extraction goal based on its filename and type if possible, or state that the content cannot be analyzed." 970 ) 971 ) 972 log.debug( 973 "%s Prepared message for unsupported binary content (MIME: %s) for LLM.", 974 log_identifier, 975 source_mime_type, 976 ) 977 978 # System instruction to ensure the LLM directly analyzes data rather than generating code 979 system_instruction = """You are a data extraction and analysis assistant. Your task is to directly analyze the provided artifact content and return the requested information. 980 981 CRITICAL RULES: 982 1. DIRECTLY ANALYZE the data provided - do NOT write code (Python, SQL, or any other language) to analyze it 983 2. The artifact content is already loaded and provided to you - you must work with it directly 984 3. Provide actual results, counts, summaries, or extracted data based on what you find in the content 985 4. If you cannot find the requested information, clearly state what you found instead 986 5. Format your response as plain text or structured data (JSON, markdown tables) - NOT as code 987 6. You do NOT have access to execute code - any code you write will NOT be run 988 989 Example of WRONG response (do not do this): 990 ```python 991 import pandas as pd 992 df = pd.read_csv('file.csv') 993 print(df['column'].count()) 994 ``` 995 996 Example of CORRECT response: 997 Based on analyzing the CSV data, I found 102 records containing 'Employee' in the 'Type' column. Here's the breakdown: 998 - Employee-A: 65 records 999 - Employee-B: 37 records""" 1000 1001 internal_llm_contents = [ 1002 adk_types.Content( 1003 role="user", parts=[adk_types.Part(text=extraction_goal)] + llm_parts 1004 ) 1005 ] 1006 internal_llm_request = LlmRequest( 1007 model=chosen_llm.model, 1008 contents=internal_llm_contents, 1009 config=adk_types.GenerateContentConfig( 1010 temperature=0.1, 1011 system_instruction=system_instruction, 1012 ), 1013 ) 1014 1015 extracted_content_str = "" 1016 try: 1017 log.info( 1018 "%s Executing internal LLM call for extraction. Goal: %s", 1019 log_identifier, 1020 extraction_goal, 1021 ) 1022 if hasattr(chosen_llm, "generate_content") and not hasattr( 1023 chosen_llm, "generate_content_async" 1024 ): 1025 llm_response = chosen_llm.generate_content(request=internal_llm_request) 1026 if llm_response.parts: 1027 extracted_content_str = llm_response.parts[0].text or "" 1028 else: 1029 extracted_content_str = "" 1030 elif hasattr(chosen_llm, "generate_content_async"): 1031 log.debug( 1032 "%s Calling LLM's generate_content_async (non-streaming) for extraction.", 1033 log_identifier, 1034 ) 1035 try: 1036 llm_response_obj = None 1037 async for response_event in chosen_llm.generate_content_async( 1038 internal_llm_request 1039 ): 1040 llm_response_obj = response_event 1041 break 1042 if ( 1043 llm_response_obj 1044 and hasattr(llm_response_obj, "text") 1045 and llm_response_obj.text 1046 ): 1047 extracted_content_str = llm_response_obj.text 1048 elif ( 1049 llm_response_obj 1050 and hasattr(llm_response_obj, "parts") 1051 and llm_response_obj.parts 1052 ): 1053 extracted_content_str = "".join( 1054 [ 1055 part.text 1056 for part in llm_response_obj.parts 1057 if hasattr(part, "text") and part.text 1058 ] 1059 ) 1060 elif ( 1061 llm_response_obj 1062 and hasattr(llm_response_obj, "content") 1063 and hasattr(llm_response_obj.content, "parts") 1064 and llm_response_obj.content.parts 1065 ): 1066 extracted_content_str = "".join( 1067 [ 1068 part.text 1069 for part in llm_response_obj.content.parts 1070 if hasattr(part, "text") and part.text 1071 ] 1072 ) 1073 else: 1074 extracted_content_str = "" 1075 log.warning( 1076 "%s LLM response object or its text/parts were not found or empty after non-streaming call.", 1077 log_identifier, 1078 ) 1079 1080 except Exception as llm_async_err: 1081 log.exception( 1082 "%s Asynchronous LLM call for extraction failed: %s", 1083 log_identifier, 1084 llm_async_err, 1085 ) 1086 # Return an error status instead of continuing with error message as "extracted data" 1087 return { 1088 "status": "error_llm_call_failed", 1089 "message_to_llm": f"The internal LLM call failed while processing artifact '{filename}' for your goal '{extraction_goal}'. " 1090 f"Error: {llm_async_err}. Please inform the user that the extraction could not be completed.", 1091 "filename": filename, 1092 "version_requested": str(version), 1093 "error_details": str(llm_async_err), 1094 } 1095 else: 1096 log.error( 1097 "%s LLM does not have a known generate_content or generate_content_async method.", 1098 log_identifier, 1099 ) 1100 return { 1101 "status": "error_llm_configuration", 1102 "message_to_llm": f"The LLM configured for extraction does not have a supported generation method. " 1103 f"Please inform the user that the extraction tool is misconfigured.", 1104 "filename": filename, 1105 "version_requested": str(version), 1106 } 1107 1108 log.info( 1109 "%s Internal LLM call completed. Extracted content length: %d chars", 1110 log_identifier, 1111 len(extracted_content_str), 1112 ) 1113 if not extracted_content_str.strip(): 1114 log.warning( 1115 "%s Internal LLM produced empty or whitespace-only content for extraction goal.", 1116 log_identifier, 1117 ) 1118 1119 # Check if the LLM generated code instead of actual analysis results 1120 # This is a safety check to prevent hallucinated code from being saved as "extracted data" 1121 code_indicators = [ 1122 "```python", 1123 "```sql", 1124 "import pandas", 1125 "import csv", 1126 "pd.read_csv", 1127 "df = pd.", 1128 "SELECT * FROM", 1129 "def analyze(", 1130 "def extract(", 1131 ] 1132 content_lower = extracted_content_str.lower() 1133 detected_code = [indicator for indicator in code_indicators if indicator.lower() in content_lower] 1134 1135 if detected_code and len(extracted_content_str) > 100: 1136 # Check if the response is primarily code (more than 50% of content is in code blocks) 1137 code_block_pattern = r'```[\s\S]*?```' 1138 code_blocks = re.findall(code_block_pattern, extracted_content_str) 1139 code_content_length = sum(len(block) for block in code_blocks) 1140 1141 if code_content_length > len(extracted_content_str) * 0.3: 1142 log.warning( 1143 "%s Internal LLM generated code instead of analyzing data. Detected indicators: %s. " 1144 "Code blocks comprise %.1f%% of response.", 1145 log_identifier, 1146 detected_code, 1147 (code_content_length / len(extracted_content_str)) * 100, 1148 ) 1149 return { 1150 "status": "error_llm_generated_code", 1151 "message_to_llm": f"The extraction tool's internal LLM generated code instead of analyzing the data directly. " 1152 f"This tool cannot execute code. For CSV data analysis, please use the 'query_data_with_sql' tool" 1153 f"from the Data Analysis tools instead, if available, which can execute SQL queries on CSV files. " 1154 f"Alternatively, use 'load_artifact' to view the raw content and analyze it yourself.", 1155 "filename": filename, 1156 "version_requested": str(version), 1157 "detected_code_indicators": detected_code, 1158 } 1159 1160 except Exception as e: 1161 log.exception( 1162 "%s Internal LLM call for extraction failed: %s", log_identifier, e 1163 ) 1164 return ToolResult.error( 1165 f"The LLM failed to process the artifact content for your goal '{extraction_goal}'. Error: {e}", 1166 data={"filename": filename, "version_requested": str(version)} 1167 ) 1168 1169 extracted_content_bytes = extracted_content_str.encode("utf-8") 1170 extracted_content_size_bytes = len(extracted_content_bytes) 1171 output_mime_type = "text/plain" 1172 try: 1173 json.loads(extracted_content_str) 1174 output_mime_type = "application/json" 1175 log.debug( 1176 "%s Extracted content appears to be valid JSON. Setting output MIME to application/json.", 1177 log_identifier, 1178 ) 1179 except json.JSONDecodeError: 1180 log.debug( 1181 "%s Extracted content is not JSON. Using output MIME text/plain.", 1182 log_identifier, 1183 ) 1184 1185 response_for_llm_str = extracted_content_str 1186 saved_extracted_artifact_details = None 1187 final_status = "success" 1188 message_to_llm_parts = [ 1189 f"Successfully extracted content from '{filename}' (v{actual_source_version}) based on your goal: '{extraction_goal}'." 1190 ] 1191 was_saved = False 1192 was_truncated = False 1193 1194 if extracted_content_size_bytes > save_threshold: 1195 log.info( 1196 "%s Extracted content size (%d bytes) exceeds save threshold (%d bytes). Saving as new artifact.", 1197 log_identifier, 1198 extracted_content_size_bytes, 1199 save_threshold, 1200 ) 1201 saved_extracted_artifact_details = await _save_extracted_artifact( 1202 tool_context, 1203 host_component, 1204 extracted_content_bytes, 1205 filename, 1206 actual_source_version, 1207 extraction_goal, 1208 output_filename_base, 1209 output_mime_type, 1210 ) 1211 if saved_extracted_artifact_details.get("status") == "success": 1212 was_saved = True 1213 message_to_llm_parts.append( 1214 f"The full extracted content was saved as artifact '{saved_extracted_artifact_details.get('data_filename')}' " 1215 f"(version {saved_extracted_artifact_details.get('data_version')}). " 1216 f"You can retrieve it using 'load_artifact' or perform further extractions on it using 'extract_content_from_artifact' " 1217 f"with this new filename and version." 1218 ) 1219 else: 1220 message_to_llm_parts.append( 1221 f"Attempted to save the large extracted content, but failed: {saved_extracted_artifact_details.get('message')}" 1222 ) 1223 1224 if len(extracted_content_str.encode("utf-8")) > llm_max_bytes: 1225 was_truncated = True 1226 log.info( 1227 "%s Original extracted content (%d bytes) exceeds LLM return max bytes (%d bytes). Truncating for LLM response.", 1228 log_identifier, 1229 len(extracted_content_str.encode("utf-8")), 1230 llm_max_bytes, 1231 ) 1232 1233 if not was_saved: 1234 log.info( 1235 "%s Saving extracted content now because it needs truncation for LLM response and wasn't saved previously.", 1236 log_identifier, 1237 ) 1238 saved_extracted_artifact_details = await _save_extracted_artifact( 1239 tool_context, 1240 host_component, 1241 extracted_content_bytes, 1242 filename, 1243 actual_source_version, 1244 extraction_goal, 1245 output_filename_base, 1246 output_mime_type, 1247 ) 1248 if saved_extracted_artifact_details.get("status") == "success": 1249 was_saved = True 1250 message_to_llm_parts.append( 1251 f"The full extracted content (which is being truncated for this response) was saved as artifact " 1252 f"'{saved_extracted_artifact_details.get('data_filename')}' (version {saved_extracted_artifact_details.get('data_version')}). " 1253 f"You can retrieve the full content using 'load_artifact' or perform further extractions on it." 1254 ) 1255 else: 1256 message_to_llm_parts.append( 1257 f"Attempted to save the extracted content before truncation, but failed: {saved_extracted_artifact_details.get('message')}" 1258 ) 1259 1260 truncation_suffix = "... [Content truncated]" 1261 adjusted_max_bytes = llm_max_bytes - len(truncation_suffix.encode("utf-8")) 1262 if adjusted_max_bytes < 0: 1263 adjusted_max_bytes = 0 1264 1265 temp_response_bytes = extracted_content_str.encode("utf-8") 1266 truncated_bytes = temp_response_bytes[:adjusted_max_bytes] 1267 response_for_llm_str = ( 1268 truncated_bytes.decode("utf-8", "ignore") + truncation_suffix 1269 ) 1270 1271 message_to_llm_parts.append( 1272 "The extracted content provided in 'extracted_data_preview' has been truncated due to size limits. " 1273 "If saved, the full version is available in the specified artifact." 1274 ) 1275 1276 if was_saved and was_truncated: 1277 final_status = "success_full_content_saved_preview_returned" 1278 elif was_saved: 1279 final_status = "success_full_content_saved_and_returned" 1280 elif was_truncated: 1281 final_status = "success_content_returned_truncated_and_saved" 1282 else: 1283 final_status = "success_content_returned" 1284 1285 final_response_dict = { 1286 "status": final_status, 1287 "message_to_llm": " ".join(list(dict.fromkeys(message_to_llm_parts))), 1288 "source_filename": filename, 1289 "source_version_processed": actual_source_version, 1290 "extraction_goal_used": extraction_goal, 1291 } 1292 1293 if was_truncated: 1294 final_response_dict["extracted_data_preview"] = response_for_llm_str 1295 else: 1296 final_response_dict["extracted_data"] = response_for_llm_str 1297 1298 if ( 1299 saved_extracted_artifact_details 1300 and saved_extracted_artifact_details.get("status") == "success" 1301 ): 1302 final_response_dict["saved_extracted_artifact_details"] = ( 1303 saved_extracted_artifact_details 1304 ) 1305 elif saved_extracted_artifact_details: 1306 final_response_dict["saved_extracted_artifact_attempt_details"] = ( 1307 saved_extracted_artifact_details 1308 ) 1309 1310 log.info( 1311 "%s Tool execution finished. Final status: %s. Response preview: %s", 1312 log_identifier, 1313 final_status, 1314 final_response_dict, 1315 ) 1316 # Convert to ToolResult - this is a success path 1317 message = final_response_dict.pop("message_to_llm", "Extraction completed.") 1318 return ToolResult.ok(message, data=final_response_dict) 1319 1320 1321 async def append_to_artifact( 1322 filename: str, 1323 content_chunk: str, 1324 mime_type: str, 1325 tool_context: ToolContext = None, 1326 ) -> ToolResult: 1327 """ 1328 Appends a chunk of content to an existing artifact. This operation will 1329 create a new version of the artifact. The content_chunk should be a string, 1330 potentially base64 encoded if it represents binary data (indicated by mime_type). 1331 The chunk size should be limited (e.g., max 3KB) by the LLM. 1332 1333 Args: 1334 filename: The name of the artifact to append to. May contain embeds. 1335 content_chunk: The chunk of content to append (max approx. 3KB). 1336 If mime_type suggests binary, this should be base64 encoded. 1337 May contain embeds. 1338 mime_type: The MIME type of the content_chunk. This helps determine if 1339 base64 decoding is needed for the chunk. The overall artifact's 1340 MIME type will be preserved from its latest version. 1341 May contain embeds. 1342 tool_context: The context provided by the ADK framework. 1343 1344 Returns: 1345 A ToolResult indicating the result, including the new version of the artifact. 1346 """ 1347 if not tool_context: 1348 return ToolResult.error( 1349 "ToolContext is missing, cannot append to artifact.", 1350 data={"filename": filename} 1351 ) 1352 1353 log_identifier = f"[BuiltinArtifactTool:append_to_artifact:{filename}]" 1354 log.debug("%s Processing request to append chunk.", log_identifier) 1355 1356 try: 1357 inv_context = tool_context._invocation_context 1358 artifact_service = inv_context.artifact_service 1359 if not artifact_service: 1360 raise ValueError("ArtifactService is not available in the context.") 1361 1362 app_name = inv_context.app_name 1363 user_id = inv_context.user_id 1364 session_id = get_original_session_id(inv_context) 1365 host_component = getattr(inv_context.agent, "host_component", None) 1366 1367 log.debug( 1368 "%s Loading latest version of artifact '%s' content to append to.", 1369 log_identifier, 1370 filename, 1371 ) 1372 content_load_result = await load_artifact_content_or_metadata( 1373 artifact_service=artifact_service, 1374 app_name=app_name, 1375 user_id=user_id, 1376 session_id=session_id, 1377 filename=filename, 1378 version="latest", 1379 load_metadata_only=False, 1380 return_raw_bytes=True, 1381 component=host_component, 1382 log_identifier_prefix=f"{log_identifier}[LoadOriginalContent]", 1383 ) 1384 1385 if content_load_result.get("status") != "success": 1386 log.error( 1387 "%s Failed to load original artifact content '%s': %s", 1388 log_identifier, 1389 filename, 1390 content_load_result.get("message"), 1391 ) 1392 return ToolResult.error( 1393 f"Failed to load original artifact content to append to: {content_load_result.get('message')}", 1394 data={"filename": filename} 1395 ) 1396 1397 original_artifact_bytes = content_load_result.get("raw_bytes", b"") 1398 original_mime_type = content_load_result.get( 1399 "mime_type", "application/octet-stream" 1400 ) 1401 original_version_loaded = content_load_result.get("version", "unknown") 1402 log.info( 1403 "%s Loaded original artifact content '%s' v%s, type: %s, size: %d bytes.", 1404 log_identifier, 1405 filename, 1406 original_version_loaded, 1407 original_mime_type, 1408 len(original_artifact_bytes), 1409 ) 1410 1411 log.debug( 1412 "%s Loading latest version of artifact '%s' metadata.", 1413 log_identifier, 1414 filename, 1415 ) 1416 metadata_load_result = await load_artifact_content_or_metadata( 1417 artifact_service=artifact_service, 1418 app_name=app_name, 1419 user_id=user_id, 1420 session_id=session_id, 1421 filename=filename, 1422 version="latest", 1423 load_metadata_only=True, 1424 component=host_component, 1425 log_identifier_prefix=f"{log_identifier}[LoadOriginalMetadata]", 1426 ) 1427 original_metadata_dict = {} 1428 if metadata_load_result.get("status") == "success": 1429 original_metadata_dict = metadata_load_result.get("metadata", {}) 1430 log.info( 1431 "%s Loaded original artifact metadata for '%s' v%s.", 1432 log_identifier, 1433 filename, 1434 metadata_load_result.get("version", "unknown"), 1435 ) 1436 else: 1437 log.warning( 1438 "%s Failed to load original artifact metadata for '%s': %s. Proceeding with minimal metadata.", 1439 log_identifier, 1440 filename, 1441 metadata_load_result.get("message"), 1442 ) 1443 1444 chunk_bytes, _ = decode_and_get_bytes( 1445 content_chunk, mime_type, f"{log_identifier}[DecodeChunk]" 1446 ) 1447 log.debug( 1448 "%s Decoded content_chunk (declared type: %s) to %d bytes.", 1449 log_identifier, 1450 mime_type, 1451 len(chunk_bytes), 1452 ) 1453 1454 combined_bytes = original_artifact_bytes + chunk_bytes 1455 log.debug( 1456 "%s Appended chunk. New total size: %d bytes.", 1457 log_identifier, 1458 len(combined_bytes), 1459 ) 1460 1461 new_metadata_for_save = { 1462 key: value 1463 for key, value in original_metadata_dict.items() 1464 if key 1465 not in [ 1466 "filename", 1467 "mime_type", 1468 "size_bytes", 1469 "timestamp_utc", 1470 "schema", 1471 "version", 1472 ] 1473 } 1474 new_metadata_for_save["appended_from_version"] = original_version_loaded 1475 new_metadata_for_save["appended_chunk_declared_mime_type"] = mime_type 1476 1477 schema_max_keys = ( 1478 host_component.get_config("schema_max_keys", DEFAULT_SCHEMA_MAX_KEYS) 1479 if host_component 1480 else DEFAULT_SCHEMA_MAX_KEYS 1481 ) 1482 1483 save_result = await save_artifact_with_metadata( 1484 artifact_service=artifact_service, 1485 app_name=app_name, 1486 user_id=user_id, 1487 session_id=session_id, 1488 filename=filename, 1489 content_bytes=combined_bytes, 1490 mime_type=original_mime_type, 1491 metadata_dict=new_metadata_for_save, 1492 timestamp=datetime.now(timezone.utc), 1493 schema_max_keys=schema_max_keys, 1494 tool_context=tool_context, 1495 ) 1496 1497 log.info( 1498 "%s Result from save_artifact_with_metadata after append: %s", 1499 log_identifier, 1500 save_result, 1501 ) 1502 1503 if save_result.get("status") == "error": 1504 raise IOError( 1505 f"Failed to save appended artifact: {save_result.get('message', 'Unknown error')}" 1506 ) 1507 1508 return ToolResult.ok( 1509 f"Chunk appended to '{filename}'. New version is {save_result.get('data_version')} with total size {len(combined_bytes)} bytes.", 1510 data={ 1511 "filename": filename, 1512 "new_version": save_result.get("data_version"), 1513 "total_size_bytes": len(combined_bytes), 1514 } 1515 ) 1516 1517 except FileNotFoundError as e: 1518 log.warning("%s Original artifact not found for append: %s", log_identifier, e) 1519 return ToolResult.error( 1520 f"Original artifact '{filename}' not found: {e}", 1521 data={"filename": filename} 1522 ) 1523 except ValueError as e: 1524 log.warning("%s Value error during append: %s", log_identifier, e) 1525 return ToolResult.error(str(e), data={"filename": filename}) 1526 except IOError as e: 1527 log.warning("%s IO error during append: %s", log_identifier, e) 1528 return ToolResult.error(str(e), data={"filename": filename}) 1529 except Exception as e: 1530 log.exception( 1531 "%s Unexpected error appending to artifact '%s': %s", 1532 log_identifier, 1533 filename, 1534 e, 1535 ) 1536 return ToolResult.error( 1537 f"Failed to append to artifact: {e}", 1538 data={"filename": filename} 1539 ) 1540 1541 1542 async def _save_extracted_artifact( 1543 tool_context: ToolContext, 1544 host_component: Any, 1545 extracted_content_bytes: bytes, 1546 source_artifact_filename: str, 1547 source_artifact_version: Union[int, str], 1548 extraction_goal: str, 1549 output_filename_base: Optional[str], 1550 output_mime_type: str, 1551 ) -> Dict[str, Any]: 1552 """ 1553 Saves the extracted content as a new artifact with comprehensive metadata. 1554 1555 Args: 1556 tool_context: The ADK ToolContext. 1557 host_component: The A2A_ADK_HostComponent instance for accessing config and services. 1558 extracted_content_bytes: The raw byte content of the extracted data. 1559 source_artifact_filename: The filename of the original artifact. 1560 source_artifact_version: The version of the original artifact. 1561 extraction_goal: The natural language goal used for extraction. 1562 output_filename_base: Optional base for the new artifact's filename. 1563 output_mime_type: The MIME type of the extracted content. 1564 1565 Returns: 1566 A dictionary containing details of the saved artifact, as returned by 1567 `save_artifact_with_metadata`. 1568 """ 1569 log_identifier = f"[BuiltinArtifactTool:_save_extracted_artifact]" 1570 log.debug("%s Saving extracted content...", log_identifier) 1571 1572 try: 1573 base_name = output_filename_base or f"{source_artifact_filename}_extracted" 1574 base_name_sanitized = re.sub(r'[<>:"/\\|?*\s]+', "_", base_name) 1575 base_name_sanitized = base_name_sanitized.strip("_") 1576 1577 suffix = uuid.uuid4().hex[:8] 1578 extension_map = { 1579 "text/plain": ".txt", 1580 "application/json": ".json", 1581 "text/csv": ".csv", 1582 "text/html": ".html", 1583 "image/png": ".png", 1584 "image/jpeg": ".jpg", 1585 "application/pdf": ".pdf", 1586 } 1587 ext = extension_map.get(output_mime_type.lower(), ".dat") 1588 filename = f"{base_name_sanitized}_{suffix}{ext}" 1589 log.debug("%s Generated output filename: %s", log_identifier, filename) 1590 1591 timestamp = datetime.now(timezone.utc) 1592 metadata_for_saving = { 1593 "description": f"Content extracted/transformed from artifact '{source_artifact_filename}' (version {source_artifact_version}) using goal: '{extraction_goal}'.", 1594 "source_artifact_filename": source_artifact_filename, 1595 "source_artifact_version": source_artifact_version, 1596 "extraction_goal_used": extraction_goal, 1597 } 1598 log.debug( 1599 "%s Prepared metadata for saving: %s", log_identifier, metadata_for_saving 1600 ) 1601 1602 inv_context = tool_context._invocation_context 1603 artifact_service = inv_context.artifact_service 1604 if not artifact_service: 1605 raise ValueError("ArtifactService is not available in the context.") 1606 1607 app_name = inv_context.app_name 1608 user_id = inv_context.user_id 1609 session_id = get_original_session_id(inv_context) 1610 schema_max_keys = host_component.get_config( 1611 "schema_max_keys", DEFAULT_SCHEMA_MAX_KEYS 1612 ) 1613 1614 log.debug( 1615 "%s Calling save_artifact_with_metadata for '%s' (app: %s, user: %s, session: %s, schema_keys: %d)", 1616 log_identifier, 1617 filename, 1618 app_name, 1619 user_id, 1620 session_id, 1621 schema_max_keys, 1622 ) 1623 1624 save_result = await save_artifact_with_metadata( 1625 artifact_service=artifact_service, 1626 app_name=app_name, 1627 user_id=user_id, 1628 session_id=session_id, 1629 filename=filename, 1630 content_bytes=extracted_content_bytes, 1631 mime_type=output_mime_type, 1632 metadata_dict=metadata_for_saving, 1633 timestamp=timestamp, 1634 schema_max_keys=schema_max_keys, 1635 tool_context=tool_context, 1636 ) 1637 1638 log.info( 1639 "%s Extracted content saved as artifact '%s' (version %s). Result: %s", 1640 log_identifier, 1641 save_result.get("data_filename", filename), 1642 save_result.get("data_version", "N/A"), 1643 save_result.get("status"), 1644 ) 1645 return save_result 1646 1647 except Exception as e: 1648 log.exception( 1649 "%s Error in _save_extracted_artifact for source '%s': %s", 1650 log_identifier, 1651 source_artifact_filename, 1652 e, 1653 ) 1654 return { 1655 "status": "error", 1656 "data_filename": filename if "filename" in locals() else "unknown_filename", 1657 "message": f"Failed to save extracted content as artifact: {e}", 1658 } 1659 1660 1661 async def _notify_artifact_save( 1662 filename: str, 1663 version: int, 1664 status: str, 1665 tool_context: ToolContext = None, # Keep tool_context for signature consistency 1666 ) -> Dict[str, Any]: 1667 """ 1668 CRITICAL: _notify_artifact_save is automatically invoked by the system as a side-effect when you create artifacts. You should NEVER call this tool yourself. The system will call it for you and provide the results in your next turn. If you manually invoke it, you are making an error." 1669 """ 1670 return { 1671 "filename": filename, 1672 "version": version, 1673 "status": status, 1674 "message": "Artifact has been created and provided to the requester", 1675 } 1676 1677 1678 _notify_artifact_save_tool_def = BuiltinTool( 1679 name="_notify_artifact_save", 1680 implementation=_notify_artifact_save, 1681 description="CRITICAL: _notify_artifact_save is automatically invoked by the system as a side-effect when you create artifacts. You should NEVER call this tool yourself. The system will call it for you and provide the results in your next turn. If you manually invoke it, you are making an error.", 1682 category="internal", 1683 required_scopes=[], # No scopes needed for an internal notification tool 1684 parameters=adk_types.Schema( 1685 type=adk_types.Type.OBJECT, 1686 properties={ 1687 "filename": adk_types.Schema( 1688 type=adk_types.Type.STRING, 1689 description="The name of the artifact that was saved.", 1690 ), 1691 "version": adk_types.Schema( 1692 type=adk_types.Type.INTEGER, 1693 description="The version number of the saved artifact.", 1694 ), 1695 "status": adk_types.Schema( 1696 type=adk_types.Type.STRING, 1697 description="The status of the save operation.", 1698 ), 1699 }, 1700 required=["filename", "version", "status"], 1701 ), 1702 examples=[], 1703 ) 1704 1705 append_to_artifact_tool_def = BuiltinTool( 1706 name="append_to_artifact", 1707 implementation=append_to_artifact, 1708 description="Appends a chunk of content to an existing artifact. This operation will create a new version of the artifact. The content_chunk should be a string, potentially base64 encoded if it represents binary data (indicated by mime_type). The chunk size should be limited (e.g., max 3KB) by the LLM.", 1709 category="artifact_management", 1710 category_name=CATEGORY_NAME, 1711 category_description=CATEGORY_DESCRIPTION, 1712 required_scopes=["tool:artifact:append"], 1713 parameters=adk_types.Schema( 1714 type=adk_types.Type.OBJECT, 1715 properties={ 1716 "filename": adk_types.Schema( 1717 type=adk_types.Type.STRING, 1718 description="The name of the artifact to append to. May contain embeds.", 1719 ), 1720 "content_chunk": adk_types.Schema( 1721 type=adk_types.Type.STRING, 1722 description="The chunk of content to append (max approx. 3KB). If mime_type suggests binary, this should be base64 encoded. May contain embeds.", 1723 ), 1724 "mime_type": adk_types.Schema( 1725 type=adk_types.Type.STRING, 1726 description="The MIME type of the content_chunk. This helps determine if base64 decoding is needed for the chunk. The overall artifact's MIME type will be preserved from its latest version. May contain embeds.", 1727 ), 1728 }, 1729 required=["filename", "content_chunk", "mime_type"], 1730 ), 1731 examples=[], 1732 ) 1733 1734 list_artifacts_tool_def = BuiltinTool( 1735 name="list_artifacts", 1736 implementation=list_artifacts, 1737 description="Lists all available data artifact filenames and their versions for the current session. Includes a summary of the latest version's metadata for each artifact.", 1738 category="artifact_management", 1739 category_name=CATEGORY_NAME, 1740 category_description=CATEGORY_DESCRIPTION, 1741 required_scopes=["tool:artifact:list"], 1742 parameters=adk_types.Schema( 1743 type=adk_types.Type.OBJECT, 1744 properties={}, 1745 required=[], 1746 ), 1747 examples=[], 1748 ) 1749 1750 load_artifact_tool_def = BuiltinTool( 1751 name="load_artifact", 1752 implementation=load_artifact, 1753 description="Loads the content or metadata of a specific artifact version. If load_metadata_only is True, loads the full metadata dictionary. Otherwise, loads text content (potentially truncated) or a summary for binary types. For image artifacts (PNG, JPG, etc.) on vision-enabled agents, the image is returned inline so you can see and analyze it directly. Use this to view images created by tools or uploaded by users. Line numbers can be optionally included for precise line range identification.", 1754 category="artifact_management", 1755 category_name=CATEGORY_NAME, 1756 category_description=CATEGORY_DESCRIPTION, 1757 required_scopes=["tool:artifact:load"], 1758 parameters=adk_types.Schema( 1759 type=adk_types.Type.OBJECT, 1760 properties={ 1761 "filename": adk_types.Schema( 1762 type=adk_types.Type.STRING, 1763 description="The name of the artifact to load. May contain embeds.", 1764 ), 1765 "version": adk_types.Schema( 1766 type=adk_types.Type.INTEGER, 1767 description="The specific version number to load. Must be explicitly provided.", 1768 ), 1769 "load_metadata_only": adk_types.Schema( 1770 type=adk_types.Type.BOOLEAN, 1771 description="If True, load only the metadata JSON. Default False.", 1772 nullable=True, 1773 ), 1774 "max_content_length": adk_types.Schema( 1775 type=adk_types.Type.INTEGER, 1776 description="Optional. Maximum character length for text content. If None, uses app configuration. Range: 100-100,000.", 1777 nullable=True, 1778 ), 1779 "include_line_numbers": adk_types.Schema( 1780 type=adk_types.Type.BOOLEAN, 1781 description="If True, prefix each line with its 1-based line number followed by a TAB character. Line numbers are for LLM viewing only and are not stored in the artifact. Default False.", 1782 nullable=True, 1783 ), 1784 }, 1785 required=["filename", "version"], 1786 ), 1787 examples=[], 1788 ) 1789 1790 apply_embed_and_create_artifact_tool_def = BuiltinTool( 1791 name="apply_embed_and_create_artifact", 1792 implementation=apply_embed_and_create_artifact, 1793 description="Resolves an 'artifact_content' embed directive (including modifiers and formatting) and saves the resulting content as a new artifact. The entire embed directive must be provided as a string.", 1794 category="artifact_management", 1795 category_name=CATEGORY_NAME, 1796 category_description=CATEGORY_DESCRIPTION, 1797 required_scopes=["tool:artifact:create", "tool:artifact:load"], 1798 parameters=adk_types.Schema( 1799 type=adk_types.Type.OBJECT, 1800 properties={ 1801 "output_filename": adk_types.Schema( 1802 type=adk_types.Type.STRING, 1803 description="The desired name for the new artifact.", 1804 ), 1805 "embed_directive": adk_types.Schema( 1806 type=adk_types.Type.STRING, 1807 description="The full '«artifact_content:...>>>...>>>format:...»' string.", 1808 ), 1809 "output_metadata": adk_types.Schema( 1810 type=adk_types.Type.OBJECT, 1811 description="Optional metadata for the new artifact.", 1812 nullable=True, 1813 ), 1814 }, 1815 required=["output_filename", "embed_directive"], 1816 ), 1817 raw_string_args=["embed_directive"], 1818 examples=[], 1819 ) 1820 1821 extract_content_from_artifact_tool_def = BuiltinTool( 1822 name="extract_content_from_artifact", 1823 implementation=extract_content_from_artifact, 1824 description="Loads an existing artifact, uses an internal LLM to process its content based on an 'extraction_goal,' and manages the output by returning it or saving it as a new artifact. IMPORTANT: If the tool returns an error status (e.g., 'error_encoding_failed', 'error_artifact_not_found'), you MUST relay this error to the user - do NOT attempt to generate or fabricate data. The tool will return a 'message_to_llm' field explaining the error.", 1825 category="artifact_management", 1826 category_name=CATEGORY_NAME, 1827 category_description=CATEGORY_DESCRIPTION, 1828 required_scopes=["tool:artifact:load", "tool:artifact:create"], 1829 parameters=adk_types.Schema( 1830 type=adk_types.Type.OBJECT, 1831 properties={ 1832 "filename": adk_types.Schema( 1833 type=adk_types.Type.STRING, 1834 description="Name of the source artifact. May contain embeds.", 1835 ), 1836 "extraction_goal": adk_types.Schema( 1837 type=adk_types.Type.STRING, 1838 description="Natural language instruction for the LLM on what to extract or how to transform the content. May contain embeds.", 1839 ), 1840 "version": adk_types.Schema( 1841 type=adk_types.Type.STRING, 1842 description="Version of the source artifact. Can be an integer or 'latest'. Defaults to 'latest'. May contain embeds.", 1843 nullable=True, 1844 ), 1845 "output_filename_base": adk_types.Schema( 1846 type=adk_types.Type.STRING, 1847 description="Optional base name for the new artifact if the extracted content is saved. May contain embeds.", 1848 nullable=True, 1849 ), 1850 }, 1851 required=["filename", "extraction_goal"], 1852 ), 1853 examples=[], 1854 ) 1855 1856 tool_registry.register(_notify_artifact_save_tool_def) 1857 tool_registry.register(append_to_artifact_tool_def) 1858 tool_registry.register(list_artifacts_tool_def) 1859 tool_registry.register(load_artifact_tool_def) 1860 tool_registry.register(apply_embed_and_create_artifact_tool_def) 1861 tool_registry.register(extract_content_from_artifact_tool_def) 1862 1863 1864 async def delete_artifact( 1865 filename: str, 1866 version: Optional[int] = None, 1867 confirm_delete: bool = False, 1868 tool_context: ToolContext = None, 1869 ) -> ToolResult: 1870 """ 1871 Deletes all versions of an artifact. Version-specific deletion is not currently supported. 1872 1873 Args: 1874 filename: The name of the artifact to delete. 1875 version: Reserved for future use. Currently not supported - returns error if specified. 1876 confirm_delete: Must be set to True to confirm deletion. If False, returns confirmation prompt. 1877 tool_context: The context provided by the ADK framework. 1878 1879 Returns: 1880 A ToolResult indicating the result of the deletion or requesting confirmation. 1881 """ 1882 if not tool_context: 1883 return ToolResult.error( 1884 "ToolContext is missing, cannot delete artifact.", 1885 data={"filename": filename} 1886 ) 1887 1888 log_identifier = f"[BuiltinArtifactTool:delete_artifact:{filename}]" 1889 log.debug("%s Processing request.", log_identifier) 1890 1891 try: 1892 inv_context = tool_context._invocation_context 1893 artifact_service = inv_context.artifact_service 1894 if not artifact_service: 1895 raise ValueError("ArtifactService is not available in the context.") 1896 1897 app_name = inv_context.app_name 1898 user_id = inv_context.user_id 1899 session_id = get_original_session_id(inv_context) 1900 1901 if not hasattr(artifact_service, "delete_artifact"): 1902 raise NotImplementedError( 1903 "ArtifactService does not support deleting artifacts." 1904 ) 1905 1906 # Error if version-specific deletion requested (not currently supported) 1907 if version is not None: 1908 return ToolResult.error( 1909 f"Deleting a specific version ({version}) is not currently supported. Only deletion of ALL versions is supported. To delete all versions, omit 'version' and set confirm_delete=True.", 1910 data={"filename": filename, "version_requested": version} 1911 ) 1912 1913 # Get version list for confirmation message 1914 versions = await artifact_service.list_versions( 1915 app_name=app_name, user_id=user_id, session_id=session_id, filename=filename 1916 ) 1917 1918 # Require confirmation before deleting 1919 if not confirm_delete: 1920 count = len(versions) if versions else "unknown number of" 1921 return ToolResult.partial( 1922 f"WARNING: This operation is irreversible and will permanently delete artifact '{filename}' and ALL {count} version(s). To proceed, call this tool again with confirm_delete=True.", 1923 data={ 1924 "filename": filename, 1925 "version_count": len(versions) if versions else None, 1926 "versions": versions, 1927 "confirmation_required": True, 1928 } 1929 ) 1930 1931 # Proceed with deletion 1932 await artifact_service.delete_artifact( 1933 app_name=app_name, 1934 user_id=user_id, 1935 session_id=session_id, 1936 filename=filename, 1937 ) 1938 1939 log.info("%s Successfully deleted artifact '%s'.", log_identifier, filename) 1940 return ToolResult.ok( 1941 f"Artifact '{filename}' deleted successfully.", 1942 data={ 1943 "filename": filename, 1944 "versions_deleted": len(versions) if versions else None, 1945 } 1946 ) 1947 1948 except FileNotFoundError as e: 1949 log.warning("%s Artifact not found for deletion: %s", log_identifier, e) 1950 return ToolResult.error( 1951 f"Artifact '{filename}' not found.", 1952 data={"filename": filename} 1953 ) 1954 except Exception as e: 1955 log.exception( 1956 "%s Error deleting artifact '%s': %s", log_identifier, filename, e 1957 ) 1958 return ToolResult.error( 1959 f"Failed to delete artifact: {e}", 1960 data={"filename": filename} 1961 ) 1962 1963 1964 delete_artifact_tool_def = BuiltinTool( 1965 name="delete_artifact", 1966 implementation=delete_artifact, 1967 description="Deletes all versions of an artifact. IMPORTANT: Requires explicit confirmation via confirm_delete=True parameter. The first call without confirmation will return details about what will be deleted.", 1968 category="artifact_management", 1969 category_name=CATEGORY_NAME, 1970 category_description=CATEGORY_DESCRIPTION, 1971 required_scopes=["tool:artifact:delete"], 1972 parameters=adk_types.Schema( 1973 type=adk_types.Type.OBJECT, 1974 properties={ 1975 "filename": adk_types.Schema( 1976 type=adk_types.Type.STRING, 1977 description="The name of the artifact to delete.", 1978 ), 1979 "version": adk_types.Schema( 1980 type=adk_types.Type.INTEGER, 1981 description="Reserved for future use. Version-specific deletion is not currently supported - will return error if specified.", 1982 nullable=True, 1983 ), 1984 "confirm_delete": adk_types.Schema( 1985 type=adk_types.Type.BOOLEAN, 1986 description="Must be set to True to actually perform the deletion. If False or omitted, returns a confirmation prompt with details about what will be deleted (including version count).", 1987 nullable=True, 1988 ), 1989 }, 1990 required=["filename"], 1991 ), 1992 examples=[], 1993 ) 1994 1995 tool_registry.register(delete_artifact_tool_def) 1996 1997 1998 def _perform_single_replacement( 1999 content: str, 2000 search_expr: str, 2001 replace_expr: str, 2002 is_regex: bool, 2003 regex_flags: str, 2004 log_identifier: str, 2005 strict_match_validation: bool = False, 2006 ) -> Tuple[str, int, Optional[str]]: 2007 """ 2008 Performs a single search-and-replace operation. 2009 2010 Args: 2011 content: The text content to search/replace in 2012 search_expr: The search pattern (literal or regex) 2013 replace_expr: The replacement text 2014 is_regex: If True, search_expr is treated as regex 2015 regex_flags: Flags for regex behavior ('g', 'i', 'm', 's') 2016 log_identifier: Logging prefix 2017 strict_match_validation: If True, error on multiple matches without 'g' flag (for batch mode) 2018 2019 Returns: 2020 tuple: (new_content, match_count, error_message) 2021 error_message is None on success 2022 """ 2023 match_count = 0 2024 new_content = content 2025 2026 if is_regex: 2027 # Parse regex flags 2028 flags_value = 0 2029 global_replace = False 2030 2031 if regex_flags: 2032 for flag_char in regex_flags.lower(): 2033 if flag_char == "g": 2034 global_replace = True 2035 elif flag_char == "i": 2036 flags_value |= re.IGNORECASE 2037 elif flag_char == "m": 2038 flags_value |= re.MULTILINE 2039 elif flag_char == "s": 2040 flags_value |= re.DOTALL 2041 else: 2042 log.warning( 2043 "%s Ignoring unrecognized regexp flag: '%s'", 2044 log_identifier, 2045 flag_char, 2046 ) 2047 2048 # Convert JavaScript-style capture groups ($1, $2) to Python style (\1, \2) 2049 # Also handle escaped dollar signs ($$) -> literal $ 2050 python_replace_expr = replace_expr 2051 # First, protect escaped dollars: $$ -> a placeholder 2052 python_replace_expr = python_replace_expr.replace("$$", "\x00DOLLAR\x00") 2053 # Convert capture groups: $1 -> \1 2054 python_replace_expr = re.sub(r"\$(\d+)", r"\\\1", python_replace_expr) 2055 # Restore escaped dollars: placeholder -> $ 2056 python_replace_expr = python_replace_expr.replace("\x00DOLLAR\x00", "$") 2057 2058 try: 2059 # Compile the regex pattern 2060 pattern = re.compile(search_expr, flags_value) 2061 2062 # Count matches first 2063 match_count = len(pattern.findall(content)) 2064 2065 if match_count == 0: 2066 return content, 0, f"No matches found" 2067 2068 # Check for multiple matches without global flag (only in strict mode for batch operations) 2069 if strict_match_validation and match_count > 1 and not global_replace: 2070 return ( 2071 content, 2072 match_count, 2073 f"Multiple matches found ({match_count}) but global flag 'g' not set", 2074 ) 2075 2076 # Perform replacement 2077 count_limit = 0 if global_replace else 1 2078 new_content = pattern.sub(python_replace_expr, content, count=count_limit) 2079 2080 return new_content, match_count, None 2081 2082 except re.error as regex_err: 2083 return content, 0, f"Invalid regular expression: {regex_err}" 2084 2085 else: 2086 # Literal string replacement 2087 match_count = content.count(search_expr) 2088 2089 if match_count == 0: 2090 return content, 0, f"No matches found" 2091 2092 # Replace all occurrences for literal mode 2093 new_content = content.replace(search_expr, replace_expr) 2094 return new_content, match_count, None 2095 2096 2097 async def artifact_search_and_replace_regex( 2098 filename: Artifact, 2099 search_expression: Optional[str] = None, 2100 replace_expression: Optional[str] = None, 2101 is_regexp: bool = False, 2102 regexp_flags: Optional[str] = "", 2103 new_filename: Optional[str] = None, 2104 new_description: Optional[str] = None, 2105 replacements: Optional[List[Dict[str, Any]]] = None, 2106 tool_context: ToolContext = None, 2107 ) -> ToolResult: 2108 """ 2109 Performs search and replace on an artifact's text content using either 2110 literal string matching or regular expressions. Note that this is run once across the entire artifact. 2111 If multiple replacements are needed, then set the 'g' flag in regexp_flags. 2112 2113 Handling Multi-line Search and Replace: 2114 2115 When searching for or replacing text that spans multiple lines: 2116 2117 - In literal mode (is_regexp=false): Include actual newline characters directly in your search_expression 2118 and replace_expression parameters. Do NOT use escape sequences like \n - the tool will search for those 2119 literal characters. Multi-line parameter values are fully supported in the XML parameter format. 2120 2121 - In regex mode (is_regexp=true): Use the regex pattern \n to match newline characters in your pattern. 2122 2123 For multiple independent replacements: 2124 2125 Use the replacements array parameter to perform all replacements atomically in a single tool call, which is more efficient than multiple sequential calls. 2126 2127 Args: 2128 filename: The artifact to search/replace in (pre-loaded by the framework). 2129 search_expression: The pattern to search for (regex if is_regexp=true, literal otherwise). 2130 replace_expression: The replacement text. For regex mode, supports capture groups ($1, $2, etc.). Use $$ to insert a literal dollar sign 2131 is_regexp: If True, treat search_expression as a regular expression. If False, treat as literal string. 2132 regexp_flags: Flags for regex behavior (only used when is_regexp=true). 2133 String of letters: 'g' (global/replace-all), 'i' (case-insensitive), 'm' (multiline), 's' (dotall). 2134 Defaults to empty string (no flags). 2135 new_filename: Optional. If provided, saves the result as a new artifact with this name. 2136 new_description: Optional. Description for the new/updated artifact. 2137 2138 Returns: 2139 A ToolResult containing the result status, filename, version, match count, and any error messages. 2140 """ 2141 if not tool_context: 2142 return ToolResult.error( 2143 "ToolContext is missing, cannot perform search and replace.", 2144 data={"filename": filename.filename if isinstance(filename, Artifact) else filename} 2145 ) 2146 2147 artifact_filename = filename.filename 2148 artifact_version = filename.version 2149 2150 log_identifier = ( 2151 f"[BuiltinArtifactTool:artifact_search_and_replace_regex:{artifact_filename}:{artifact_version}]" 2152 ) 2153 log.debug("%s Processing request.", log_identifier) 2154 2155 # Validate parameter combinations 2156 if replacements is not None and ( 2157 search_expression is not None or replace_expression is not None 2158 ): 2159 return ToolResult.error( 2160 "Cannot provide both 'replacements' array and individual 'search_expression'/'replace_expression'. Use one or the other.", 2161 data={"filename": artifact_filename} 2162 ) 2163 2164 if replacements is None and ( 2165 search_expression is None or replace_expression is None 2166 ): 2167 return ToolResult.error( 2168 "Must provide either 'replacements' array or both 'search_expression' and 'replace_expression'.", 2169 data={"filename": artifact_filename} 2170 ) 2171 2172 if replacements is not None: 2173 if not isinstance(replacements, list) or len(replacements) == 0: 2174 return ToolResult.error( 2175 "replacements must be a non-empty array.", 2176 data={"filename": artifact_filename} 2177 ) 2178 2179 # Validate each replacement entry 2180 for idx, repl in enumerate(replacements): 2181 if not isinstance(repl, dict): 2182 return ToolResult.error( 2183 f"Replacement at index {idx} must be a dictionary.", 2184 data={"filename": artifact_filename} 2185 ) 2186 if "search" not in repl or "replace" not in repl or "is_regexp" not in repl: 2187 return ToolResult.error( 2188 f"Replacement at index {idx} missing required fields: 'search', 'replace', 'is_regexp'.", 2189 data={"filename": artifact_filename} 2190 ) 2191 2192 # Validate inputs for single replacement mode 2193 if replacements is None and not search_expression: 2194 return ToolResult.error( 2195 "search_expression cannot be empty.", 2196 data={"filename": artifact_filename} 2197 ) 2198 2199 # Determine output filename 2200 output_filename = new_filename if new_filename else artifact_filename 2201 2202 if new_filename and not is_filename_safe(new_filename): 2203 return ToolResult.error( 2204 f"Invalid new_filename: '{new_filename}'. Filename must not contain path separators or traversal sequences.", 2205 data={"filename": artifact_filename} 2206 ) 2207 2208 try: 2209 # Use pre-loaded artifact data 2210 source_bytes = filename.as_bytes() 2211 source_mime_type = filename.mime_type or "application/octet-stream" 2212 actual_version = artifact_version 2213 2214 # Verify it's a text-based artifact 2215 if not is_text_based_file(source_mime_type, source_bytes): 2216 return ToolResult.error( 2217 f"Cannot perform search and replace on binary artifact of type '{source_mime_type}'. This tool only works with text-based content.", 2218 data={"filename": artifact_filename, "version": actual_version} 2219 ) 2220 2221 # Decode the content - try multiple encodings for Windows-exported files 2222 original_content = None 2223 encoding_used = None 2224 encodings_to_try = ['utf-8', 'utf-16', 'cp1252', 'latin-1'] 2225 decode_errors = [] 2226 2227 for encoding in encodings_to_try: 2228 try: 2229 original_content = source_bytes.decode(encoding) 2230 encoding_used = encoding 2231 if encoding != 'utf-8': 2232 log.info( 2233 "%s Successfully decoded artifact using fallback encoding '%s' (UTF-8 failed)", 2234 log_identifier, 2235 encoding, 2236 ) 2237 break 2238 except UnicodeDecodeError as e: 2239 decode_errors.append(f"{encoding}: {e}") 2240 continue 2241 2242 if original_content is None: 2243 log.error( 2244 "%s Failed to decode artifact content with any supported encoding. Errors: %s", 2245 log_identifier, 2246 "; ".join(decode_errors), 2247 ) 2248 return ToolResult.error( 2249 f"Failed to decode artifact content as UTF-8: {decode_err}", 2250 data={"filename": artifact_filename, "version": actual_version} 2251 ) 2252 2253 # Perform the search and replace 2254 if replacements: 2255 # Batch mode 2256 log.info( 2257 "%s Processing batch of %d replacements.", 2258 log_identifier, 2259 len(replacements), 2260 ) 2261 2262 current_content = original_content 2263 replacement_results = [] 2264 total_matches = 0 2265 2266 for idx, repl in enumerate(replacements): 2267 search_expr = repl["search"] 2268 replace_expr = repl["replace"] 2269 is_regex = repl["is_regexp"] 2270 regex_flags = repl.get("regexp_flags", "") 2271 2272 # Perform replacement on current state (with strict validation for batch mode) 2273 new_content, match_count, error_msg = _perform_single_replacement( 2274 current_content, 2275 search_expr, 2276 replace_expr, 2277 is_regex, 2278 regex_flags, 2279 log_identifier, 2280 strict_match_validation=True, 2281 ) 2282 2283 if error_msg: 2284 # Rollback - return error with details 2285 log.warning( 2286 "%s Batch replacement failed at index %d: %s", 2287 log_identifier, 2288 idx, 2289 error_msg, 2290 ) 2291 2292 # Mark all as skipped 2293 all_results = replacement_results + [ 2294 { 2295 "search": repl["search"], 2296 "match_count": match_count, 2297 "status": "error", 2298 "error": error_msg, 2299 } 2300 ] 2301 # Add remaining as skipped 2302 for i in range(idx + 1, len(replacements)): 2303 all_results.append( 2304 { 2305 "search": replacements[i]["search"], 2306 "match_count": 0, 2307 "status": "skipped", 2308 } 2309 ) 2310 2311 return ToolResult.error( 2312 f"Batch replacement failed: No changes applied due to error in replacement {idx + 1}", 2313 data={ 2314 "filename": artifact_filename, 2315 "version": actual_version, 2316 "replacement_results": all_results, 2317 "failed_replacement": { 2318 "index": idx, 2319 "search": search_expr, 2320 "error": error_msg, 2321 }, 2322 } 2323 ) 2324 2325 # Success - update state and continue 2326 current_content = new_content 2327 total_matches += match_count 2328 replacement_results.append( 2329 { 2330 "search": search_expr, 2331 "match_count": match_count, 2332 "status": "success", 2333 } 2334 ) 2335 2336 log.debug( 2337 "%s Replacement %d/%d succeeded: %d matches", 2338 log_identifier, 2339 idx + 1, 2340 len(replacements), 2341 match_count, 2342 ) 2343 2344 # All replacements succeeded 2345 final_content = current_content 2346 total_replacements = len(replacements) 2347 2348 log.info( 2349 "%s Batch replacement succeeded: %d operations, %d total matches", 2350 log_identifier, 2351 total_replacements, 2352 total_matches, 2353 ) 2354 2355 else: 2356 # Single replacement mode (backward compatible) 2357 final_content, match_count, error_msg = _perform_single_replacement( 2358 original_content, 2359 search_expression, 2360 replace_expression, 2361 is_regexp, 2362 regexp_flags, 2363 log_identifier, 2364 ) 2365 2366 if error_msg: 2367 # Check if it's a "no matches" error specifically 2368 if match_count == 0 and "No matches found" in error_msg: 2369 return ToolResult.partial( 2370 f"No matches found for pattern '{search_expression}'. Artifact not modified.", 2371 data={ 2372 "filename": artifact_filename, 2373 "version": actual_version, 2374 "match_count": 0, 2375 "no_matches": True, 2376 } 2377 ) 2378 else: 2379 return ToolResult.error( 2380 error_msg, 2381 data={"filename": artifact_filename, "version": actual_version} 2382 ) 2383 2384 total_replacements = 1 2385 total_matches = match_count 2386 replacement_results = None 2387 2388 # Prepare metadata for the output artifact 2389 if replacements: 2390 new_metadata = { 2391 "source": f"artifact_search_and_replace_regex (batch) from '{artifact_filename}' v{actual_version}", 2392 "total_replacements": total_replacements, 2393 "total_matches": total_matches, 2394 } 2395 else: 2396 new_metadata = { 2397 "source": f"artifact_search_and_replace_regex from '{artifact_filename}' v{actual_version}", 2398 "search_expression": search_expression, 2399 "replace_expression": replace_expression, 2400 "is_regexp": is_regexp, 2401 "match_count": match_count, 2402 } 2403 2404 if regexp_flags and is_regexp: 2405 new_metadata["regexp_flags"] = regexp_flags 2406 2407 # Determine description for the output artifact 2408 artifact_description = new_description 2409 if not artifact_description and not new_filename: 2410 # Preserve original description when updating the same artifact 2411 original_metadata = filename.metadata or {} 2412 artifact_description = original_metadata.get("description") 2413 2414 # Return appropriate response based on mode 2415 new_content_bytes = final_content.encode("utf-8") 2416 2417 if replacements: 2418 return ToolResult.ok( 2419 f"Batch replacement completed: {total_replacements} operations, {total_matches} total matches", 2420 data={ 2421 "source_filename": artifact_filename, 2422 "source_version": actual_version, 2423 "total_replacements": total_replacements, 2424 "replacement_results": replacement_results, 2425 "total_matches": total_matches, 2426 }, 2427 data_objects=[ 2428 DataObject( 2429 name=output_filename, 2430 content=new_content_bytes, 2431 mime_type=source_mime_type, 2432 disposition=DataDisposition.ARTIFACT, 2433 description=artifact_description, 2434 metadata=new_metadata, 2435 ) 2436 ], 2437 ) 2438 else: 2439 # Compute replacements_made 2440 # For literal replacements, all matches are replaced 2441 # For regex without 'g' flag, only first match is replaced 2442 global_replace = "g" in (regexp_flags or "") 2443 replacements_made = ( 2444 match_count if not is_regexp or global_replace else min(match_count, 1) 2445 ) 2446 2447 return ToolResult.ok( 2448 f"Successfully performed {'regex' if is_regexp else 'literal'} search and replace. " 2449 f"Found {match_count} match(es).", 2450 data={ 2451 "source_filename": artifact_filename, 2452 "source_version": actual_version, 2453 "match_count": match_count, 2454 "replacements_made": replacements_made, 2455 }, 2456 data_objects=[ 2457 DataObject( 2458 name=output_filename, 2459 content=new_content_bytes, 2460 mime_type=source_mime_type, 2461 disposition=DataDisposition.ARTIFACT, 2462 description=artifact_description, 2463 metadata=new_metadata, 2464 ) 2465 ], 2466 ) 2467 2468 except FileNotFoundError as fnf_err: 2469 log.warning("%s Artifact not found: %s", log_identifier, fnf_err) 2470 return ToolResult.error( 2471 f"Artifact not found: {fnf_err}", 2472 data={"filename": artifact_filename, "version": artifact_version} 2473 ) 2474 except Exception as e: 2475 log.exception( 2476 "%s Unexpected error during search and replace: %s", log_identifier, e 2477 ) 2478 return ToolResult.error( 2479 f"Unexpected error: {e}", 2480 data={"filename": artifact_filename, "version": artifact_version} 2481 ) 2482 2483 2484 artifact_search_and_replace_regex_tool_def = BuiltinTool( 2485 name="artifact_search_and_replace_regex", 2486 implementation=artifact_search_and_replace_regex, 2487 description="Performs search and replace on an artifact's text content using either literal string matching or regular expressions. Supports both single replacements and atomic batch replacements for efficiency.", 2488 category="artifact_management", 2489 category_name=CATEGORY_NAME, 2490 category_description=CATEGORY_DESCRIPTION, 2491 required_scopes=["tool:artifact:load", "tool:artifact:create"], 2492 parameters=adk_types.Schema( 2493 type=adk_types.Type.OBJECT, 2494 properties={ 2495 "filename": adk_types.Schema( 2496 type=adk_types.Type.STRING, 2497 description="The name (and optional :version) of the artifact to search/replace in.", 2498 ), 2499 "search_expression": adk_types.Schema( 2500 type=adk_types.Type.STRING, 2501 description="The pattern to search for (single replacement mode). If is_regexp is true, this is treated as a regular expression. Otherwise, it's a literal string. Do not use if 'replacements' is provided.", 2502 nullable=True, 2503 ), 2504 "replace_expression": adk_types.Schema( 2505 type=adk_types.Type.STRING, 2506 description="The replacement text (single replacement mode). For regex mode, supports capture group references using $1, $2, etc. Use $$ to insert a literal dollar sign. Do not use if 'replacements' is provided.", 2507 nullable=True, 2508 ), 2509 "is_regexp": adk_types.Schema( 2510 type=adk_types.Type.BOOLEAN, 2511 description="If true, treat search_expression as a regular expression. If false, treat as literal string. Only used in single replacement mode.", 2512 nullable=True, 2513 ), 2514 "regexp_flags": adk_types.Schema( 2515 type=adk_types.Type.STRING, 2516 description="Flags for regex behavior (only used when is_regexp=true in single mode). String of letters: 'g' (global/replace all), 'i' (case-insensitive), 'm' (multiline), 's' (dotall). Example: 'gim'. Defaults to empty string.", 2517 nullable=True, 2518 ), 2519 "new_filename": adk_types.Schema( 2520 type=adk_types.Type.STRING, 2521 description="Optional. If provided, saves the result as a new artifact with this name instead of creating a new version of the original.", 2522 nullable=True, 2523 ), 2524 "new_description": adk_types.Schema( 2525 type=adk_types.Type.STRING, 2526 description="Optional. Description for the new/updated artifact.", 2527 nullable=True, 2528 ), 2529 "replacements": adk_types.Schema( 2530 type=adk_types.Type.ARRAY, 2531 items=adk_types.Schema( 2532 type=adk_types.Type.OBJECT, 2533 properties={ 2534 "search": adk_types.Schema( 2535 type=adk_types.Type.STRING, 2536 description="The search pattern (literal string or regex).", 2537 ), 2538 "replace": adk_types.Schema( 2539 type=adk_types.Type.STRING, 2540 description="The replacement text. For regex mode, supports $1, $2, etc. Use $$ for literal $.", 2541 ), 2542 "is_regexp": adk_types.Schema( 2543 type=adk_types.Type.BOOLEAN, 2544 description="If true, 'search' is a regex pattern. If false, literal string.", 2545 ), 2546 "regexp_flags": adk_types.Schema( 2547 type=adk_types.Type.STRING, 2548 description="Flags for regex: 'g' (global), 'i' (case-insensitive), 'm' (multiline), 's' (dotall). Default: ''.", 2549 nullable=True, 2550 ), 2551 }, 2552 required=["search", "replace", "is_regexp"], 2553 ), 2554 description="Optional. Array of replacement operations to perform atomically. Each operation is processed sequentially on the cumulative result. If any operation fails, all changes are rolled back. Do not use with 'search_expression' or 'replace_expression'.", 2555 nullable=True, 2556 ), 2557 }, 2558 required=["filename"], 2559 ), 2560 examples=[], 2561 ) 2562 2563 tool_registry.register(artifact_search_and_replace_regex_tool_def)