builtin_data_analysis_tools.py
1 """ 2 Built-in ADK Tools for Data Analysis (SQL, JQ, Plotly). 3 """ 4 5 import logging 6 import json 7 import os 8 from typing import Dict, Optional, Literal 9 from datetime import datetime, timezone 10 11 try: 12 import yaml 13 14 PYYAML_AVAILABLE = True 15 except ImportError: 16 PYYAML_AVAILABLE = False 17 18 try: 19 import plotly.io as pio 20 import plotly.graph_objects as go 21 22 PLOTLY_AVAILABLE = True 23 try: 24 import kaleido 25 26 KALEIDO_AVAILABLE = True 27 except ImportError: 28 KALEIDO_AVAILABLE = False 29 except ImportError: 30 PLOTLY_AVAILABLE = False 31 KALEIDO_AVAILABLE = False 32 33 34 from google.adk.tools import ToolContext 35 from google.genai import types as adk_types 36 37 from ...agent.utils.artifact_helpers import ensure_correct_extension 38 39 from .tool_definition import BuiltinTool 40 from .tool_result import ToolResult, DataObject, DataDisposition 41 from .registry import tool_registry 42 43 log = logging.getLogger(__name__) 44 45 CATEGORY_NAME = "Data Analysis" 46 CATEGORY_DESCRIPTION = "Create static chart images from data in JSON or YAML format." 47 48 49 def _reap_zombie_processes(): 50 """Reap any zombie child processes to prevent accumulation. 51 52 In containerized environments where PID 1 is the Python process (not init), 53 zombie child processes from Kaleido/Chromium are not automatically reaped. 54 This function calls os.waitpid() in a non-blocking loop to clean them up. 55 """ 56 reaped = 0 57 while True: 58 try: 59 pid, _ = os.waitpid(-1, os.WNOHANG) 60 if pid == 0: 61 break 62 reaped += 1 63 except ChildProcessError: 64 # No child processes exist 65 break 66 if reaped > 0: 67 log.debug("Reaped %d zombie child process(es).", reaped) 68 69 70 def _cleanup_kaleido_scope(): 71 """Shut down the Kaleido Chromium subprocess scope to free resources. 72 73 Kaleido 0.2.x maintains a persistent Chromium subprocess (PlotlyScope) 74 that spawns child renderer processes. 75 """ 76 try: 77 if hasattr(pio, "_kaleido") and pio._kaleido is not None: 78 scope = pio._kaleido 79 if hasattr(scope, "_shutdown"): 80 scope._shutdown() 81 log.debug("Kaleido scope shut down successfully.") 82 elif hasattr(scope, "_proc") and scope._proc is not None: 83 scope._proc.kill() 84 scope._proc.wait() 85 scope._proc = None 86 log.debug("Kaleido subprocess terminated successfully.") 87 elif hasattr(pio, "kaleido") and hasattr(pio.kaleido, "scope"): 88 scope = pio.kaleido.scope 89 if hasattr(scope, "_shutdown"): 90 scope._shutdown() 91 log.debug("Kaleido scope shut down successfully.") 92 except Exception as e: 93 log.debug("Non-critical: Failed to clean up Kaleido scope: %s", e) 94 95 96 async def create_chart_from_plotly_config( 97 config_content: str, 98 config_format: Literal["json", "yaml"], 99 output_filename: str, 100 output_format: Optional[str] = "png", 101 tool_context: ToolContext = None, 102 ) -> ToolResult: 103 """ 104 Generates a static chart image from a Plotly configuration provided as a string. 105 106 Args: 107 config_content: The Plotly configuration (JSON or YAML) as a string. 108 config_format: The format of the config_content ('json' or 'yaml'). 109 output_filename: The desired filename for the output image artifact. 110 output_format: The desired image format ('png', 'jpg', 'svg', 'pdf', etc.). Default 'png'. 111 tool_context: The context provided by the ADK framework. 112 113 Returns: 114 ToolResult with output artifact details. 115 """ 116 if not tool_context: 117 return ToolResult.error("ToolContext is missing.") 118 if not PLOTLY_AVAILABLE: 119 return ToolResult.error( 120 "The plotly library is required for chart generation but it is not installed." 121 ) 122 if not KALEIDO_AVAILABLE: 123 return ToolResult.error( 124 "The kaleido library is required for chart generation but it is not installed." 125 ) 126 127 log_identifier = f"[DataTool:create_chart:{output_filename}]" 128 log.info( 129 "%s Processing request to create chart '%s' from %s config.", 130 log_identifier, 131 output_filename, 132 config_format, 133 ) 134 135 try: 136 plotly_config_dict: Dict 137 if config_format == "json": 138 try: 139 plotly_config_dict = json.loads(config_content) 140 except json.JSONDecodeError as parse_err: 141 raise ValueError( 142 f"Failed to parse Plotly config as JSON: {parse_err}" 143 ) from parse_err 144 elif config_format == "yaml": 145 try: 146 plotly_config_dict = yaml.safe_load(config_content) 147 if not isinstance(plotly_config_dict, dict): 148 raise ValueError("YAML content did not parse into a dictionary.") 149 except yaml.YAMLError as parse_err: 150 raise ValueError( 151 f"Failed to parse Plotly config as YAML: {parse_err}" 152 ) from parse_err 153 else: 154 raise ValueError( 155 f"Invalid config_format: {config_format}. Expected 'json' or 'yaml'." 156 ) 157 158 try: 159 fig = go.Figure(plotly_config_dict) 160 except Exception as fig_err: 161 raise ValueError( 162 f"Failed to create Plotly figure from config: {fig_err}" 163 ) from fig_err 164 165 try: 166 image_bytes = pio.to_image(fig, format=output_format, engine="kaleido") 167 log.info( 168 "%s Successfully generated %s image bytes using Kaleido.", 169 log_identifier, 170 output_format, 171 ) 172 except Exception as img_err: 173 raise ValueError( 174 f"Failed to generate {output_format} image using Plotly/Kaleido: {img_err}. Ensure 'kaleido' package is installed and functional." 175 ) from img_err 176 finally: 177 # Clean up Kaleido's Chromium subprocess to prevent zombie process 178 # accumulation. 179 _cleanup_kaleido_scope() 180 _reap_zombie_processes() 181 182 mime_map = { 183 "png": "image/png", 184 "jpg": "image/jpeg", 185 "jpeg": "image/jpeg", 186 "webp": "image/webp", 187 "svg": "image/svg+xml", 188 "pdf": "application/pdf", 189 "eps": "application/postscript", 190 } 191 output_mime_type = mime_map.get(output_format.lower(), f"image/{output_format}") 192 193 final_output_filename = ensure_correct_extension(output_filename, output_format) 194 195 metadata = { 196 "description": f"Chart generated from Plotly config ({config_format})", 197 "source_format": config_format, 198 "output_format": output_format, 199 "generation_tool": "create_chart_from_plotly_config", 200 "generation_timestamp": datetime.now(timezone.utc).isoformat(), 201 } 202 203 log.info("%s Returning chart as DataObject for artifact storage", log_identifier) 204 205 return ToolResult.ok( 206 "Chart image created successfully.", 207 data={ 208 "source_format": config_format, 209 "output_format": output_format, 210 }, 211 data_objects=[ 212 DataObject( 213 name=final_output_filename, 214 content=image_bytes, 215 mime_type=output_mime_type, 216 disposition=DataDisposition.ARTIFACT, 217 description=f"Chart generated from Plotly config ({config_format})", 218 metadata=metadata, 219 ) 220 ], 221 ) 222 223 except ValueError as e: 224 log.warning("%s Value error: %s", log_identifier, e) 225 return ToolResult.error(str(e)) 226 except ImportError as e: 227 log.warning("%s Missing library error: %s", log_identifier, e) 228 return ToolResult.error(str(e)) 229 except Exception as e: 230 log.exception( 231 "%s Unexpected error in create_chart_from_plotly_config: %s", 232 log_identifier, 233 e, 234 ) 235 return ToolResult.error(f"An unexpected error occurred: {e}") 236 237 238 create_chart_from_plotly_config_tool_def = BuiltinTool( 239 name="create_chart_from_plotly_config", 240 implementation=create_chart_from_plotly_config, 241 description="Generates a static chart image (PNG, JPG, SVG, PDF) from a Plotly configuration provided directly as a JSON or YAML string in `config_content`. Specify the format of the string in `config_format` and the desired output filename and image format.", 242 category="data_analysis", 243 category_name=CATEGORY_NAME, 244 category_description=CATEGORY_DESCRIPTION, 245 required_scopes=["tool:data:chart"], 246 parameters=adk_types.Schema( 247 type=adk_types.Type.OBJECT, 248 properties={ 249 "config_content": adk_types.Schema( 250 type=adk_types.Type.STRING, 251 description="The Plotly configuration (JSON or YAML) as a string.", 252 ), 253 "config_format": adk_types.Schema( 254 type=adk_types.Type.STRING, 255 description="The format of the config_content ('json' or 'yaml').", 256 enum=["json", "yaml"], 257 ), 258 "output_filename": adk_types.Schema( 259 type=adk_types.Type.STRING, 260 description="The desired filename for the output image artifact.", 261 ), 262 "output_format": adk_types.Schema( 263 type=adk_types.Type.STRING, 264 description="The desired image format ('png', 'jpg', 'svg', 'pdf', etc.). Defaults to 'png'.", 265 nullable=True, 266 ), 267 }, 268 required=["config_content", "config_format", "output_filename"], 269 ), 270 examples=[], 271 ) 272 273 tool_registry.register(create_chart_from_plotly_config_tool_def)