resolver.py
1 """ 2 Contains the main embed resolution functions, including the chain executor. 3 """ 4 5 import logging 6 import asyncio 7 import json 8 import uuid 9 from typing import Any, Callable, Dict, Optional, Set, Tuple, List, Union 10 from .constants import ( 11 EMBED_REGEX, 12 EMBED_DELIMITER_OPEN, 13 EMBED_DELIMITER_CLOSE, 14 EARLY_EMBED_TYPES, 15 LATE_EMBED_TYPES, 16 ) 17 from .evaluators import EMBED_EVALUATORS, _evaluate_artifact_content_embed 18 from .modifiers import MODIFIER_DEFINITIONS, _parse_modifier_chain 19 from .converter import ( 20 convert_data, 21 serialize_data, 22 _parse_string_to_list_of_dicts, 23 ) 24 from .types import DataFormat, ResolutionMode 25 from ..mime_helpers import is_text_based_mime_type 26 27 log = logging.getLogger(__name__) 28 29 import yaml 30 31 32 def _log_data_state( 33 log_id: str, 34 step: str, 35 data: Any, 36 data_format: Optional[DataFormat], 37 mime_type: Optional[str], 38 ): 39 """Logs the state of the data at a specific step.""" 40 data_type = type(data).__name__ 41 data_size = "N/A" 42 data_preview = "N/A" 43 44 if isinstance(data, (bytes, str, list)): 45 data_size = str(len(data)) 46 elif isinstance(data, dict): 47 data_size = f"{len(data)} keys" 48 49 if isinstance(data, bytes): 50 try: 51 data_preview = data[:100].decode("utf-8", errors="replace") + ( 52 "..." if len(data) > 100 else "" 53 ) 54 except Exception: 55 data_preview = f"Bytes[{len(data)}]" 56 elif isinstance(data, str): 57 data_preview = data[:100] + ("..." if len(data) > 100 else "") 58 elif isinstance(data, list): 59 data_preview = f"List[{len(data)} items]" 60 if data and isinstance(data[0], dict): 61 data_preview += f" (First item keys: {list(data[0].keys())[:5]}{'...' if len(data[0].keys()) > 5 else ''})" 62 elif isinstance(data, dict): 63 data_preview = f"Dict[{len(data)} keys: {list(data.keys())[:5]}{'...' if len(data.keys()) > 5 else ''}]" 64 else: 65 data_preview = str(data)[:100] + ("..." if len(str(data)) > 100 else "") 66 67 log.info( 68 "%s [%s] Format: %s, MimeType: %s, Type: %s, Size: %s, Preview: '%s'", 69 log_id, 70 step, 71 data_format.name if data_format else "None", 72 mime_type, 73 data_type, 74 data_size, 75 data_preview, 76 ) 77 78 79 async def _evaluate_artifact_content_embed_with_chain( 80 artifact_spec_from_directive: str, 81 modifiers_from_directive: List[Tuple[str, str]], 82 output_format_from_directive: Optional[str], 83 context: Any, 84 log_identifier: str, 85 resolution_mode: "ResolutionMode", 86 config: Optional[Dict] = None, 87 current_depth: int = 0, 88 visited_artifacts: Optional[Set[Tuple[str, int]]] = None, 89 ) -> Union[Tuple[str, Optional[str], int], Tuple[None, str, Any]]: 90 """ 91 Loads artifact content, recursively resolves its internal embeds if text-based, 92 applies a chain of modifiers, and serializes the final result. 93 """ 94 log.info( 95 "%s [Depth:%d] Starting chain execution for artifact directive: %s", 96 log_identifier, 97 current_depth, 98 artifact_spec_from_directive, 99 ) 100 visited_artifacts = visited_artifacts or set() 101 parsed_artifact_spec = artifact_spec_from_directive 102 103 loaded_content_bytes, original_mime_type, load_error = ( 104 await _evaluate_artifact_content_embed( 105 parsed_artifact_spec, context, log_identifier, config 106 ) 107 ) 108 109 if load_error: 110 log.warning( 111 "%s [Depth:%d] Error loading initial artifact '%s': %s", 112 log_identifier, 113 current_depth, 114 parsed_artifact_spec, 115 load_error, 116 ) 117 err_str = f"[Error loading artifact '{parsed_artifact_spec}': {load_error}]" 118 return err_str, load_error, len(err_str.encode("utf-8")) 119 120 if loaded_content_bytes is None: 121 err_msg = f"Internal error - Artifact load for '{parsed_artifact_spec}' returned None content without error." 122 log.error("%s %s", log_identifier, err_msg) 123 return f"[Error: {err_msg}]", err_msg, 0 124 125 current_data: Any = loaded_content_bytes 126 current_format: DataFormat = DataFormat.BYTES 127 _log_data_state( 128 log_identifier, 129 f"[Depth:{current_depth}] Initial Load", 130 current_data, 131 current_format, 132 original_mime_type, 133 ) 134 if is_text_based_mime_type(original_mime_type): 135 try: 136 decoded_content = loaded_content_bytes.decode("utf-8") 137 log.debug( 138 "%s [Depth:%d] Artifact '%s' is text-based (%s). Attempting recursive embed resolution.", 139 log_identifier, 140 current_depth, 141 parsed_artifact_spec, 142 original_mime_type, 143 ) 144 spec_parts = parsed_artifact_spec.split(":", 1) 145 filename_for_key = spec_parts[0] 146 version_str_for_key = spec_parts[1] if len(spec_parts) > 1 else None 147 try: 148 version_for_key = ( 149 int(version_str_for_key) if version_str_for_key else -1 150 ) 151 except ValueError: 152 log.warning( 153 "%s Could not parse version from '%s' for visited_artifacts key. Loop detection might be affected.", 154 log_identifier, 155 parsed_artifact_spec, 156 ) 157 version_for_key = -1 158 159 artifact_key = (filename_for_key, version_for_key) 160 new_visited_artifacts = visited_artifacts.copy() 161 new_visited_artifacts.add(artifact_key) 162 163 resolved_string_content = await resolve_embeds_recursively_in_string( 164 text=decoded_content, 165 context=context, 166 resolver_func=evaluate_embed, 167 types_to_resolve=EARLY_EMBED_TYPES.union(LATE_EMBED_TYPES), 168 resolution_mode=ResolutionMode.RECURSIVE_ARTIFACT_CONTENT, 169 log_identifier=log_identifier, 170 config=config, 171 max_depth=config.get("gateway_recursive_embed_depth", 12), 172 current_depth=current_depth + 1, 173 visited_artifacts=new_visited_artifacts, 174 accumulated_size=0, 175 max_total_size=config.get( 176 "gateway_max_artifact_resolve_size_bytes", -1 177 ), 178 ) 179 current_data = resolved_string_content 180 current_format = DataFormat.STRING 181 _log_data_state( 182 log_identifier, 183 f"[Depth:{current_depth}] After Recursive Resolution (including templates)", 184 current_data, 185 current_format, 186 original_mime_type, 187 ) 188 189 except UnicodeDecodeError as ude: 190 err_msg = f"Failed to decode text-based artifact '{parsed_artifact_spec}' for recursion: {ude}" 191 log.warning("%s %s", log_identifier, err_msg) 192 return f"[Error: {err_msg}]", err_msg, 0 193 except Exception as recurse_err: 194 err_msg = f"Error during recursive resolution of '{parsed_artifact_spec}': {recurse_err}" 195 log.exception("%s %s", log_identifier, err_msg) 196 return f"[Error: {err_msg}]", err_msg, 0 197 else: 198 log.debug( 199 "%s [Depth:%d] Artifact '%s' is not text-based (%s). Passing raw bytes to modifier chain.", 200 log_identifier, 201 current_depth, 202 parsed_artifact_spec, 203 original_mime_type, 204 ) 205 206 if current_format == DataFormat.STRING and original_mime_type: 207 normalized_mime_type = original_mime_type.lower() 208 log.debug( 209 "%s [Depth:%d] Pre-parsing string content with MIME type: %s", 210 log_identifier, 211 current_depth, 212 normalized_mime_type, 213 ) 214 if "json" in normalized_mime_type: 215 try: 216 current_data = json.loads(current_data) 217 current_format = DataFormat.JSON_OBJECT 218 log.info( 219 "%s [Depth:%d] Pre-parsed string as JSON_OBJECT.", 220 log_identifier, 221 current_depth, 222 ) 223 except json.JSONDecodeError: 224 log.warning( 225 "%s [Depth:%d] Failed to pre-parse as JSON despite MIME type '%s'. Content will be treated as STRING.", 226 log_identifier, 227 current_depth, 228 original_mime_type, 229 ) 230 elif "yaml" in normalized_mime_type or "yml" in normalized_mime_type: 231 try: 232 current_data = yaml.safe_load(current_data) 233 current_format = DataFormat.JSON_OBJECT 234 log.info( 235 "%s [Depth:%d] Pre-parsed string as YAML (now JSON_OBJECT).", 236 log_identifier, 237 current_depth, 238 ) 239 except yaml.YAMLError: 240 log.warning( 241 "%s [Depth:%d] Failed to pre-parse as YAML despite MIME type '%s'. Content will be treated as STRING.", 242 log_identifier, 243 current_depth, 244 original_mime_type, 245 ) 246 elif "csv" in normalized_mime_type: 247 parsed_data, error_msg = _parse_string_to_list_of_dicts( 248 current_data, original_mime_type, log_identifier 249 ) 250 if error_msg is None and parsed_data is not None: 251 current_data = parsed_data 252 current_format = DataFormat.LIST_OF_DICTS 253 log.info( 254 "%s [Depth:%d] Pre-parsed string as LIST_OF_DICTS from CSV.", 255 log_identifier, 256 current_depth, 257 ) 258 else: 259 log.warning( 260 "%s [Depth:%d] Failed to pre-parse as CSV despite MIME type '%s': %s. Content will be treated as STRING.", 261 log_identifier, 262 current_depth, 263 original_mime_type, 264 error_msg, 265 ) 266 267 _log_data_state( 268 log_identifier, 269 f"[Depth:{current_depth}] After Pre-parsing", 270 current_data, 271 current_format, 272 original_mime_type, 273 ) 274 275 modifier_index = 0 276 for prefix, value in modifiers_from_directive: 277 modifier_index += 1 278 modifier_step_id = f"Modifier {modifier_index} ({prefix})" 279 280 modifier_def = MODIFIER_DEFINITIONS.get(prefix) 281 if not modifier_def: 282 err_msg = f"Unknown modifier prefix: '{prefix}'" 283 log.warning("%s %s", log_identifier, err_msg) 284 return f"[Error: {err_msg}]", err_msg, 0 285 286 modifier_func = modifier_def["function"] 287 accepts_formats: List[DataFormat] = modifier_def["accepts"] 288 produces_format: DataFormat = modifier_def["produces"] 289 290 log.info( 291 "%s [Depth:%d][%s] Applying modifier: %s:%s (Accepts: %s, Produces: %s)", 292 log_identifier, 293 current_depth, 294 modifier_step_id, 295 prefix, 296 value[:50] + "...", 297 [f.name for f in accepts_formats], 298 produces_format.name, 299 ) 300 301 if current_format not in accepts_formats: 302 target_format_for_modifier = accepts_formats[0] 303 log.info( 304 "%s [Depth:%d][%s] Current format %s not accepted by %s. Converting to %s...", 305 log_identifier, 306 current_depth, 307 modifier_step_id, 308 current_format.name, 309 prefix, 310 target_format_for_modifier.name, 311 ) 312 _log_data_state( 313 log_identifier, 314 f"[Depth:{current_depth}] {modifier_step_id} - Before Conversion", 315 current_data, 316 current_format, 317 original_mime_type, 318 ) 319 converted_data, new_format, convert_error = convert_data( 320 current_data, 321 current_format, 322 target_format_for_modifier, 323 log_identifier, 324 original_mime_type, 325 ) 326 if convert_error: 327 err_msg = ( 328 f"Failed to convert data for modifier '{prefix}': {convert_error}" 329 ) 330 log.warning("%s %s", log_identifier, err_msg) 331 return f"[Error: {err_msg}]", err_msg, 0 332 current_data = converted_data 333 current_format = new_format 334 log.info( 335 "%s [Depth:%d][%s] Conversion successful. New format: %s", 336 log_identifier, 337 current_depth, 338 modifier_step_id, 339 current_format.name, 340 ) 341 _log_data_state( 342 log_identifier, 343 f"[Depth:{current_depth}] {modifier_step_id} - After Conversion", 344 current_data, 345 current_format, 346 original_mime_type, 347 ) 348 349 try: 350 _log_data_state( 351 log_identifier, 352 f"[Depth:{current_depth}] {modifier_step_id} - Before Execution", 353 current_data, 354 current_format, 355 original_mime_type, 356 ) 357 if prefix == "apply_to_template": 358 result_data, _, exec_error = await modifier_func( 359 current_data, value, original_mime_type, log_identifier, context 360 ) 361 else: 362 if asyncio.iscoroutinefunction(modifier_func): 363 result_data, _, exec_error = await modifier_func( 364 current_data, value, original_mime_type, log_identifier 365 ) 366 else: 367 result_data, _, exec_error = modifier_func( 368 current_data, value, original_mime_type, log_identifier 369 ) 370 371 if exec_error: 372 err_msg = f"Error applying modifier '{prefix}': {exec_error}" 373 log.warning("%s %s", log_identifier, err_msg) 374 return f"[Error: {err_msg}]", err_msg, 0 375 376 current_data = result_data 377 current_format = produces_format 378 log.info( 379 "%s [Depth:%d][%s] Modifier '%s' executed. Result format: %s", 380 log_identifier, 381 current_depth, 382 modifier_step_id, 383 prefix, 384 current_format.name, 385 ) 386 _log_data_state( 387 log_identifier, 388 f"[Depth:{current_depth}] {modifier_step_id} - After Execution", 389 current_data, 390 current_format, 391 original_mime_type, 392 ) 393 if current_data is None or ( 394 isinstance(current_data, (list, str, bytes)) and not current_data 395 ): 396 log.info( 397 "%s [Depth:%d][%s] Modifier '%s' resulted in empty data.", 398 log_identifier, 399 current_depth, 400 modifier_step_id, 401 prefix, 402 ) 403 404 except Exception as mod_err: 405 log.exception( 406 "%s [Depth:%d][%s] Unexpected error executing modifier '%s': %s", 407 log_identifier, 408 current_depth, 409 modifier_step_id, 410 prefix, 411 mod_err, 412 ) 413 err_msg = f"Unexpected error in modifier '{prefix}': {mod_err}" 414 return f"[Error: {err_msg}]", err_msg, 0 415 416 if ( 417 current_format == DataFormat.BYTES 418 and resolution_mode == ResolutionMode.A2A_MESSAGE_TO_USER 419 ): 420 log.info( 421 "%s [Depth:%d] Result is binary data in A2A_MESSAGE_TO_USER mode. Signaling for inline binary content.", 422 log_identifier, 423 current_depth, 424 ) 425 filename_for_signal = artifact_spec_from_directive.split(":", 1)[0] 426 return ( 427 None, 428 "SIGNAL_INLINE_BINARY_CONTENT", 429 { 430 "bytes": current_data, 431 "mime_type": original_mime_type, 432 "name": filename_for_signal, 433 }, 434 ) 435 436 target_string_format = output_format_from_directive 437 if target_string_format is None: 438 log.warning( 439 "%s [Depth:%d] Missing final 'format:' step in chain. Defaulting to 'text'.", 440 log_identifier, 441 current_depth, 442 ) 443 target_string_format = "text" 444 445 log.info( 446 "%s [Depth:%d] [Final Serialization] Serializing final data (Format: %s) to target string format '%s'", 447 log_identifier, 448 current_depth, 449 current_format.name, 450 target_string_format, 451 ) 452 _log_data_state( 453 log_identifier, 454 f"[Depth:{current_depth}] Before Serialization", 455 current_data, 456 current_format, 457 original_mime_type, 458 ) 459 460 final_serialized_string, serialize_error = serialize_data( 461 data=current_data, 462 data_format=current_format, 463 target_string_format=target_string_format, 464 original_mime_type=original_mime_type, 465 log_id=log_identifier, 466 ) 467 468 if serialize_error: 469 log.warning("%s [Depth:%d] %s", log_identifier, current_depth, serialize_error) 470 return ( 471 final_serialized_string, 472 serialize_error, 473 len(final_serialized_string.encode("utf-8")), 474 ) 475 476 final_size = len(final_serialized_string.encode("utf-8")) 477 log.info( 478 "%s [Depth:%d] Chain execution completed successfully. Final size: %d bytes.", 479 log_identifier, 480 current_depth, 481 final_size, 482 ) 483 log.info( 484 "%s [Depth:%d] [Final Serialization] Result: '%s...'", 485 log_identifier, 486 current_depth, 487 final_serialized_string[:100] 488 + ("..." if len(final_serialized_string) > 100 else ""), 489 ) 490 return final_serialized_string, None, final_size 491 492 493 async def resolve_embeds_in_string( 494 text: str, 495 context: Any, 496 resolver_func: Callable[ 497 ..., Union[Tuple[str, Optional[str], int], Tuple[None, str, Any]] 498 ], 499 types_to_resolve: Set[str], 500 resolution_mode: "ResolutionMode", 501 log_identifier: str = "[EmbedUtil]", 502 config: Optional[Dict[str, Any]] = None, 503 ) -> Tuple[str, int, List[Tuple[int, Any, str]]]: 504 """ 505 Resolves specified embed types within a string using a provided resolver function. 506 This is the TOP-LEVEL resolver called by gateways. It handles signals and buffering. 507 It does NOT perform recursion itself but calls `evaluate_embed` which might trigger recursion. 508 509 Processes the string iteratively, resolving one embed at a time. 510 Includes buffering logic: stops processing if a partial embed delimiter is found 511 at the end, returning the processed part and the index where processing stopped. 512 Can now return special signals from the resolver function. 513 514 Args: 515 text: The input string potentially containing embeds. 516 context: The context object passed to the resolver function (now a Dict). 517 resolver_func: The function to call for evaluating each embed. 518 Signature: (type, expression, format, context, log_id, config, ...) -> Any 519 Can return a string for replacement, or a tuple like (None, "SIGNAL_TYPE", data) 520 to indicate a signal instead of text replacement. 521 types_to_resolve: A set of embed types (strings) to resolve in this pass. 522 log_identifier: Identifier for logging. 523 config: Optional configuration dictionary passed to the resolver. 524 525 Returns: 526 A tuple containing: 527 - The string with specified embeds resolved (or removed if signaled). 528 - The index in the *original* string representing the end of the 529 successfully processed portion (useful for buffering). This will be 530 len(text) if the whole string was processed. 531 - A list of signals encountered during resolution, as tuples (index, signal_data). 532 The index corresponds to the start index of the embed directive in the original string. 533 """ 534 resolved_parts = [] 535 signals_found: List[Tuple[int, Any, str]] = [] 536 last_end = 0 537 original_length = len(text) 538 539 log.debug( 540 "%s Checking for embeds in text: '%s'", log_identifier, text[:200] + "..." 541 ) 542 543 for match in EMBED_REGEX.finditer(text): 544 start, end = match.span() 545 embed_type = match.group(1) 546 expression = match.group(2) 547 format_spec = match.group(3) 548 549 resolved_parts.append(text[last_end:start]) 550 551 if embed_type in types_to_resolve: 552 log.info( 553 "%s Found embed type '%s' to resolve: expr='%s', fmt='%s', types_to_resolve=%s", 554 log_identifier, 555 embed_type, 556 expression, 557 format_spec, 558 types_to_resolve, 559 ) 560 resolved_value = await resolver_func( 561 embed_type, 562 expression, 563 format_spec, 564 context, 565 log_identifier, 566 resolution_mode, 567 config, 568 ) 569 570 if ( 571 isinstance(resolved_value, tuple) 572 and len(resolved_value) == 3 573 and resolved_value[0] is None 574 and isinstance(resolved_value[1], str) 575 ): 576 signal_type = resolved_value[1] 577 log.info( 578 "%s Received signal '%s' from resolver for embed at index %d.", 579 log_identifier, 580 signal_type, 581 start, 582 ) 583 placeholder = f"__EMBED_SIGNAL_{uuid.uuid4().hex}__" 584 resolved_parts.append(placeholder) 585 signals_found.append( 586 ( 587 start, 588 resolved_value, 589 placeholder, 590 ) 591 ) 592 elif ( 593 isinstance(resolved_value, tuple) 594 and len(resolved_value) == 3 595 and isinstance(resolved_value[0], str) 596 and isinstance(resolved_value[2], int) 597 ): 598 text_content, error_message, _ = resolved_value 599 if error_message: 600 log.warning( 601 "%s Embed resolution for '%s:%s' resulted in error: %s. Using error string as content.", 602 log_identifier, 603 embed_type, 604 expression, 605 error_message, 606 ) 607 resolved_parts.append(text_content) 608 else: 609 log.warning( 610 "%s Resolver for type '%s' returned unexpected structure %s. Treating as error string.", 611 log_identifier, 612 embed_type, 613 type(resolved_value), 614 ) 615 616 else: 617 log.debug( 618 "%s Skipping embed type '%s' (not in types_to_resolve=%s)", 619 log_identifier, 620 embed_type, 621 types_to_resolve, 622 ) 623 resolved_parts.append(match.group(0)) 624 625 last_end = end 626 627 remaining_text = text[last_end:] 628 resolved_parts.append(remaining_text) 629 630 potential_partial_embed = False 631 partial_embed_start_index = -1 632 633 last_open_delimiter_index = remaining_text.rfind(EMBED_DELIMITER_OPEN) 634 635 if last_open_delimiter_index != -1: 636 closing_delimiter_index = remaining_text.find( 637 EMBED_DELIMITER_CLOSE, last_open_delimiter_index 638 ) 639 if closing_delimiter_index == -1: 640 potential_partial_embed = True 641 partial_embed_start_index = last_open_delimiter_index 642 log.debug( 643 "%s Potential unclosed embed detected starting at index %d within remaining text: '%s...'", 644 log_identifier, 645 partial_embed_start_index, 646 remaining_text[ 647 partial_embed_start_index : partial_embed_start_index + 10 648 ], 649 ) 650 651 if potential_partial_embed: 652 processed_until_index = last_end + partial_embed_start_index 653 final_text = ( 654 "".join(resolved_parts[:-1]) + remaining_text[:partial_embed_start_index] 655 ) 656 log.debug( 657 "%s Returning processed text up to index %d due to potential partial embed.", 658 log_identifier, 659 processed_until_index, 660 ) 661 else: 662 final_text = "".join(resolved_parts) 663 processed_until_index = original_length 664 log.debug( 665 "%s Returning fully processed text (length %d).", 666 log_identifier, 667 len(final_text), 668 ) 669 670 # If resolving late embeds, also resolve template blocks 671 # Templates are considered late embeds since they need artifact service access 672 if LATE_EMBED_TYPES.intersection(types_to_resolve): 673 try: 674 from ..templates import resolve_template_blocks_in_string 675 676 artifact_service = context.get("artifact_service") 677 session_context = context.get("session_context") 678 679 if artifact_service and session_context: 680 log.debug( 681 "%s Resolving template blocks after late embed resolution.", 682 log_identifier, 683 ) 684 final_text = await resolve_template_blocks_in_string( 685 text=final_text, 686 artifact_service=artifact_service, 687 session_context=session_context, 688 log_identifier=f"{log_identifier}[TemplateResolve]", 689 ) 690 except Exception as template_err: 691 log.warning( 692 "%s Failed to resolve template blocks: %s", 693 log_identifier, 694 template_err, 695 ) 696 # Continue with final_text as-is 697 698 return final_text, processed_until_index, signals_found 699 700 701 async def resolve_embeds_recursively_in_string( 702 text: str, 703 context: Any, 704 resolver_func: Callable[..., Tuple[str, Optional[str], int]], 705 types_to_resolve: Set[str], 706 resolution_mode: "ResolutionMode", 707 log_identifier: str, 708 config: Optional[Dict], 709 max_depth: int, 710 current_depth: int = 0, 711 visited_artifacts: Optional[Set[Tuple[str, int]]] = None, 712 accumulated_size: int = 0, 713 max_total_size: int = -1, 714 ) -> str: 715 """ 716 Recursively resolves specified embed types within a string, respecting depth, 717 loop detection (via visited_artifacts passed down), and accumulated size limits. 718 """ 719 if current_depth >= max_depth: 720 log.warning( 721 "%s Max embed recursion depth (%d) reached for current processing string.", 722 log_identifier, 723 max_depth, 724 ) 725 return "[Error: Max embed depth exceeded]" 726 727 visited_artifacts = visited_artifacts or set() 728 resolved_parts = [] 729 last_end = 0 730 731 for match in EMBED_REGEX.finditer(text): 732 start, end = match.span() 733 embed_type = match.group(1) 734 expression = match.group(2) 735 format_spec = match.group(3) 736 737 resolved_parts.append(text[last_end:start]) 738 739 if embed_type not in types_to_resolve: 740 resolved_parts.append(match.group(0)) 741 last_end = end 742 continue 743 744 log.debug( 745 "%s [Depth:%d] Found embed '%s' to resolve: expr='%s', fmt='%s'", 746 log_identifier, 747 current_depth, 748 embed_type, 749 expression, 750 format_spec, 751 ) 752 753 resolved_value = await resolver_func( 754 embed_type, 755 expression, 756 format_spec, 757 context, 758 log_identifier, 759 resolution_mode, 760 config, 761 current_depth, 762 visited_artifacts, 763 ) 764 765 if ( 766 isinstance(resolved_value, tuple) 767 and len(resolved_value) == 3 768 and isinstance(resolved_value[0], str) 769 and isinstance(resolved_value[2], int) 770 ): 771 resolved_string_for_embed, error_msg_from_chain, size_of_this_embed = ( 772 resolved_value 773 ) 774 else: 775 log.error( 776 "%s [Depth:%d] Recursive call to resolver for '%s:%s' returned unexpected signal or format. Treating as error.", 777 log_identifier, 778 current_depth, 779 embed_type, 780 expression, 781 ) 782 error_msg_from_chain = "Recursive resolution returned unexpected signal." 783 resolved_string_for_embed = f"[Error: {error_msg_from_chain}]" 784 size_of_this_embed = len(resolved_string_for_embed.encode("utf-8")) 785 786 if error_msg_from_chain: 787 log.warning( 788 "%s [Depth:%d] Embed '%s:%s' resulted in error from chain: %s", 789 log_identifier, 790 current_depth, 791 embed_type, 792 expression, 793 error_msg_from_chain, 794 ) 795 resolved_parts.append(resolved_string_for_embed) 796 else: 797 if ( 798 max_total_size >= 0 799 and accumulated_size + size_of_this_embed > max_total_size 800 ): 801 error_str = f"[Error: Embedding '{expression}' exceeds total size limit for parent content ({accumulated_size + size_of_this_embed} > {max_total_size})]" 802 log.warning("%s %s", log_identifier, error_str) 803 resolved_parts.append(error_str) 804 else: 805 resolved_parts.append(resolved_string_for_embed) 806 accumulated_size += size_of_this_embed 807 log.debug( 808 "%s [Depth:%d] Appended resolved embed (size: %d). Current accumulated_size: %d", 809 log_identifier, 810 current_depth, 811 size_of_this_embed, 812 accumulated_size, 813 ) 814 815 last_end = end 816 817 resolved_parts.append(text[last_end:]) 818 result_text = "".join(resolved_parts) 819 820 # If resolving late embeds, also resolve template blocks 821 # Templates are considered late embeds since they need artifact service access 822 if LATE_EMBED_TYPES.intersection(types_to_resolve): 823 try: 824 from ..templates import resolve_template_blocks_in_string 825 826 artifact_service = context.get("artifact_service") 827 session_context = context.get("session_context") 828 829 if artifact_service and session_context: 830 log.debug( 831 "%s [Depth:%d] Resolving template blocks after late embed resolution.", 832 log_identifier, 833 current_depth, 834 ) 835 result_text = await resolve_template_blocks_in_string( 836 text=result_text, 837 artifact_service=artifact_service, 838 session_context=session_context, 839 log_identifier=f"{log_identifier}[TemplateResolve]", 840 ) 841 except Exception as template_err: 842 log.warning( 843 "%s [Depth:%d] Failed to resolve template blocks: %s", 844 log_identifier, 845 current_depth, 846 template_err, 847 ) 848 # Continue with result_text as-is 849 850 return result_text 851 852 853 async def evaluate_embed( 854 embed_type: str, 855 expression: str, 856 format_spec: Optional[str], 857 context: Dict[str, Any], 858 log_identifier: str, 859 resolution_mode: "ResolutionMode", 860 config: Optional[Dict] = None, 861 current_depth: int = 0, 862 visited_artifacts: Optional[Set[Tuple[str, int]]] = None, 863 ) -> Union[Tuple[str, Optional[str], int], Tuple[None, str, Any]]: 864 """ 865 Evaluates a single embed directive. 866 For 'artifact_content', it handles the modifier chain and potential internal recursion. 867 For other types, it evaluates directly and returns a 3-tuple (text, error, size). 868 For 'status_update', it returns a signal tuple (None, "SIGNAL_STATUS_UPDATE", data). 869 870 Args: 871 embed_type: The type of the embed. 872 expression: The expression part of the embed. 873 format_spec: The optional format specifier. 874 context: The dictionary-based context (containing artifact_service, session_context, config). 875 log_identifier: Identifier for logging. 876 config: Optional configuration dictionary (now part of context or passed if needed by evaluators). 877 current_depth: Current recursion depth (for artifact_content). 878 visited_artifacts: Set of visited artifacts (for artifact_content). 879 880 Returns: 881 A 3-tuple (text_content, error_message, size) or a signal tuple (None, signal_type, data). 882 """ 883 log.debug( 884 "%s Evaluating embed: type='%s', expr='%s', fmt='%s'", 885 log_identifier, 886 embed_type, 887 expression[:50] + "...", 888 format_spec, 889 ) 890 891 if embed_type == "status_update": 892 status_text = expression.strip() 893 log.info("%s Detected 'status_update' embed. Signaling.", log_identifier) 894 return (None, "SIGNAL_STATUS_UPDATE", status_text) 895 896 elif embed_type == "artifact_return": 897 if resolution_mode == ResolutionMode.A2A_MESSAGE_TO_USER: 898 parts = expression.strip().split(":", 1) 899 filename = parts[0] 900 version = parts[1] if len(parts) > 1 else "latest" 901 log.info("%s Detected 'artifact_return' embed. Signaling.", log_identifier) 902 return ( 903 None, 904 "SIGNAL_ARTIFACT_RETURN", 905 {"filename": filename, "version": version}, 906 ) 907 else: 908 log.warning( 909 "%s Ignoring 'artifact_return' embed in unsupported context: %s", 910 log_identifier, 911 resolution_mode.name, 912 ) 913 original_embed_text = f"«{embed_type}:{expression}»" 914 return original_embed_text, None, len(original_embed_text.encode("utf-8")) 915 916 elif embed_type == "artifact_content": 917 artifact_spec, modifiers, output_format = _parse_modifier_chain(expression) 918 if output_format is None and format_spec is not None: 919 log.warning( 920 "%s Using format specifier '| %s' for artifact_content as chain format step was missing.", 921 log_identifier, 922 format_spec, 923 ) 924 output_format = format_spec 925 926 # Check if this is a deep research report artifact 927 # Deep research reports should be rendered by the frontend component, not resolved inline 928 filename_part = artifact_spec.split(":")[0] if ":" in artifact_spec else artifact_spec 929 filename_lower = filename_part.lower() 930 is_deep_research_report = filename_lower.endswith("_deep_research_report.md") 931 932 if is_deep_research_report and resolution_mode == ResolutionMode.A2A_MESSAGE_TO_USER: 933 # Parse version from artifact_spec 934 parts = artifact_spec.strip().split(":", 1) 935 filename = parts[0] 936 version = parts[1] if len(parts) > 1 else "latest" 937 log.info( 938 "%s Detected deep research report artifact '%s'. Signaling for frontend rendering instead of inline resolution.", 939 log_identifier, 940 filename, 941 ) 942 return ( 943 None, 944 "SIGNAL_DEEP_RESEARCH_REPORT", 945 {"filename": filename, "version": version}, 946 ) 947 948 result = await _evaluate_artifact_content_embed_with_chain( 949 artifact_spec_from_directive=artifact_spec, 950 modifiers_from_directive=modifiers, 951 output_format_from_directive=output_format, 952 context=context, 953 log_identifier=log_identifier, 954 resolution_mode=resolution_mode, 955 config=config, 956 current_depth=current_depth, 957 visited_artifacts=visited_artifacts or set(), 958 ) 959 return result 960 961 else: 962 evaluator = EMBED_EVALUATORS.get(embed_type) 963 if not evaluator: 964 err_msg = f"Unknown embed type: '{embed_type}'" 965 log.warning("%s %s", log_identifier, err_msg) 966 err_str = f"[Error: {err_msg}]" 967 return err_str, err_msg, len(err_str.encode("utf-8")) 968 969 try: 970 if asyncio.iscoroutinefunction(evaluator): 971 str_value, eval_error, size = await evaluator( 972 expression, context, log_identifier, format_spec 973 ) 974 else: 975 str_value, eval_error, size = evaluator( 976 expression, context, log_identifier, format_spec 977 ) 978 979 return str_value, eval_error, size 980 981 except Exception as e: 982 log.exception( 983 "%s Unexpected error evaluating %s embed '%s': %s", 984 log_identifier, 985 embed_type, 986 expression, 987 e, 988 ) 989 err_msg = f"Unexpected evaluation error: {e}" 990 err_str = f"[Error: {err_msg}]" 991 return err_str, err_msg, len(err_str.encode("utf-8"))