chat_message.py
1 # SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai> 2 # 3 # SPDX-License-Identifier: Apache-2.0 4 5 import json 6 from collections.abc import Sequence 7 from dataclasses import asdict, dataclass, field 8 from enum import Enum 9 from typing import Any 10 11 from haystack import logging 12 from haystack.dataclasses.file_content import FileContent 13 from haystack.dataclasses.image_content import ImageContent 14 from haystack.utils.dataclasses import _warn_on_inplace_mutation 15 16 logger = logging.getLogger(__name__) 17 18 19 class ChatRole(str, Enum): 20 """ 21 Enumeration representing the roles within a chat. 22 """ 23 24 #: The user role. A message from the user contains only text. 25 USER = "user" 26 27 #: The system role. A message from the system contains only text. 28 SYSTEM = "system" 29 30 #: The assistant role. A message from the assistant can contain text and Tool calls. It can also store metadata. 31 ASSISTANT = "assistant" 32 33 #: The tool role. A message from a tool contains the result of a Tool invocation. 34 TOOL = "tool" 35 36 @staticmethod 37 def from_str(string: str) -> "ChatRole": 38 """ 39 Convert a string to a ChatRole enum. 40 """ 41 enum_map = {e.value: e for e in ChatRole} 42 role = enum_map.get(string) 43 if role is None: 44 msg = f"Unknown chat role '{string}'. Supported roles are: {list(enum_map.keys())}" 45 raise ValueError(msg) 46 return role 47 48 49 @_warn_on_inplace_mutation 50 @dataclass 51 class TextContent: 52 """ 53 The textual content of a chat message. 54 55 :param text: The text content of the message. 56 """ 57 58 text: str 59 60 def to_dict(self) -> dict[str, Any]: 61 """ 62 Convert TextContent into a dictionary. 63 """ 64 return asdict(self) 65 66 @classmethod 67 def from_dict(cls, data: dict[str, Any]) -> "TextContent": 68 """ 69 Create a TextContent from a dictionary. 70 """ 71 return TextContent(**data) 72 73 74 @_warn_on_inplace_mutation 75 @dataclass 76 class ToolCall: 77 """ 78 Represents a Tool call prepared by the model, usually contained in an assistant message. 79 80 :param id: The ID of the Tool call. 81 :param tool_name: The name of the Tool to call. 82 :param arguments: The arguments to call the Tool with. 83 :param extra: Dictionary of extra information about the Tool call. Use to store provider-specific 84 information. To avoid serialization issues, values should be JSON serializable. 85 """ 86 87 tool_name: str 88 arguments: dict[str, Any] 89 id: str | None = None # noqa: A003 90 extra: dict[str, Any] | None = None 91 92 def to_dict(self) -> dict[str, Any]: 93 """ 94 Convert ToolCall into a dictionary. 95 96 :returns: A dictionary with keys 'tool_name', 'arguments', 'id', and 'extra'. 97 """ 98 return asdict(self) 99 100 @classmethod 101 def from_dict(cls, data: dict[str, Any]) -> "ToolCall": 102 """ 103 Creates a new ToolCall object from a dictionary. 104 105 :param data: 106 The dictionary to build the ToolCall object. 107 :returns: 108 The created object. 109 """ 110 return ToolCall(**data) 111 112 113 ToolCallResultContentT = str | Sequence[TextContent | ImageContent] 114 115 116 @_warn_on_inplace_mutation 117 @dataclass 118 class ToolCallResult: 119 """ 120 Represents the result of a Tool invocation. 121 122 :param result: The result of the Tool invocation. 123 :param origin: The Tool call that produced this result. 124 :param error: Whether the Tool invocation resulted in an error. 125 """ 126 127 result: ToolCallResultContentT 128 origin: ToolCall 129 error: bool 130 131 def to_dict(self) -> dict[str, Any]: 132 """ 133 Converts ToolCallResult into a dictionary. 134 135 :returns: A dictionary with keys 'result', 'origin', and 'error'. 136 """ 137 serialized = asdict(self) 138 if isinstance(self.result, list): 139 if not all(isinstance(part, (TextContent, ImageContent)) for part in self.result): 140 raise ValueError("ToolCallResult result must be a string or a list of TextContent or ImageContent") 141 serialized["result"] = [_serialize_content_part(part) for part in self.result] 142 return serialized 143 144 @classmethod 145 def from_dict(cls, data: dict[str, Any]) -> "ToolCallResult": 146 """ 147 Creates a ToolCallResult from a dictionary. 148 149 :param data: 150 The dictionary to build the ToolCallResult object. 151 :returns: 152 The created object. 153 """ 154 if not all(x in data for x in ["result", "origin", "error"]): 155 raise ValueError( 156 "Fields `result`, `origin`, `error` are required for ToolCallResult deserialization. " 157 f"Received dictionary with keys {list(data.keys())}" 158 ) 159 160 result = data["result"] 161 if isinstance(result, list): 162 result = [_deserialize_content_part(part) for part in result] 163 164 return ToolCallResult(result=result, origin=ToolCall.from_dict(data["origin"]), error=data["error"]) 165 166 167 @_warn_on_inplace_mutation 168 @dataclass 169 class ReasoningContent: 170 """ 171 Represents the optional reasoning content prepared by the model, usually contained in an assistant message. 172 173 :param reasoning_text: The reasoning text produced by the model. 174 :param extra: Dictionary of extra information about the reasoning content. Use to store provider-specific 175 information. To avoid serialization issues, values should be JSON serializable. 176 """ 177 178 reasoning_text: str 179 extra: dict[str, Any] = field(default_factory=dict) 180 181 def to_dict(self) -> dict[str, Any]: 182 """ 183 Convert ReasoningContent into a dictionary. 184 185 :returns: A dictionary with keys 'reasoning_text', and 'extra'. 186 """ 187 return asdict(self) 188 189 @classmethod 190 def from_dict(cls, data: dict[str, Any]) -> "ReasoningContent": 191 """ 192 Creates a new ReasoningContent object from a dictionary. 193 194 :param data: 195 The dictionary to build the ReasoningContent object. 196 :returns: 197 The created object. 198 """ 199 return ReasoningContent(**data) 200 201 202 ChatMessageContentT = TextContent | ToolCall | ToolCallResult | ImageContent | ReasoningContent | FileContent 203 204 _CONTENT_PART_CLASSES_TO_SERIALIZATION_KEYS: dict[type[ChatMessageContentT], str] = { 205 TextContent: "text", 206 ToolCall: "tool_call", 207 ToolCallResult: "tool_call_result", 208 ImageContent: "image", 209 ReasoningContent: "reasoning", 210 FileContent: "file", 211 } 212 213 214 def _deserialize_content_part(part: dict[str, Any]) -> ChatMessageContentT: 215 """ 216 Deserialize a single content part of a serialized ChatMessage. 217 218 :param part: 219 A dictionary representing a single content part of a serialized ChatMessage. 220 :returns: 221 A ChatMessageContentT object. 222 :raises ValueError: 223 If the part is not a valid ChatMessageContentT object. 224 """ 225 # handle flat text format separately 226 if "text" in part: 227 return TextContent.from_dict(part) 228 229 for cls, serialization_key in _CONTENT_PART_CLASSES_TO_SERIALIZATION_KEYS.items(): 230 if serialization_key in part: 231 return cls.from_dict(part[serialization_key]) 232 233 # NOTE: this verbose error message provides guidance to LLMs when creating invalid messages during agent runs 234 msg = ( 235 f"Unsupported content part in the serialized ChatMessage: {part}. " 236 "The `content` field of the serialized ChatMessage must be a list of dictionaries, where each dictionary " 237 "contains one of these keys: 'text', 'image', 'file', 'reasoning', 'tool_call', or 'tool_call_result'. " 238 "Valid formats: [{'text': 'Hello'}, {'image': {'base64_image': '...', ...}}, " 239 "{'file': {'base64_data': '...', ...}}, {'reasoning': {'reasoning_text': 'I think...', 'extra': {...}}}, " 240 "{'tool_call': {'tool_name': 'search', 'arguments': {}, 'id': 'call_123'}}, " 241 "{'tool_call_result': {'result': 'data', 'origin': {...}, 'error': false}}]" 242 ) 243 raise ValueError(msg) 244 245 246 def _serialize_content_part(part: ChatMessageContentT) -> dict[str, Any]: 247 """ 248 Serialize a single content part of a ChatMessage. 249 250 :param part: 251 A ChatMessageContentT object. 252 :returns: 253 A dictionary representing the content part. 254 :raises TypeError: 255 If the part is not a valid ChatMessageContentT object. 256 """ 257 serialization_key = _CONTENT_PART_CLASSES_TO_SERIALIZATION_KEYS.get(type(part)) 258 if serialization_key is None: 259 raise TypeError(f"Unsupported type in ChatMessage content: `{type(part).__name__}` for `{part}`.") 260 261 # handle flat text format separately 262 if isinstance(part, TextContent): 263 return part.to_dict() 264 265 return {serialization_key: part.to_dict()} 266 267 268 @_warn_on_inplace_mutation 269 @dataclass 270 class ChatMessage: 271 """ 272 Represents a message in a LLM chat conversation. 273 274 Use the `from_assistant`, `from_user`, `from_system`, and `from_tool` class methods to create a ChatMessage. 275 """ 276 277 _role: ChatRole 278 _content: Sequence[ChatMessageContentT] 279 _name: str | None = None 280 _meta: dict[str, Any] = field(default_factory=dict, hash=False) 281 282 def __len__(self) -> int: 283 return len(self._content) 284 285 @property 286 def role(self) -> ChatRole: 287 """ 288 Returns the role of the entity sending the message. 289 """ 290 return self._role 291 292 @property 293 def meta(self) -> dict[str, Any]: 294 """ 295 Returns the metadata associated with the message. 296 """ 297 return self._meta 298 299 @property 300 def name(self) -> str | None: 301 """ 302 Returns the name associated with the message. 303 """ 304 return self._name 305 306 @property 307 def texts(self) -> list[str]: 308 """ 309 Returns the list of all texts contained in the message. 310 """ 311 return [content.text for content in self._content if isinstance(content, TextContent)] 312 313 @property 314 def text(self) -> str | None: 315 """ 316 Returns the first text contained in the message. 317 """ 318 if texts := self.texts: 319 return texts[0] 320 return None 321 322 @property 323 def tool_calls(self) -> list[ToolCall]: 324 """ 325 Returns the list of all Tool calls contained in the message. 326 """ 327 return [content for content in self._content if isinstance(content, ToolCall)] 328 329 @property 330 def tool_call(self) -> ToolCall | None: 331 """ 332 Returns the first Tool call contained in the message. 333 """ 334 if tool_calls := self.tool_calls: 335 return tool_calls[0] 336 return None 337 338 @property 339 def tool_call_results(self) -> list[ToolCallResult]: 340 """ 341 Returns the list of all Tool call results contained in the message. 342 """ 343 return [content for content in self._content if isinstance(content, ToolCallResult)] 344 345 @property 346 def tool_call_result(self) -> ToolCallResult | None: 347 """ 348 Returns the first Tool call result contained in the message. 349 """ 350 if tool_call_results := self.tool_call_results: 351 return tool_call_results[0] 352 return None 353 354 @property 355 def images(self) -> list[ImageContent]: 356 """ 357 Returns the list of all images contained in the message. 358 """ 359 return [content for content in self._content if isinstance(content, ImageContent)] 360 361 @property 362 def image(self) -> ImageContent | None: 363 """ 364 Returns the first image contained in the message. 365 """ 366 if images := self.images: 367 return images[0] 368 return None 369 370 @property 371 def files(self) -> list[FileContent]: 372 """ 373 Returns the list of all files contained in the message. 374 """ 375 return [content for content in self._content if isinstance(content, FileContent)] 376 377 @property 378 def file(self) -> FileContent | None: 379 """ 380 Returns the first file contained in the message. 381 """ 382 if files := self.files: 383 return files[0] 384 return None 385 386 @property 387 def reasonings(self) -> list[ReasoningContent]: 388 """ 389 Returns the list of all reasoning contents contained in the message. 390 """ 391 return [content for content in self._content if isinstance(content, ReasoningContent)] 392 393 @property 394 def reasoning(self) -> ReasoningContent | None: 395 """ 396 Returns the first reasoning content contained in the message. 397 """ 398 if reasonings := self.reasonings: 399 return reasonings[0] 400 return None 401 402 def is_from(self, role: ChatRole | str) -> bool: 403 """ 404 Check if the message is from a specific role. 405 406 :param role: The role to check against. 407 :returns: True if the message is from the specified role, False otherwise. 408 """ 409 if isinstance(role, str): 410 role = ChatRole.from_str(role) 411 return self._role == role 412 413 @classmethod 414 def from_user( 415 cls, 416 text: str | None = None, 417 meta: dict[str, Any] | None = None, 418 name: str | None = None, 419 *, 420 content_parts: Sequence[TextContent | str | ImageContent | FileContent] | None = None, 421 ) -> "ChatMessage": 422 """ 423 Create a message from the user. 424 425 :param text: The text content of the message. Specify this or content_parts. 426 :param meta: Additional metadata associated with the message. 427 :param name: An optional name for the participant. This field is only supported by OpenAI. 428 :param content_parts: A list of content parts to include in the message. Specify this or text. 429 :returns: A new ChatMessage instance. 430 :raises ValueError: If neither or both of text and content_parts are provided, or if content_parts is empty. 431 :raises TypeError: If a content part is not a str, TextContent, ImageContent, or FileContent. 432 """ 433 if text is None and content_parts is None: 434 raise ValueError("Either text or content_parts must be provided.") 435 if text is not None and content_parts is not None: 436 raise ValueError("Only one of text or content_parts can be provided.") 437 438 content: list[TextContent | ImageContent | FileContent] = [] 439 440 if text is not None: 441 content = [TextContent(text=text)] 442 elif content_parts is not None: 443 for part in content_parts: 444 if isinstance(part, str): 445 content.append(TextContent(text=part)) 446 elif isinstance(part, (TextContent, ImageContent, FileContent)): 447 content.append(part) 448 else: 449 raise TypeError(f"The user message must contain only text or image parts. Unsupported part: {part}") 450 if len(content) == 0: 451 raise ValueError("The user message must contain at least one content part (text, image, file).") 452 453 return cls(_role=ChatRole.USER, _content=content, _meta=meta or {}, _name=name) 454 455 @classmethod 456 def from_system(cls, text: str, meta: dict[str, Any] | None = None, name: str | None = None) -> "ChatMessage": 457 """ 458 Create a message from the system. 459 460 :param text: The text content of the message. 461 :param meta: Additional metadata associated with the message. 462 :param name: An optional name for the participant. This field is only supported by OpenAI. 463 :returns: A new ChatMessage instance. 464 """ 465 return cls(_role=ChatRole.SYSTEM, _content=[TextContent(text=text)], _meta=meta or {}, _name=name) 466 467 @classmethod 468 def from_assistant( 469 cls, 470 text: str | None = None, 471 meta: dict[str, Any] | None = None, 472 name: str | None = None, 473 tool_calls: list[ToolCall] | None = None, 474 *, 475 reasoning: str | ReasoningContent | None = None, 476 ) -> "ChatMessage": 477 """ 478 Create a message from the assistant. 479 480 :param text: The text content of the message. 481 :param meta: Additional metadata associated with the message. 482 :param name: An optional name for the participant. This field is only supported by OpenAI. 483 :param tool_calls: The Tool calls to include in the message. 484 :param reasoning: The reasoning content to include in the message. 485 :returns: A new ChatMessage instance. 486 :raises TypeError: If `reasoning` is not a string or ReasoningContent object. 487 """ 488 content: list[ChatMessageContentT] = [] 489 if reasoning: 490 if isinstance(reasoning, str): 491 content.append(ReasoningContent(reasoning_text=reasoning)) 492 elif isinstance(reasoning, ReasoningContent): 493 content.append(reasoning) 494 else: 495 raise TypeError(f"reasoning must be a string or a ReasoningContent object, got {type(reasoning)}") 496 if text is not None: 497 content.append(TextContent(text=text)) 498 if tool_calls: 499 content.extend(tool_calls) 500 501 return cls(_role=ChatRole.ASSISTANT, _content=content, _meta=meta or {}, _name=name) 502 503 @classmethod 504 def from_tool( 505 cls, 506 tool_result: ToolCallResultContentT, 507 origin: ToolCall, 508 error: bool = False, 509 meta: dict[str, Any] | None = None, 510 ) -> "ChatMessage": 511 """ 512 Create a message from a Tool. 513 514 :param tool_result: The result of the Tool invocation. 515 :param origin: The Tool call that produced this result. 516 :param error: Whether the Tool invocation resulted in an error. 517 :param meta: Additional metadata associated with the message. 518 :returns: A new ChatMessage instance. 519 """ 520 return cls( 521 _role=ChatRole.TOOL, 522 _content=[ToolCallResult(result=tool_result, origin=origin, error=error)], 523 _meta=meta or {}, 524 ) 525 526 def to_dict(self) -> dict[str, Any]: 527 """ 528 Converts ChatMessage into a dictionary. 529 530 :returns: 531 Serialized version of the object. 532 """ 533 534 serialized: dict[str, Any] = {} 535 serialized["role"] = self._role.value 536 serialized["meta"] = self._meta 537 serialized["name"] = self._name 538 539 serialized["content"] = [_serialize_content_part(part) for part in self._content] 540 return serialized 541 542 def _to_trace_dict(self) -> dict[str, Any]: 543 """ 544 Convert the ChatMessage to a dictionary representation for tracing. 545 546 For Image Content objects, the base64_image is replaced with a placeholder string to avoid sending large 547 payloads to the tracing backend. 548 549 :returns: 550 Serialized version of the object only for tracing purposes. 551 """ 552 553 serialized: dict[str, Any] = {} 554 serialized["role"] = self._role.value 555 serialized["meta"] = self._meta 556 serialized["name"] = self._name 557 558 serialized["content"] = [] 559 for part in self._content: 560 serialized_part = _serialize_content_part(part) 561 if isinstance(part, ImageContent): 562 serialized_part["image"] = part._to_trace_dict() 563 elif isinstance(part, FileContent): 564 serialized_part["file"] = part._to_trace_dict() 565 serialized["content"].append(serialized_part) 566 567 return serialized 568 569 @classmethod 570 def from_dict(cls, data: dict[str, Any]) -> "ChatMessage": 571 """ 572 Creates a new ChatMessage object from a dictionary. 573 574 :param data: 575 The dictionary to build the ChatMessage object. 576 :returns: 577 The created object. 578 :raises ValueError: If the `role` field is missing from the dictionary. 579 :raises TypeError: If the `content` field is not a list or string. 580 """ 581 582 # NOTE: this verbose error message provides guidance to LLMs when creating invalid messages during agent runs 583 if "role" not in data and "_role" not in data: 584 raise ValueError( 585 "The `role` field is required in the message dictionary. " 586 f"Expected a dictionary with 'role' field containing one of: {[role.value for role in ChatRole]}. " 587 f"Common roles are 'user' (for user messages) and 'assistant' (for AI responses). " 588 f"Received dictionary with keys: {list(data.keys())}" 589 ) 590 591 if "content" in data: 592 init_params: dict[str, Any] = { 593 "_role": ChatRole(data["role"]), 594 "_name": data.get("name"), 595 "_meta": data.get("meta") or {}, 596 } 597 598 if isinstance(data["content"], list): 599 # current format - the serialized `content` field is a list of dictionaries 600 init_params["_content"] = [_deserialize_content_part(part) for part in data["content"]] 601 elif isinstance(data["content"], str): 602 # pre 2.9.0 format - the `content` field is a string 603 init_params["_content"] = [TextContent(text=data["content"])] 604 else: 605 raise TypeError(f"Unsupported content type in serialized ChatMessage: `{(data['content'])}`") 606 return cls(**init_params) 607 608 if "_content" in data: 609 # format for versions >=2.9.0 and <2.12.0 - the serialized `_content` field is a list of dictionaries 610 return cls( 611 _role=ChatRole(data["_role"]), 612 _content=[_deserialize_content_part(part) for part in data["_content"]], 613 _name=data.get("_name"), 614 _meta=data.get("_meta") or {}, 615 ) 616 617 raise ValueError(f"Missing 'content' or '_content' in serialized ChatMessage: `{data}`") 618 619 def to_openai_dict_format(self, require_tool_call_ids: bool = True) -> dict[str, Any]: 620 """ 621 Convert a ChatMessage to the dictionary format expected by OpenAI's Chat Completions API. 622 623 :param require_tool_call_ids: 624 If True (default), enforces that each Tool Call includes a non-null `id` attribute. 625 Set to False to allow Tool Calls without `id`, which may be suitable for shallow OpenAI-compatible APIs. 626 :returns: 627 The ChatMessage in the format expected by OpenAI's Chat Completions API. 628 629 :raises ValueError: 630 If the message format is invalid, or if `require_tool_call_ids` is True and any Tool Call is missing an 631 `id` attribute. 632 """ 633 if not self.texts and not self.tool_calls and not self.tool_call_results and not self.images and not self.files: 634 raise ValueError( 635 "A `ChatMessage` must contain at least one `TextContent`, `ToolCall`, " 636 "`ToolCallResult`, `ImageContent`, or `FileContent`." 637 ) 638 if len(self.tool_call_results) > 0 and len(self._content) > 1: 639 raise ValueError( 640 "For OpenAI compatibility, a `ChatMessage` with a `ToolCallResult` cannot contain any other content." 641 ) 642 643 openai_msg: dict[str, Any] = {"role": self._role.value} 644 645 if self._name is not None: 646 openai_msg["name"] = self._name 647 648 if openai_msg["role"] == "user": 649 return self._user_message_to_openai(openai_msg) 650 651 if self.tool_call_results: 652 return self._tool_result_message_to_openai(openai_msg, require_tool_call_ids) 653 654 return self._system_assistant_message_to_openai(openai_msg, require_tool_call_ids) 655 656 def _user_message_to_openai(self, openai_msg: dict[str, Any]) -> dict[str, Any]: 657 """Build OpenAI dict for a user message.""" 658 if len(self._content) == 1 and isinstance(self._content[0], TextContent): 659 openai_msg["content"] = self.text 660 return openai_msg 661 662 content = [] 663 for part in self._content: 664 if isinstance(part, TextContent): 665 content.append({"type": "text", "text": part.text}) 666 elif isinstance(part, ImageContent): 667 image_item: dict[str, Any] = { 668 "type": "image_url", 669 # If no MIME type is provided, default to JPEG. 670 # OpenAI API appears to tolerate MIME type mismatches. 671 "image_url": {"url": f"data:{part.mime_type or 'image/jpeg'};base64,{part.base64_image}"}, 672 } 673 if part.detail: 674 image_item["image_url"]["detail"] = part.detail 675 content.append(image_item) 676 elif isinstance(part, FileContent): 677 file_item: dict[str, Any] = { 678 "type": "file", 679 "file": { 680 "file_data": f"data:{part.mime_type or 'application/pdf'};base64,{part.base64_data}", 681 # Filename is optional but if not provided, OpenAI expects a file_id of a previous file upload. 682 # We use a dummy filename. 683 "filename": part.filename or "filename", 684 }, 685 } 686 content.append(file_item) 687 openai_msg["content"] = content 688 return openai_msg 689 690 def _tool_result_message_to_openai(self, openai_msg: dict[str, Any], require_tool_call_ids: bool) -> dict[str, Any]: 691 """Build OpenAI dict for a tool result message.""" 692 result = self.tool_call_results[0] 693 if isinstance(result.result, str): 694 openai_msg["content"] = result.result 695 # OpenAI Chat Completions API does not support multimodal tool results 696 elif isinstance(result.result, list) and all(isinstance(part, TextContent) for part in result.result): 697 openai_msg["content"] = [{"type": "text", "text": part.text} for part in result.result] 698 else: 699 raise ValueError( 700 f"Unsupported tool result: {result}. If you need to pass images in tool results, " 701 "use OpenAI Responses API instead." 702 ) 703 704 if result.origin.id is not None: 705 openai_msg["tool_call_id"] = result.origin.id 706 elif require_tool_call_ids: 707 raise ValueError("`ToolCall` must have a non-null `id` attribute to be used with OpenAI.") 708 # OpenAI does not provide a way to communicate errors in tool invocations, so we ignore the error field 709 return openai_msg 710 711 def _system_assistant_message_to_openai( 712 self, openai_msg: dict[str, Any], require_tool_call_ids: bool 713 ) -> dict[str, Any]: 714 """Build OpenAI dict for system and assistant messages.""" 715 # OpenAI Chat Completions API does not support reasoning content, so we ignore it 716 if self.texts: 717 openai_msg["content"] = self.texts[0] 718 if self.tool_calls: 719 openai_tool_calls = [] 720 for tc in self.tool_calls: 721 openai_tool_call = { 722 "type": "function", 723 # We disable ensure_ascii so special chars like emojis are not converted 724 "function": {"name": tc.tool_name, "arguments": json.dumps(tc.arguments, ensure_ascii=False)}, 725 } 726 if tc.id is not None: 727 openai_tool_call["id"] = tc.id 728 elif require_tool_call_ids: 729 raise ValueError("`ToolCall` must have a non-null `id` attribute to be used with OpenAI.") 730 openai_tool_calls.append(openai_tool_call) 731 openai_msg["tool_calls"] = openai_tool_calls 732 return openai_msg 733 734 @staticmethod 735 def _validate_openai_message(message: dict[str, Any]) -> None: 736 """ 737 Validate that a message dictionary follows OpenAI's Chat API format. 738 739 :param message: The message dictionary to validate 740 :raises ValueError: If the message format is invalid 741 """ 742 if "role" not in message: 743 raise ValueError("The `role` field is required in the message dictionary.") 744 745 role = message["role"] 746 content = message.get("content") 747 tool_calls = message.get("tool_calls") 748 749 if role not in ["assistant", "user", "system", "developer", "tool"]: 750 raise ValueError(f"Unsupported role: {role}") 751 752 if role == "assistant": 753 if not content and not tool_calls: 754 raise ValueError("For assistant messages, either `content` or `tool_calls` must be present.") 755 if tool_calls: 756 for tc in tool_calls: 757 if "function" not in tc: 758 raise ValueError("Tool calls must contain the `function` field") 759 elif not content: 760 raise ValueError(f"The `content` field is required for {role} messages.") 761 762 @classmethod 763 def from_openai_dict_format(cls, message: dict[str, Any]) -> "ChatMessage": 764 """ 765 Create a ChatMessage from a dictionary in the format expected by OpenAI's Chat API. 766 767 NOTE: While OpenAI's API requires `tool_call_id` in both tool calls and tool messages, this method 768 accepts messages without it to support shallow OpenAI-compatible APIs. 769 If you plan to use the resulting ChatMessage with OpenAI, you must include `tool_call_id` or you'll 770 encounter validation errors. 771 772 :param message: 773 The OpenAI dictionary to build the ChatMessage object. 774 :returns: 775 The created ChatMessage object. 776 777 :raises ValueError: 778 If the message dictionary is missing required fields. 779 """ 780 cls._validate_openai_message(message) 781 782 role = message["role"] 783 content = message.get("content") 784 name = message.get("name") 785 tool_calls = message.get("tool_calls") 786 tool_call_id = message.get("tool_call_id") 787 788 if role == "assistant": 789 haystack_tool_calls = None 790 if tool_calls: 791 haystack_tool_calls = [] 792 for tc in tool_calls: 793 haystack_tc = ToolCall( 794 id=tc.get("id"), 795 tool_name=tc["function"]["name"], 796 arguments=json.loads(tc["function"]["arguments"]), 797 ) 798 haystack_tool_calls.append(haystack_tc) 799 return cls.from_assistant(text=content, name=name, tool_calls=haystack_tool_calls) 800 801 assert content is not None # ensured by _validate_openai_message, but we need to make mypy happy 802 803 if role == "user": 804 return cls.from_user(text=content, name=name) 805 if role in ["system", "developer"]: 806 return cls.from_system(text=content, name=name) 807 808 if isinstance(content, list): 809 if not all("text" in el for el in content): 810 raise ValueError("To be used with OpenAI, tool results must be a string or a list of TextContent") 811 content = [TextContent(text=el["text"]) for el in content] 812 return cls.from_tool( 813 tool_result=content, origin=ToolCall(id=tool_call_id, tool_name="", arguments={}), error=False 814 )