image_content.py
1 # SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai> 2 # 3 # SPDX-License-Identifier: Apache-2.0 4 5 import base64 6 from dataclasses import asdict, dataclass, field 7 from io import BytesIO 8 from pathlib import Path 9 from typing import Any, Literal 10 11 import filetype 12 13 from haystack import logging 14 from haystack.lazy_imports import LazyImport 15 from haystack.utils import is_in_jupyter 16 from haystack.utils.dataclasses import _warn_on_inplace_mutation 17 18 with LazyImport("The 'show' method requires the 'PIL' library. Run 'pip install pillow'") as pillow_import: 19 from PIL import Image 20 21 logger = logging.getLogger(__name__) 22 23 # NOTE: We have to rely on this since our util functions are using the bytestream object. 24 # We could change this to use the file path instead, where the file extension is used to determine the format. 25 # This is a mapping of image formats to their MIME types. 26 # from PIL import Image 27 # Image.init() # <- Must force all plugins to initialize to get this mapping 28 # print(Image.MIME) 29 FORMAT_TO_MIME = { 30 "BMP": "image/bmp", 31 "DIB": "image/bmp", 32 "PCX": "image/x-pcx", 33 "EPS": "application/postscript", 34 "GIF": "image/gif", 35 "PNG": "image/png", 36 "JPEG2000": "image/jp2", 37 "ICNS": "image/icns", 38 "ICO": "image/x-icon", 39 "JPEG": "image/jpeg", 40 "MPEG": "video/mpeg", 41 "TIFF": "image/tiff", 42 "MPO": "image/mpo", 43 "PALM": "image/palm", 44 "PDF": "application/pdf", 45 "PPM": "image/x-portable-anymap", 46 "PSD": "image/vnd.adobe.photoshop", 47 "SGI": "image/sgi", 48 "TGA": "image/x-tga", 49 "WEBP": "image/webp", 50 "XBM": "image/xbm", 51 "XPM": "image/xpm", 52 } 53 MIME_TO_FORMAT = {v: k for k, v in FORMAT_TO_MIME.items()} 54 # Adding some common MIME types that are not in the PIL mapping 55 MIME_TO_FORMAT["image/jpg"] = "JPEG" 56 57 IMAGE_MIME_TYPES = set(MIME_TO_FORMAT.keys()) 58 59 60 @_warn_on_inplace_mutation 61 @dataclass 62 class ImageContent: 63 """ 64 The image content of a chat message. 65 66 :param base64_image: A base64 string representing the image. 67 :param mime_type: The MIME type of the image (e.g. "image/png", "image/jpeg"). 68 Providing this value is recommended, as most LLM providers require it. 69 If not provided, the MIME type is guessed from the base64 string, which can be slow and not always reliable. 70 :param detail: Optional detail level of the image (only supported by OpenAI). One of "auto", "high", or "low". 71 :param meta: Optional metadata for the image. 72 :param validation: If True (default), a validation process is performed: 73 - Check whether the base64 string is valid; 74 - Guess the MIME type if not provided; 75 - Check if the MIME type is a valid image MIME type. 76 Set to False to skip validation and speed up initialization. 77 """ 78 79 base64_image: str 80 mime_type: str | None = None 81 detail: Literal["auto", "high", "low"] | None = None 82 meta: dict[str, Any] = field(default_factory=dict) 83 validation: bool = True 84 85 def __post_init__(self) -> None: 86 if not self.validation: 87 return 88 89 try: 90 decoded_image = base64.b64decode(self.base64_image, validate=True) 91 except Exception as e: 92 raise ValueError("The base64 string is not valid") from e 93 94 # mime_type is an important information, so we try to guess it if not provided 95 if not self.mime_type: 96 guess = filetype.guess(decoded_image) 97 if guess: 98 self.mime_type = guess.mime 99 else: 100 msg = ( 101 "Failed to guess the MIME type of the image. Omitting the MIME type may result in " 102 "processing errors or incorrect handling of the image by LLM providers." 103 ) 104 logger.warning(msg) 105 106 if self.mime_type and self.mime_type not in IMAGE_MIME_TYPES: 107 raise ValueError(f"{self.mime_type} is not a valid image MIME type.") 108 109 def __repr__(self) -> str: 110 """ 111 Return a string representation of the ImageContent, truncating the base64_image to 100 bytes. 112 """ 113 fields = [] 114 115 truncated_data = self.base64_image[:100] + "..." if len(self.base64_image) > 100 else self.base64_image 116 fields.append(f"base64_image={truncated_data!r}") 117 fields.append(f"mime_type={self.mime_type!r}") 118 fields.append(f"detail={self.detail!r}") 119 fields.append(f"meta={self.meta!r}") 120 fields_str = ", ".join(fields) 121 return f"{self.__class__.__name__}({fields_str})" 122 123 def show(self) -> None: 124 """ 125 Shows the image. 126 """ 127 pillow_import.check() 128 image_bytes = BytesIO(base64.b64decode(self.base64_image)) 129 image = Image.open(image_bytes) 130 131 if is_in_jupyter(): 132 # ipython is not a core dependency so we cannot import it at the module level 133 from IPython.display import display 134 135 display(image) 136 else: 137 image.show() 138 139 def to_dict(self) -> dict[str, Any]: 140 """ 141 Convert ImageContent into a dictionary. 142 """ 143 return asdict(self) 144 145 def _to_trace_dict(self) -> dict[str, Any]: 146 """ 147 Convert the ImageContent to a dictionary representation for tracing. 148 149 The base64_image is replaced with a placeholder string to avoid sending large payloads to the tracing backend. 150 151 :returns: 152 Serialized version of the object only for tracing purposes. 153 """ 154 data = self.to_dict() 155 data["base64_image"] = f"Base64 string ({len(self.base64_image)} characters)" 156 return data 157 158 @classmethod 159 def from_dict(cls, data: dict[str, Any]) -> "ImageContent": 160 """ 161 Create an ImageContent from a dictionary. 162 """ 163 return ImageContent(**data) 164 165 @classmethod 166 def from_file_path( 167 cls, 168 file_path: str | Path, 169 *, 170 size: tuple[int, int] | None = None, 171 detail: Literal["auto", "high", "low"] | None = None, 172 meta: dict[str, Any] | None = None, 173 ) -> "ImageContent": 174 """ 175 Create an ImageContent object from a file path. 176 177 It exposes similar functionality as the `ImageFileToImageContent` component. For PDF to ImageContent conversion, 178 use the `PDFToImageContent` component. 179 180 :param file_path: 181 The path to the image file. PDF files are not supported. For PDF to ImageContent conversion, use the 182 `PDFToImageContent` component. 183 :param size: 184 If provided, resizes the image to fit within the specified dimensions (width, height) while 185 maintaining aspect ratio. This reduces file size, memory usage, and processing time, which is beneficial 186 when working with models that have resolution constraints or when transmitting images to remote services. 187 :param detail: 188 Optional detail level of the image (only supported by OpenAI). One of "auto", "high", or "low". 189 :param meta: 190 Additional metadata for the image. 191 192 :returns: 193 An ImageContent object. 194 """ 195 # to avoid a circular import 196 from haystack.components.converters.image import ImageFileToImageContent 197 198 converter = ImageFileToImageContent(size=size, detail=detail) 199 result = converter.run(sources=[file_path], meta=[meta] if meta else None) 200 return result["image_contents"][0] 201 202 @classmethod 203 def from_url( 204 cls, 205 url: str, 206 *, 207 retry_attempts: int = 2, 208 timeout: int = 10, 209 size: tuple[int, int] | None = None, 210 detail: Literal["auto", "high", "low"] | None = None, 211 meta: dict[str, Any] | None = None, 212 ) -> "ImageContent": 213 """ 214 Create an ImageContent object from a URL. The image is downloaded and converted to a base64 string. 215 216 For PDF to ImageContent conversion, use the `PDFToImageContent` component. 217 218 :param url: 219 The URL of the image. PDF files are not supported. For PDF to ImageContent conversion, use the 220 `PDFToImageContent` component. 221 :param retry_attempts: 222 The number of times to retry to fetch the URL's content. 223 :param timeout: 224 Timeout in seconds for the request. 225 :param size: 226 If provided, resizes the image to fit within the specified dimensions (width, height) while 227 maintaining aspect ratio. This reduces file size, memory usage, and processing time, which is beneficial 228 when working with models that have resolution constraints or when transmitting images to remote services. 229 :param detail: 230 Optional detail level of the image (only supported by OpenAI). One of "auto", "high", or "low". 231 :param meta: 232 Additional metadata for the image. 233 234 :raises ValueError: 235 If the URL does not point to an image or if it points to a PDF file. 236 237 :returns: 238 An ImageContent object. 239 """ 240 # to avoid circular imports 241 from haystack.components.converters.image import ImageFileToImageContent 242 from haystack.components.fetchers.link_content import LinkContentFetcher 243 244 fetcher = LinkContentFetcher(raise_on_failure=True, retry_attempts=retry_attempts, timeout=timeout) 245 bytestream = fetcher.run(urls=[url])["streams"][0] 246 247 if bytestream.mime_type not in IMAGE_MIME_TYPES: 248 msg = f"The URL does not point to an image. The MIME type of the URL is {bytestream.mime_type}." 249 raise ValueError(msg) 250 251 if bytestream.mime_type == "application/pdf": 252 raise ValueError( 253 "PDF files are not supported. " 254 "For PDF to ImageContent conversion, use the `PDFToImageContent` component." 255 ) 256 257 converter = ImageFileToImageContent(size=size, detail=detail) 258 result = converter.run(sources=[bytestream], meta=[meta] if meta else None) 259 return result["image_contents"][0]