/ haystack / dataclasses / image_content.py
image_content.py
  1  # SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
  2  #
  3  # SPDX-License-Identifier: Apache-2.0
  4  
  5  import base64
  6  from dataclasses import asdict, dataclass, field
  7  from io import BytesIO
  8  from pathlib import Path
  9  from typing import Any, Literal
 10  
 11  import filetype
 12  
 13  from haystack import logging
 14  from haystack.lazy_imports import LazyImport
 15  from haystack.utils import is_in_jupyter
 16  from haystack.utils.dataclasses import _warn_on_inplace_mutation
 17  
 18  with LazyImport("The 'show' method requires the 'PIL' library. Run 'pip install pillow'") as pillow_import:
 19      from PIL import Image
 20  
 21  logger = logging.getLogger(__name__)
 22  
 23  # NOTE: We have to rely on this since our util functions are using the bytestream object.
 24  #      We could change this to use the file path instead, where the file extension is used to determine the format.
 25  # This is a mapping of image formats to their MIME types.
 26  # from PIL import Image
 27  # Image.init()  # <- Must force all plugins to initialize to get this mapping
 28  # print(Image.MIME)
 29  FORMAT_TO_MIME = {
 30      "BMP": "image/bmp",
 31      "DIB": "image/bmp",
 32      "PCX": "image/x-pcx",
 33      "EPS": "application/postscript",
 34      "GIF": "image/gif",
 35      "PNG": "image/png",
 36      "JPEG2000": "image/jp2",
 37      "ICNS": "image/icns",
 38      "ICO": "image/x-icon",
 39      "JPEG": "image/jpeg",
 40      "MPEG": "video/mpeg",
 41      "TIFF": "image/tiff",
 42      "MPO": "image/mpo",
 43      "PALM": "image/palm",
 44      "PDF": "application/pdf",
 45      "PPM": "image/x-portable-anymap",
 46      "PSD": "image/vnd.adobe.photoshop",
 47      "SGI": "image/sgi",
 48      "TGA": "image/x-tga",
 49      "WEBP": "image/webp",
 50      "XBM": "image/xbm",
 51      "XPM": "image/xpm",
 52  }
 53  MIME_TO_FORMAT = {v: k for k, v in FORMAT_TO_MIME.items()}
 54  # Adding some common MIME types that are not in the PIL mapping
 55  MIME_TO_FORMAT["image/jpg"] = "JPEG"
 56  
 57  IMAGE_MIME_TYPES = set(MIME_TO_FORMAT.keys())
 58  
 59  
 60  @_warn_on_inplace_mutation
 61  @dataclass
 62  class ImageContent:
 63      """
 64      The image content of a chat message.
 65  
 66      :param base64_image: A base64 string representing the image.
 67      :param mime_type: The MIME type of the image (e.g. "image/png", "image/jpeg").
 68          Providing this value is recommended, as most LLM providers require it.
 69          If not provided, the MIME type is guessed from the base64 string, which can be slow and not always reliable.
 70      :param detail: Optional detail level of the image (only supported by OpenAI). One of "auto", "high", or "low".
 71      :param meta: Optional metadata for the image.
 72      :param validation: If True (default), a validation process is performed:
 73          - Check whether the base64 string is valid;
 74          - Guess the MIME type if not provided;
 75          - Check if the MIME type is a valid image MIME type.
 76          Set to False to skip validation and speed up initialization.
 77      """
 78  
 79      base64_image: str
 80      mime_type: str | None = None
 81      detail: Literal["auto", "high", "low"] | None = None
 82      meta: dict[str, Any] = field(default_factory=dict)
 83      validation: bool = True
 84  
 85      def __post_init__(self) -> None:
 86          if not self.validation:
 87              return
 88  
 89          try:
 90              decoded_image = base64.b64decode(self.base64_image, validate=True)
 91          except Exception as e:
 92              raise ValueError("The base64 string is not valid") from e
 93  
 94          # mime_type is an important information, so we try to guess it if not provided
 95          if not self.mime_type:
 96              guess = filetype.guess(decoded_image)
 97              if guess:
 98                  self.mime_type = guess.mime
 99              else:
100                  msg = (
101                      "Failed to guess the MIME type of the image. Omitting the MIME type may result in "
102                      "processing errors or incorrect handling of the image by LLM providers."
103                  )
104                  logger.warning(msg)
105  
106          if self.mime_type and self.mime_type not in IMAGE_MIME_TYPES:
107              raise ValueError(f"{self.mime_type} is not a valid image MIME type.")
108  
109      def __repr__(self) -> str:
110          """
111          Return a string representation of the ImageContent, truncating the base64_image to 100 bytes.
112          """
113          fields = []
114  
115          truncated_data = self.base64_image[:100] + "..." if len(self.base64_image) > 100 else self.base64_image
116          fields.append(f"base64_image={truncated_data!r}")
117          fields.append(f"mime_type={self.mime_type!r}")
118          fields.append(f"detail={self.detail!r}")
119          fields.append(f"meta={self.meta!r}")
120          fields_str = ", ".join(fields)
121          return f"{self.__class__.__name__}({fields_str})"
122  
123      def show(self) -> None:
124          """
125          Shows the image.
126          """
127          pillow_import.check()
128          image_bytes = BytesIO(base64.b64decode(self.base64_image))
129          image = Image.open(image_bytes)
130  
131          if is_in_jupyter():
132              # ipython is not a core dependency so we cannot import it at the module level
133              from IPython.display import display
134  
135              display(image)
136          else:
137              image.show()
138  
139      def to_dict(self) -> dict[str, Any]:
140          """
141          Convert ImageContent into a dictionary.
142          """
143          return asdict(self)
144  
145      def _to_trace_dict(self) -> dict[str, Any]:
146          """
147          Convert the ImageContent to a dictionary representation for tracing.
148  
149          The base64_image is replaced with a placeholder string to avoid sending large payloads to the tracing backend.
150  
151          :returns:
152              Serialized version of the object only for tracing purposes.
153          """
154          data = self.to_dict()
155          data["base64_image"] = f"Base64 string ({len(self.base64_image)} characters)"
156          return data
157  
158      @classmethod
159      def from_dict(cls, data: dict[str, Any]) -> "ImageContent":
160          """
161          Create an ImageContent from a dictionary.
162          """
163          return ImageContent(**data)
164  
165      @classmethod
166      def from_file_path(
167          cls,
168          file_path: str | Path,
169          *,
170          size: tuple[int, int] | None = None,
171          detail: Literal["auto", "high", "low"] | None = None,
172          meta: dict[str, Any] | None = None,
173      ) -> "ImageContent":
174          """
175          Create an ImageContent object from a file path.
176  
177          It exposes similar functionality as the `ImageFileToImageContent` component. For PDF to ImageContent conversion,
178          use the `PDFToImageContent` component.
179  
180          :param file_path:
181              The path to the image file. PDF files are not supported. For PDF to ImageContent conversion, use the
182              `PDFToImageContent` component.
183          :param size:
184              If provided, resizes the image to fit within the specified dimensions (width, height) while
185              maintaining aspect ratio. This reduces file size, memory usage, and processing time, which is beneficial
186              when working with models that have resolution constraints or when transmitting images to remote services.
187          :param detail:
188              Optional detail level of the image (only supported by OpenAI). One of "auto", "high", or "low".
189          :param meta:
190              Additional metadata for the image.
191  
192          :returns:
193              An ImageContent object.
194          """
195          # to avoid a circular import
196          from haystack.components.converters.image import ImageFileToImageContent
197  
198          converter = ImageFileToImageContent(size=size, detail=detail)
199          result = converter.run(sources=[file_path], meta=[meta] if meta else None)
200          return result["image_contents"][0]
201  
202      @classmethod
203      def from_url(
204          cls,
205          url: str,
206          *,
207          retry_attempts: int = 2,
208          timeout: int = 10,
209          size: tuple[int, int] | None = None,
210          detail: Literal["auto", "high", "low"] | None = None,
211          meta: dict[str, Any] | None = None,
212      ) -> "ImageContent":
213          """
214          Create an ImageContent object from a URL. The image is downloaded and converted to a base64 string.
215  
216          For PDF to ImageContent conversion, use the `PDFToImageContent` component.
217  
218          :param url:
219              The URL of the image. PDF files are not supported. For PDF to ImageContent conversion, use the
220              `PDFToImageContent` component.
221          :param retry_attempts:
222              The number of times to retry to fetch the URL's content.
223          :param timeout:
224              Timeout in seconds for the request.
225          :param size:
226              If provided, resizes the image to fit within the specified dimensions (width, height) while
227              maintaining aspect ratio. This reduces file size, memory usage, and processing time, which is beneficial
228              when working with models that have resolution constraints or when transmitting images to remote services.
229          :param detail:
230              Optional detail level of the image (only supported by OpenAI). One of "auto", "high", or "low".
231          :param meta:
232              Additional metadata for the image.
233  
234          :raises ValueError:
235              If the URL does not point to an image or if it points to a PDF file.
236  
237          :returns:
238              An ImageContent object.
239          """
240          # to avoid circular imports
241          from haystack.components.converters.image import ImageFileToImageContent
242          from haystack.components.fetchers.link_content import LinkContentFetcher
243  
244          fetcher = LinkContentFetcher(raise_on_failure=True, retry_attempts=retry_attempts, timeout=timeout)
245          bytestream = fetcher.run(urls=[url])["streams"][0]
246  
247          if bytestream.mime_type not in IMAGE_MIME_TYPES:
248              msg = f"The URL does not point to an image. The MIME type of the URL is {bytestream.mime_type}."
249              raise ValueError(msg)
250  
251          if bytestream.mime_type == "application/pdf":
252              raise ValueError(
253                  "PDF files are not supported. "
254                  "For PDF to ImageContent conversion, use the `PDFToImageContent` component."
255              )
256  
257          converter = ImageFileToImageContent(size=size, detail=detail)
258          result = converter.run(sources=[bytestream], meta=[meta] if meta else None)
259          return result["image_contents"][0]