/ haystack / components / generators / chat / azure_responses.py
azure_responses.py
  1  # SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
  2  #
  3  # SPDX-License-Identifier: Apache-2.0
  4  
  5  import os
  6  from collections.abc import Awaitable, Callable
  7  from typing import Any, ClassVar
  8  
  9  from openai.lib._pydantic import to_strict_json_schema
 10  from pydantic import BaseModel
 11  
 12  from haystack import component, default_from_dict, default_to_dict
 13  from haystack.components.generators.chat import OpenAIResponsesChatGenerator
 14  from haystack.dataclasses.streaming_chunk import StreamingCallbackT
 15  from haystack.tools import ToolsType, deserialize_tools_or_toolset_inplace, serialize_tools_or_toolset
 16  from haystack.utils import Secret, deserialize_callable, serialize_callable
 17  
 18  
 19  @component
 20  class AzureOpenAIResponsesChatGenerator(OpenAIResponsesChatGenerator):
 21      """
 22      Completes chats using OpenAI's Responses API on Azure.
 23  
 24      It works with the gpt-5 and o-series models and supports streaming responses
 25      from OpenAI API. It uses [ChatMessage](https://docs.haystack.deepset.ai/docs/chatmessage)
 26      format in input and output.
 27  
 28      You can customize how the text is generated by passing parameters to the
 29      OpenAI API. Use the `**generation_kwargs` argument when you initialize
 30      the component or when you run it. Any parameter that works with
 31      `openai.Responses.create` will work here too.
 32  
 33      For details on OpenAI API parameters, see
 34      [OpenAI documentation](https://platform.openai.com/docs/api-reference/responses).
 35  
 36      ### Usage example
 37      <!-- test-ignore -->
 38      ```python
 39      from haystack.components.generators.chat import AzureOpenAIResponsesChatGenerator
 40      from haystack.dataclasses import ChatMessage
 41  
 42      messages = [ChatMessage.from_user("What's Natural Language Processing?")]
 43  
 44      client = AzureOpenAIResponsesChatGenerator(
 45          azure_endpoint="https://example-resource.azure.openai.com/",
 46          generation_kwargs={"reasoning": {"effort": "low", "summary": "auto"}}
 47      )
 48      response = client.run(messages)
 49      print(response)
 50      ```
 51      """
 52  
 53      SUPPORTED_MODELS: ClassVar[list[str]] = [
 54          "gpt-5.4-pro",
 55          "gpt-5.4",
 56          "gpt-5.3-chat",
 57          "gpt-5.3-codex",
 58          "gpt-5.2-codex",
 59          "gpt-5.2",
 60          "gpt-5.2-chat",
 61          "gpt-5.1-codex-max",
 62          "gpt-5.1",
 63          "gpt-5.1-chat",
 64          "gpt-5.1-codex",
 65          "gpt-5.1-codex-mini",
 66          "gpt-5-pro",
 67          "gpt-5-codex",
 68          "gpt-5",
 69          "gpt-5-mini",
 70          "gpt-5-nano",
 71          "gpt-5-chat",
 72          "gpt-4o",
 73          "gpt-4o-mini",
 74          "computer-use-preview",
 75          "gpt-4.1",
 76          "gpt-4.1-nano",
 77          "gpt-4.1-mini",
 78          "gpt-image-1",
 79          "gpt-image-1-mini",
 80          "gpt-image-1.5",
 81          "o1",
 82          "o3-mini",
 83          "o3",
 84          "o4-mini",
 85      ]
 86      """A non-exhaustive list of chat models supported by this component.
 87      See https://learn.microsoft.com/en-us/azure/foundry/openai/how-to/responses#model-support for the full list."""
 88  
 89      # ruff: noqa: PLR0913
 90      def __init__(
 91          self,
 92          *,
 93          api_key: Secret | Callable[[], str] | Callable[[], Awaitable[str]] = Secret.from_env_var(
 94              "AZURE_OPENAI_API_KEY", strict=False
 95          ),
 96          azure_endpoint: str | None = None,
 97          azure_deployment: str = "gpt-5-mini",
 98          streaming_callback: StreamingCallbackT | None = None,
 99          organization: str | None = None,
100          generation_kwargs: dict[str, Any] | None = None,
101          timeout: float | None = None,
102          max_retries: int | None = None,
103          tools: ToolsType | None = None,
104          tools_strict: bool = False,
105          http_client_kwargs: dict[str, Any] | None = None,
106      ) -> None:
107          """
108          Initialize the AzureOpenAIResponsesChatGenerator component.
109  
110          :param api_key: The API key to use for authentication. Can be:
111              - A `Secret` object containing the API key.
112              - A `Secret` object containing the [Azure Active Directory token](https://www.microsoft.com/en-us/security/business/identity-access/microsoft-entra-id).
113              - A function that returns an Azure Active Directory token.
114          :param azure_endpoint: The endpoint of the deployed model, for example `"https://example-resource.azure.openai.com/"`.
115          :param azure_deployment: The deployment of the model, usually the model name.
116          :param organization: Your organization ID, defaults to `None`. For help, see
117          [Setting up your organization](https://platform.openai.com/docs/guides/production-best-practices/setting-up-your-organization).
118          :param streaming_callback: A callback function called when a new token is received from the stream.
119              It accepts [StreamingChunk](https://docs.haystack.deepset.ai/docs/data-classes#streamingchunk)
120              as an argument.
121          :param timeout: Timeout for OpenAI client calls. If not set, it defaults to either the
122              `OPENAI_TIMEOUT` environment variable, or 30 seconds.
123          :param max_retries: Maximum number of retries to contact OpenAI after an internal error.
124              If not set, it defaults to either the `OPENAI_MAX_RETRIES` environment variable, or set to 5.
125          :param generation_kwargs: Other parameters to use for the model. These parameters are sent
126             directly to the OpenAI endpoint.
127             See OpenAI [documentation](https://platform.openai.com/docs/api-reference/responses) for
128              more details.
129              Some of the supported parameters:
130              - `temperature`: What sampling temperature to use. Higher values like 0.8 will make the output more random,
131                  while lower values like 0.2 will make it more focused and deterministic.
132              - `top_p`: An alternative to sampling with temperature, called nucleus sampling, where the model
133                  considers the results of the tokens with top_p probability mass. For example, 0.1 means only the tokens
134                  comprising the top 10% probability mass are considered.
135              - `previous_response_id`: The ID of the previous response.
136                  Use this to create multi-turn conversations.
137              - `text_format`: A Pydantic model that enforces the structure of the model's response.
138                  If provided, the output will always be validated against this
139                  format (unless the model returns a tool call).
140                  For details, see the [OpenAI Structured Outputs documentation](https://platform.openai.com/docs/guides/structured-outputs).
141              - `text`: A JSON schema that enforces the structure of the model's response.
142                  If provided, the output will always be validated against this
143                  format (unless the model returns a tool call).
144                  Notes:
145                  - Both JSON Schema and Pydantic models are supported for latest models starting from GPT-4o.
146                  - If both are provided, `text_format` takes precedence and json schema passed to `text` is ignored.
147                  - Currently, this component doesn't support streaming for structured outputs.
148                  - Older models only support basic version of structured outputs through `{"type": "json_object"}`.
149                      For detailed information on JSON mode, see the [OpenAI Structured Outputs documentation](https://platform.openai.com/docs/guides/structured-outputs#json-mode).
150              - `reasoning`: A dictionary of parameters for reasoning. For example:
151                  - `summary`: The summary of the reasoning.
152                  - `effort`: The level of effort to put into the reasoning. Can be `low`, `medium` or `high`.
153                  - `generate_summary`: Whether to generate a summary of the reasoning.
154                  Note: OpenAI does not return the reasoning tokens, but we can view summary if its enabled.
155                  For details, see the [OpenAI Reasoning documentation](https://platform.openai.com/docs/guides/reasoning).
156          :param tools:
157              A list of Tool and/or Toolset objects, or a single Toolset for which the model can prepare calls.
158          :param tools_strict:
159              Whether to enable strict schema adherence for tool calls. If set to `True`, the model will follow exactly
160              the schema provided in the `parameters` field of the tool definition, but this may increase latency.
161          :param http_client_kwargs:
162              A dictionary of keyword arguments to configure a custom `httpx.Client`or `httpx.AsyncClient`.
163              For more information, see the [HTTPX documentation](https://www.python-httpx.org/api/#client).
164          """
165          azure_endpoint = azure_endpoint or os.getenv("AZURE_OPENAI_ENDPOINT")
166          if azure_endpoint is None:
167              raise ValueError(
168                  "You must provide `azure_endpoint` or set the `AZURE_OPENAI_ENDPOINT` environment variable."
169              )
170          self._azure_endpoint = azure_endpoint
171          self._azure_deployment = azure_deployment
172          super(AzureOpenAIResponsesChatGenerator, self).__init__(  # noqa: UP008
173              api_key=api_key,  # type: ignore[arg-type]
174              model=self._azure_deployment,
175              streaming_callback=streaming_callback,
176              api_base_url=f"{self._azure_endpoint.rstrip('/')}/openai/v1",
177              organization=organization,
178              generation_kwargs=generation_kwargs,
179              timeout=timeout,
180              max_retries=max_retries,
181              tools=tools,
182              tools_strict=tools_strict,
183              http_client_kwargs=http_client_kwargs,
184          )
185  
186      def to_dict(self) -> dict[str, Any]:
187          """
188          Serialize this component to a dictionary.
189  
190          :returns:
191              The serialized component as a dictionary.
192          """
193          callback_name = serialize_callable(self.streaming_callback) if self.streaming_callback else None
194  
195          # API key can be a secret or a callable
196          serialized_api_key = (
197              serialize_callable(self.api_key)
198              if callable(self.api_key)
199              else self.api_key.to_dict()
200              if isinstance(self.api_key, Secret)
201              else None
202          )
203  
204          # If the text format is a Pydantic model, it's converted to openai's json schema format
205          # If it's already a json schema, it's left as is
206          generation_kwargs = self.generation_kwargs.copy()
207          text_format = generation_kwargs.pop("text_format", None)
208          if text_format and isinstance(text_format, type) and issubclass(text_format, BaseModel):
209              json_schema = {
210                  "format": {
211                      "type": "json_schema",
212                      "name": text_format.__name__,
213                      "strict": True,
214                      "schema": to_strict_json_schema(text_format),
215                  }
216              }
217              # json schema needs to be passed to text parameter instead of text_format
218              generation_kwargs["text"] = json_schema
219  
220          # OpenAI/MCP tools are passed as list of dictionaries
221          serialized_tools: dict[str, Any] | list[dict[str, Any]] | None
222          if self.tools and isinstance(self.tools, list) and isinstance(self.tools[0], dict):
223              # mypy can't infer that self.tools is list[dict] here
224              serialized_tools = self.tools  # type: ignore[assignment]
225          else:
226              serialized_tools = serialize_tools_or_toolset(self.tools)  # type: ignore[arg-type]
227  
228          return default_to_dict(
229              self,
230              azure_endpoint=self._azure_endpoint,
231              api_key=serialized_api_key,
232              azure_deployment=self._azure_deployment,
233              streaming_callback=callback_name,
234              organization=self.organization,
235              generation_kwargs=generation_kwargs,
236              timeout=self.timeout,
237              max_retries=self.max_retries,
238              tools=serialized_tools,
239              tools_strict=self.tools_strict,
240              http_client_kwargs=self.http_client_kwargs,
241          )
242  
243      @classmethod
244      def from_dict(cls, data: dict[str, Any]) -> "AzureOpenAIResponsesChatGenerator":
245          """
246          Deserialize this component from a dictionary.
247  
248          :param data: The dictionary representation of this component.
249          :returns:
250              The deserialized component instance.
251          """
252          # If api_key is a str, it's a callable (Secrets are handled automatically by default_from_dict)
253          serialized_api_key = data["init_parameters"].get("api_key")
254          if isinstance(serialized_api_key, str):
255              data["init_parameters"]["api_key"] = deserialize_callable(serialized_api_key)
256  
257          # we only deserialize the tools if they are haystack tools
258          # because openai tools are not serialized in the same way
259          tools = data["init_parameters"].get("tools")
260          if tools and (
261              isinstance(tools, dict)
262              and tools.get("type") == "haystack.tools.toolset.Toolset"
263              or isinstance(tools, list)
264              and tools[0].get("type") == "haystack.tools.tool.Tool"
265          ):
266              deserialize_tools_or_toolset_inplace(data["init_parameters"], key="tools")
267  
268          init_params = data.get("init_parameters", {})
269          serialized_callback_handler = init_params.get("streaming_callback")
270          if serialized_callback_handler:
271              data["init_parameters"]["streaming_callback"] = deserialize_callable(serialized_callback_handler)
272          return default_from_dict(cls, data)