azure_responses.py
1 # SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai> 2 # 3 # SPDX-License-Identifier: Apache-2.0 4 5 import os 6 from collections.abc import Awaitable, Callable 7 from typing import Any, ClassVar 8 9 from openai.lib._pydantic import to_strict_json_schema 10 from pydantic import BaseModel 11 12 from haystack import component, default_from_dict, default_to_dict 13 from haystack.components.generators.chat import OpenAIResponsesChatGenerator 14 from haystack.dataclasses.streaming_chunk import StreamingCallbackT 15 from haystack.tools import ToolsType, deserialize_tools_or_toolset_inplace, serialize_tools_or_toolset 16 from haystack.utils import Secret, deserialize_callable, serialize_callable 17 18 19 @component 20 class AzureOpenAIResponsesChatGenerator(OpenAIResponsesChatGenerator): 21 """ 22 Completes chats using OpenAI's Responses API on Azure. 23 24 It works with the gpt-5 and o-series models and supports streaming responses 25 from OpenAI API. It uses [ChatMessage](https://docs.haystack.deepset.ai/docs/chatmessage) 26 format in input and output. 27 28 You can customize how the text is generated by passing parameters to the 29 OpenAI API. Use the `**generation_kwargs` argument when you initialize 30 the component or when you run it. Any parameter that works with 31 `openai.Responses.create` will work here too. 32 33 For details on OpenAI API parameters, see 34 [OpenAI documentation](https://platform.openai.com/docs/api-reference/responses). 35 36 ### Usage example 37 <!-- test-ignore --> 38 ```python 39 from haystack.components.generators.chat import AzureOpenAIResponsesChatGenerator 40 from haystack.dataclasses import ChatMessage 41 42 messages = [ChatMessage.from_user("What's Natural Language Processing?")] 43 44 client = AzureOpenAIResponsesChatGenerator( 45 azure_endpoint="https://example-resource.azure.openai.com/", 46 generation_kwargs={"reasoning": {"effort": "low", "summary": "auto"}} 47 ) 48 response = client.run(messages) 49 print(response) 50 ``` 51 """ 52 53 SUPPORTED_MODELS: ClassVar[list[str]] = [ 54 "gpt-5.4-pro", 55 "gpt-5.4", 56 "gpt-5.3-chat", 57 "gpt-5.3-codex", 58 "gpt-5.2-codex", 59 "gpt-5.2", 60 "gpt-5.2-chat", 61 "gpt-5.1-codex-max", 62 "gpt-5.1", 63 "gpt-5.1-chat", 64 "gpt-5.1-codex", 65 "gpt-5.1-codex-mini", 66 "gpt-5-pro", 67 "gpt-5-codex", 68 "gpt-5", 69 "gpt-5-mini", 70 "gpt-5-nano", 71 "gpt-5-chat", 72 "gpt-4o", 73 "gpt-4o-mini", 74 "computer-use-preview", 75 "gpt-4.1", 76 "gpt-4.1-nano", 77 "gpt-4.1-mini", 78 "gpt-image-1", 79 "gpt-image-1-mini", 80 "gpt-image-1.5", 81 "o1", 82 "o3-mini", 83 "o3", 84 "o4-mini", 85 ] 86 """A non-exhaustive list of chat models supported by this component. 87 See https://learn.microsoft.com/en-us/azure/foundry/openai/how-to/responses#model-support for the full list.""" 88 89 # ruff: noqa: PLR0913 90 def __init__( 91 self, 92 *, 93 api_key: Secret | Callable[[], str] | Callable[[], Awaitable[str]] = Secret.from_env_var( 94 "AZURE_OPENAI_API_KEY", strict=False 95 ), 96 azure_endpoint: str | None = None, 97 azure_deployment: str = "gpt-5-mini", 98 streaming_callback: StreamingCallbackT | None = None, 99 organization: str | None = None, 100 generation_kwargs: dict[str, Any] | None = None, 101 timeout: float | None = None, 102 max_retries: int | None = None, 103 tools: ToolsType | None = None, 104 tools_strict: bool = False, 105 http_client_kwargs: dict[str, Any] | None = None, 106 ) -> None: 107 """ 108 Initialize the AzureOpenAIResponsesChatGenerator component. 109 110 :param api_key: The API key to use for authentication. Can be: 111 - A `Secret` object containing the API key. 112 - A `Secret` object containing the [Azure Active Directory token](https://www.microsoft.com/en-us/security/business/identity-access/microsoft-entra-id). 113 - A function that returns an Azure Active Directory token. 114 :param azure_endpoint: The endpoint of the deployed model, for example `"https://example-resource.azure.openai.com/"`. 115 :param azure_deployment: The deployment of the model, usually the model name. 116 :param organization: Your organization ID, defaults to `None`. For help, see 117 [Setting up your organization](https://platform.openai.com/docs/guides/production-best-practices/setting-up-your-organization). 118 :param streaming_callback: A callback function called when a new token is received from the stream. 119 It accepts [StreamingChunk](https://docs.haystack.deepset.ai/docs/data-classes#streamingchunk) 120 as an argument. 121 :param timeout: Timeout for OpenAI client calls. If not set, it defaults to either the 122 `OPENAI_TIMEOUT` environment variable, or 30 seconds. 123 :param max_retries: Maximum number of retries to contact OpenAI after an internal error. 124 If not set, it defaults to either the `OPENAI_MAX_RETRIES` environment variable, or set to 5. 125 :param generation_kwargs: Other parameters to use for the model. These parameters are sent 126 directly to the OpenAI endpoint. 127 See OpenAI [documentation](https://platform.openai.com/docs/api-reference/responses) for 128 more details. 129 Some of the supported parameters: 130 - `temperature`: What sampling temperature to use. Higher values like 0.8 will make the output more random, 131 while lower values like 0.2 will make it more focused and deterministic. 132 - `top_p`: An alternative to sampling with temperature, called nucleus sampling, where the model 133 considers the results of the tokens with top_p probability mass. For example, 0.1 means only the tokens 134 comprising the top 10% probability mass are considered. 135 - `previous_response_id`: The ID of the previous response. 136 Use this to create multi-turn conversations. 137 - `text_format`: A Pydantic model that enforces the structure of the model's response. 138 If provided, the output will always be validated against this 139 format (unless the model returns a tool call). 140 For details, see the [OpenAI Structured Outputs documentation](https://platform.openai.com/docs/guides/structured-outputs). 141 - `text`: A JSON schema that enforces the structure of the model's response. 142 If provided, the output will always be validated against this 143 format (unless the model returns a tool call). 144 Notes: 145 - Both JSON Schema and Pydantic models are supported for latest models starting from GPT-4o. 146 - If both are provided, `text_format` takes precedence and json schema passed to `text` is ignored. 147 - Currently, this component doesn't support streaming for structured outputs. 148 - Older models only support basic version of structured outputs through `{"type": "json_object"}`. 149 For detailed information on JSON mode, see the [OpenAI Structured Outputs documentation](https://platform.openai.com/docs/guides/structured-outputs#json-mode). 150 - `reasoning`: A dictionary of parameters for reasoning. For example: 151 - `summary`: The summary of the reasoning. 152 - `effort`: The level of effort to put into the reasoning. Can be `low`, `medium` or `high`. 153 - `generate_summary`: Whether to generate a summary of the reasoning. 154 Note: OpenAI does not return the reasoning tokens, but we can view summary if its enabled. 155 For details, see the [OpenAI Reasoning documentation](https://platform.openai.com/docs/guides/reasoning). 156 :param tools: 157 A list of Tool and/or Toolset objects, or a single Toolset for which the model can prepare calls. 158 :param tools_strict: 159 Whether to enable strict schema adherence for tool calls. If set to `True`, the model will follow exactly 160 the schema provided in the `parameters` field of the tool definition, but this may increase latency. 161 :param http_client_kwargs: 162 A dictionary of keyword arguments to configure a custom `httpx.Client`or `httpx.AsyncClient`. 163 For more information, see the [HTTPX documentation](https://www.python-httpx.org/api/#client). 164 """ 165 azure_endpoint = azure_endpoint or os.getenv("AZURE_OPENAI_ENDPOINT") 166 if azure_endpoint is None: 167 raise ValueError( 168 "You must provide `azure_endpoint` or set the `AZURE_OPENAI_ENDPOINT` environment variable." 169 ) 170 self._azure_endpoint = azure_endpoint 171 self._azure_deployment = azure_deployment 172 super(AzureOpenAIResponsesChatGenerator, self).__init__( # noqa: UP008 173 api_key=api_key, # type: ignore[arg-type] 174 model=self._azure_deployment, 175 streaming_callback=streaming_callback, 176 api_base_url=f"{self._azure_endpoint.rstrip('/')}/openai/v1", 177 organization=organization, 178 generation_kwargs=generation_kwargs, 179 timeout=timeout, 180 max_retries=max_retries, 181 tools=tools, 182 tools_strict=tools_strict, 183 http_client_kwargs=http_client_kwargs, 184 ) 185 186 def to_dict(self) -> dict[str, Any]: 187 """ 188 Serialize this component to a dictionary. 189 190 :returns: 191 The serialized component as a dictionary. 192 """ 193 callback_name = serialize_callable(self.streaming_callback) if self.streaming_callback else None 194 195 # API key can be a secret or a callable 196 serialized_api_key = ( 197 serialize_callable(self.api_key) 198 if callable(self.api_key) 199 else self.api_key.to_dict() 200 if isinstance(self.api_key, Secret) 201 else None 202 ) 203 204 # If the text format is a Pydantic model, it's converted to openai's json schema format 205 # If it's already a json schema, it's left as is 206 generation_kwargs = self.generation_kwargs.copy() 207 text_format = generation_kwargs.pop("text_format", None) 208 if text_format and isinstance(text_format, type) and issubclass(text_format, BaseModel): 209 json_schema = { 210 "format": { 211 "type": "json_schema", 212 "name": text_format.__name__, 213 "strict": True, 214 "schema": to_strict_json_schema(text_format), 215 } 216 } 217 # json schema needs to be passed to text parameter instead of text_format 218 generation_kwargs["text"] = json_schema 219 220 # OpenAI/MCP tools are passed as list of dictionaries 221 serialized_tools: dict[str, Any] | list[dict[str, Any]] | None 222 if self.tools and isinstance(self.tools, list) and isinstance(self.tools[0], dict): 223 # mypy can't infer that self.tools is list[dict] here 224 serialized_tools = self.tools # type: ignore[assignment] 225 else: 226 serialized_tools = serialize_tools_or_toolset(self.tools) # type: ignore[arg-type] 227 228 return default_to_dict( 229 self, 230 azure_endpoint=self._azure_endpoint, 231 api_key=serialized_api_key, 232 azure_deployment=self._azure_deployment, 233 streaming_callback=callback_name, 234 organization=self.organization, 235 generation_kwargs=generation_kwargs, 236 timeout=self.timeout, 237 max_retries=self.max_retries, 238 tools=serialized_tools, 239 tools_strict=self.tools_strict, 240 http_client_kwargs=self.http_client_kwargs, 241 ) 242 243 @classmethod 244 def from_dict(cls, data: dict[str, Any]) -> "AzureOpenAIResponsesChatGenerator": 245 """ 246 Deserialize this component from a dictionary. 247 248 :param data: The dictionary representation of this component. 249 :returns: 250 The deserialized component instance. 251 """ 252 # If api_key is a str, it's a callable (Secrets are handled automatically by default_from_dict) 253 serialized_api_key = data["init_parameters"].get("api_key") 254 if isinstance(serialized_api_key, str): 255 data["init_parameters"]["api_key"] = deserialize_callable(serialized_api_key) 256 257 # we only deserialize the tools if they are haystack tools 258 # because openai tools are not serialized in the same way 259 tools = data["init_parameters"].get("tools") 260 if tools and ( 261 isinstance(tools, dict) 262 and tools.get("type") == "haystack.tools.toolset.Toolset" 263 or isinstance(tools, list) 264 and tools[0].get("type") == "haystack.tools.tool.Tool" 265 ): 266 deserialize_tools_or_toolset_inplace(data["init_parameters"], key="tools") 267 268 init_params = data.get("init_parameters", {}) 269 serialized_callback_handler = init_params.get("streaming_callback") 270 if serialized_callback_handler: 271 data["init_parameters"]["streaming_callback"] = deserialize_callable(serialized_callback_handler) 272 return default_from_dict(cls, data)