/ haystack / tools / from_function.py
from_function.py
  1  # SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
  2  #
  3  # SPDX-License-Identifier: Apache-2.0
  4  
  5  import inspect
  6  from collections.abc import Callable
  7  from typing import Any
  8  
  9  from pydantic import create_model
 10  
 11  from haystack.components.agents.state.state import State
 12  
 13  from .errors import SchemaGenerationError
 14  from .parameters_schema_utils import _contains_callable_type, _unwrap_optional
 15  from .tool import Tool
 16  
 17  
 18  def create_tool_from_function(
 19      function: Callable,
 20      name: str | None = None,
 21      description: str | None = None,
 22      inputs_from_state: dict[str, str] | None = None,
 23      outputs_to_state: dict[str, dict[str, Any]] | None = None,
 24      outputs_to_string: dict[str, Any] | None = None,
 25  ) -> "Tool":
 26      """
 27      Create a Tool instance from a function.
 28  
 29      Allows customizing the Tool name and description.
 30      For simpler use cases, consider using the `@tool` decorator.
 31  
 32      ### Usage example
 33  
 34      ```python
 35      from typing import Annotated, Literal
 36      from haystack.tools import create_tool_from_function
 37  
 38      def get_weather(
 39          city: Annotated[str, "the city for which to get the weather"] = "Munich",
 40          unit: Annotated[Literal["Celsius", "Fahrenheit"], "the unit for the temperature"] = "Celsius"):
 41          '''A simple function to get the current weather for a location.'''
 42          return f"Weather report for {city}: 20 {unit}, sunny"
 43  
 44      tool = create_tool_from_function(get_weather)
 45  
 46      print(tool)
 47      # >> Tool(name='get_weather', description='A simple function to get the current weather for a location.',
 48      # >> parameters={
 49      # >> 'type': 'object',
 50      # >> 'properties': {
 51      # >>     'city': {'type': 'string', 'description': 'the city for which to get the weather', 'default': 'Munich'},
 52      # >>     'unit': {
 53      # >>         'type': 'string',
 54      # >>         'enum': ['Celsius', 'Fahrenheit'],
 55      # >>         'description': 'the unit for the temperature',
 56      # >>         'default': 'Celsius',
 57      # >>     },
 58      # >>     }
 59      # >> },
 60      # >> function=<function get_weather at 0x7f7b3a8a9b80>)
 61      ```
 62  
 63      :param function:
 64          The function to be converted into a Tool.
 65          The function must include type hints for all parameters.
 66          The function is expected to have basic python input types (str, int, float, bool, list, dict, tuple).
 67          Other input types may work but are not guaranteed.
 68          If a parameter is annotated using `typing.Annotated`, its metadata will be used as parameter description.
 69      :param name:
 70          The name of the Tool. If not provided, the name of the function will be used.
 71      :param description:
 72          The description of the Tool. If not provided, the docstring of the function will be used.
 73          To intentionally leave the description empty, pass an empty string.
 74      :param inputs_from_state:
 75          Optional dictionary mapping state keys to tool parameter names.
 76          Example: `{"repository": "repo"}` maps state's "repository" to tool's "repo" parameter.
 77      :param outputs_to_state:
 78          Optional dictionary defining how tool outputs map to keys within state as well as optional handlers.
 79          If the source is provided only the specified output key is sent to the handler.
 80          Example:
 81          ```python
 82          {
 83              "documents": {"source": "docs", "handler": custom_handler}
 84          }
 85          ```
 86          If the source is omitted the whole tool result is sent to the handler.
 87          Example:
 88          ```python
 89          {
 90              "documents": {"handler": custom_handler}
 91          }
 92          ```
 93      :param outputs_to_string:
 94          Optional dictionary defining how tool outputs should be converted into string(s) or results.
 95          If not provided, the tool result is converted to a string using a default handler.
 96  
 97          `outputs_to_string` supports two formats:
 98  
 99          1. Single output format - use "source", "handler", and/or "raw_result" at the root level:
100             ```python
101             {
102                 "source": "docs", "handler": format_documents, "raw_result": False
103             }
104             ```
105             - `source`: If provided, only the specified output key is sent to the handler. If not provided, the whole
106                tool result is sent to the handler.
107             - `handler`: A function that takes the tool output (or the extracted source value) and returns the
108               final result.
109             - `raw_result`: If `True`, the result is returned raw without string conversion, but applying the `handler`
110               if provided. This is intended for tools that return images. In this mode, the Tool function or the
111               `handler` must return a list of `TextContent`/`ImageContent` objects to ensure compatibility with Chat
112               Generators.
113  
114          2. Multiple output format - map keys to individual configurations:
115             ```python
116             {
117                 "formatted_docs": {"source": "docs", "handler": format_documents},
118                 "summary": {"source": "summary_text", "handler": str.upper}
119             }
120             ```
121             Each key maps to a dictionary that can contain "source" and/or "handler".
122             Note that `raw_result` is not supported in the multiple output format.
123      :returns:
124          The Tool created from the function.
125  
126      :raises ValueError:
127          If any parameter of the function lacks a type hint.
128      :raises SchemaGenerationError:
129          If there is an error generating the JSON schema for the Tool.
130      """
131      tool_description = description if description is not None else (function.__doc__ or "")
132  
133      signature = inspect.signature(function)
134  
135      # collect fields (types and defaults) and descriptions from function parameters
136      fields: dict[str, Any] = {}
137      descriptions = {}
138  
139      for param_name, param in signature.parameters.items():
140          # Skip adding parameter names that will be passed to the tool from State
141          if inputs_from_state and param_name in inputs_from_state.values():
142              continue
143  
144          # Skip State-typed parameters (including Optional[State]) - ToolInvoker injects them at runtime
145          if _unwrap_optional(param.annotation) is State:
146              continue
147  
148          if param.annotation is param.empty:
149              raise ValueError(f"Function '{function.__name__}': parameter '{param_name}' does not have a type hint.")
150  
151          # Skip Callable types since Pydantic cannot generate JSON schemas for them
152          if _contains_callable_type(param.annotation):
153              continue
154  
155          # if the parameter has not a default value, Pydantic requires an Ellipsis (...)
156          # to explicitly indicate that the parameter is required
157          default = param.default if param.default is not param.empty else ...
158          fields[param_name] = (param.annotation, default)
159  
160          if hasattr(param.annotation, "__metadata__"):
161              descriptions[param_name] = param.annotation.__metadata__[0]
162  
163      # create Pydantic model and generate JSON schema
164      try:
165          model = create_model(function.__name__, **fields)
166          schema = model.model_json_schema()
167      except Exception as e:
168          raise SchemaGenerationError(f"Failed to create JSON schema for function '{function.__name__}'") from e
169  
170      # we don't want to include title keywords in the schema, as they contain redundant information
171      # there is no programmatic way to prevent Pydantic from adding them, so we remove them later
172      # see https://github.com/pydantic/pydantic/discussions/8504
173      _remove_title_from_schema(schema)
174  
175      # add parameters descriptions to the schema
176      for param_name, param_description in descriptions.items():
177          if param_name in schema["properties"]:
178              schema["properties"][param_name]["description"] = param_description
179  
180      return Tool(
181          name=name or function.__name__,
182          description=tool_description,
183          parameters=schema,
184          function=function,
185          inputs_from_state=inputs_from_state,
186          outputs_to_state=outputs_to_state,
187          outputs_to_string=outputs_to_string,
188      )
189  
190  
191  def tool(
192      function: Callable | None = None,
193      *,
194      name: str | None = None,
195      description: str | None = None,
196      inputs_from_state: dict[str, str] | None = None,
197      outputs_to_state: dict[str, dict[str, Any]] | None = None,
198      outputs_to_string: dict[str, Any] | None = None,
199  ) -> Tool | Callable[[Callable], Tool]:
200      """
201      Decorator to convert a function into a Tool.
202  
203      Can be used with or without parameters:
204      @tool  # without parameters
205      def my_function(): ...
206  
207      @tool(name="custom_name")  # with parameters
208      def my_function(): ...
209  
210      ### Usage example
211      ```python
212      from typing import Annotated, Literal
213      from haystack.tools import tool
214  
215      @tool
216      def get_weather(
217          city: Annotated[str, "the city for which to get the weather"] = "Munich",
218          unit: Annotated[Literal["Celsius", "Fahrenheit"], "the unit for the temperature"] = "Celsius"):
219          '''A simple function to get the current weather for a location.'''
220          return f"Weather report for {city}: 20 {unit}, sunny"
221  
222      print(get_weather)
223      # >> Tool(name='get_weather', description='A simple function to get the current weather for a location.',
224      # >> parameters={
225      # >> 'type': 'object',
226      # >> 'properties': {
227      # >>     'city': {'type': 'string', 'description': 'the city for which to get the weather', 'default': 'Munich'},
228      # >>     'unit': {
229      # >>         'type': 'string',
230      # >>         'enum': ['Celsius', 'Fahrenheit'],
231      # >>         'description': 'the unit for the temperature',
232      # >>         'default': 'Celsius',
233      # >>     },
234      # >>     }
235      # >> },
236      # >> function=<function get_weather at 0x7f7b3a8a9b80>)
237      ```
238  
239      :param function: The function to decorate (when used without parameters)
240      :param name: Optional custom name for the tool
241      :param description: Optional custom description
242      :param inputs_from_state:
243          Optional dictionary mapping state keys to tool parameter names.
244          Example: `{"repository": "repo"}` maps state's "repository" to tool's "repo" parameter.
245      :param outputs_to_state:
246          Optional dictionary defining how tool outputs map to keys within state as well as optional handlers.
247          If the source is provided only the specified output key is sent to the handler.
248          Example:
249          ```python
250          {
251              "documents": {"source": "docs", "handler": custom_handler}
252          }
253          ```
254          If the source is omitted the whole tool result is sent to the handler.
255          Example:
256          ```python
257          {
258              "documents": {"handler": custom_handler}
259          }
260          ```
261      :param outputs_to_string:
262          Optional dictionary defining how tool outputs should be converted into string(s) or results.
263          If not provided, the tool result is converted to a string using a default handler.
264  
265          `outputs_to_string` supports two formats:
266  
267          1. Single output format - use "source", "handler", and/or "raw_result" at the root level:
268             ```python
269             {
270                 "source": "docs", "handler": format_documents, "raw_result": False
271             }
272             ```
273             - `source`: If provided, only the specified output key is sent to the handler. If not provided, the whole
274                tool result is sent to the handler.
275             - `handler`: A function that takes the tool output (or the extracted source value) and returns the
276               final result.
277             - `raw_result`: If `True`, the result is returned raw without string conversion, but applying the `handler`
278               if provided. This is intended for tools that return images. In this mode, the Tool function or the
279               `handler` must return a list of `TextContent`/`ImageContent` objects to ensure compatibility with Chat
280               Generators.
281  
282          2. Multiple output format - map keys to individual configurations:
283             ```python
284             {
285                 "formatted_docs": {"source": "docs", "handler": format_documents},
286                 "summary": {"source": "summary_text", "handler": str.upper}
287             }
288             ```
289             Each key maps to a dictionary that can contain "source" and/or "handler".
290             Note that `raw_result` is not supported in the multiple output format.
291  
292      :returns: Either a Tool instance or a decorator function that will create one
293      """
294  
295      def decorator(func: Callable) -> Tool:
296          return create_tool_from_function(
297              function=func,
298              name=name,
299              description=description,
300              inputs_from_state=inputs_from_state,
301              outputs_to_state=outputs_to_state,
302              outputs_to_string=outputs_to_string,
303          )
304  
305      if function is None:
306          return decorator
307      return decorator(function)
308  
309  
310  def _remove_title_from_schema(schema: dict[str, Any]) -> None:
311      """
312      Remove the 'title' keyword from JSON schema and contained property schemas.
313  
314      :param schema:
315          The JSON schema to remove the 'title' keyword from.
316      """
317      for key, value in list(schema.items()):
318          # Make sure not to remove parameters named title
319          if key == "properties" and isinstance(value, dict) and "title" in value:
320              for sub_val in value.values():
321                  _remove_title_from_schema(sub_val)
322          elif key == "title":
323              del schema[key]
324          elif isinstance(value, dict):
325              _remove_title_from_schema(value)
326          elif isinstance(value, list):
327              for item in value:
328                  if isinstance(item, dict):
329                      _remove_title_from_schema(item)