/ haystack / tools / toolset.py
toolset.py
  1  # SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai>
  2  #
  3  # SPDX-License-Identifier: Apache-2.0
  4  
  5  from collections.abc import Iterator
  6  from dataclasses import dataclass, field
  7  from typing import Any, Union
  8  
  9  from haystack.core.serialization import generate_qualified_class_name, import_class_by_name
 10  from haystack.tools.tool import Tool, _check_duplicate_tool_names
 11  
 12  
 13  @dataclass
 14  class Toolset:
 15      """
 16      A collection of related Tools that can be used and managed as a cohesive unit.
 17  
 18      Toolset serves two main purposes:
 19  
 20      1. Group related tools together:
 21         Toolset allows you to organize related tools into a single collection, making it easier
 22         to manage and use them as a unit in Haystack pipelines.
 23  
 24         Example:
 25      ```python
 26      from haystack.tools import Tool, Toolset
 27      from haystack.components.tools import ToolInvoker
 28  
 29      # Define math functions
 30      def add_numbers(a: int, b: int) -> int:
 31          return a + b
 32  
 33      def subtract_numbers(a: int, b: int) -> int:
 34          return a - b
 35  
 36      # Create tools with proper schemas
 37      add_tool = Tool(
 38          name="add",
 39          description="Add two numbers",
 40          parameters={
 41              "type": "object",
 42              "properties": {
 43                  "a": {"type": "integer"},
 44                  "b": {"type": "integer"}
 45              },
 46              "required": ["a", "b"]
 47          },
 48          function=add_numbers
 49      )
 50  
 51      subtract_tool = Tool(
 52          name="subtract",
 53          description="Subtract b from a",
 54          parameters={
 55              "type": "object",
 56              "properties": {
 57                  "a": {"type": "integer"},
 58                  "b": {"type": "integer"}
 59              },
 60              "required": ["a", "b"]
 61          },
 62          function=subtract_numbers
 63      )
 64  
 65      # Create a toolset with the math tools
 66      math_toolset = Toolset([add_tool, subtract_tool])
 67  
 68      # Use the toolset with a ToolInvoker or ChatGenerator component
 69      invoker = ToolInvoker(tools=math_toolset)
 70      ```
 71  
 72      2. Base class for dynamic tool loading:
 73         By subclassing Toolset, you can create implementations that dynamically load tools
 74         from external sources like OpenAPI URLs, MCP servers, or other resources.
 75  
 76         Example:
 77      ```python
 78      from haystack.core.serialization import generate_qualified_class_name
 79      from haystack.tools import Tool, Toolset
 80      from haystack.components.tools import ToolInvoker
 81  
 82      class CalculatorToolset(Toolset):
 83          '''A toolset for calculator operations.'''
 84  
 85          def __init__(self) -> None:
 86              tools = self._create_tools()
 87              super().__init__(tools)
 88  
 89          def _create_tools(self):
 90              # These Tool instances are obviously defined statically and for illustration purposes only.
 91              # In a real-world scenario, you would dynamically load tools from an external source here.
 92              tools = []
 93              add_tool = Tool(
 94                  name="add",
 95                  description="Add two numbers",
 96                  parameters={
 97                      "type": "object",
 98                      "properties": {"a": {"type": "integer"}, "b": {"type": "integer"}},
 99                      "required": ["a", "b"],
100                  },
101                  function=lambda a, b: a + b,
102              )
103  
104              multiply_tool = Tool(
105                  name="multiply",
106                  description="Multiply two numbers",
107                  parameters={
108                      "type": "object",
109                      "properties": {"a": {"type": "integer"}, "b": {"type": "integer"}},
110                      "required": ["a", "b"],
111                  },
112                  function=lambda a, b: a * b,
113              )
114  
115              tools.append(add_tool)
116              tools.append(multiply_tool)
117  
118              return tools
119  
120          def to_dict(self):
121              return {
122                  "type": generate_qualified_class_name(type(self)),
123                  "data": {},  # no data to serialize as we define the tools dynamically
124              }
125  
126          @classmethod
127          def from_dict(cls, data):
128              return cls()  # Recreate the tools dynamically during deserialization
129  
130      # Create the dynamic toolset and use it with ToolInvoker
131      calculator_toolset = CalculatorToolset()
132      invoker = ToolInvoker(tools=calculator_toolset)
133      ```
134  
135      Toolset implements the collection interface (__iter__, __contains__, __len__, __getitem__),
136      making it behave like a list of Tools. This makes it compatible with components that expect
137      iterable tools, such as ToolInvoker or Haystack chat generators.
138  
139      When implementing a custom Toolset subclass for dynamic tool loading:
140      - Perform the dynamic loading in the __init__ method
141      - Override to_dict() and from_dict() methods if your tools are defined dynamically
142      - Serialize endpoint descriptors rather than tool instances if your tools
143        are loaded from external sources
144      """
145  
146      # Use field() with default_factory to initialize the list
147      tools: list[Tool] = field(default_factory=list)
148  
149      def __post_init__(self) -> None:
150          """
151          Validate and set up the toolset after initialization.
152  
153          This handles the case when tools are provided during initialization.
154          """
155          # If initialization was done a single Tool, raise an error
156          if isinstance(self.tools, Tool):
157              raise TypeError("A single Tool cannot be directly passed to Toolset. Please use a list: Toolset([tool])")
158  
159          # Check for duplicate tool names in the initial set
160          _check_duplicate_tool_names(self.tools)
161  
162      def __iter__(self) -> Iterator[Tool]:
163          """
164          Return an iterator over the Tools in this Toolset.
165  
166          This allows the Toolset to be used wherever a list of Tools is expected.
167  
168          :returns: An iterator yielding Tool instances
169          """
170          return iter(self.tools)
171  
172      def __contains__(self, item: str | Tool) -> bool:
173          """
174          Check if a tool is in this Toolset.
175  
176          Supports checking by:
177          - Tool instance: tool in toolset
178          - Tool name: "tool_name" in toolset
179  
180          :param item: Tool instance or tool name string
181          :returns: True if contained, False otherwise
182          """
183          if isinstance(item, str):
184              return any(tool.name == item for tool in self.tools)
185          if isinstance(item, Tool):
186              return item in self.tools
187          return False
188  
189      def warm_up(self) -> None:
190          """
191          Prepare the Toolset for use.
192  
193          By default, this method iterates through and warms up all tools in the Toolset.
194          Subclasses can override this method to customize initialization behavior, such as:
195  
196          - Setting up shared resources (database connections, HTTP sessions) instead of
197            warming individual tools
198          - Implementing custom initialization logic for dynamically loaded tools
199          - Controlling when and how tools are initialized
200  
201          For example, a Toolset that manages tools from an external service (like MCPToolset)
202          might override this to initialize a shared connection rather than warming up
203          individual tools:
204  
205          ```python
206          class MCPToolset(Toolset):
207              def warm_up(self) -> None:
208                  # Only warm up the shared MCP connection, not individual tools
209                  self.mcp_connection = establish_connection(self.server_url)
210          ```
211  
212          This method should be idempotent, as it may be called multiple times.
213          """
214          for tool in self.tools:
215              if hasattr(tool, "warm_up"):
216                  tool.warm_up()
217  
218      def add(self, tool: Union[Tool, "Toolset"]) -> None:
219          """
220          Add a new Tool or merge another Toolset.
221  
222          :param tool: A Tool instance or another Toolset to add
223          :raises ValueError: If adding the tool would result in duplicate tool names
224          :raises TypeError: If the provided object is not a Tool or Toolset
225          """
226          new_tools = []
227  
228          if isinstance(tool, Tool):
229              new_tools = [tool]
230          elif isinstance(tool, Toolset):
231              new_tools = list(tool)
232          else:
233              raise TypeError(f"Expected Tool or Toolset, got {type(tool).__name__}")
234  
235          # Check for duplicates before adding
236          combined_tools = self.tools + new_tools
237          _check_duplicate_tool_names(combined_tools)
238  
239          self.tools.extend(new_tools)
240  
241      def to_dict(self) -> dict[str, Any]:
242          """
243          Serialize the Toolset to a dictionary.
244  
245          :returns: A dictionary representation of the Toolset
246  
247          Note for subclass implementers:
248          The default implementation is ideal for scenarios where Tool resolution is static. However, if your subclass
249          of Toolset dynamically resolves Tool instances from external sources—such as an MCP server, OpenAPI URL, or
250          a local OpenAPI specification—you should consider serializing the endpoint descriptor instead of the Tool
251          instances themselves. This strategy preserves the dynamic nature of your Toolset and minimizes the overhead
252          associated with serializing potentially large collections of Tool objects. Moreover, by serializing the
253          descriptor, you ensure that the deserialization process can accurately reconstruct the Tool instances, even
254          if they have been modified or removed since the last serialization. Failing to serialize the descriptor may
255          lead to issues where outdated or incorrect Tool configurations are loaded, potentially causing errors or
256          unexpected behavior.
257          """
258          return {
259              "type": generate_qualified_class_name(type(self)),
260              "data": {"tools": [tool.to_dict() for tool in self.tools]},
261          }
262  
263      @classmethod
264      def from_dict(cls, data: dict[str, Any]) -> "Toolset":
265          """
266          Deserialize a Toolset from a dictionary.
267  
268          :param data: Dictionary representation of the Toolset
269          :returns: A new Toolset instance
270          """
271          inner_data = data["data"]
272          tools_data = inner_data.get("tools", [])
273  
274          tools = []
275          for tool_data in tools_data:
276              tool_class = import_class_by_name(tool_data["type"])
277              if not issubclass(tool_class, Tool):
278                  raise TypeError(f"Class '{tool_class}' is not a subclass of Tool")
279              tools.append(tool_class.from_dict(tool_data))
280  
281          return cls(tools=tools)
282  
283      def __add__(self, other: Union[Tool, "Toolset", list[Tool]]) -> "Toolset":
284          """
285          Concatenate this Toolset with another Tool, Toolset, or list of Tools.
286  
287          :param other: Another Tool, Toolset, or list of Tools to concatenate
288          :returns: A new Toolset containing all tools
289          :raises TypeError: If the other parameter is not a Tool, Toolset, or list of Tools
290          :raises ValueError: If the combination would result in duplicate tool names
291          """
292          if isinstance(other, Tool):
293              return Toolset(tools=self.tools + [other])
294          if isinstance(other, Toolset):
295              return _ToolsetWrapper([self, other])
296          if isinstance(other, list) and all(isinstance(item, Tool) for item in other):
297              return Toolset(tools=self.tools + other)
298          raise TypeError(f"Cannot add {type(other).__name__} to Toolset")
299  
300      def __len__(self) -> int:
301          """
302          Return the number of Tools in this Toolset.
303  
304          :returns: Number of Tools
305          """
306          return len(self.tools)
307  
308      def __getitem__(self, index: int) -> Tool:
309          """
310          Get a Tool by index.
311  
312          :param index: Index of the Tool to get
313          :returns: The Tool at the specified index
314          """
315          return self.tools[index]
316  
317  
318  class _ToolsetWrapper(Toolset):
319      """
320      A wrapper that holds multiple toolsets and provides a unified interface.
321  
322      This is used internally when combining different types of toolsets to preserve
323      their individual configurations while still being usable with ToolInvoker.
324      """
325  
326      def __init__(self, toolsets: list[Toolset]) -> None:
327          super().__init__([tool for toolset in toolsets for tool in toolset])
328          self.toolsets = toolsets
329  
330      def __iter__(self) -> Iterator[Tool]:
331          """Iterate over all tools from all toolsets."""
332          for toolset in self.toolsets:
333              yield from toolset
334  
335      def __contains__(self, item: Any) -> bool:
336          """Check if a tool is in any of the toolsets."""
337          return any(item in toolset for toolset in self.toolsets)
338  
339      def warm_up(self) -> None:
340          """Warm up all toolsets."""
341          for toolset in self.toolsets:
342              toolset.warm_up()
343  
344      def __len__(self) -> int:
345          """Return total number of tools across all toolsets."""
346          return sum(len(toolset) for toolset in self.toolsets)
347  
348      def __getitem__(self, index: int) -> Tool:
349          """Get a tool by index across all toolsets."""
350          # Leverage iteration instead of manual index tracking
351          for i, tool in enumerate(self):
352              if i == index:
353                  return tool
354          raise IndexError("ToolsetWrapper index out of range")
355  
356      def __add__(self, other: Toolset | Tool | list[Tool]) -> "_ToolsetWrapper":
357          """Add another toolset or tool to this wrapper."""
358          if isinstance(other, Toolset):
359              return _ToolsetWrapper(self.toolsets + [other])
360          if isinstance(other, Tool):
361              return _ToolsetWrapper(self.toolsets + [Toolset([other])])
362          if isinstance(other, list) and all(isinstance(item, Tool) for item in other):
363              return _ToolsetWrapper(self.toolsets + [Toolset(other)])
364          raise TypeError(f"Cannot add {type(other).__name__} to ToolsetWrapper")