toolset.py
1 # SPDX-FileCopyrightText: 2022-present deepset GmbH <info@deepset.ai> 2 # 3 # SPDX-License-Identifier: Apache-2.0 4 5 from collections.abc import Iterator 6 from dataclasses import dataclass, field 7 from typing import Any, Union 8 9 from haystack.core.serialization import generate_qualified_class_name, import_class_by_name 10 from haystack.tools.tool import Tool, _check_duplicate_tool_names 11 12 13 @dataclass 14 class Toolset: 15 """ 16 A collection of related Tools that can be used and managed as a cohesive unit. 17 18 Toolset serves two main purposes: 19 20 1. Group related tools together: 21 Toolset allows you to organize related tools into a single collection, making it easier 22 to manage and use them as a unit in Haystack pipelines. 23 24 Example: 25 ```python 26 from haystack.tools import Tool, Toolset 27 from haystack.components.tools import ToolInvoker 28 29 # Define math functions 30 def add_numbers(a: int, b: int) -> int: 31 return a + b 32 33 def subtract_numbers(a: int, b: int) -> int: 34 return a - b 35 36 # Create tools with proper schemas 37 add_tool = Tool( 38 name="add", 39 description="Add two numbers", 40 parameters={ 41 "type": "object", 42 "properties": { 43 "a": {"type": "integer"}, 44 "b": {"type": "integer"} 45 }, 46 "required": ["a", "b"] 47 }, 48 function=add_numbers 49 ) 50 51 subtract_tool = Tool( 52 name="subtract", 53 description="Subtract b from a", 54 parameters={ 55 "type": "object", 56 "properties": { 57 "a": {"type": "integer"}, 58 "b": {"type": "integer"} 59 }, 60 "required": ["a", "b"] 61 }, 62 function=subtract_numbers 63 ) 64 65 # Create a toolset with the math tools 66 math_toolset = Toolset([add_tool, subtract_tool]) 67 68 # Use the toolset with a ToolInvoker or ChatGenerator component 69 invoker = ToolInvoker(tools=math_toolset) 70 ``` 71 72 2. Base class for dynamic tool loading: 73 By subclassing Toolset, you can create implementations that dynamically load tools 74 from external sources like OpenAPI URLs, MCP servers, or other resources. 75 76 Example: 77 ```python 78 from haystack.core.serialization import generate_qualified_class_name 79 from haystack.tools import Tool, Toolset 80 from haystack.components.tools import ToolInvoker 81 82 class CalculatorToolset(Toolset): 83 '''A toolset for calculator operations.''' 84 85 def __init__(self) -> None: 86 tools = self._create_tools() 87 super().__init__(tools) 88 89 def _create_tools(self): 90 # These Tool instances are obviously defined statically and for illustration purposes only. 91 # In a real-world scenario, you would dynamically load tools from an external source here. 92 tools = [] 93 add_tool = Tool( 94 name="add", 95 description="Add two numbers", 96 parameters={ 97 "type": "object", 98 "properties": {"a": {"type": "integer"}, "b": {"type": "integer"}}, 99 "required": ["a", "b"], 100 }, 101 function=lambda a, b: a + b, 102 ) 103 104 multiply_tool = Tool( 105 name="multiply", 106 description="Multiply two numbers", 107 parameters={ 108 "type": "object", 109 "properties": {"a": {"type": "integer"}, "b": {"type": "integer"}}, 110 "required": ["a", "b"], 111 }, 112 function=lambda a, b: a * b, 113 ) 114 115 tools.append(add_tool) 116 tools.append(multiply_tool) 117 118 return tools 119 120 def to_dict(self): 121 return { 122 "type": generate_qualified_class_name(type(self)), 123 "data": {}, # no data to serialize as we define the tools dynamically 124 } 125 126 @classmethod 127 def from_dict(cls, data): 128 return cls() # Recreate the tools dynamically during deserialization 129 130 # Create the dynamic toolset and use it with ToolInvoker 131 calculator_toolset = CalculatorToolset() 132 invoker = ToolInvoker(tools=calculator_toolset) 133 ``` 134 135 Toolset implements the collection interface (__iter__, __contains__, __len__, __getitem__), 136 making it behave like a list of Tools. This makes it compatible with components that expect 137 iterable tools, such as ToolInvoker or Haystack chat generators. 138 139 When implementing a custom Toolset subclass for dynamic tool loading: 140 - Perform the dynamic loading in the __init__ method 141 - Override to_dict() and from_dict() methods if your tools are defined dynamically 142 - Serialize endpoint descriptors rather than tool instances if your tools 143 are loaded from external sources 144 """ 145 146 # Use field() with default_factory to initialize the list 147 tools: list[Tool] = field(default_factory=list) 148 149 def __post_init__(self) -> None: 150 """ 151 Validate and set up the toolset after initialization. 152 153 This handles the case when tools are provided during initialization. 154 """ 155 # If initialization was done a single Tool, raise an error 156 if isinstance(self.tools, Tool): 157 raise TypeError("A single Tool cannot be directly passed to Toolset. Please use a list: Toolset([tool])") 158 159 # Check for duplicate tool names in the initial set 160 _check_duplicate_tool_names(self.tools) 161 162 def __iter__(self) -> Iterator[Tool]: 163 """ 164 Return an iterator over the Tools in this Toolset. 165 166 This allows the Toolset to be used wherever a list of Tools is expected. 167 168 :returns: An iterator yielding Tool instances 169 """ 170 return iter(self.tools) 171 172 def __contains__(self, item: str | Tool) -> bool: 173 """ 174 Check if a tool is in this Toolset. 175 176 Supports checking by: 177 - Tool instance: tool in toolset 178 - Tool name: "tool_name" in toolset 179 180 :param item: Tool instance or tool name string 181 :returns: True if contained, False otherwise 182 """ 183 if isinstance(item, str): 184 return any(tool.name == item for tool in self.tools) 185 if isinstance(item, Tool): 186 return item in self.tools 187 return False 188 189 def warm_up(self) -> None: 190 """ 191 Prepare the Toolset for use. 192 193 By default, this method iterates through and warms up all tools in the Toolset. 194 Subclasses can override this method to customize initialization behavior, such as: 195 196 - Setting up shared resources (database connections, HTTP sessions) instead of 197 warming individual tools 198 - Implementing custom initialization logic for dynamically loaded tools 199 - Controlling when and how tools are initialized 200 201 For example, a Toolset that manages tools from an external service (like MCPToolset) 202 might override this to initialize a shared connection rather than warming up 203 individual tools: 204 205 ```python 206 class MCPToolset(Toolset): 207 def warm_up(self) -> None: 208 # Only warm up the shared MCP connection, not individual tools 209 self.mcp_connection = establish_connection(self.server_url) 210 ``` 211 212 This method should be idempotent, as it may be called multiple times. 213 """ 214 for tool in self.tools: 215 if hasattr(tool, "warm_up"): 216 tool.warm_up() 217 218 def add(self, tool: Union[Tool, "Toolset"]) -> None: 219 """ 220 Add a new Tool or merge another Toolset. 221 222 :param tool: A Tool instance or another Toolset to add 223 :raises ValueError: If adding the tool would result in duplicate tool names 224 :raises TypeError: If the provided object is not a Tool or Toolset 225 """ 226 new_tools = [] 227 228 if isinstance(tool, Tool): 229 new_tools = [tool] 230 elif isinstance(tool, Toolset): 231 new_tools = list(tool) 232 else: 233 raise TypeError(f"Expected Tool or Toolset, got {type(tool).__name__}") 234 235 # Check for duplicates before adding 236 combined_tools = self.tools + new_tools 237 _check_duplicate_tool_names(combined_tools) 238 239 self.tools.extend(new_tools) 240 241 def to_dict(self) -> dict[str, Any]: 242 """ 243 Serialize the Toolset to a dictionary. 244 245 :returns: A dictionary representation of the Toolset 246 247 Note for subclass implementers: 248 The default implementation is ideal for scenarios where Tool resolution is static. However, if your subclass 249 of Toolset dynamically resolves Tool instances from external sources—such as an MCP server, OpenAPI URL, or 250 a local OpenAPI specification—you should consider serializing the endpoint descriptor instead of the Tool 251 instances themselves. This strategy preserves the dynamic nature of your Toolset and minimizes the overhead 252 associated with serializing potentially large collections of Tool objects. Moreover, by serializing the 253 descriptor, you ensure that the deserialization process can accurately reconstruct the Tool instances, even 254 if they have been modified or removed since the last serialization. Failing to serialize the descriptor may 255 lead to issues where outdated or incorrect Tool configurations are loaded, potentially causing errors or 256 unexpected behavior. 257 """ 258 return { 259 "type": generate_qualified_class_name(type(self)), 260 "data": {"tools": [tool.to_dict() for tool in self.tools]}, 261 } 262 263 @classmethod 264 def from_dict(cls, data: dict[str, Any]) -> "Toolset": 265 """ 266 Deserialize a Toolset from a dictionary. 267 268 :param data: Dictionary representation of the Toolset 269 :returns: A new Toolset instance 270 """ 271 inner_data = data["data"] 272 tools_data = inner_data.get("tools", []) 273 274 tools = [] 275 for tool_data in tools_data: 276 tool_class = import_class_by_name(tool_data["type"]) 277 if not issubclass(tool_class, Tool): 278 raise TypeError(f"Class '{tool_class}' is not a subclass of Tool") 279 tools.append(tool_class.from_dict(tool_data)) 280 281 return cls(tools=tools) 282 283 def __add__(self, other: Union[Tool, "Toolset", list[Tool]]) -> "Toolset": 284 """ 285 Concatenate this Toolset with another Tool, Toolset, or list of Tools. 286 287 :param other: Another Tool, Toolset, or list of Tools to concatenate 288 :returns: A new Toolset containing all tools 289 :raises TypeError: If the other parameter is not a Tool, Toolset, or list of Tools 290 :raises ValueError: If the combination would result in duplicate tool names 291 """ 292 if isinstance(other, Tool): 293 return Toolset(tools=self.tools + [other]) 294 if isinstance(other, Toolset): 295 return _ToolsetWrapper([self, other]) 296 if isinstance(other, list) and all(isinstance(item, Tool) for item in other): 297 return Toolset(tools=self.tools + other) 298 raise TypeError(f"Cannot add {type(other).__name__} to Toolset") 299 300 def __len__(self) -> int: 301 """ 302 Return the number of Tools in this Toolset. 303 304 :returns: Number of Tools 305 """ 306 return len(self.tools) 307 308 def __getitem__(self, index: int) -> Tool: 309 """ 310 Get a Tool by index. 311 312 :param index: Index of the Tool to get 313 :returns: The Tool at the specified index 314 """ 315 return self.tools[index] 316 317 318 class _ToolsetWrapper(Toolset): 319 """ 320 A wrapper that holds multiple toolsets and provides a unified interface. 321 322 This is used internally when combining different types of toolsets to preserve 323 their individual configurations while still being usable with ToolInvoker. 324 """ 325 326 def __init__(self, toolsets: list[Toolset]) -> None: 327 super().__init__([tool for toolset in toolsets for tool in toolset]) 328 self.toolsets = toolsets 329 330 def __iter__(self) -> Iterator[Tool]: 331 """Iterate over all tools from all toolsets.""" 332 for toolset in self.toolsets: 333 yield from toolset 334 335 def __contains__(self, item: Any) -> bool: 336 """Check if a tool is in any of the toolsets.""" 337 return any(item in toolset for toolset in self.toolsets) 338 339 def warm_up(self) -> None: 340 """Warm up all toolsets.""" 341 for toolset in self.toolsets: 342 toolset.warm_up() 343 344 def __len__(self) -> int: 345 """Return total number of tools across all toolsets.""" 346 return sum(len(toolset) for toolset in self.toolsets) 347 348 def __getitem__(self, index: int) -> Tool: 349 """Get a tool by index across all toolsets.""" 350 # Leverage iteration instead of manual index tracking 351 for i, tool in enumerate(self): 352 if i == index: 353 return tool 354 raise IndexError("ToolsetWrapper index out of range") 355 356 def __add__(self, other: Toolset | Tool | list[Tool]) -> "_ToolsetWrapper": 357 """Add another toolset or tool to this wrapper.""" 358 if isinstance(other, Toolset): 359 return _ToolsetWrapper(self.toolsets + [other]) 360 if isinstance(other, Tool): 361 return _ToolsetWrapper(self.toolsets + [Toolset([other])]) 362 if isinstance(other, list) and all(isinstance(item, Tool) for item in other): 363 return _ToolsetWrapper(self.toolsets + [Toolset(other)]) 364 raise TypeError(f"Cannot add {type(other).__name__} to ToolsetWrapper")