/ example_structured_tool.py
example_structured_tool.py
1 """ 2 Example of a well-structured MCP tool with best practices. 3 4 This module demonstrates how to create a comprehensive MCP tool 5 that implements all the best practices for LLM usability: 6 - Tool annotations for better decision-making 7 - Standardized error handling 8 - Input validation 9 - Detailed documentation with examples 10 - Structured outputs with consistent formats 11 """ 12 13 import time 14 import uuid 15 from typing import Any, Dict, Optional 16 17 # --------------------------------- 18 from error_handling import non_empty_string, validate_inputs, with_error_handling 19 from tool_annotations import ToolAnnotations 20 21 # --- Import RAG tools/services --- 22 # Assuming direct function import for simplicity in example 23 # In a real structured app, might use dependency injection or service locators 24 from ultimate_mcp_server.tools.rag import ( 25 add_documents, 26 create_knowledge_base, 27 delete_knowledge_base, 28 retrieve_context, 29 ) 30 31 # --- Define KB Name for Demo --- 32 DEMO_KB_NAME = f"example_tool_kb_{uuid.uuid4().hex[:8]}" 33 # ------------------------------ 34 35 # --- Sample Data (moved to top) --- 36 # This data will now be *added* to the KB during setup 37 SAMPLE_DOCUMENTS = [ 38 { 39 "id": "kb-001", 40 "title": "Introduction to Climate Change", 41 "text": "An overview of climate change causes and effects.", 42 "type": "article", 43 "level": "beginner", 44 "date": "2023-01-15", 45 "score_for_ranking": 0.95, # Keep score for potential sorting demonstration? 46 }, 47 { 48 "id": "kb-002", 49 "title": "Machine Learning Fundamentals", 50 "text": "Learn the basics of machine learning algorithms.", 51 "type": "tutorial", 52 "level": "beginner", 53 "date": "2023-02-20", 54 "score_for_ranking": 0.92, 55 }, 56 { 57 "id": "kb-003", 58 "title": "Advanced Neural Networks", 59 "text": "Deep dive into neural network architectures.", 60 "type": "tutorial", 61 "level": "advanced", 62 "date": "2023-03-10", 63 "score_for_ranking": 0.88, 64 }, 65 { 66 "id": "kb-004", 67 "title": "Climate Policy FAQ", 68 "text": "Frequently asked questions about climate policies.", 69 "type": "faq", 70 "level": "intermediate", 71 "date": "2023-04-05", 72 "score_for_ranking": 0.82, 73 }, 74 { 75 "id": "kb-005", 76 "title": "Python Reference for Data Science", 77 "text": "Reference guide for Python in data science applications.", 78 "type": "reference", 79 "level": "intermediate", 80 "date": "2023-05-12", 81 "score_for_ranking": 0.78, 82 }, 83 ] 84 # ------------------------------------- 85 86 87 class ExampleTool: 88 """ 89 Example implementation of a well-structured MCP tool with best practices. 90 91 The ExampleTool class serves as a reference implementation that demonstrates how to properly 92 design and implement tools for the Model Control Protocol (MCP) ecosystem. It showcases 93 a real-world RAG (Retrieval-Augmented Generation) tool that interacts with a knowledge base. 94 95 Key design features: 96 - Proper tool registration with the MCP server 97 - Comprehensive schema definitions for inputs and outputs 98 - Clear tool descriptions with usage guidance for LLMs 99 - Tool annotations that provide semantic hints about tool behavior 100 - Consistent error handling and input validation 101 - Well-structured implementation with clean separation of concerns 102 103 The class implements a search_knowledge_base tool that allows querying a vector store 104 containing sample documents. The implementation demonstrates how to: 105 - Process input parameters and apply validation 106 - Interact with external services (the knowledge base) 107 - Format response data in a consistent structure 108 - Handle errors gracefully with meaningful error messages 109 - Add appropriate metadata to help LLMs use the tool effectively 110 111 This implementation is intended as an educational reference for developers creating 112 their own MCP tools, showing patterns and practices that lead to tools that are 113 easily discoverable, usable, and maintainable. 114 115 Usage: 116 ```python 117 # Initialize the MCP server 118 server = MCPServer() 119 120 # Create an instance (automatically registers all tools) 121 tool = ExampleTool(server) 122 123 # The tool is now available for use through the server 124 # After ensuring the knowledge base is set up 125 await setup_demo_kb() 126 ``` 127 """ 128 129 def __init__(self, mcp_server): 130 """ 131 Initialize an ExampleTool instance and register its tools with the MCP server. 132 133 This constructor creates a new instance of the ExampleTool class and automatically 134 registers all tools implemented by this class with the provided MCP server. It 135 serves as the entry point for integrating the example tools into an MCP server. 136 137 The initialization process: 138 1. Stores a reference to the provided MCP server instance 139 2. Calls the _register_tools method to define and register all tools 140 3. Establishes all necessary connections to the underlying knowledge base 141 142 After initialization, the tools become available for use through the MCP server's 143 tool invocation interface. No further setup is required for the tools themselves, 144 although the underlying knowledge base (see setup_demo_kb) must be initialized 145 before the tools can be used effectively. 146 147 Args: 148 mcp_server: An instance of the MCP server to register tools with. This must be 149 a fully initialized server object with a functional tool registration 150 system available through its 'tool' decorator. 151 152 Returns: 153 None 154 155 Notes: 156 - Tool registration happens immediately during initialization 157 - Tool usage requires the demo knowledge base to be set up separately 158 - The server instance is stored but not modified beyond tool registration 159 """ 160 self.mcp = mcp_server 161 self._register_tools() 162 163 def _register_tools(self): 164 """ 165 Register all tools provided by this class with the MCP server. 166 167 This private method is called during initialization and handles the registration 168 of all tools implemented by the ExampleTool class. It defines and registers 169 individual tools with appropriate metadata, schemas, and implementations. 170 171 For each tool, the method: 172 1. Creates tool annotations with appropriate behavioral hints 173 2. Defines the tool's description, input schema, and output schema 174 3. Implements the tool function with error handling and input validation 175 4. Registers the complete tool with the MCP server 176 177 The primary tool defined here is: 178 - search_knowledge_base: Searches the demo knowledge base for relevant documents 179 based on user queries and optional filters 180 181 Each tool is decorated with: 182 - @self.mcp.tool: Registers the function as an MCP tool 183 - @with_error_handling: Provides standardized exception handling 184 - @validate_inputs: Validates required parameters before execution 185 186 The detailed tool definitions include human-readable descriptions, parameter 187 schemas with comprehensive type information, and examples demonstrating proper 188 tool usage for LLMs. 189 190 Returns: 191 None - Tools are registered as a side effect 192 """ 193 194 # Create tool annotations with appropriate hints 195 search_annotations = ToolAnnotations( 196 read_only_hint=True, # This tool doesn't modify anything 197 destructive_hint=False, # No destructive operations 198 idempotent_hint=True, # Can be called repeatedly with same results 199 open_world_hint=True, # Interacts with external data sources 200 audience=["assistant"], # Intended for the LLM to use 201 priority=0.8, # High priority tool 202 title="Search Knowledge Base", # Human-readable title 203 examples=[ 204 { 205 "name": "Basic search", 206 "description": "Search for information about a topic", 207 "input": {"query": "climate change", "filters": {"type": "article"}}, 208 "output": { 209 "results": [ 210 {"title": "Climate Change Basics", "score": 0.92}, 211 {"title": "Effects of Global Warming", "score": 0.87}, 212 ], 213 "total_matches": 2, 214 "search_time_ms": 105, 215 }, 216 }, 217 { 218 "name": "Advanced search", 219 "description": "Search with multiple filters and limits", 220 "input": { 221 "query": "machine learning", 222 "filters": {"type": "tutorial", "level": "beginner"}, 223 "limit": 1, 224 }, 225 "output": { 226 "results": [{"title": "Introduction to Machine Learning", "score": 0.95}], 227 "total_matches": 1, 228 "search_time_ms": 87, 229 }, 230 }, 231 ], 232 ) 233 234 @self.mcp.tool( 235 name="search_knowledge_base", 236 description=( 237 "Search for information in the knowledge base using keywords and filters.\n\n" 238 "This tool is ideal for finding relevant information on specific topics. " 239 "It supports filtering by content type, date ranges, and other metadata. " 240 "The tool returns a list of matching results sorted by relevance score.\n\n" 241 "WHEN TO USE:\n" 242 "- When you need to find specific information on a topic\n" 243 "- When you want to discover relevant articles or documentation\n" 244 "- Before generating content to ensure accuracy\n\n" 245 "WHEN NOT TO USE:\n" 246 "- When you need to modify or create content (use content_* tools instead)\n" 247 "- When you need very recent information that might not be in the knowledge base\n" 248 "- When you need exact answers to questions (use qa_* tools instead)" 249 ), 250 annotations=search_annotations.to_dict(), 251 input_schema={ 252 "type": "object", 253 "properties": { 254 "query": {"type": "string", "description": "Search query (required)"}, 255 "filters": { 256 "type": "object", 257 "description": "Optional filters to narrow results", 258 "properties": { 259 "type": { 260 "type": "string", 261 "enum": ["article", "tutorial", "reference", "faq"], 262 "description": "Content type filter", 263 }, 264 "level": { 265 "type": "string", 266 "enum": ["beginner", "intermediate", "advanced"], 267 "description": "Difficulty level filter", 268 }, 269 "date_after": { 270 "type": "string", 271 "format": "date", 272 "description": "Only include content after this date (YYYY-MM-DD)", 273 }, 274 }, 275 }, 276 "limit": { 277 "type": "integer", 278 "minimum": 1, 279 "maximum": 20, 280 "default": 5, 281 "description": "Maximum number of results to return (1-20, default 5)", 282 }, 283 }, 284 "required": ["query"], 285 }, 286 output_schema={ 287 "type": "object", 288 "properties": { 289 "results": { 290 "type": "array", 291 "items": { 292 "type": "object", 293 "properties": { 294 "id": {"type": "string"}, 295 "title": {"type": "string"}, 296 "summary": {"type": "string"}, 297 "type": {"type": "string"}, 298 "date": {"type": "string", "format": "date"}, 299 "score": {"type": "number"}, 300 }, 301 }, 302 }, 303 "total_matches": {"type": "integer"}, 304 "search_time_ms": {"type": "integer"}, 305 }, 306 }, 307 ) 308 @with_error_handling 309 @validate_inputs(query=non_empty_string) 310 async def search_knowledge_base( 311 query: str, filters: Optional[Dict[str, Any]] = None, limit: int = 5, ctx=None 312 ) -> Dict[str, Any]: 313 """ 314 Search for information in the knowledge base using keywords and filters. 315 316 This tool is ideal for finding relevant information on specific topics. 317 It supports filtering by content type, date ranges, and other metadata. 318 319 Args: 320 query: Search query string (required) 321 filters: Optional filters to narrow results 322 - type: Content type filter (article, tutorial, reference, faq) 323 - level: Difficulty level filter (beginner, intermediate, advanced) 324 - date_after: Only include content after this date (YYYY-MM-DD) 325 limit: Maximum number of results to return (1-20, default 5) 326 ctx: Context object passed by the MCP server 327 328 Returns: 329 Dictionary containing: 330 - results: List of retrieved document chunks with metadata and scores. 331 - count: Number of results returned (respecting limit). 332 - retrieval_time: Time taken for retrieval in seconds. 333 334 Examples: 335 Basic search: 336 search_knowledge_base(query="climate change") 337 338 Filtered search: 339 search_knowledge_base( 340 query="machine learning", 341 filters={"type": "tutorial", "level": "beginner"}, 342 limit=3 343 ) 344 """ 345 # Start timing 346 start_time = time.time() 347 348 # Convert simple filters to ChromaDB compatible format if needed 349 # The retrieve_context tool might already handle this, depending on its implementation. 350 # For simplicity, we pass the filters dict directly. 351 metadata_filter = filters # Pass filters directly 352 353 # Ensure limit is positive 354 limit = max(1, limit) 355 356 try: 357 # Call the actual retrieve_context tool 358 # Ensure DEMO_KB_NAME is defined appropriately 359 retrieval_result = await retrieve_context( 360 knowledge_base_name=DEMO_KB_NAME, 361 query=query, 362 top_k=limit, 363 metadata_filter=metadata_filter, 364 # Add other relevant params like min_score if needed 365 ) 366 367 # Return formatted results 368 # The retrieve_context tool already returns a dict with 'success', 'results', etc. 369 # We can return it directly or reformat if needed. 370 if retrieval_result.get("success"): 371 return { 372 "results": retrieval_result.get("results", []), 373 "count": len(retrieval_result.get("results", [])), 374 "retrieval_time": retrieval_result.get( 375 "retrieval_time", time.time() - start_time 376 ), 377 } 378 else: 379 # Propagate the error from retrieve_context 380 return { 381 "error": retrieval_result.get("message", "Retrieval failed"), 382 "results": [], 383 "count": 0, 384 "retrieval_time": time.time() - start_time, 385 } 386 387 except Exception as e: 388 # Log the error (in a real implementation) 389 print(f"Search error: {str(e)}") 390 391 # Return error response 392 return {"error": f"Search failed: {str(e)}"} 393 394 395 # --- Added Setup/Teardown for Demo KB --- 396 async def setup_demo_kb(): 397 """ 398 Creates and populates the demo knowledge base with sample documents. 399 400 This function handles the initialization of the demo knowledge base used by 401 the example tools. It performs the following operations in sequence: 402 1. Creates a new knowledge base with the name defined in DEMO_KB_NAME 403 2. Extracts documents, metadata, and IDs from the SAMPLE_DOCUMENTS constant 404 3. Adds the extracted information to the newly created knowledge base 405 406 The knowledge base is created with overwrite=True, which means any existing 407 knowledge base with the same name will be deleted and recreated. This ensures 408 a clean starting state for the demo. 409 410 Each document in the sample data is structured with: 411 - id: Unique identifier for the document 412 - title: Document title 413 - text: The actual document content to be vectorized 414 - type: Document category (article, tutorial, reference, faq) 415 - level: Difficulty level (beginner, intermediate, advanced) 416 - date: Publication date in YYYY-MM-DD format 417 - score_for_ranking: A number between 0-1 used for demonstration purposes 418 419 The function logs its progress to stdout and raises any exceptions it encounters, 420 allowing the caller to handle failures appropriately. 421 422 Returns: 423 None 424 425 Raises: 426 Exception: If any step in the setup process fails. The original exception is 427 preserved and propagated with context information. 428 429 Usage: 430 await setup_demo_kb() # Must be called in an async context 431 """ 432 print(f"Setting up demo knowledge base: {DEMO_KB_NAME}...") 433 try: 434 await create_knowledge_base(name=DEMO_KB_NAME, overwrite=True) 435 texts_to_add = [doc["text"] for doc in SAMPLE_DOCUMENTS] 436 metadatas_to_add = [ 437 {k: v for k, v in doc.items() if k != "text"} for doc in SAMPLE_DOCUMENTS 438 ] 439 ids_to_add = [doc["id"] for doc in SAMPLE_DOCUMENTS] 440 await add_documents( 441 knowledge_base_name=DEMO_KB_NAME, 442 documents=texts_to_add, 443 metadatas=metadatas_to_add, 444 ids=ids_to_add, 445 ) 446 print("Demo knowledge base setup complete.") 447 except Exception as e: 448 print(f"Error setting up demo KB: {e}") 449 raise 450 451 452 async def teardown_demo_kb(): 453 """ 454 Deletes the demo knowledge base and cleans up associated resources. 455 456 This function is responsible for properly disposing of the demo knowledge base 457 after the examples have been run. It ensures that temporary resources created 458 for demonstration purposes don't persist unnecessarily. Specifically, it: 459 460 1. Attempts to delete the knowledge base identified by DEMO_KB_NAME 461 2. Logs the success or failure of the operation to stdout 462 3. Suppresses any exceptions to prevent cleanup errors from propagating 463 464 Unlike setup_demo_kb(), this function does not raise exceptions for failures, 465 as cleanup errors should not prevent the application from continuing or shutting 466 down normally. Instead, errors are logged but suppressed. 467 468 The function can be safely called multiple times or even if the knowledge base 469 doesn't exist (the underlying delete_knowledge_base function should handle such cases). 470 471 This function should be called during application shutdown or after example 472 tools are no longer needed, typically in one of these contexts: 473 - Server shutdown hooks/lifecycle events 474 - After example demonstration is complete 475 - During application cleanup phases 476 477 Returns: 478 None 479 480 Usage: 481 await teardown_demo_kb() # Must be called in an async context 482 483 Note: 484 In production systems, more robust cleanup might involve tracking created 485 resources and ensuring proper disposal even after unexpected termination. 486 """ 487 print(f"Cleaning up demo knowledge base: {DEMO_KB_NAME}...") 488 try: 489 await delete_knowledge_base(name=DEMO_KB_NAME) 490 print("Demo knowledge base cleaned up.") 491 except Exception as e: 492 print(f"Error cleaning up demo KB: {e}") 493 494 495 # ----------------------------------------- 496 497 498 def register_example_tools(mcp_server): 499 """ 500 Register all example tools with the MCP server and set up required resources. 501 502 This function serves as the main entry point for integrating the example tools 503 into an MCP server instance. It instantiates the ExampleTool class, which registers 504 all individual tools with the provided server. Additionally, it handles concerns 505 related to the setup and teardown of resources required by the example tools. 506 507 Key responsibilities: 508 1. Creates an instance of ExampleTool, which registers all example tools with the server 509 2. Manages the initialization of required resources (demo knowledge base) 510 3. Documents integration concerns and known limitations 511 512 Integration notes: 513 - The demo knowledge base (DEMO_KB_NAME) must be set up before tools are used 514 - In a production environment, the async setup should be handled as part of the 515 server lifecycle (e.g., using lifespan or startup events) rather than directly here 516 - Current implementation leaves knowledge base setup as a separate step due to 517 challenges with mixing sync/async code in the registration process 518 519 Args: 520 mcp_server: An instance of the MCP server to register tools with. This should be 521 a fully initialized server object with a working tool registration system. 522 523 Returns: 524 None 525 526 Usage: 527 ```python 528 # During server initialization: 529 server = MCPServer() 530 register_example_tools(server) 531 532 # Remember to set up the knowledge base separately (due to async requirements): 533 await setup_demo_kb() # Before using the tools 534 535 # And clean up when done: 536 await teardown_demo_kb() # After tools are no longer needed 537 ``` 538 539 Known limitations: 540 - Cannot perform async setup directly in this function due to sync/async boundary issues 541 - Knowledge base setup must be handled separately as an async operation 542 - Resource cleanup must also be manually triggered as an async operation 543 """ 544 # Perform setup when tools are registered 545 # Note: In a real server, setup/teardown might be handled differently (e.g., lifespan) 546 # Running async setup directly here might block if called synchronously. 547 # A better approach might be to trigger setup after server start. 548 # For this example modification, we assume it can be awaited here or handled externally. 549 # asyncio.run(setup_demo_kb()) # This would block if register_example_tools is sync 550 # TODO: Need a way to run async setup/teardown non-blockingly or during server lifespan. 551 # Skipping async setup call here due to potential blocking issues. 552 # KB needs to be set up *before* the tool is called in a demo. 553 554 ExampleTool(mcp_server)