Cradicle Explorer

/ example_structured_tool.py
example_structured_tool.py
  1  """
  2  Example of a well-structured MCP tool with best practices.
  3  
  4  This module demonstrates how to create a comprehensive MCP tool
  5  that implements all the best practices for LLM usability:
  6  - Tool annotations for better decision-making
  7  - Standardized error handling
  8  - Input validation
  9  - Detailed documentation with examples
 10  - Structured outputs with consistent formats
 11  """
 12  
 13  import time
 14  import uuid
 15  from typing import Any, Dict, Optional
 16  
 17  # ---------------------------------
 18  from error_handling import non_empty_string, validate_inputs, with_error_handling
 19  from tool_annotations import ToolAnnotations
 20  
 21  # --- Import RAG tools/services ---
 22  # Assuming direct function import for simplicity in example
 23  # In a real structured app, might use dependency injection or service locators
 24  from ultimate_mcp_server.tools.rag import (
 25      add_documents,
 26      create_knowledge_base,
 27      delete_knowledge_base,
 28      retrieve_context,
 29  )
 30  
 31  # --- Define KB Name for Demo ---
 32  DEMO_KB_NAME = f"example_tool_kb_{uuid.uuid4().hex[:8]}"
 33  # ------------------------------
 34  
 35  # --- Sample Data (moved to top) ---
 36  # This data will now be *added* to the KB during setup
 37  SAMPLE_DOCUMENTS = [
 38      {
 39          "id": "kb-001",
 40          "title": "Introduction to Climate Change",
 41          "text": "An overview of climate change causes and effects.",
 42          "type": "article",
 43          "level": "beginner",
 44          "date": "2023-01-15",
 45          "score_for_ranking": 0.95,  # Keep score for potential sorting demonstration?
 46      },
 47      {
 48          "id": "kb-002",
 49          "title": "Machine Learning Fundamentals",
 50          "text": "Learn the basics of machine learning algorithms.",
 51          "type": "tutorial",
 52          "level": "beginner",
 53          "date": "2023-02-20",
 54          "score_for_ranking": 0.92,
 55      },
 56      {
 57          "id": "kb-003",
 58          "title": "Advanced Neural Networks",
 59          "text": "Deep dive into neural network architectures.",
 60          "type": "tutorial",
 61          "level": "advanced",
 62          "date": "2023-03-10",
 63          "score_for_ranking": 0.88,
 64      },
 65      {
 66          "id": "kb-004",
 67          "title": "Climate Policy FAQ",
 68          "text": "Frequently asked questions about climate policies.",
 69          "type": "faq",
 70          "level": "intermediate",
 71          "date": "2023-04-05",
 72          "score_for_ranking": 0.82,
 73      },
 74      {
 75          "id": "kb-005",
 76          "title": "Python Reference for Data Science",
 77          "text": "Reference guide for Python in data science applications.",
 78          "type": "reference",
 79          "level": "intermediate",
 80          "date": "2023-05-12",
 81          "score_for_ranking": 0.78,
 82      },
 83  ]
 84  # -------------------------------------
 85  
 86  
 87  class ExampleTool:
 88      """
 89      Example implementation of a well-structured MCP tool with best practices.
 90  
 91      The ExampleTool class serves as a reference implementation that demonstrates how to properly
 92      design and implement tools for the Model Control Protocol (MCP) ecosystem. It showcases
 93      a real-world RAG (Retrieval-Augmented Generation) tool that interacts with a knowledge base.
 94  
 95      Key design features:
 96      - Proper tool registration with the MCP server
 97      - Comprehensive schema definitions for inputs and outputs
 98      - Clear tool descriptions with usage guidance for LLMs
 99      - Tool annotations that provide semantic hints about tool behavior
100      - Consistent error handling and input validation
101      - Well-structured implementation with clean separation of concerns
102  
103      The class implements a search_knowledge_base tool that allows querying a vector store
104      containing sample documents. The implementation demonstrates how to:
105      - Process input parameters and apply validation
106      - Interact with external services (the knowledge base)
107      - Format response data in a consistent structure
108      - Handle errors gracefully with meaningful error messages
109      - Add appropriate metadata to help LLMs use the tool effectively
110  
111      This implementation is intended as an educational reference for developers creating
112      their own MCP tools, showing patterns and practices that lead to tools that are
113      easily discoverable, usable, and maintainable.
114  
115      Usage:
116          ```python
117          # Initialize the MCP server
118          server = MCPServer()
119  
120          # Create an instance (automatically registers all tools)
121          tool = ExampleTool(server)
122  
123          # The tool is now available for use through the server
124          # After ensuring the knowledge base is set up
125          await setup_demo_kb()
126          ```
127      """
128  
129      def __init__(self, mcp_server):
130          """
131          Initialize an ExampleTool instance and register its tools with the MCP server.
132  
133          This constructor creates a new instance of the ExampleTool class and automatically
134          registers all tools implemented by this class with the provided MCP server. It
135          serves as the entry point for integrating the example tools into an MCP server.
136  
137          The initialization process:
138          1. Stores a reference to the provided MCP server instance
139          2. Calls the _register_tools method to define and register all tools
140          3. Establishes all necessary connections to the underlying knowledge base
141  
142          After initialization, the tools become available for use through the MCP server's
143          tool invocation interface. No further setup is required for the tools themselves,
144          although the underlying knowledge base (see setup_demo_kb) must be initialized
145          before the tools can be used effectively.
146  
147          Args:
148              mcp_server: An instance of the MCP server to register tools with. This must be
149                         a fully initialized server object with a functional tool registration
150                         system available through its 'tool' decorator.
151  
152          Returns:
153              None
154  
155          Notes:
156              - Tool registration happens immediately during initialization
157              - Tool usage requires the demo knowledge base to be set up separately
158              - The server instance is stored but not modified beyond tool registration
159          """
160          self.mcp = mcp_server
161          self._register_tools()
162  
163      def _register_tools(self):
164          """
165          Register all tools provided by this class with the MCP server.
166  
167          This private method is called during initialization and handles the registration
168          of all tools implemented by the ExampleTool class. It defines and registers
169          individual tools with appropriate metadata, schemas, and implementations.
170  
171          For each tool, the method:
172          1. Creates tool annotations with appropriate behavioral hints
173          2. Defines the tool's description, input schema, and output schema
174          3. Implements the tool function with error handling and input validation
175          4. Registers the complete tool with the MCP server
176  
177          The primary tool defined here is:
178          - search_knowledge_base: Searches the demo knowledge base for relevant documents
179            based on user queries and optional filters
180  
181          Each tool is decorated with:
182          - @self.mcp.tool: Registers the function as an MCP tool
183          - @with_error_handling: Provides standardized exception handling
184          - @validate_inputs: Validates required parameters before execution
185  
186          The detailed tool definitions include human-readable descriptions, parameter
187          schemas with comprehensive type information, and examples demonstrating proper
188          tool usage for LLMs.
189  
190          Returns:
191              None - Tools are registered as a side effect
192          """
193  
194          # Create tool annotations with appropriate hints
195          search_annotations = ToolAnnotations(
196              read_only_hint=True,  # This tool doesn't modify anything
197              destructive_hint=False,  # No destructive operations
198              idempotent_hint=True,  # Can be called repeatedly with same results
199              open_world_hint=True,  # Interacts with external data sources
200              audience=["assistant"],  # Intended for the LLM to use
201              priority=0.8,  # High priority tool
202              title="Search Knowledge Base",  # Human-readable title
203              examples=[
204                  {
205                      "name": "Basic search",
206                      "description": "Search for information about a topic",
207                      "input": {"query": "climate change", "filters": {"type": "article"}},
208                      "output": {
209                          "results": [
210                              {"title": "Climate Change Basics", "score": 0.92},
211                              {"title": "Effects of Global Warming", "score": 0.87},
212                          ],
213                          "total_matches": 2,
214                          "search_time_ms": 105,
215                      },
216                  },
217                  {
218                      "name": "Advanced search",
219                      "description": "Search with multiple filters and limits",
220                      "input": {
221                          "query": "machine learning",
222                          "filters": {"type": "tutorial", "level": "beginner"},
223                          "limit": 1,
224                      },
225                      "output": {
226                          "results": [{"title": "Introduction to Machine Learning", "score": 0.95}],
227                          "total_matches": 1,
228                          "search_time_ms": 87,
229                      },
230                  },
231              ],
232          )
233  
234          @self.mcp.tool(
235              name="search_knowledge_base",
236              description=(
237                  "Search for information in the knowledge base using keywords and filters.\n\n"
238                  "This tool is ideal for finding relevant information on specific topics. "
239                  "It supports filtering by content type, date ranges, and other metadata. "
240                  "The tool returns a list of matching results sorted by relevance score.\n\n"
241                  "WHEN TO USE:\n"
242                  "- When you need to find specific information on a topic\n"
243                  "- When you want to discover relevant articles or documentation\n"
244                  "- Before generating content to ensure accuracy\n\n"
245                  "WHEN NOT TO USE:\n"
246                  "- When you need to modify or create content (use content_* tools instead)\n"
247                  "- When you need very recent information that might not be in the knowledge base\n"
248                  "- When you need exact answers to questions (use qa_* tools instead)"
249              ),
250              annotations=search_annotations.to_dict(),
251              input_schema={
252                  "type": "object",
253                  "properties": {
254                      "query": {"type": "string", "description": "Search query (required)"},
255                      "filters": {
256                          "type": "object",
257                          "description": "Optional filters to narrow results",
258                          "properties": {
259                              "type": {
260                                  "type": "string",
261                                  "enum": ["article", "tutorial", "reference", "faq"],
262                                  "description": "Content type filter",
263                              },
264                              "level": {
265                                  "type": "string",
266                                  "enum": ["beginner", "intermediate", "advanced"],
267                                  "description": "Difficulty level filter",
268                              },
269                              "date_after": {
270                                  "type": "string",
271                                  "format": "date",
272                                  "description": "Only include content after this date (YYYY-MM-DD)",
273                              },
274                          },
275                      },
276                      "limit": {
277                          "type": "integer",
278                          "minimum": 1,
279                          "maximum": 20,
280                          "default": 5,
281                          "description": "Maximum number of results to return (1-20, default 5)",
282                      },
283                  },
284                  "required": ["query"],
285              },
286              output_schema={
287                  "type": "object",
288                  "properties": {
289                      "results": {
290                          "type": "array",
291                          "items": {
292                              "type": "object",
293                              "properties": {
294                                  "id": {"type": "string"},
295                                  "title": {"type": "string"},
296                                  "summary": {"type": "string"},
297                                  "type": {"type": "string"},
298                                  "date": {"type": "string", "format": "date"},
299                                  "score": {"type": "number"},
300                              },
301                          },
302                      },
303                      "total_matches": {"type": "integer"},
304                      "search_time_ms": {"type": "integer"},
305                  },
306              },
307          )
308          @with_error_handling
309          @validate_inputs(query=non_empty_string)
310          async def search_knowledge_base(
311              query: str, filters: Optional[Dict[str, Any]] = None, limit: int = 5, ctx=None
312          ) -> Dict[str, Any]:
313              """
314              Search for information in the knowledge base using keywords and filters.
315  
316              This tool is ideal for finding relevant information on specific topics.
317              It supports filtering by content type, date ranges, and other metadata.
318  
319              Args:
320                  query: Search query string (required)
321                  filters: Optional filters to narrow results
322                      - type: Content type filter (article, tutorial, reference, faq)
323                      - level: Difficulty level filter (beginner, intermediate, advanced)
324                      - date_after: Only include content after this date (YYYY-MM-DD)
325                  limit: Maximum number of results to return (1-20, default 5)
326                  ctx: Context object passed by the MCP server
327  
328              Returns:
329                  Dictionary containing:
330                  - results: List of retrieved document chunks with metadata and scores.
331                  - count: Number of results returned (respecting limit).
332                  - retrieval_time: Time taken for retrieval in seconds.
333  
334              Examples:
335                  Basic search:
336                    search_knowledge_base(query="climate change")
337  
338                  Filtered search:
339                    search_knowledge_base(
340                      query="machine learning",
341                      filters={"type": "tutorial", "level": "beginner"},
342                      limit=3
343                    )
344              """
345              # Start timing
346              start_time = time.time()
347  
348              # Convert simple filters to ChromaDB compatible format if needed
349              # The retrieve_context tool might already handle this, depending on its implementation.
350              # For simplicity, we pass the filters dict directly.
351              metadata_filter = filters  # Pass filters directly
352  
353              # Ensure limit is positive
354              limit = max(1, limit)
355  
356              try:
357                  # Call the actual retrieve_context tool
358                  # Ensure DEMO_KB_NAME is defined appropriately
359                  retrieval_result = await retrieve_context(
360                      knowledge_base_name=DEMO_KB_NAME,
361                      query=query,
362                      top_k=limit,
363                      metadata_filter=metadata_filter,
364                      # Add other relevant params like min_score if needed
365                  )
366  
367                  # Return formatted results
368                  # The retrieve_context tool already returns a dict with 'success', 'results', etc.
369                  # We can return it directly or reformat if needed.
370                  if retrieval_result.get("success"):
371                      return {
372                          "results": retrieval_result.get("results", []),
373                          "count": len(retrieval_result.get("results", [])),
374                          "retrieval_time": retrieval_result.get(
375                              "retrieval_time", time.time() - start_time
376                          ),
377                      }
378                  else:
379                      # Propagate the error from retrieve_context
380                      return {
381                          "error": retrieval_result.get("message", "Retrieval failed"),
382                          "results": [],
383                          "count": 0,
384                          "retrieval_time": time.time() - start_time,
385                      }
386  
387              except Exception as e:
388                  # Log the error (in a real implementation)
389                  print(f"Search error: {str(e)}")
390  
391                  # Return error response
392                  return {"error": f"Search failed: {str(e)}"}
393  
394  
395  # --- Added Setup/Teardown for Demo KB ---
396  async def setup_demo_kb():
397      """
398      Creates and populates the demo knowledge base with sample documents.
399  
400      This function handles the initialization of the demo knowledge base used by
401      the example tools. It performs the following operations in sequence:
402      1. Creates a new knowledge base with the name defined in DEMO_KB_NAME
403      2. Extracts documents, metadata, and IDs from the SAMPLE_DOCUMENTS constant
404      3. Adds the extracted information to the newly created knowledge base
405  
406      The knowledge base is created with overwrite=True, which means any existing
407      knowledge base with the same name will be deleted and recreated. This ensures
408      a clean starting state for the demo.
409  
410      Each document in the sample data is structured with:
411      - id: Unique identifier for the document
412      - title: Document title
413      - text: The actual document content to be vectorized
414      - type: Document category (article, tutorial, reference, faq)
415      - level: Difficulty level (beginner, intermediate, advanced)
416      - date: Publication date in YYYY-MM-DD format
417      - score_for_ranking: A number between 0-1 used for demonstration purposes
418  
419      The function logs its progress to stdout and raises any exceptions it encounters,
420      allowing the caller to handle failures appropriately.
421  
422      Returns:
423          None
424  
425      Raises:
426          Exception: If any step in the setup process fails. The original exception is
427                   preserved and propagated with context information.
428  
429      Usage:
430          await setup_demo_kb()  # Must be called in an async context
431      """
432      print(f"Setting up demo knowledge base: {DEMO_KB_NAME}...")
433      try:
434          await create_knowledge_base(name=DEMO_KB_NAME, overwrite=True)
435          texts_to_add = [doc["text"] for doc in SAMPLE_DOCUMENTS]
436          metadatas_to_add = [
437              {k: v for k, v in doc.items() if k != "text"} for doc in SAMPLE_DOCUMENTS
438          ]
439          ids_to_add = [doc["id"] for doc in SAMPLE_DOCUMENTS]
440          await add_documents(
441              knowledge_base_name=DEMO_KB_NAME,
442              documents=texts_to_add,
443              metadatas=metadatas_to_add,
444              ids=ids_to_add,
445          )
446          print("Demo knowledge base setup complete.")
447      except Exception as e:
448          print(f"Error setting up demo KB: {e}")
449          raise
450  
451  
452  async def teardown_demo_kb():
453      """
454      Deletes the demo knowledge base and cleans up associated resources.
455  
456      This function is responsible for properly disposing of the demo knowledge base
457      after the examples have been run. It ensures that temporary resources created
458      for demonstration purposes don't persist unnecessarily. Specifically, it:
459  
460      1. Attempts to delete the knowledge base identified by DEMO_KB_NAME
461      2. Logs the success or failure of the operation to stdout
462      3. Suppresses any exceptions to prevent cleanup errors from propagating
463  
464      Unlike setup_demo_kb(), this function does not raise exceptions for failures,
465      as cleanup errors should not prevent the application from continuing or shutting
466      down normally. Instead, errors are logged but suppressed.
467  
468      The function can be safely called multiple times or even if the knowledge base
469      doesn't exist (the underlying delete_knowledge_base function should handle such cases).
470  
471      This function should be called during application shutdown or after example
472      tools are no longer needed, typically in one of these contexts:
473      - Server shutdown hooks/lifecycle events
474      - After example demonstration is complete
475      - During application cleanup phases
476  
477      Returns:
478          None
479  
480      Usage:
481          await teardown_demo_kb()  # Must be called in an async context
482  
483      Note:
484          In production systems, more robust cleanup might involve tracking created
485          resources and ensuring proper disposal even after unexpected termination.
486      """
487      print(f"Cleaning up demo knowledge base: {DEMO_KB_NAME}...")
488      try:
489          await delete_knowledge_base(name=DEMO_KB_NAME)
490          print("Demo knowledge base cleaned up.")
491      except Exception as e:
492          print(f"Error cleaning up demo KB: {e}")
493  
494  
495  # -----------------------------------------
496  
497  
498  def register_example_tools(mcp_server):
499      """
500      Register all example tools with the MCP server and set up required resources.
501  
502      This function serves as the main entry point for integrating the example tools
503      into an MCP server instance. It instantiates the ExampleTool class, which registers
504      all individual tools with the provided server. Additionally, it handles concerns
505      related to the setup and teardown of resources required by the example tools.
506  
507      Key responsibilities:
508      1. Creates an instance of ExampleTool, which registers all example tools with the server
509      2. Manages the initialization of required resources (demo knowledge base)
510      3. Documents integration concerns and known limitations
511  
512      Integration notes:
513      - The demo knowledge base (DEMO_KB_NAME) must be set up before tools are used
514      - In a production environment, the async setup should be handled as part of the
515        server lifecycle (e.g., using lifespan or startup events) rather than directly here
516      - Current implementation leaves knowledge base setup as a separate step due to
517        challenges with mixing sync/async code in the registration process
518  
519      Args:
520          mcp_server: An instance of the MCP server to register tools with. This should be
521                     a fully initialized server object with a working tool registration system.
522  
523      Returns:
524          None
525  
526      Usage:
527          ```python
528          # During server initialization:
529          server = MCPServer()
530          register_example_tools(server)
531  
532          # Remember to set up the knowledge base separately (due to async requirements):
533          await setup_demo_kb()  # Before using the tools
534  
535          # And clean up when done:
536          await teardown_demo_kb()  # After tools are no longer needed
537          ```
538  
539      Known limitations:
540      - Cannot perform async setup directly in this function due to sync/async boundary issues
541      - Knowledge base setup must be handled separately as an async operation
542      - Resource cleanup must also be manually triggered as an async operation
543      """
544      # Perform setup when tools are registered
545      # Note: In a real server, setup/teardown might be handled differently (e.g., lifespan)
546      # Running async setup directly here might block if called synchronously.
547      # A better approach might be to trigger setup after server start.
548      # For this example modification, we assume it can be awaited here or handled externally.
549      # asyncio.run(setup_demo_kb()) # This would block if register_example_tools is sync
550      # TODO: Need a way to run async setup/teardown non-blockingly or during server lifespan.
551      # Skipping async setup call here due to potential blocking issues.
552      # KB needs to be set up *before* the tool is called in a demo.
553  
554      ExampleTool(mcp_server)