/ model_preferences.py
model_preferences.py
1 """ 2 Model preferences for MCP servers. 3 4 This module implements the ModelPreferences capability from the MCP protocol, 5 allowing servers to express preferences for model selection during sampling. 6 """ 7 8 from typing import List, Optional 9 10 11 class ModelHint: 12 """ 13 Hint for model selection. 14 15 Model hints allow the server to suggest specific models or model families 16 that would be appropriate for a given task. 17 """ 18 19 def __init__(self, name: str): 20 """ 21 Initialize a model hint. 22 23 Args: 24 name: A hint for a model name (e.g., 'claude-3-5-sonnet', 'sonnet', 'claude'). 25 This should be treated as a substring matching. 26 """ 27 self.name = name 28 29 def to_dict(self) -> dict: 30 """Convert model hint to dictionary.""" 31 return {"name": self.name} 32 33 34 class ModelPreferences: 35 """ 36 Preferences for model selection to guide LLM client decisions. 37 38 The ModelPreferences class provides a standardized way for servers to express 39 prioritization along three key dimensions (intelligence, speed, cost) that can 40 help clients make more informed decisions when selecting LLM models for specific tasks. 41 42 These preferences serve as advisory hints that help optimize the tradeoffs between: 43 - Intelligence/capability: Higher quality, more capable models (but often slower/costlier) 44 - Speed: Faster response time and lower latency (but potentially less capable) 45 - Cost: Lower token or API costs (but potentially less capable or slower) 46 47 The class also supports model-specific hints that can recommend particular models 48 or model families that are well-suited for specific tasks (e.g., suggesting Claude 49 models for creative writing or GPT-4V for image analysis). 50 51 All preferences are expressed with normalized values between 0.0 (lowest priority) 52 and 1.0 (highest priority) to allow for consistent interpretation across different 53 implementations. 54 55 Note: These preferences are always advisory. Clients may use them as guidance but 56 are not obligated to follow them, particularly if there are overriding user preferences 57 or system constraints. 58 59 Usage example: 60 ```python 61 # For a coding task requiring high intelligence but where cost is a major concern 62 preferences = ModelPreferences( 63 intelligence_priority=0.8, # High priority on capability 64 speed_priority=0.4, # Moderate priority on speed 65 cost_priority=0.7, # High priority on cost 66 hints=[ModelHint("gpt-4-turbo")] # Specific model recommendation 67 ) 68 ``` 69 """ 70 71 def __init__( 72 self, 73 intelligence_priority: float = 0.5, 74 speed_priority: float = 0.5, 75 cost_priority: float = 0.5, 76 hints: Optional[List[ModelHint]] = None, 77 ): 78 """ 79 Initialize model preferences. 80 81 Args: 82 intelligence_priority: How much to prioritize intelligence/capabilities (0.0-1.0). 83 Higher values favor more capable, sophisticated models that may produce 84 higher quality outputs, handle complex tasks, or follow instructions better. 85 Default: 0.5 (balanced) 86 speed_priority: How much to prioritize sampling speed/latency (0.0-1.0). 87 Higher values favor faster models with lower latency, which is important 88 for real-time applications, interactive experiences, or time-sensitive tasks. 89 Default: 0.5 (balanced) 90 cost_priority: How much to prioritize cost efficiency (0.0-1.0). 91 Higher values favor more economical models with lower token or API costs, 92 which is important for budget-constrained applications or high-volume usage. 93 Default: 0.5 (balanced) 94 hints: Optional model hints in preference order. These can suggest specific 95 models or model families that would be appropriate for the task. 96 The list should be ordered by preference (most preferred first). 97 """ 98 # Clamp values between 0 and 1 99 self.intelligence_priority = max(0.0, min(1.0, intelligence_priority)) 100 self.speed_priority = max(0.0, min(1.0, speed_priority)) 101 self.cost_priority = max(0.0, min(1.0, cost_priority)) 102 self.hints = hints or [] 103 104 def to_dict(self) -> dict: 105 """Convert model preferences to dictionary.""" 106 return { 107 "intelligencePriority": self.intelligence_priority, 108 "speedPriority": self.speed_priority, 109 "costPriority": self.cost_priority, 110 "hints": [hint.to_dict() for hint in self.hints], 111 } 112 113 114 # Pre-defined preference templates for common use cases 115 116 # Default balanced preference profile - no strong bias in any direction 117 # Use when there's no clear priority between intelligence, speed, and cost 118 # Good for general-purpose applications where trade-offs are acceptable 119 BALANCED_PREFERENCES = ModelPreferences( 120 intelligence_priority=0.5, speed_priority=0.5, cost_priority=0.5 121 ) 122 123 # Prioritizes high-quality, sophisticated model responses 124 # Use for complex reasoning, creative tasks, or critical applications 125 # where accuracy and capability matter more than speed or cost 126 INTELLIGENCE_FOCUSED = ModelPreferences( 127 intelligence_priority=0.9, 128 speed_priority=0.3, 129 cost_priority=0.3, 130 hints=[ModelHint("claude-3-5-opus")], 131 ) 132 133 # Prioritizes response speed and low latency 134 # Use for real-time applications, interactive experiences, 135 # chatbots, or any use case where user wait time is critical 136 SPEED_FOCUSED = ModelPreferences( 137 intelligence_priority=0.3, 138 speed_priority=0.9, 139 cost_priority=0.5, 140 hints=[ModelHint("claude-3-haiku"), ModelHint("gemini-flash")], 141 ) 142 143 # Prioritizes cost efficiency and token economy 144 # Use for high-volume applications, background processing, 145 # or when operating under strict budget constraints 146 COST_FOCUSED = ModelPreferences( 147 intelligence_priority=0.3, 148 speed_priority=0.5, 149 cost_priority=0.9, 150 hints=[ModelHint("mistral"), ModelHint("gemini-flash")], 151 )