/ model_preferences.py
model_preferences.py
  1  """
  2  Model preferences for MCP servers.
  3  
  4  This module implements the ModelPreferences capability from the MCP protocol,
  5  allowing servers to express preferences for model selection during sampling.
  6  """
  7  
  8  from typing import List, Optional
  9  
 10  
 11  class ModelHint:
 12      """
 13      Hint for model selection.
 14  
 15      Model hints allow the server to suggest specific models or model families
 16      that would be appropriate for a given task.
 17      """
 18  
 19      def __init__(self, name: str):
 20          """
 21          Initialize a model hint.
 22  
 23          Args:
 24              name: A hint for a model name (e.g., 'claude-3-5-sonnet', 'sonnet', 'claude').
 25                   This should be treated as a substring matching.
 26          """
 27          self.name = name
 28  
 29      def to_dict(self) -> dict:
 30          """Convert model hint to dictionary."""
 31          return {"name": self.name}
 32  
 33  
 34  class ModelPreferences:
 35      """
 36      Preferences for model selection to guide LLM client decisions.
 37  
 38      The ModelPreferences class provides a standardized way for servers to express
 39      prioritization along three key dimensions (intelligence, speed, cost) that can
 40      help clients make more informed decisions when selecting LLM models for specific tasks.
 41  
 42      These preferences serve as advisory hints that help optimize the tradeoffs between:
 43      - Intelligence/capability: Higher quality, more capable models (but often slower/costlier)
 44      - Speed: Faster response time and lower latency (but potentially less capable)
 45      - Cost: Lower token or API costs (but potentially less capable or slower)
 46  
 47      The class also supports model-specific hints that can recommend particular models
 48      or model families that are well-suited for specific tasks (e.g., suggesting Claude
 49      models for creative writing or GPT-4V for image analysis).
 50  
 51      All preferences are expressed with normalized values between 0.0 (lowest priority)
 52      and 1.0 (highest priority) to allow for consistent interpretation across different
 53      implementations.
 54  
 55      Note: These preferences are always advisory. Clients may use them as guidance but
 56      are not obligated to follow them, particularly if there are overriding user preferences
 57      or system constraints.
 58  
 59      Usage example:
 60          ```python
 61          # For a coding task requiring high intelligence but where cost is a major concern
 62          preferences = ModelPreferences(
 63              intelligence_priority=0.8,  # High priority on capability
 64              speed_priority=0.4,         # Moderate priority on speed
 65              cost_priority=0.7,          # High priority on cost
 66              hints=[ModelHint("gpt-4-turbo")]  # Specific model recommendation
 67          )
 68          ```
 69      """
 70  
 71      def __init__(
 72          self,
 73          intelligence_priority: float = 0.5,
 74          speed_priority: float = 0.5,
 75          cost_priority: float = 0.5,
 76          hints: Optional[List[ModelHint]] = None,
 77      ):
 78          """
 79          Initialize model preferences.
 80  
 81          Args:
 82              intelligence_priority: How much to prioritize intelligence/capabilities (0.0-1.0).
 83                  Higher values favor more capable, sophisticated models that may produce
 84                  higher quality outputs, handle complex tasks, or follow instructions better.
 85                  Default: 0.5 (balanced)
 86              speed_priority: How much to prioritize sampling speed/latency (0.0-1.0).
 87                  Higher values favor faster models with lower latency, which is important
 88                  for real-time applications, interactive experiences, or time-sensitive tasks.
 89                  Default: 0.5 (balanced)
 90              cost_priority: How much to prioritize cost efficiency (0.0-1.0).
 91                  Higher values favor more economical models with lower token or API costs,
 92                  which is important for budget-constrained applications or high-volume usage.
 93                  Default: 0.5 (balanced)
 94              hints: Optional model hints in preference order. These can suggest specific
 95                  models or model families that would be appropriate for the task.
 96                  The list should be ordered by preference (most preferred first).
 97          """
 98          # Clamp values between 0 and 1
 99          self.intelligence_priority = max(0.0, min(1.0, intelligence_priority))
100          self.speed_priority = max(0.0, min(1.0, speed_priority))
101          self.cost_priority = max(0.0, min(1.0, cost_priority))
102          self.hints = hints or []
103  
104      def to_dict(self) -> dict:
105          """Convert model preferences to dictionary."""
106          return {
107              "intelligencePriority": self.intelligence_priority,
108              "speedPriority": self.speed_priority,
109              "costPriority": self.cost_priority,
110              "hints": [hint.to_dict() for hint in self.hints],
111          }
112  
113  
114  # Pre-defined preference templates for common use cases
115  
116  # Default balanced preference profile - no strong bias in any direction
117  # Use when there's no clear priority between intelligence, speed, and cost
118  # Good for general-purpose applications where trade-offs are acceptable
119  BALANCED_PREFERENCES = ModelPreferences(
120      intelligence_priority=0.5, speed_priority=0.5, cost_priority=0.5
121  )
122  
123  # Prioritizes high-quality, sophisticated model responses
124  # Use for complex reasoning, creative tasks, or critical applications
125  # where accuracy and capability matter more than speed or cost
126  INTELLIGENCE_FOCUSED = ModelPreferences(
127      intelligence_priority=0.9,
128      speed_priority=0.3,
129      cost_priority=0.3,
130      hints=[ModelHint("claude-3-5-opus")],
131  )
132  
133  # Prioritizes response speed and low latency
134  # Use for real-time applications, interactive experiences,
135  # chatbots, or any use case where user wait time is critical
136  SPEED_FOCUSED = ModelPreferences(
137      intelligence_priority=0.3,
138      speed_priority=0.9,
139      cost_priority=0.5,
140      hints=[ModelHint("claude-3-haiku"), ModelHint("gemini-flash")],
141  )
142  
143  # Prioritizes cost efficiency and token economy
144  # Use for high-volume applications, background processing,
145  # or when operating under strict budget constraints
146  COST_FOCUSED = ModelPreferences(
147      intelligence_priority=0.3,
148      speed_priority=0.5,
149      cost_priority=0.9,
150      hints=[ModelHint("mistral"), ModelHint("gemini-flash")],
151  )