model_router.py
1 """ 2 Edge Model Router - Biomimetic Neural Routing 3 4 Routes queries to appropriate models using patterns from the nervous system. 5 6 ## Organic Parallel: Neural Processing Hierarchy 7 8 The nervous system routes signals based on complexity and urgency: 9 10 | Neural Level | Function | Silicon Equivalent | 11 |---------------------|-----------------------------|-----------------------| 12 | Spinal cord | Reflexes, no conscious thought | TINY (<1B) - instant, local | 13 | Brainstem | Vital functions, automatic | SMALL (1-7B) - routine | 14 | Limbic system | Emotion, context, memory | MEDIUM (7-30B) - contextual | 15 | Prefrontal cortex | Novel reasoning, planning | LARGE (Cloud) - complex | 16 17 ## Key Biological Principles Applied 18 19 1. **Most processing is unconscious** (95% never reaches cortex) 20 → Default to local models, escalate only when necessary 21 22 2. **Escalation is metabolically expensive** (cortex uses 10x more glucose) 23 → Track cost, prefer efficient routing 24 25 3. **Habituation** (repeated stimuli route lower) 26 → Cache successful routings, demote familiar patterns 27 28 4. **Startle response** (urgent signals bypass cortex) 29 → Emergency queries get immediate local response 30 31 5. **Refractory period** (neurons need recovery after firing) 32 → After complex queries, prefer simpler routing briefly 33 34 6. **Priming** (recent activity lowers thresholds) 35 → Recent topics route to lower tiers 36 37 7. **Arousal state** (alert vs drowsy affects routing) 38 → Energy state (battery) affects tier selection 39 40 The router IS a Markov blanket - it separates query space (external) 41 from model space (internal), deciding what crosses. 42 """ 43 44 from dataclasses import dataclass, field 45 from datetime import datetime, timedelta 46 from enum import Enum 47 from typing import Optional, List, Dict, Any, Callable, Tuple 48 from collections import defaultdict 49 import json 50 import re 51 import hashlib 52 import urllib.request 53 54 55 class ModelTier(Enum): 56 """ 57 Model tiers mapped to neural processing levels. 58 59 SPINAL → TINY: Reflexive, no "thinking" needed 60 BRAINSTEM → SMALL: Automatic, routine processing 61 LIMBIC → MEDIUM: Contextual, memory-linked 62 CORTEX → LARGE: Novel reasoning, planning 63 """ 64 TINY = "tiny" # Spinal - reflexes 65 SMALL = "small" # Brainstem - automatic 66 MEDIUM = "medium" # Limbic - contextual 67 LARGE = "large" # Cortex - reasoning 68 69 70 @dataclass 71 class ModelConfig: 72 """Configuration for a model endpoint.""" 73 tier: ModelTier 74 name: str 75 endpoint: str 76 max_tokens: int = 4096 77 capabilities: List[str] = field(default_factory=list) 78 cost_per_token: float = 0.0 79 80 # Biological properties 81 latency_ms: int = 100 # Response time 82 energy_cost: float = 0.1 # Relative energy (0-1) 83 recovery_time_ms: int = 0 # Refractory period 84 85 86 @dataclass 87 class RoutingDecision: 88 """Result of routing with biological metadata.""" 89 tier: ModelTier 90 reason: str 91 confidence: float 92 estimated_tokens: int 93 capabilities_needed: List[str] 94 95 # Biological factors that influenced decision 96 factors: Dict[str, float] = field(default_factory=dict) 97 98 # Neural pathway taken 99 pathway: str = "default" # reflex, automatic, contextual, reasoning 100 101 102 @dataclass 103 class QueryContext: 104 """Context for routing decisions.""" 105 query: str 106 conversation_history: List[Dict] = field(default_factory=list) 107 phoenix_state: Optional[Dict] = None 108 file_context: List[str] = field(default_factory=list) 109 user_preference: Optional[ModelTier] = None 110 111 # Biological state 112 urgency: float = 0.5 # 0=relaxed, 1=urgent (affects startle) 113 energy_state: float = 1.0 # 0=depleted, 1=full (battery/arousal) 114 recent_topics: List[str] = field(default_factory=list) # For priming 115 116 117 @dataclass 118 class RoutingMemory: 119 """ 120 Memory of past routings for habituation. 121 122 Organic parallel: Synaptic strengthening through repeated use. 123 Queries that consistently succeed at a tier get "hardwired" there. 124 """ 125 query_hash: str 126 successful_tier: ModelTier 127 success_count: int = 1 128 last_used: datetime = field(default_factory=datetime.now) 129 avg_latency_ms: float = 0 130 131 132 class BiomimeticRouter: 133 """ 134 Routes queries using nervous system principles. 135 136 The router maintains state analogous to neural state: 137 - Arousal level (energy/alertness) 138 - Refractory state (recovery after heavy processing) 139 - Priming (recent topics lower thresholds) 140 - Habituation memory (learned routings) 141 """ 142 143 def __init__(self): 144 self.models: Dict[ModelTier, ModelConfig] = {} 145 146 # Neural state 147 self._arousal: float = 1.0 # Energy/alertness level 148 self._last_complex_query: Optional[datetime] = None 149 self._refractory_duration = timedelta(seconds=30) 150 151 # Habituation memory (learned routings) 152 self._routing_memory: Dict[str, RoutingMemory] = {} 153 self._memory_max_age = timedelta(hours=24) 154 155 # Priming state (recent topics) 156 self._recent_topics: List[Tuple[str, datetime]] = [] 157 self._priming_window = timedelta(minutes=10) 158 159 # Statistics 160 self.stats = { 161 'queries': 0, 162 'by_tier': {tier.value: 0 for tier in ModelTier}, 163 'by_pathway': defaultdict(int), 164 'escalations': 0, 165 'habituated': 0, 166 'primed': 0, 167 'reflexive': 0, 168 } 169 170 # Pattern recognition (like neural feature detectors) 171 self._init_pattern_detectors() 172 173 def _init_pattern_detectors(self): 174 """ 175 Initialize pattern detectors. 176 177 Organic parallel: Feature detectors in visual cortex. 178 Each detector responds to specific patterns. 179 """ 180 # Reflexive patterns (spinal - immediate response) 181 self.REFLEX_PATTERNS = [ 182 r'^(yes|no|ok|sure|thanks|thank you|got it)[\.\!]?$', 183 r'^(hi|hello|hey)[\.\!]?$', 184 r'^\d+$', # Just a number 185 r'^[A-Za-z]$', # Single letter 186 ] 187 188 # Automatic patterns (brainstem - routine) 189 self.AUTOMATIC_PATTERNS = [ 190 (r'^what (is|are) ', 'definition'), 191 (r'^list ', 'enumeration'), 192 (r'^format ', 'formatting'), 193 (r'^convert ', 'conversion'), 194 (r'^translate ', 'translation'), 195 ] 196 197 # Contextual patterns (limbic - needs memory/emotion) 198 self.CONTEXTUAL_PATTERNS = [ 199 (r'\b(remember|earlier|before|previously|we discussed)\b', 'memory'), 200 (r'\b(feel|emotion|mood|tone)\b', 'affect'), 201 (r'\b(context|situation|scenario)\b', 'situational'), 202 ] 203 204 # Reasoning patterns (cortex - novel thought) 205 self.REASONING_PATTERNS = [ 206 (r'\b(why|because|therefore|hence|thus|reason)\b', 'causal'), 207 (r'\b(implement|build|create|design|architect)\b', 'generative'), 208 (r'\b(debug|fix|solve|troubleshoot)\b', 'diagnostic'), 209 (r'\b(compare|contrast|analyze|evaluate|trade-?off)\b', 'analytical'), 210 (r'\b(plan|strategy|approach|how should)\b', 'planning'), 211 (r'```', 'code'), # Code blocks 212 ] 213 214 # Urgency/startle patterns (bypass normal routing) 215 self.STARTLE_PATTERNS = [ 216 r'\b(urgent|emergency|asap|immediately|critical|broken)\b', 217 r'\!\!\!', 218 r'^HELP', 219 ] 220 221 def register_model(self, config: ModelConfig): 222 """Register a model for a tier.""" 223 self.models[config.tier] = config 224 225 def configure_defaults(self): 226 """Configure default model endpoints.""" 227 self.register_model(ModelConfig( 228 tier=ModelTier.TINY, 229 name="qwen2.5:0.5b", 230 endpoint="ollama:qwen2.5:0.5b", 231 max_tokens=2048, 232 capabilities=["classification", "extraction", "reflex"], 233 latency_ms=50, 234 energy_cost=0.05, 235 )) 236 237 self.register_model(ModelConfig( 238 tier=ModelTier.SMALL, 239 name="qwen2.5:3b", 240 endpoint="ollama:qwen2.5:3b", 241 max_tokens=4096, 242 capabilities=["summarization", "qa", "formatting", "automatic"], 243 latency_ms=200, 244 energy_cost=0.15, 245 )) 246 247 self.register_model(ModelConfig( 248 tier=ModelTier.MEDIUM, 249 name="qwen2.5:7b", 250 endpoint="ollama:qwen2.5:7b", 251 max_tokens=8192, 252 capabilities=["code_generation", "reasoning", "contextual"], 253 latency_ms=500, 254 energy_cost=0.4, 255 )) 256 257 self.register_model(ModelConfig( 258 tier=ModelTier.LARGE, 259 name="claude-sonnet-4", 260 endpoint="anthropic:claude-sonnet-4-20250514", 261 max_tokens=16384, 262 capabilities=["code_generation", "reasoning", "memory", "complex", "novel"], 263 cost_per_token=0.00001, 264 latency_ms=2000, 265 energy_cost=1.0, 266 recovery_time_ms=5000, # Need recovery after cortical processing 267 )) 268 269 def route(self, ctx: QueryContext) -> RoutingDecision: 270 """ 271 Route query using biological principles. 272 273 Routing order (like neural processing): 274 1. Check for startle/urgent → immediate local response 275 2. Check habituation memory → known good routing 276 3. Check priming → recent topics route lower 277 4. Check refractory state → prefer simpler after complex 278 5. Pattern match → determine required level 279 6. Apply arousal/energy modulation 280 """ 281 factors = {} 282 query = ctx.query.strip() 283 query_lower = query.lower() 284 285 # Update arousal from context 286 self._arousal = ctx.energy_state 287 288 # === 1. STARTLE RESPONSE === 289 # Urgent queries get immediate local handling (bypass cortex) 290 if self._is_startle(query_lower) or ctx.urgency > 0.8: 291 factors['startle'] = 1.0 292 self.stats['reflexive'] += 1 293 return self._make_decision( 294 ModelTier.SMALL, # Fast local response 295 "Startle response - urgent, handling locally", 296 factors, 297 pathway="reflex" 298 ) 299 300 # === 2. REFLEX CHECK === 301 # Very simple queries handled at spinal level 302 if self._is_reflex(query): 303 factors['reflex'] = 1.0 304 self.stats['reflexive'] += 1 305 return self._make_decision( 306 ModelTier.TINY, 307 "Reflex - simple pattern, spinal routing", 308 factors, 309 pathway="reflex" 310 ) 311 312 # === 3. HABITUATION CHECK === 313 # Have we successfully routed this before? 314 query_hash = self._hash_query(query_lower) 315 if query_hash in self._routing_memory: 316 memory = self._routing_memory[query_hash] 317 if memory.success_count >= 2: # Learned routing 318 factors['habituation'] = min(1.0, memory.success_count / 5) 319 self.stats['habituated'] += 1 320 return self._make_decision( 321 memory.successful_tier, 322 f"Habituated - {memory.success_count} successful routings", 323 factors, 324 pathway="habituated" 325 ) 326 327 # === 4. PRIMING CHECK === 328 # Recent topics lower the threshold 329 primed_tier = self._check_priming(query_lower, ctx.recent_topics) 330 if primed_tier: 331 factors['priming'] = 0.7 332 self.stats['primed'] += 1 333 334 # === 5. REFRACTORY CHECK === 335 # After complex processing, prefer simpler routing 336 refractory_penalty = self._get_refractory_penalty() 337 if refractory_penalty > 0: 338 factors['refractory'] = refractory_penalty 339 340 # === 6. PATTERN MATCHING === 341 # Determine required processing level 342 base_tier, pattern_reason = self._pattern_match(query_lower) 343 factors['pattern'] = 1.0 344 345 # === 7. CONTEXT MODULATION === 346 # Long history needs more context handling 347 if len(ctx.conversation_history) > 10: 348 if base_tier == ModelTier.TINY: 349 base_tier = ModelTier.SMALL 350 factors['history_length'] = len(ctx.conversation_history) / 20 351 352 # File context requires more capacity 353 if ctx.file_context: 354 total_context = sum(len(f) for f in ctx.file_context) 355 if total_context > 5000 and base_tier.value < ModelTier.MEDIUM.value: 356 base_tier = ModelTier.MEDIUM 357 factors['file_context'] = total_context / 10000 358 359 # === 8. AROUSAL MODULATION === 360 # Low energy → prefer lower tiers (like drowsy brain) 361 if self._arousal < 0.3: 362 if base_tier == ModelTier.LARGE: 363 base_tier = ModelTier.MEDIUM 364 factors['low_arousal'] = 1 - self._arousal 365 366 # === 9. PRIMING ADJUSTMENT === 367 # If primed, can go one tier lower 368 if primed_tier and base_tier.value > ModelTier.TINY.value: 369 tier_order = [ModelTier.TINY, ModelTier.SMALL, ModelTier.MEDIUM, ModelTier.LARGE] 370 current_idx = tier_order.index(base_tier) 371 base_tier = tier_order[max(0, current_idx - 1)] 372 373 # === 10. REFRACTORY ADJUSTMENT === 374 # If in refractory, prefer one tier lower 375 if refractory_penalty > 0.5 and base_tier == ModelTier.LARGE: 376 base_tier = ModelTier.MEDIUM 377 378 # === 11. USER OVERRIDE === 379 if ctx.user_preference: 380 base_tier = ctx.user_preference 381 factors['user_override'] = 1.0 382 383 # Determine pathway name 384 pathway = { 385 ModelTier.TINY: "reflex", 386 ModelTier.SMALL: "automatic", 387 ModelTier.MEDIUM: "contextual", 388 ModelTier.LARGE: "reasoning" 389 }.get(base_tier, "default") 390 391 return self._make_decision(base_tier, pattern_reason, factors, pathway) 392 393 def _is_startle(self, query: str) -> bool: 394 """Check if query triggers startle response.""" 395 for pattern in self.STARTLE_PATTERNS: 396 if re.search(pattern, query, re.IGNORECASE): 397 return True 398 return False 399 400 def _is_reflex(self, query: str) -> bool: 401 """Check if query is simple enough for reflex handling.""" 402 for pattern in self.REFLEX_PATTERNS: 403 if re.match(pattern, query, re.IGNORECASE): 404 return True 405 return len(query.split()) <= 3 and '?' not in query 406 407 def _check_priming(self, query: str, recent_topics: List[str]) -> Optional[ModelTier]: 408 """ 409 Check if query is primed by recent topics. 410 411 Organic parallel: Neural priming - recent activation 412 lowers the threshold for related neurons. 413 """ 414 # Clean old priming 415 now = datetime.now() 416 self._recent_topics = [ 417 (t, ts) for t, ts in self._recent_topics 418 if now - ts < self._priming_window 419 ] 420 421 # Add context topics 422 for topic in recent_topics: 423 self._recent_topics.append((topic.lower(), now)) 424 425 # Check if query matches primed topics 426 query_words = set(query.lower().split()) 427 for topic, _ in self._recent_topics: 428 if topic in query or topic in query_words: 429 return ModelTier.SMALL # Primed queries can go lower 430 431 return None 432 433 def _get_refractory_penalty(self) -> float: 434 """ 435 Calculate refractory penalty after complex processing. 436 437 Organic parallel: Neurons have a refractory period 438 after firing where they're less responsive. 439 """ 440 if not self._last_complex_query: 441 return 0.0 442 443 elapsed = datetime.now() - self._last_complex_query 444 if elapsed > self._refractory_duration: 445 return 0.0 446 447 # Linear decay 448 return 1.0 - (elapsed.total_seconds() / self._refractory_duration.total_seconds()) 449 450 def _pattern_match(self, query: str) -> Tuple[ModelTier, str]: 451 """ 452 Match query patterns to determine tier. 453 454 Like feature detectors in visual cortex, 455 each pattern detector responds to specific features. 456 """ 457 # Check reasoning patterns first (highest level) 458 for pattern, name in self.REASONING_PATTERNS: 459 if re.search(pattern, query, re.IGNORECASE): 460 return ModelTier.LARGE, f"Reasoning pattern detected: {name}" 461 462 # Check contextual patterns 463 for pattern, name in self.CONTEXTUAL_PATTERNS: 464 if re.search(pattern, query, re.IGNORECASE): 465 return ModelTier.MEDIUM, f"Contextual pattern detected: {name}" 466 467 # Check automatic patterns 468 for pattern, name in self.AUTOMATIC_PATTERNS: 469 if re.search(pattern, query, re.IGNORECASE): 470 return ModelTier.SMALL, f"Automatic pattern detected: {name}" 471 472 # Default based on query length/complexity 473 words = len(query.split()) 474 if words < 10: 475 return ModelTier.SMALL, "Short query, automatic processing" 476 elif words < 50: 477 return ModelTier.MEDIUM, "Medium query, contextual processing" 478 else: 479 return ModelTier.LARGE, "Long query, reasoning processing" 480 481 def _hash_query(self, query: str) -> str: 482 """Hash query for habituation memory lookup.""" 483 # Normalize: lowercase, remove punctuation, sort words 484 normalized = ' '.join(sorted(re.sub(r'[^\w\s]', '', query.lower()).split())) 485 return hashlib.md5(normalized.encode()).hexdigest()[:16] 486 487 def _make_decision( 488 self, 489 tier: ModelTier, 490 reason: str, 491 factors: Dict[str, float], 492 pathway: str 493 ) -> RoutingDecision: 494 """Create routing decision and update stats.""" 495 # Ensure we have this tier 496 if tier not in self.models: 497 # Escalate to next available 498 tier_order = [ModelTier.TINY, ModelTier.SMALL, ModelTier.MEDIUM, ModelTier.LARGE] 499 for t in tier_order: 500 if t.value >= tier.value and t in self.models: 501 tier = t 502 reason = f"Escalated: {reason}" 503 self.stats['escalations'] += 1 504 break 505 506 # Update stats 507 self.stats['queries'] += 1 508 self.stats['by_tier'][tier.value] += 1 509 self.stats['by_pathway'][pathway] += 1 510 511 # Mark complex query time for refractory 512 if tier == ModelTier.LARGE: 513 self._last_complex_query = datetime.now() 514 515 # Calculate confidence (lower for more factors involved) 516 confidence = max(0.5, 1.0 - len(factors) * 0.1) 517 518 return RoutingDecision( 519 tier=tier, 520 reason=reason, 521 confidence=confidence, 522 estimated_tokens=0, # Could estimate 523 capabilities_needed=[], # Could extract 524 factors=factors, 525 pathway=pathway 526 ) 527 528 def record_success(self, query: str, tier: ModelTier, latency_ms: float = 0): 529 """ 530 Record successful routing for habituation learning. 531 532 Organic parallel: Synaptic strengthening through 533 repeated successful activation. 534 """ 535 query_hash = self._hash_query(query.lower()) 536 537 if query_hash in self._routing_memory: 538 memory = self._routing_memory[query_hash] 539 if memory.successful_tier == tier: 540 memory.success_count += 1 541 memory.last_used = datetime.now() 542 memory.avg_latency_ms = (memory.avg_latency_ms + latency_ms) / 2 543 else: 544 self._routing_memory[query_hash] = RoutingMemory( 545 query_hash=query_hash, 546 successful_tier=tier, 547 avg_latency_ms=latency_ms 548 ) 549 550 # Prune old memories (forgetting) 551 self._prune_memories() 552 553 def record_failure(self, query: str, tier: ModelTier): 554 """ 555 Record failed routing to prevent future use. 556 557 Organic parallel: Synaptic weakening when 558 pathway doesn't lead to reward. 559 """ 560 query_hash = self._hash_query(query.lower()) 561 if query_hash in self._routing_memory: 562 memory = self._routing_memory[query_hash] 563 memory.success_count = max(0, memory.success_count - 2) 564 if memory.success_count == 0: 565 del self._routing_memory[query_hash] 566 567 def _prune_memories(self): 568 """ 569 Remove old routing memories. 570 571 Organic parallel: Synaptic pruning - unused 572 connections are removed over time. 573 """ 574 now = datetime.now() 575 to_remove = [] 576 577 for hash_key, memory in self._routing_memory.items(): 578 if now - memory.last_used > self._memory_max_age: 579 to_remove.append(hash_key) 580 581 for key in to_remove: 582 del self._routing_memory[key] 583 584 def prime_topic(self, topic: str): 585 """ 586 Prime a topic for faster routing. 587 588 Organic parallel: Semantic priming - hearing 589 a word makes related words easier to recognize. 590 """ 591 self._recent_topics.append((topic.lower(), datetime.now())) 592 593 def set_arousal(self, level: float): 594 """ 595 Set arousal/energy level. 596 597 Organic parallel: Arousal systems (reticular 598 activating system) modulate cortical responsiveness. 599 """ 600 self._arousal = max(0.0, min(1.0, level)) 601 602 def get_stats(self) -> Dict[str, Any]: 603 """Get routing statistics including biological metrics.""" 604 total = max(1, self.stats['queries']) 605 return { 606 **self.stats, 607 'local_ratio': ( 608 self.stats['by_tier']['tiny'] + 609 self.stats['by_tier']['small'] + 610 self.stats['by_tier']['medium'] 611 ) / total, 612 'habituation_rate': self.stats['habituated'] / total, 613 'priming_rate': self.stats['primed'] / total, 614 'reflex_rate': self.stats['reflexive'] / total, 615 'memories_stored': len(self._routing_memory), 616 'arousal_level': self._arousal, 617 } 618 619 async def query(self, ctx: QueryContext) -> Dict[str, Any]: 620 """Route and execute a query.""" 621 decision = self.route(ctx) 622 model = self.models.get(decision.tier) 623 624 if not model: 625 return { 626 'error': f'No model available for tier {decision.tier.value}', 627 'decision': decision 628 } 629 630 endpoint = model.endpoint 631 start_time = datetime.now() 632 633 if endpoint.startswith('ollama:'): 634 response = await self._query_ollama( 635 endpoint.split(':', 1)[1], 636 ctx.query, 637 ctx.conversation_history 638 ) 639 elif endpoint.startswith('anthropic:'): 640 response = await self._query_anthropic( 641 endpoint.split(':', 1)[1], 642 ctx.query, 643 ctx.conversation_history 644 ) 645 else: 646 response = {'error': f'Unknown endpoint type: {endpoint}'} 647 648 # Record success/failure for habituation 649 elapsed_ms = (datetime.now() - start_time).total_seconds() * 1000 650 if 'error' not in response: 651 self.record_success(ctx.query, decision.tier, elapsed_ms) 652 else: 653 self.record_failure(ctx.query, decision.tier) 654 655 return { 656 'response': response, 657 'decision': decision, 658 'model': model.name, 659 'tier': decision.tier.value, 660 'pathway': decision.pathway, 661 'latency_ms': elapsed_ms 662 } 663 664 async def _query_ollama( 665 self, 666 model: str, 667 query: str, 668 history: List[Dict] 669 ) -> Dict[str, Any]: 670 """Query Ollama API.""" 671 try: 672 messages = history + [{"role": "user", "content": query}] 673 data = json.dumps({ 674 "model": model, 675 "messages": messages, 676 "stream": False 677 }).encode('utf-8') 678 679 req = urllib.request.Request( 680 "http://localhost:11434/api/chat", 681 data=data, 682 headers={"Content-Type": "application/json"} 683 ) 684 685 with urllib.request.urlopen(req, timeout=60) as resp: 686 result = json.loads(resp.read().decode('utf-8')) 687 return { 688 'content': result.get('message', {}).get('content', ''), 689 'model': model, 690 'local': True 691 } 692 except Exception as e: 693 return {'error': str(e), 'model': model} 694 695 async def _query_anthropic( 696 self, 697 model: str, 698 query: str, 699 history: List[Dict] 700 ) -> Dict[str, Any]: 701 """Query Anthropic API - placeholder.""" 702 return { 703 'error': 'Anthropic API not implemented - use Claude Code directly', 704 'model': model, 705 'suggestion': 'Escalate to Claude Code for complex queries' 706 } 707 708 709 # Singleton instance 710 _router: Optional[BiomimeticRouter] = None 711 712 713 def get_router() -> BiomimeticRouter: 714 """Get or create the global router instance.""" 715 global _router 716 if _router is None: 717 _router = BiomimeticRouter() 718 _router.configure_defaults() 719 return _router 720 721 722 # Backwards compatibility 723 EdgeModelRouter = BiomimeticRouter 724 ComplexityEstimator = None # Deprecated, use BiomimeticRouter directly 725 726 727 if __name__ == "__main__": 728 import asyncio 729 730 async def main(): 731 router = get_router() 732 733 print("=== Biomimetic Router Test ===\n") 734 print("Testing neural routing principles:\n") 735 736 test_cases = [ 737 # Reflex (spinal) 738 ("yes", "Should be TINY - reflex"), 739 ("ok", "Should be TINY - reflex"), 740 741 # Automatic (brainstem) 742 ("What is Python?", "Should be SMALL - definition"), 743 ("List the files", "Should be SMALL - enumeration"), 744 ("Summarize this", "Should be SMALL - automatic"), 745 746 # Contextual (limbic) 747 ("Remember what we discussed earlier about attention?", "Should be MEDIUM - memory"), 748 749 # Reasoning (cortex) 750 ("Why does this algorithm have O(n²) complexity?", "Should be LARGE - causal"), 751 ("Implement a binary search tree with insert, delete, and balance operations", "Should be LARGE - generative"), 752 ("Compare Redux vs Context API for state management", "Should be LARGE - analytical"), 753 754 # Startle 755 ("URGENT: production is down!!!", "Should be SMALL - startle response"), 756 ] 757 758 for query, expected in test_cases: 759 ctx = QueryContext(query=query) 760 decision = router.route(ctx) 761 print(f"Query: {query[:50]}...") 762 print(f" → Tier: {decision.tier.value} | Pathway: {decision.pathway}") 763 print(f" → Reason: {decision.reason}") 764 print(f" → Factors: {decision.factors}") 765 print(f" → Expected: {expected}") 766 print() 767 768 # Test habituation 769 print("=== Testing Habituation ===\n") 770 for _ in range(3): 771 ctx = QueryContext(query="What is the weather?") 772 router.route(ctx) 773 router.record_success("What is the weather?", ModelTier.SMALL) 774 775 ctx = QueryContext(query="What is the weather?") 776 decision = router.route(ctx) 777 print(f"After 3 successful routings:") 778 print(f" → Tier: {decision.tier.value} | Pathway: {decision.pathway}") 779 print(f" → Should be habituated\n") 780 781 # Test priming 782 print("=== Testing Priming ===\n") 783 router.prime_topic("neural networks") 784 ctx = QueryContext(query="Tell me about neural networks") 785 decision = router.route(ctx) 786 print(f"After priming 'neural networks':") 787 print(f" → Tier: {decision.tier.value}") 788 print(f" → Factors: {decision.factors}\n") 789 790 # Stats 791 print("=== Routing Stats ===\n") 792 for key, value in router.get_stats().items(): 793 print(f" {key}: {value}") 794 795 asyncio.run(main())