Cradicle Explorer

/ core / edge / model_router.py
model_router.py
  1  """
  2  Edge Model Router - Biomimetic Neural Routing
  3  
  4  Routes queries to appropriate models using patterns from the nervous system.
  5  
  6  ## Organic Parallel: Neural Processing Hierarchy
  7  
  8  The nervous system routes signals based on complexity and urgency:
  9  
 10  | Neural Level         | Function                    | Silicon Equivalent      |
 11  |---------------------|-----------------------------|-----------------------|
 12  | Spinal cord         | Reflexes, no conscious thought | TINY (<1B) - instant, local |
 13  | Brainstem           | Vital functions, automatic   | SMALL (1-7B) - routine |
 14  | Limbic system       | Emotion, context, memory     | MEDIUM (7-30B) - contextual |
 15  | Prefrontal cortex   | Novel reasoning, planning    | LARGE (Cloud) - complex |
 16  
 17  ## Key Biological Principles Applied
 18  
 19  1. **Most processing is unconscious** (95% never reaches cortex)
 20     → Default to local models, escalate only when necessary
 21  
 22  2. **Escalation is metabolically expensive** (cortex uses 10x more glucose)
 23     → Track cost, prefer efficient routing
 24  
 25  3. **Habituation** (repeated stimuli route lower)
 26     → Cache successful routings, demote familiar patterns
 27  
 28  4. **Startle response** (urgent signals bypass cortex)
 29     → Emergency queries get immediate local response
 30  
 31  5. **Refractory period** (neurons need recovery after firing)
 32     → After complex queries, prefer simpler routing briefly
 33  
 34  6. **Priming** (recent activity lowers thresholds)
 35     → Recent topics route to lower tiers
 36  
 37  7. **Arousal state** (alert vs drowsy affects routing)
 38     → Energy state (battery) affects tier selection
 39  
 40  The router IS a Markov blanket - it separates query space (external)
 41  from model space (internal), deciding what crosses.
 42  """
 43  
 44  from dataclasses import dataclass, field
 45  from datetime import datetime, timedelta
 46  from enum import Enum
 47  from typing import Optional, List, Dict, Any, Callable, Tuple
 48  from collections import defaultdict
 49  import json
 50  import re
 51  import hashlib
 52  import urllib.request
 53  
 54  
 55  class ModelTier(Enum):
 56      """
 57      Model tiers mapped to neural processing levels.
 58  
 59      SPINAL → TINY: Reflexive, no "thinking" needed
 60      BRAINSTEM → SMALL: Automatic, routine processing
 61      LIMBIC → MEDIUM: Contextual, memory-linked
 62      CORTEX → LARGE: Novel reasoning, planning
 63      """
 64      TINY = "tiny"       # Spinal - reflexes
 65      SMALL = "small"     # Brainstem - automatic
 66      MEDIUM = "medium"   # Limbic - contextual
 67      LARGE = "large"     # Cortex - reasoning
 68  
 69  
 70  @dataclass
 71  class ModelConfig:
 72      """Configuration for a model endpoint."""
 73      tier: ModelTier
 74      name: str
 75      endpoint: str
 76      max_tokens: int = 4096
 77      capabilities: List[str] = field(default_factory=list)
 78      cost_per_token: float = 0.0
 79  
 80      # Biological properties
 81      latency_ms: int = 100          # Response time
 82      energy_cost: float = 0.1       # Relative energy (0-1)
 83      recovery_time_ms: int = 0      # Refractory period
 84  
 85  
 86  @dataclass
 87  class RoutingDecision:
 88      """Result of routing with biological metadata."""
 89      tier: ModelTier
 90      reason: str
 91      confidence: float
 92      estimated_tokens: int
 93      capabilities_needed: List[str]
 94  
 95      # Biological factors that influenced decision
 96      factors: Dict[str, float] = field(default_factory=dict)
 97  
 98      # Neural pathway taken
 99      pathway: str = "default"  # reflex, automatic, contextual, reasoning
100  
101  
102  @dataclass
103  class QueryContext:
104      """Context for routing decisions."""
105      query: str
106      conversation_history: List[Dict] = field(default_factory=list)
107      phoenix_state: Optional[Dict] = None
108      file_context: List[str] = field(default_factory=list)
109      user_preference: Optional[ModelTier] = None
110  
111      # Biological state
112      urgency: float = 0.5           # 0=relaxed, 1=urgent (affects startle)
113      energy_state: float = 1.0      # 0=depleted, 1=full (battery/arousal)
114      recent_topics: List[str] = field(default_factory=list)  # For priming
115  
116  
117  @dataclass
118  class RoutingMemory:
119      """
120      Memory of past routings for habituation.
121  
122      Organic parallel: Synaptic strengthening through repeated use.
123      Queries that consistently succeed at a tier get "hardwired" there.
124      """
125      query_hash: str
126      successful_tier: ModelTier
127      success_count: int = 1
128      last_used: datetime = field(default_factory=datetime.now)
129      avg_latency_ms: float = 0
130  
131  
132  class BiomimeticRouter:
133      """
134      Routes queries using nervous system principles.
135  
136      The router maintains state analogous to neural state:
137      - Arousal level (energy/alertness)
138      - Refractory state (recovery after heavy processing)
139      - Priming (recent topics lower thresholds)
140      - Habituation memory (learned routings)
141      """
142  
143      def __init__(self):
144          self.models: Dict[ModelTier, ModelConfig] = {}
145  
146          # Neural state
147          self._arousal: float = 1.0              # Energy/alertness level
148          self._last_complex_query: Optional[datetime] = None
149          self._refractory_duration = timedelta(seconds=30)
150  
151          # Habituation memory (learned routings)
152          self._routing_memory: Dict[str, RoutingMemory] = {}
153          self._memory_max_age = timedelta(hours=24)
154  
155          # Priming state (recent topics)
156          self._recent_topics: List[Tuple[str, datetime]] = []
157          self._priming_window = timedelta(minutes=10)
158  
159          # Statistics
160          self.stats = {
161              'queries': 0,
162              'by_tier': {tier.value: 0 for tier in ModelTier},
163              'by_pathway': defaultdict(int),
164              'escalations': 0,
165              'habituated': 0,
166              'primed': 0,
167              'reflexive': 0,
168          }
169  
170          # Pattern recognition (like neural feature detectors)
171          self._init_pattern_detectors()
172  
173      def _init_pattern_detectors(self):
174          """
175          Initialize pattern detectors.
176  
177          Organic parallel: Feature detectors in visual cortex.
178          Each detector responds to specific patterns.
179          """
180          # Reflexive patterns (spinal - immediate response)
181          self.REFLEX_PATTERNS = [
182              r'^(yes|no|ok|sure|thanks|thank you|got it)[\.\!]?$',
183              r'^(hi|hello|hey)[\.\!]?$',
184              r'^\d+$',  # Just a number
185              r'^[A-Za-z]$',  # Single letter
186          ]
187  
188          # Automatic patterns (brainstem - routine)
189          self.AUTOMATIC_PATTERNS = [
190              (r'^what (is|are) ', 'definition'),
191              (r'^list ', 'enumeration'),
192              (r'^format ', 'formatting'),
193              (r'^convert ', 'conversion'),
194              (r'^translate ', 'translation'),
195          ]
196  
197          # Contextual patterns (limbic - needs memory/emotion)
198          self.CONTEXTUAL_PATTERNS = [
199              (r'\b(remember|earlier|before|previously|we discussed)\b', 'memory'),
200              (r'\b(feel|emotion|mood|tone)\b', 'affect'),
201              (r'\b(context|situation|scenario)\b', 'situational'),
202          ]
203  
204          # Reasoning patterns (cortex - novel thought)
205          self.REASONING_PATTERNS = [
206              (r'\b(why|because|therefore|hence|thus|reason)\b', 'causal'),
207              (r'\b(implement|build|create|design|architect)\b', 'generative'),
208              (r'\b(debug|fix|solve|troubleshoot)\b', 'diagnostic'),
209              (r'\b(compare|contrast|analyze|evaluate|trade-?off)\b', 'analytical'),
210              (r'\b(plan|strategy|approach|how should)\b', 'planning'),
211              (r'```', 'code'),  # Code blocks
212          ]
213  
214          # Urgency/startle patterns (bypass normal routing)
215          self.STARTLE_PATTERNS = [
216              r'\b(urgent|emergency|asap|immediately|critical|broken)\b',
217              r'\!\!\!',
218              r'^HELP',
219          ]
220  
221      def register_model(self, config: ModelConfig):
222          """Register a model for a tier."""
223          self.models[config.tier] = config
224  
225      def configure_defaults(self):
226          """Configure default model endpoints."""
227          self.register_model(ModelConfig(
228              tier=ModelTier.TINY,
229              name="qwen2.5:0.5b",
230              endpoint="ollama:qwen2.5:0.5b",
231              max_tokens=2048,
232              capabilities=["classification", "extraction", "reflex"],
233              latency_ms=50,
234              energy_cost=0.05,
235          ))
236  
237          self.register_model(ModelConfig(
238              tier=ModelTier.SMALL,
239              name="qwen2.5:3b",
240              endpoint="ollama:qwen2.5:3b",
241              max_tokens=4096,
242              capabilities=["summarization", "qa", "formatting", "automatic"],
243              latency_ms=200,
244              energy_cost=0.15,
245          ))
246  
247          self.register_model(ModelConfig(
248              tier=ModelTier.MEDIUM,
249              name="qwen2.5:7b",
250              endpoint="ollama:qwen2.5:7b",
251              max_tokens=8192,
252              capabilities=["code_generation", "reasoning", "contextual"],
253              latency_ms=500,
254              energy_cost=0.4,
255          ))
256  
257          self.register_model(ModelConfig(
258              tier=ModelTier.LARGE,
259              name="claude-sonnet-4",
260              endpoint="anthropic:claude-sonnet-4-20250514",
261              max_tokens=16384,
262              capabilities=["code_generation", "reasoning", "memory", "complex", "novel"],
263              cost_per_token=0.00001,
264              latency_ms=2000,
265              energy_cost=1.0,
266              recovery_time_ms=5000,  # Need recovery after cortical processing
267          ))
268  
269      def route(self, ctx: QueryContext) -> RoutingDecision:
270          """
271          Route query using biological principles.
272  
273          Routing order (like neural processing):
274          1. Check for startle/urgent → immediate local response
275          2. Check habituation memory → known good routing
276          3. Check priming → recent topics route lower
277          4. Check refractory state → prefer simpler after complex
278          5. Pattern match → determine required level
279          6. Apply arousal/energy modulation
280          """
281          factors = {}
282          query = ctx.query.strip()
283          query_lower = query.lower()
284  
285          # Update arousal from context
286          self._arousal = ctx.energy_state
287  
288          # === 1. STARTLE RESPONSE ===
289          # Urgent queries get immediate local handling (bypass cortex)
290          if self._is_startle(query_lower) or ctx.urgency > 0.8:
291              factors['startle'] = 1.0
292              self.stats['reflexive'] += 1
293              return self._make_decision(
294                  ModelTier.SMALL,  # Fast local response
295                  "Startle response - urgent, handling locally",
296                  factors,
297                  pathway="reflex"
298              )
299  
300          # === 2. REFLEX CHECK ===
301          # Very simple queries handled at spinal level
302          if self._is_reflex(query):
303              factors['reflex'] = 1.0
304              self.stats['reflexive'] += 1
305              return self._make_decision(
306                  ModelTier.TINY,
307                  "Reflex - simple pattern, spinal routing",
308                  factors,
309                  pathway="reflex"
310              )
311  
312          # === 3. HABITUATION CHECK ===
313          # Have we successfully routed this before?
314          query_hash = self._hash_query(query_lower)
315          if query_hash in self._routing_memory:
316              memory = self._routing_memory[query_hash]
317              if memory.success_count >= 2:  # Learned routing
318                  factors['habituation'] = min(1.0, memory.success_count / 5)
319                  self.stats['habituated'] += 1
320                  return self._make_decision(
321                      memory.successful_tier,
322                      f"Habituated - {memory.success_count} successful routings",
323                      factors,
324                      pathway="habituated"
325                  )
326  
327          # === 4. PRIMING CHECK ===
328          # Recent topics lower the threshold
329          primed_tier = self._check_priming(query_lower, ctx.recent_topics)
330          if primed_tier:
331              factors['priming'] = 0.7
332              self.stats['primed'] += 1
333  
334          # === 5. REFRACTORY CHECK ===
335          # After complex processing, prefer simpler routing
336          refractory_penalty = self._get_refractory_penalty()
337          if refractory_penalty > 0:
338              factors['refractory'] = refractory_penalty
339  
340          # === 6. PATTERN MATCHING ===
341          # Determine required processing level
342          base_tier, pattern_reason = self._pattern_match(query_lower)
343          factors['pattern'] = 1.0
344  
345          # === 7. CONTEXT MODULATION ===
346          # Long history needs more context handling
347          if len(ctx.conversation_history) > 10:
348              if base_tier == ModelTier.TINY:
349                  base_tier = ModelTier.SMALL
350              factors['history_length'] = len(ctx.conversation_history) / 20
351  
352          # File context requires more capacity
353          if ctx.file_context:
354              total_context = sum(len(f) for f in ctx.file_context)
355              if total_context > 5000 and base_tier.value < ModelTier.MEDIUM.value:
356                  base_tier = ModelTier.MEDIUM
357                  factors['file_context'] = total_context / 10000
358  
359          # === 8. AROUSAL MODULATION ===
360          # Low energy → prefer lower tiers (like drowsy brain)
361          if self._arousal < 0.3:
362              if base_tier == ModelTier.LARGE:
363                  base_tier = ModelTier.MEDIUM
364                  factors['low_arousal'] = 1 - self._arousal
365  
366          # === 9. PRIMING ADJUSTMENT ===
367          # If primed, can go one tier lower
368          if primed_tier and base_tier.value > ModelTier.TINY.value:
369              tier_order = [ModelTier.TINY, ModelTier.SMALL, ModelTier.MEDIUM, ModelTier.LARGE]
370              current_idx = tier_order.index(base_tier)
371              base_tier = tier_order[max(0, current_idx - 1)]
372  
373          # === 10. REFRACTORY ADJUSTMENT ===
374          # If in refractory, prefer one tier lower
375          if refractory_penalty > 0.5 and base_tier == ModelTier.LARGE:
376              base_tier = ModelTier.MEDIUM
377  
378          # === 11. USER OVERRIDE ===
379          if ctx.user_preference:
380              base_tier = ctx.user_preference
381              factors['user_override'] = 1.0
382  
383          # Determine pathway name
384          pathway = {
385              ModelTier.TINY: "reflex",
386              ModelTier.SMALL: "automatic",
387              ModelTier.MEDIUM: "contextual",
388              ModelTier.LARGE: "reasoning"
389          }.get(base_tier, "default")
390  
391          return self._make_decision(base_tier, pattern_reason, factors, pathway)
392  
393      def _is_startle(self, query: str) -> bool:
394          """Check if query triggers startle response."""
395          for pattern in self.STARTLE_PATTERNS:
396              if re.search(pattern, query, re.IGNORECASE):
397                  return True
398          return False
399  
400      def _is_reflex(self, query: str) -> bool:
401          """Check if query is simple enough for reflex handling."""
402          for pattern in self.REFLEX_PATTERNS:
403              if re.match(pattern, query, re.IGNORECASE):
404                  return True
405          return len(query.split()) <= 3 and '?' not in query
406  
407      def _check_priming(self, query: str, recent_topics: List[str]) -> Optional[ModelTier]:
408          """
409          Check if query is primed by recent topics.
410  
411          Organic parallel: Neural priming - recent activation
412          lowers the threshold for related neurons.
413          """
414          # Clean old priming
415          now = datetime.now()
416          self._recent_topics = [
417              (t, ts) for t, ts in self._recent_topics
418              if now - ts < self._priming_window
419          ]
420  
421          # Add context topics
422          for topic in recent_topics:
423              self._recent_topics.append((topic.lower(), now))
424  
425          # Check if query matches primed topics
426          query_words = set(query.lower().split())
427          for topic, _ in self._recent_topics:
428              if topic in query or topic in query_words:
429                  return ModelTier.SMALL  # Primed queries can go lower
430  
431          return None
432  
433      def _get_refractory_penalty(self) -> float:
434          """
435          Calculate refractory penalty after complex processing.
436  
437          Organic parallel: Neurons have a refractory period
438          after firing where they're less responsive.
439          """
440          if not self._last_complex_query:
441              return 0.0
442  
443          elapsed = datetime.now() - self._last_complex_query
444          if elapsed > self._refractory_duration:
445              return 0.0
446  
447          # Linear decay
448          return 1.0 - (elapsed.total_seconds() / self._refractory_duration.total_seconds())
449  
450      def _pattern_match(self, query: str) -> Tuple[ModelTier, str]:
451          """
452          Match query patterns to determine tier.
453  
454          Like feature detectors in visual cortex,
455          each pattern detector responds to specific features.
456          """
457          # Check reasoning patterns first (highest level)
458          for pattern, name in self.REASONING_PATTERNS:
459              if re.search(pattern, query, re.IGNORECASE):
460                  return ModelTier.LARGE, f"Reasoning pattern detected: {name}"
461  
462          # Check contextual patterns
463          for pattern, name in self.CONTEXTUAL_PATTERNS:
464              if re.search(pattern, query, re.IGNORECASE):
465                  return ModelTier.MEDIUM, f"Contextual pattern detected: {name}"
466  
467          # Check automatic patterns
468          for pattern, name in self.AUTOMATIC_PATTERNS:
469              if re.search(pattern, query, re.IGNORECASE):
470                  return ModelTier.SMALL, f"Automatic pattern detected: {name}"
471  
472          # Default based on query length/complexity
473          words = len(query.split())
474          if words < 10:
475              return ModelTier.SMALL, "Short query, automatic processing"
476          elif words < 50:
477              return ModelTier.MEDIUM, "Medium query, contextual processing"
478          else:
479              return ModelTier.LARGE, "Long query, reasoning processing"
480  
481      def _hash_query(self, query: str) -> str:
482          """Hash query for habituation memory lookup."""
483          # Normalize: lowercase, remove punctuation, sort words
484          normalized = ' '.join(sorted(re.sub(r'[^\w\s]', '', query.lower()).split()))
485          return hashlib.md5(normalized.encode()).hexdigest()[:16]
486  
487      def _make_decision(
488          self,
489          tier: ModelTier,
490          reason: str,
491          factors: Dict[str, float],
492          pathway: str
493      ) -> RoutingDecision:
494          """Create routing decision and update stats."""
495          # Ensure we have this tier
496          if tier not in self.models:
497              # Escalate to next available
498              tier_order = [ModelTier.TINY, ModelTier.SMALL, ModelTier.MEDIUM, ModelTier.LARGE]
499              for t in tier_order:
500                  if t.value >= tier.value and t in self.models:
501                      tier = t
502                      reason = f"Escalated: {reason}"
503                      self.stats['escalations'] += 1
504                      break
505  
506          # Update stats
507          self.stats['queries'] += 1
508          self.stats['by_tier'][tier.value] += 1
509          self.stats['by_pathway'][pathway] += 1
510  
511          # Mark complex query time for refractory
512          if tier == ModelTier.LARGE:
513              self._last_complex_query = datetime.now()
514  
515          # Calculate confidence (lower for more factors involved)
516          confidence = max(0.5, 1.0 - len(factors) * 0.1)
517  
518          return RoutingDecision(
519              tier=tier,
520              reason=reason,
521              confidence=confidence,
522              estimated_tokens=0,  # Could estimate
523              capabilities_needed=[],  # Could extract
524              factors=factors,
525              pathway=pathway
526          )
527  
528      def record_success(self, query: str, tier: ModelTier, latency_ms: float = 0):
529          """
530          Record successful routing for habituation learning.
531  
532          Organic parallel: Synaptic strengthening through
533          repeated successful activation.
534          """
535          query_hash = self._hash_query(query.lower())
536  
537          if query_hash in self._routing_memory:
538              memory = self._routing_memory[query_hash]
539              if memory.successful_tier == tier:
540                  memory.success_count += 1
541                  memory.last_used = datetime.now()
542                  memory.avg_latency_ms = (memory.avg_latency_ms + latency_ms) / 2
543          else:
544              self._routing_memory[query_hash] = RoutingMemory(
545                  query_hash=query_hash,
546                  successful_tier=tier,
547                  avg_latency_ms=latency_ms
548              )
549  
550          # Prune old memories (forgetting)
551          self._prune_memories()
552  
553      def record_failure(self, query: str, tier: ModelTier):
554          """
555          Record failed routing to prevent future use.
556  
557          Organic parallel: Synaptic weakening when
558          pathway doesn't lead to reward.
559          """
560          query_hash = self._hash_query(query.lower())
561          if query_hash in self._routing_memory:
562              memory = self._routing_memory[query_hash]
563              memory.success_count = max(0, memory.success_count - 2)
564              if memory.success_count == 0:
565                  del self._routing_memory[query_hash]
566  
567      def _prune_memories(self):
568          """
569          Remove old routing memories.
570  
571          Organic parallel: Synaptic pruning - unused
572          connections are removed over time.
573          """
574          now = datetime.now()
575          to_remove = []
576  
577          for hash_key, memory in self._routing_memory.items():
578              if now - memory.last_used > self._memory_max_age:
579                  to_remove.append(hash_key)
580  
581          for key in to_remove:
582              del self._routing_memory[key]
583  
584      def prime_topic(self, topic: str):
585          """
586          Prime a topic for faster routing.
587  
588          Organic parallel: Semantic priming - hearing
589          a word makes related words easier to recognize.
590          """
591          self._recent_topics.append((topic.lower(), datetime.now()))
592  
593      def set_arousal(self, level: float):
594          """
595          Set arousal/energy level.
596  
597          Organic parallel: Arousal systems (reticular
598          activating system) modulate cortical responsiveness.
599          """
600          self._arousal = max(0.0, min(1.0, level))
601  
602      def get_stats(self) -> Dict[str, Any]:
603          """Get routing statistics including biological metrics."""
604          total = max(1, self.stats['queries'])
605          return {
606              **self.stats,
607              'local_ratio': (
608                  self.stats['by_tier']['tiny'] +
609                  self.stats['by_tier']['small'] +
610                  self.stats['by_tier']['medium']
611              ) / total,
612              'habituation_rate': self.stats['habituated'] / total,
613              'priming_rate': self.stats['primed'] / total,
614              'reflex_rate': self.stats['reflexive'] / total,
615              'memories_stored': len(self._routing_memory),
616              'arousal_level': self._arousal,
617          }
618  
619      async def query(self, ctx: QueryContext) -> Dict[str, Any]:
620          """Route and execute a query."""
621          decision = self.route(ctx)
622          model = self.models.get(decision.tier)
623  
624          if not model:
625              return {
626                  'error': f'No model available for tier {decision.tier.value}',
627                  'decision': decision
628              }
629  
630          endpoint = model.endpoint
631          start_time = datetime.now()
632  
633          if endpoint.startswith('ollama:'):
634              response = await self._query_ollama(
635                  endpoint.split(':', 1)[1],
636                  ctx.query,
637                  ctx.conversation_history
638              )
639          elif endpoint.startswith('anthropic:'):
640              response = await self._query_anthropic(
641                  endpoint.split(':', 1)[1],
642                  ctx.query,
643                  ctx.conversation_history
644              )
645          else:
646              response = {'error': f'Unknown endpoint type: {endpoint}'}
647  
648          # Record success/failure for habituation
649          elapsed_ms = (datetime.now() - start_time).total_seconds() * 1000
650          if 'error' not in response:
651              self.record_success(ctx.query, decision.tier, elapsed_ms)
652          else:
653              self.record_failure(ctx.query, decision.tier)
654  
655          return {
656              'response': response,
657              'decision': decision,
658              'model': model.name,
659              'tier': decision.tier.value,
660              'pathway': decision.pathway,
661              'latency_ms': elapsed_ms
662          }
663  
664      async def _query_ollama(
665          self,
666          model: str,
667          query: str,
668          history: List[Dict]
669      ) -> Dict[str, Any]:
670          """Query Ollama API."""
671          try:
672              messages = history + [{"role": "user", "content": query}]
673              data = json.dumps({
674                  "model": model,
675                  "messages": messages,
676                  "stream": False
677              }).encode('utf-8')
678  
679              req = urllib.request.Request(
680                  "http://localhost:11434/api/chat",
681                  data=data,
682                  headers={"Content-Type": "application/json"}
683              )
684  
685              with urllib.request.urlopen(req, timeout=60) as resp:
686                  result = json.loads(resp.read().decode('utf-8'))
687                  return {
688                      'content': result.get('message', {}).get('content', ''),
689                      'model': model,
690                      'local': True
691                  }
692          except Exception as e:
693              return {'error': str(e), 'model': model}
694  
695      async def _query_anthropic(
696          self,
697          model: str,
698          query: str,
699          history: List[Dict]
700      ) -> Dict[str, Any]:
701          """Query Anthropic API - placeholder."""
702          return {
703              'error': 'Anthropic API not implemented - use Claude Code directly',
704              'model': model,
705              'suggestion': 'Escalate to Claude Code for complex queries'
706          }
707  
708  
709  # Singleton instance
710  _router: Optional[BiomimeticRouter] = None
711  
712  
713  def get_router() -> BiomimeticRouter:
714      """Get or create the global router instance."""
715      global _router
716      if _router is None:
717          _router = BiomimeticRouter()
718          _router.configure_defaults()
719      return _router
720  
721  
722  # Backwards compatibility
723  EdgeModelRouter = BiomimeticRouter
724  ComplexityEstimator = None  # Deprecated, use BiomimeticRouter directly
725  
726  
727  if __name__ == "__main__":
728      import asyncio
729  
730      async def main():
731          router = get_router()
732  
733          print("=== Biomimetic Router Test ===\n")
734          print("Testing neural routing principles:\n")
735  
736          test_cases = [
737              # Reflex (spinal)
738              ("yes", "Should be TINY - reflex"),
739              ("ok", "Should be TINY - reflex"),
740  
741              # Automatic (brainstem)
742              ("What is Python?", "Should be SMALL - definition"),
743              ("List the files", "Should be SMALL - enumeration"),
744              ("Summarize this", "Should be SMALL - automatic"),
745  
746              # Contextual (limbic)
747              ("Remember what we discussed earlier about attention?", "Should be MEDIUM - memory"),
748  
749              # Reasoning (cortex)
750              ("Why does this algorithm have O(n²) complexity?", "Should be LARGE - causal"),
751              ("Implement a binary search tree with insert, delete, and balance operations", "Should be LARGE - generative"),
752              ("Compare Redux vs Context API for state management", "Should be LARGE - analytical"),
753  
754              # Startle
755              ("URGENT: production is down!!!", "Should be SMALL - startle response"),
756          ]
757  
758          for query, expected in test_cases:
759              ctx = QueryContext(query=query)
760              decision = router.route(ctx)
761              print(f"Query: {query[:50]}...")
762              print(f"  → Tier: {decision.tier.value} | Pathway: {decision.pathway}")
763              print(f"  → Reason: {decision.reason}")
764              print(f"  → Factors: {decision.factors}")
765              print(f"  → Expected: {expected}")
766              print()
767  
768          # Test habituation
769          print("=== Testing Habituation ===\n")
770          for _ in range(3):
771              ctx = QueryContext(query="What is the weather?")
772              router.route(ctx)
773              router.record_success("What is the weather?", ModelTier.SMALL)
774  
775          ctx = QueryContext(query="What is the weather?")
776          decision = router.route(ctx)
777          print(f"After 3 successful routings:")
778          print(f"  → Tier: {decision.tier.value} | Pathway: {decision.pathway}")
779          print(f"  → Should be habituated\n")
780  
781          # Test priming
782          print("=== Testing Priming ===\n")
783          router.prime_topic("neural networks")
784          ctx = QueryContext(query="Tell me about neural networks")
785          decision = router.route(ctx)
786          print(f"After priming 'neural networks':")
787          print(f"  → Tier: {decision.tier.value}")
788          print(f"  → Factors: {decision.factors}\n")
789  
790          # Stats
791          print("=== Routing Stats ===\n")
792          for key, value in router.get_stats().items():
793              print(f"  {key}: {value}")
794  
795      asyncio.run(main())