/ working_orchestrator.py
working_orchestrator.py
1 #!/usr/bin/env python3 2 """ 3 WORKING AI ORCHESTRATOR - Fixed prompting and response handling 4 """ 5 6 import os 7 import sys 8 import asyncio 9 import aiohttp 10 import json 11 import subprocess 12 import time 13 import re 14 from datetime import datetime 15 16 class WorkingOrchestrator: 17 def __init__(self): 18 self.openrouter_key = os.getenv('OPENROUTER_API_KEY') 19 self.groq_key = os.getenv('GROQ_API_KEY') 20 21 # Tested working models 22 self.groq_models = [ 23 "llama-3.3-70b-versatile", # Primary 24 "llama-3.2-90b-vision-preview", # Backup 25 "gemma2-9b-it" # Fallback 26 ] 27 28 def create_system_prompt(self) -> str: 29 """Create an effective system prompt""" 30 return """You are a helpful AI assistant. Follow these rules: 31 1. Answer questions directly and concisely 32 2. Do NOT use markdown, XML tags, or special formatting 33 3. Do NOT add prefixes like "Answer:" or "Response:" 34 4. Just provide the answer itself 35 5. For simple questions, give short answers 36 6. For complex questions, give detailed answers""" 37 38 async def call_openrouter(self, query: str) -> dict: 39 """Call OpenRouter with optimized prompting""" 40 if not self.openrouter_key: 41 return {"success": False, "error": "No API key"} 42 43 url = "https://openrouter.ai/api/v1/chat/completions" 44 headers = { 45 "Authorization": f"Bearer {self.openrouter_key}", 46 "Content-Type": "application/json", 47 "HTTP-Referer": "https://github.com/working-orchestrator" 48 } 49 50 messages = [ 51 {"role": "system", "content": self.create_system_prompt()}, 52 {"role": "user", "content": query} 53 ] 54 55 data = { 56 "model": "mistralai/mistral-7b-instruct:free", 57 "messages": messages, 58 "max_tokens": 300, 59 "temperature": 0.7, 60 "top_p": 0.9 61 } 62 63 try: 64 timeout = aiohttp.ClientTimeout(total=15) 65 async with aiohttp.ClientSession(timeout=timeout) as session: 66 async with session.post(url, headers=headers, json=data) as response: 67 if response.status == 200: 68 result = await response.json() 69 answer = result['choices'][0]['message']['content'] 70 71 # Clean the response 72 answer = self.clean_response(answer) 73 74 # Validate it's not empty 75 if not answer or len(answer.strip()) < 2: 76 return {"success": False, "error": "Empty response"} 77 78 return { 79 "success": True, 80 "api": "openrouter", 81 "response": answer, 82 "tokens": result.get('usage', {}).get('total_tokens', 0) 83 } 84 else: 85 error_text = await response.text() 86 return {"success": False, "error": f"HTTP {response.status}: {error_text[:100]}"} 87 except Exception as e: 88 return {"success": False, "error": str(e)[:100]} 89 90 async def call_groq(self, query: str) -> dict: 91 """Call Groq with updated models""" 92 if not self.groq_key: 93 return {"success": False, "error": "No API key"} 94 95 url = "https://api.groq.com/openai/v1/chat/completions" 96 headers = { 97 "Authorization": f"Bearer {self.groq_key}", 98 "Content-Type": "application/json" 99 } 100 101 messages = [ 102 {"role": "system", "content": self.create_system_prompt()}, 103 {"role": "user", "content": query} 104 ] 105 106 for model in self.groq_models: 107 data = { 108 "model": model, 109 "messages": messages, 110 "max_tokens": 300, 111 "temperature": 0.7 112 } 113 114 try: 115 timeout = aiohttp.ClientTimeout(total=15) 116 async with aiohttp.ClientSession(timeout=timeout) as session: 117 async with session.post(url, headers=headers, json=data) as response: 118 if response.status == 200: 119 result = await response.json() 120 answer = result['choices'][0]['message']['content'] 121 answer = self.clean_response(answer) 122 123 if not answer or len(answer.strip()) < 2: 124 continue # Try next model if empty 125 126 return { 127 "success": True, 128 "api": "groq", 129 "model": model, 130 "response": answer, 131 "tokens": result.get('usage', {}).get('total_tokens', 0) 132 } 133 except: 134 continue # Try next model on error 135 136 return {"success": False, "error": "All Groq models failed"} 137 138 def call_local_fast(self, query: str) -> dict: 139 """Call local model with timeout and proper parsing""" 140 # First, try a quick test to see if local models work 141 try: 142 # Use a simpler approach - call tinyllama directly 143 model_path = os.path.expanduser("~/models/tinyllama.gguf") 144 if not os.path.exists(model_path): 145 return {"success": False, "error": "Model file not found"} 146 147 # Create a simple prompt file 148 prompt_file = "/tmp/ai_prompt.txt" 149 with open(prompt_file, "w") as f: 150 f.write(f"Q: {query}\nA: ") 151 152 # Try to use llama.cpp directly if available 153 llama_cli = os.path.expanduser("~/llama.cpp/build/bin/llama-cli") 154 if os.path.exists(llama_cli): 155 try: 156 result = subprocess.run( 157 [llama_cli, "-m", model_path, "-p", f"Q: {query}\nA:", "-n", "50", "-t", "4"], 158 capture_output=True, 159 text=True, 160 timeout=30 161 ) 162 163 if result.returncode == 0: 164 # Extract the answer after "A:" 165 output = result.stdout 166 if "A:" in output: 167 answer = output.split("A:", 1)[1].strip() 168 answer = self.clean_response(answer) 169 return { 170 "success": True, 171 "api": "local", 172 "model": "tinyllama", 173 "response": answer, 174 "tokens": 0 175 } 176 except subprocess.TimeoutExpired: 177 return {"success": False, "error": "Local model timeout"} 178 179 # Fallback to ultimate_orchestrator with shorter timeout 180 try: 181 result = subprocess.run( 182 ["python3", "ultimate_orchestrator.py", "tinyllama", query], 183 capture_output=True, 184 text=True, 185 timeout=45 # Much shorter timeout 186 ) 187 188 if result.returncode == 0: 189 # Simple extraction 190 lines = result.stdout.split('\n') 191 for i, line in enumerate(lines): 192 if "FINAL ANSWER" in line or "šÆ FINAL ANSWER" in line: 193 # Get next non-empty line 194 for j in range(i+1, len(lines)): 195 if lines[j].strip() and not lines[j].startswith("==="): 196 answer = self.clean_response(lines[j].strip()) 197 return { 198 "success": True, 199 "api": "local", 200 "model": "tinyllama", 201 "response": answer, 202 "tokens": 0 203 } 204 except subprocess.TimeoutExpired: 205 return {"success": False, "error": "Local model timeout (45s)"} 206 207 return {"success": False, "error": "Could not extract answer"} 208 209 except Exception as e: 210 return {"success": False, "error": str(e)[:100]} 211 212 def clean_response(self, text: str) -> str: 213 """Clean and validate AI responses""" 214 if not text: 215 return "" 216 217 # Remove common tags 218 text = re.sub(r'<s>|</s>|\[OUT\]|\[/OUT\]|\[/s\]|<\|.*?\|>', '', text) 219 220 # Remove "Answer:" or similar prefixes 221 text = re.sub(r'^(Answer|Response|A)\s*[:.-]\s*', '', text, flags=re.IGNORECASE) 222 223 # Clean whitespace 224 text = re.sub(r'\s+', ' ', text).strip() 225 226 return text 227 228 async def orchestrate(self, query: str) -> dict: 229 """Simple, reliable orchestration""" 230 print(f"\nš¤ WORKING ORCHESTRATOR") 231 print("="*60) 232 print(f"Query: {query}") 233 print("="*60) 234 235 start_time = time.time() 236 237 # Strategy: Try OpenRouter first, then Groq, then local 238 print("\n1. Trying OpenRouter...") 239 result = await self.call_openrouter(query) 240 241 if result["success"]: 242 print(f" ā OpenRouter: Success") 243 source = "openrouter" 244 else: 245 print(f" ā OpenRouter: {result['error']}") 246 247 print("\n2. Trying Groq...") 248 result = await self.call_groq(query) 249 250 if result["success"]: 251 print(f" ā Groq: Success ({result.get('model', '')})") 252 source = "groq" 253 else: 254 print(f" ā Groq: {result['error']}") 255 256 print("\n3. Trying local model...") 257 result = self.call_local_fast(query) 258 259 if result["success"]: 260 print(f" ā Local: Success ({result.get('model', '')})") 261 source = "local" 262 else: 263 print(f" ā Local: {result['error']}") 264 265 elapsed = time.time() - start_time 266 267 if result["success"]: 268 return { 269 "success": True, 270 "query": query, 271 "response": result["response"], 272 "source": source, 273 "model": result.get("model", ""), 274 "tokens": result.get("tokens", 0), 275 "time": elapsed, 276 "timestamp": datetime.now().isoformat() 277 } 278 else: 279 return { 280 "success": False, 281 "query": query, 282 "error": "All methods failed", 283 "time": elapsed, 284 "timestamp": datetime.now().isoformat() 285 } 286 287 async def main(): 288 if len(sys.argv) < 2: 289 print("Usage: python working_orchestrator.py \"Your query\"") 290 sys.exit(1) 291 292 query = " ".join(sys.argv[1:]) 293 294 orchestrator = WorkingOrchestrator() 295 result = await orchestrator.orchestrate(query) 296 297 print("\n" + "="*60) 298 if result["success"]: 299 print("ā SUCCESS") 300 print("="*60) 301 print(f"Source: {result['source']}") 302 if result.get('model'): 303 print(f"Model: {result['model']}") 304 print(f"Time: {result['time']:.1f}s") 305 306 print("\nšÆ ANSWER:") 307 print("="*60) 308 print(result['response']) 309 print("="*60) 310 311 # Save result 312 timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") 313 filename = f"working_result_{timestamp}.json" 314 with open(filename, "w") as f: 315 json.dump(result, f, indent=2) 316 print(f"\nš¾ Saved to: {filename}") 317 else: 318 print("ā FAILED") 319 print("="*60) 320 print(f"Error: {result['error']}") 321 322 if __name__ == "__main__": 323 asyncio.run(main())