/ ai_orchestrator_hf.py
ai_orchestrator_hf.py
1 #!/usr/bin/env python3 2 import os 3 import sys 4 import json 5 import asyncio 6 import aiohttp 7 from datetime import datetime 8 from typing import Dict, Any 9 10 # ====================== CONFIGURATION ====================== 11 # ENDPOINT 1: OpenRouter (Cloud) 12 OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY", "") 13 OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions" 14 OPENROUTER_MODEL = "openai/gpt-3.5-turbo" 15 16 # ENDPOINT 2: Groq (Cloud - Fast) 17 GROQ_API_KEY = os.getenv("GROQ_API_KEY", "") 18 GROQ_URL = "https://api.groq.com/openai/v1/chat/completions" 19 GROQ_MODEL = "llama-3.1-8b-instant" 20 21 # ENDPOINT 3: Hugging Face Inference (Cloud/Local) 22 HF_API_KEY = os.getenv("HF_API_KEY", "") 23 HF_URL = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.3" 24 25 # ENDPOINT 4: Local GGUF (Your downloaded model) 26 LOCAL_GGUF_URL = "http://localhost:8080/completion" # llama.cpp server 27 LOCAL_MODEL_PATH = "/home/unknown/models/tinyllama.gguf" 28 29 # ENDPOINT 5: Custom/io.net API 30 CUSTOM_API_URL = os.getenv("CUSTOM_API_URL", "") 31 CUSTOM_API_KEY = os.getenv("CUSTOM_API_KEY", "") 32 CUSTOM_MODEL = "gpt-oss-20b" 33 34 # ====================== PROMPT ENGINEERING ====================== 35 def enhance_prompt(query: str, endpoint_type: str = "general") -> str: 36 """Improve prompts based on endpoint type""" 37 query_lower = query.lower() 38 39 # For local/smaller models, simplify complex queries 40 if endpoint_type in ["local", "hf"]: 41 # Math queries - be very direct 42 math_patterns = [r'\d+[\+\-\*\/]\d+', r'calculate', r'what is \d+', r'\d+\s*[\+\-\*\/]\s*\d+'] 43 import re 44 for pattern in math_patterns: 45 if re.search(pattern, query_lower): 46 return f"Q: {query}\nA: " 47 48 # For all models 49 import re 50 math_patterns = [r'\d+[\+\-\*\/]\d+', r'calculate', r'what is \d+', r'\d+\s*[\+\-\*\/]\s*\d+'] 51 for pattern in math_patterns: 52 if re.search(pattern, query_lower): 53 return f"Calculate this exactly: {query}. Give only the numerical answer." 54 55 fact_patterns = [r'what is', r'who is', r'when was', r'where is', r'capital of'] 56 for pattern in fact_patterns: 57 if re.search(pattern, query_lower): 58 return f"Answer this question directly: {query}" 59 60 return query 61 62 # ====================== API HANDLERS ====================== 63 async def try_openrouter(session: aiohttp.ClientSession, query: str) -> Dict[str, Any]: 64 """Endpoint 1: OpenRouter""" 65 if not OPENROUTER_API_KEY: 66 return {"error": "No API key", "success": False} 67 68 headers = { 69 "Authorization": f"Bearer {OPENROUTER_API_KEY}", 70 "Content-Type": "application/json" 71 } 72 73 payload = { 74 "model": OPENROUTER_MODEL, 75 "messages": [{"role": "user", "content": query}], 76 "temperature": 0.7, 77 "max_tokens": 1000 78 } 79 80 try: 81 async with session.post(OPENROUTER_URL, json=payload, headers=headers) as resp: 82 if resp.status == 200: 83 data = await resp.json() 84 return { 85 "success": True, 86 "response": data['choices'][0]['message']['content'], 87 "source": "openrouter" 88 } 89 else: 90 return {"error": f"HTTP {resp.status}", "success": False} 91 except Exception as e: 92 return {"error": str(e), "success": False} 93 94 async def try_groq(session: aiohttp.ClientSession, query: str) -> Dict[str, Any]: 95 """Endpoint 2: Groq""" 96 if not GROQ_API_KEY: 97 return {"error": "No API key", "success": False} 98 99 headers = { 100 "Authorization": f"Bearer {GROQ_API_KEY}", 101 "Content-Type": "application/json" 102 } 103 104 payload = { 105 "model": GROQ_MODEL, 106 "messages": [{"role": "user", "content": query}], 107 "temperature": 0.7, 108 "max_tokens": 1000 109 } 110 111 try: 112 async with session.post(GROQ_URL, json=payload, headers=headers) as resp: 113 if resp.status == 200: 114 data = await resp.json() 115 return { 116 "success": True, 117 "response": data['choices'][0]['message']['content'], 118 "source": "groq" 119 } 120 else: 121 return {"error": f"HTTP {resp.status}", "success": False} 122 except Exception as e: 123 return {"error": str(e), "success": False} 124 125 async def try_huggingface(session: aiohttp.ClientSession, query: str) -> Dict[str, Any]: 126 """Endpoint 3: Hugging Face Inference API""" 127 if not HF_API_KEY: 128 return {"error": "No HF API key", "success": False} 129 130 headers = { 131 "Authorization": f"Bearer {HF_API_KEY}", 132 "Content-Type": "application/json" 133 } 134 135 # Format for Hugging Face models 136 enhanced_query = enhance_prompt(query, "hf") 137 payload = { 138 "inputs": enhanced_query, 139 "parameters": { 140 "max_new_tokens": 500, 141 "temperature": 0.7, 142 "return_full_text": False 143 } 144 } 145 146 try: 147 async with session.post(HF_URL, json=payload, headers=headers) as resp: 148 if resp.status == 200: 149 data = await resp.json() 150 # Handle HF response format 151 if isinstance(data, list) and len(data) > 0: 152 response_text = data[0].get('generated_text', '') 153 else: 154 response_text = str(data) 155 return { 156 "success": True, 157 "response": response_text.strip(), 158 "source": "huggingface" 159 } 160 else: 161 return {"error": f"HTTP {resp.status}", "success": False} 162 except Exception as e: 163 return {"error": str(e), "success": False} 164 165 async def try_local_gguf(session: aiohttp.ClientSession, query: str) -> Dict[str, Any]: 166 """Endpoint 4: Local GGUF (llama.cpp server)""" 167 # First check if server is running, if not try to start it 168 import subprocess 169 import time 170 171 # Check if llama.cpp server is running 172 try: 173 async with session.get("http://localhost:8080/health", timeout=2) as resp: 174 if resp.status != 200: 175 # Try to start server 176 if os.path.exists(LOCAL_MODEL_PATH): 177 print(" Starting llama.cpp server...") 178 # Run server in background 179 subprocess.Popen([ 180 "python3", "-c", 181 f"import subprocess; subprocess.run(['~/llama.cpp/server', '-m', '{LOCAL_MODEL_PATH}', '--port', '8080', '-c', '2048', '-ngl', '20'], cwd='~/llama.cpp')" 182 ], shell=True) 183 time.sleep(5) # Wait for server to start 184 except: 185 # Server not running, but we'll try the request anyway 186 pass 187 188 payload = { 189 "prompt": enhance_prompt(query, "local"), 190 "n_predict": 500, 191 "temperature": 0.7, 192 "stop": ["\n", "###", "Q:"] 193 } 194 195 try: 196 async with session.post(LOCAL_GGUF_URL, json=payload, timeout=30) as resp: 197 if resp.status == 200: 198 data = await resp.json() 199 return { 200 "success": True, 201 "response": data.get('content', ''), 202 "source": "local-gguf" 203 } 204 else: 205 return {"error": f"HTTP {resp.status}", "success": False} 206 except Exception as e: 207 return {"error": str(e), "success": False} 208 209 async def try_custom_api(session: aiohttp.ClientSession, query: str) -> Dict[str, Any]: 210 """Endpoint 5: Custom/io.net API""" 211 if not CUSTOM_API_URL: 212 return {"error": "No custom API URL", "success": False} 213 214 headers = {"Content-Type": "application/json"} 215 if CUSTOM_API_KEY: 216 headers["Authorization"] = f"Bearer {CUSTOM_API_KEY}" 217 218 payload = { 219 "model": CUSTOM_MODEL, 220 "messages": [{"role": "user", "content": query}], 221 "temperature": 0.7, 222 "max_tokens": 1000 223 } 224 225 try: 226 async with session.post(CUSTOM_API_URL, json=payload, headers=headers, timeout=60) as resp: 227 if resp.status == 200: 228 data = await resp.json() 229 return { 230 "success": True, 231 "response": data['choices'][0]['message']['content'], 232 "source": "custom" 233 } 234 else: 235 return {"error": f"HTTP {resp.status}", "success": False} 236 except Exception as e: 237 return {"error": str(e), "success": False} 238 239 # ====================== ORCHESTRATOR ====================== 240 async def get_ai_response(query: str, mode: str = "auto") -> Dict[str, Any]: 241 """Main orchestrator with 5 endpoints""" 242 print(f"Query: {query}") 243 print(f"Mode: {mode}") 244 print("=" * 60) 245 246 async with aiohttp.ClientSession() as session: 247 start_time = datetime.now() 248 249 if mode == "cloud": 250 # Try cloud endpoints only 251 endpoints = [ 252 ("OpenRouter", try_openrouter), 253 ("Groq", try_groq), 254 ("HuggingFace", try_huggingface) 255 ] 256 elif mode == "local": 257 # Try local endpoints only 258 endpoints = [ 259 ("Local GGUF", try_local_gguf), 260 ("Custom API", try_custom_api) 261 ] 262 else: # auto mode 263 # Try all endpoints in order 264 endpoints = [ 265 ("OpenRouter", try_openrouter), 266 ("Groq", try_groq), 267 ("HuggingFace", try_huggingface), 268 ("Local GGUF", try_local_gguf), 269 ("Custom API", try_custom_api) 270 ] 271 272 for name, endpoint_func in endpoints: 273 print(f" Trying {name}...") 274 result = await endpoint_func(session, query) 275 if result['success']: 276 result['time'] = (datetime.now() - start_time).total_seconds() 277 return result 278 279 # All failed 280 result['time'] = (datetime.now() - start_time).total_seconds() 281 return result 282 283 # ====================== MAIN ====================== 284 async def main(): 285 if len(sys.argv) < 2: 286 print("Usage: python3 ai_orchestrator_hf.py 'your query' [--mode auto|cloud|local]") 287 print("Endpoints: OpenRouter, Groq, HuggingFace, Local GGUF, Custom API") 288 sys.exit(1) 289 290 args = sys.argv[1:] 291 mode = "auto" 292 query_parts = [] 293 294 for arg in args: 295 if arg.startswith("--mode="): 296 mode = arg.split("=")[1] 297 elif arg == "--cloud": 298 mode = "cloud" 299 elif arg == "--local": 300 mode = "local" 301 elif arg.startswith("--"): 302 continue 303 else: 304 query_parts.append(arg) 305 306 query = " ".join(query_parts) 307 result = await get_ai_response(query, mode) 308 309 if result['success']: 310 print("ā SUCCESS") 311 print("=" * 60) 312 print(f"Source: {result['source']}") 313 print(f"Time: {result['time']:.1f}s") 314 print("\nšÆ ANSWER:") 315 print("=" * 60) 316 print(result['response']) 317 print("=" * 60) 318 319 # Save result 320 timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") 321 filename = f"ai_result_{timestamp}.json" 322 with open(filename, "w") as f: 323 json.dump({ 324 "query": query, 325 "response": result['response'], 326 "source": result['source'], 327 "time": result['time'], 328 "mode": mode, 329 "success": True 330 }, f, indent=2) 331 print(f"\nš¾ Saved to: {filename}") 332 else: 333 print("ā ALL ENDPOINTS FAILED") 334 print("=" * 60) 335 print(f"Error: {result.get('error', 'Unknown error')}") 336 print(f"Time: {result.get('time', 0):.1f}s") 337 338 if __name__ == "__main__": 339 asyncio.run(main())