/ ai_orchestrator_hf.py
ai_orchestrator_hf.py
  1  #!/usr/bin/env python3
  2  import os
  3  import sys
  4  import json
  5  import asyncio
  6  import aiohttp
  7  from datetime import datetime
  8  from typing import Dict, Any
  9  
 10  # ====================== CONFIGURATION ======================
 11  # ENDPOINT 1: OpenRouter (Cloud)
 12  OPENROUTER_API_KEY = os.getenv("OPENROUTER_API_KEY", "")
 13  OPENROUTER_URL = "https://openrouter.ai/api/v1/chat/completions"
 14  OPENROUTER_MODEL = "openai/gpt-3.5-turbo"
 15  
 16  # ENDPOINT 2: Groq (Cloud - Fast)
 17  GROQ_API_KEY = os.getenv("GROQ_API_KEY", "")
 18  GROQ_URL = "https://api.groq.com/openai/v1/chat/completions"
 19  GROQ_MODEL = "llama-3.1-8b-instant"
 20  
 21  # ENDPOINT 3: Hugging Face Inference (Cloud/Local)
 22  HF_API_KEY = os.getenv("HF_API_KEY", "")
 23  HF_URL = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.3"
 24  
 25  # ENDPOINT 4: Local GGUF (Your downloaded model)
 26  LOCAL_GGUF_URL = "http://localhost:8080/completion"  # llama.cpp server
 27  LOCAL_MODEL_PATH = "/home/unknown/models/tinyllama.gguf"
 28  
 29  # ENDPOINT 5: Custom/io.net API
 30  CUSTOM_API_URL = os.getenv("CUSTOM_API_URL", "")
 31  CUSTOM_API_KEY = os.getenv("CUSTOM_API_KEY", "")
 32  CUSTOM_MODEL = "gpt-oss-20b"
 33  
 34  # ====================== PROMPT ENGINEERING ======================
 35  def enhance_prompt(query: str, endpoint_type: str = "general") -> str:
 36      """Improve prompts based on endpoint type"""
 37      query_lower = query.lower()
 38      
 39      # For local/smaller models, simplify complex queries
 40      if endpoint_type in ["local", "hf"]:
 41          # Math queries - be very direct
 42          math_patterns = [r'\d+[\+\-\*\/]\d+', r'calculate', r'what is \d+', r'\d+\s*[\+\-\*\/]\s*\d+']
 43          import re
 44          for pattern in math_patterns:
 45              if re.search(pattern, query_lower):
 46                  return f"Q: {query}\nA: "
 47      
 48      # For all models
 49      import re
 50      math_patterns = [r'\d+[\+\-\*\/]\d+', r'calculate', r'what is \d+', r'\d+\s*[\+\-\*\/]\s*\d+']
 51      for pattern in math_patterns:
 52          if re.search(pattern, query_lower):
 53              return f"Calculate this exactly: {query}. Give only the numerical answer."
 54      
 55      fact_patterns = [r'what is', r'who is', r'when was', r'where is', r'capital of']
 56      for pattern in fact_patterns:
 57          if re.search(pattern, query_lower):
 58              return f"Answer this question directly: {query}"
 59      
 60      return query
 61  
 62  # ====================== API HANDLERS ======================
 63  async def try_openrouter(session: aiohttp.ClientSession, query: str) -> Dict[str, Any]:
 64      """Endpoint 1: OpenRouter"""
 65      if not OPENROUTER_API_KEY:
 66          return {"error": "No API key", "success": False}
 67      
 68      headers = {
 69          "Authorization": f"Bearer {OPENROUTER_API_KEY}",
 70          "Content-Type": "application/json"
 71      }
 72      
 73      payload = {
 74          "model": OPENROUTER_MODEL,
 75          "messages": [{"role": "user", "content": query}],
 76          "temperature": 0.7,
 77          "max_tokens": 1000
 78      }
 79      
 80      try:
 81          async with session.post(OPENROUTER_URL, json=payload, headers=headers) as resp:
 82              if resp.status == 200:
 83                  data = await resp.json()
 84                  return {
 85                      "success": True,
 86                      "response": data['choices'][0]['message']['content'],
 87                      "source": "openrouter"
 88                  }
 89              else:
 90                  return {"error": f"HTTP {resp.status}", "success": False}
 91      except Exception as e:
 92          return {"error": str(e), "success": False}
 93  
 94  async def try_groq(session: aiohttp.ClientSession, query: str) -> Dict[str, Any]:
 95      """Endpoint 2: Groq"""
 96      if not GROQ_API_KEY:
 97          return {"error": "No API key", "success": False}
 98      
 99      headers = {
100          "Authorization": f"Bearer {GROQ_API_KEY}",
101          "Content-Type": "application/json"
102      }
103      
104      payload = {
105          "model": GROQ_MODEL,
106          "messages": [{"role": "user", "content": query}],
107          "temperature": 0.7,
108          "max_tokens": 1000
109      }
110      
111      try:
112          async with session.post(GROQ_URL, json=payload, headers=headers) as resp:
113              if resp.status == 200:
114                  data = await resp.json()
115                  return {
116                      "success": True,
117                      "response": data['choices'][0]['message']['content'],
118                      "source": "groq"
119                  }
120              else:
121                  return {"error": f"HTTP {resp.status}", "success": False}
122      except Exception as e:
123          return {"error": str(e), "success": False}
124  
125  async def try_huggingface(session: aiohttp.ClientSession, query: str) -> Dict[str, Any]:
126      """Endpoint 3: Hugging Face Inference API"""
127      if not HF_API_KEY:
128          return {"error": "No HF API key", "success": False}
129      
130      headers = {
131          "Authorization": f"Bearer {HF_API_KEY}",
132          "Content-Type": "application/json"
133      }
134      
135      # Format for Hugging Face models
136      enhanced_query = enhance_prompt(query, "hf")
137      payload = {
138          "inputs": enhanced_query,
139          "parameters": {
140              "max_new_tokens": 500,
141              "temperature": 0.7,
142              "return_full_text": False
143          }
144      }
145      
146      try:
147          async with session.post(HF_URL, json=payload, headers=headers) as resp:
148              if resp.status == 200:
149                  data = await resp.json()
150                  # Handle HF response format
151                  if isinstance(data, list) and len(data) > 0:
152                      response_text = data[0].get('generated_text', '')
153                  else:
154                      response_text = str(data)
155                  return {
156                      "success": True,
157                      "response": response_text.strip(),
158                      "source": "huggingface"
159                  }
160              else:
161                  return {"error": f"HTTP {resp.status}", "success": False}
162      except Exception as e:
163          return {"error": str(e), "success": False}
164  
165  async def try_local_gguf(session: aiohttp.ClientSession, query: str) -> Dict[str, Any]:
166      """Endpoint 4: Local GGUF (llama.cpp server)"""
167      # First check if server is running, if not try to start it
168      import subprocess
169      import time
170      
171      # Check if llama.cpp server is running
172      try:
173          async with session.get("http://localhost:8080/health", timeout=2) as resp:
174              if resp.status != 200:
175                  # Try to start server
176                  if os.path.exists(LOCAL_MODEL_PATH):
177                      print("  Starting llama.cpp server...")
178                      # Run server in background
179                      subprocess.Popen([
180                          "python3", "-c",
181                          f"import subprocess; subprocess.run(['~/llama.cpp/server', '-m', '{LOCAL_MODEL_PATH}', '--port', '8080', '-c', '2048', '-ngl', '20'], cwd='~/llama.cpp')"
182                      ], shell=True)
183                      time.sleep(5)  # Wait for server to start
184      except:
185          # Server not running, but we'll try the request anyway
186          pass
187      
188      payload = {
189          "prompt": enhance_prompt(query, "local"),
190          "n_predict": 500,
191          "temperature": 0.7,
192          "stop": ["\n", "###", "Q:"]
193      }
194      
195      try:
196          async with session.post(LOCAL_GGUF_URL, json=payload, timeout=30) as resp:
197              if resp.status == 200:
198                  data = await resp.json()
199                  return {
200                      "success": True,
201                      "response": data.get('content', ''),
202                      "source": "local-gguf"
203                  }
204              else:
205                  return {"error": f"HTTP {resp.status}", "success": False}
206      except Exception as e:
207          return {"error": str(e), "success": False}
208  
209  async def try_custom_api(session: aiohttp.ClientSession, query: str) -> Dict[str, Any]:
210      """Endpoint 5: Custom/io.net API"""
211      if not CUSTOM_API_URL:
212          return {"error": "No custom API URL", "success": False}
213      
214      headers = {"Content-Type": "application/json"}
215      if CUSTOM_API_KEY:
216          headers["Authorization"] = f"Bearer {CUSTOM_API_KEY}"
217      
218      payload = {
219          "model": CUSTOM_MODEL,
220          "messages": [{"role": "user", "content": query}],
221          "temperature": 0.7,
222          "max_tokens": 1000
223      }
224      
225      try:
226          async with session.post(CUSTOM_API_URL, json=payload, headers=headers, timeout=60) as resp:
227              if resp.status == 200:
228                  data = await resp.json()
229                  return {
230                      "success": True,
231                      "response": data['choices'][0]['message']['content'],
232                      "source": "custom"
233                  }
234              else:
235                  return {"error": f"HTTP {resp.status}", "success": False}
236      except Exception as e:
237          return {"error": str(e), "success": False}
238  
239  # ====================== ORCHESTRATOR ======================
240  async def get_ai_response(query: str, mode: str = "auto") -> Dict[str, Any]:
241      """Main orchestrator with 5 endpoints"""
242      print(f"Query: {query}")
243      print(f"Mode: {mode}")
244      print("=" * 60)
245      
246      async with aiohttp.ClientSession() as session:
247          start_time = datetime.now()
248          
249          if mode == "cloud":
250              # Try cloud endpoints only
251              endpoints = [
252                  ("OpenRouter", try_openrouter),
253                  ("Groq", try_groq),
254                  ("HuggingFace", try_huggingface)
255              ]
256          elif mode == "local":
257              # Try local endpoints only
258              endpoints = [
259                  ("Local GGUF", try_local_gguf),
260                  ("Custom API", try_custom_api)
261              ]
262          else:  # auto mode
263              # Try all endpoints in order
264              endpoints = [
265                  ("OpenRouter", try_openrouter),
266                  ("Groq", try_groq),
267                  ("HuggingFace", try_huggingface),
268                  ("Local GGUF", try_local_gguf),
269                  ("Custom API", try_custom_api)
270              ]
271          
272          for name, endpoint_func in endpoints:
273              print(f"  Trying {name}...")
274              result = await endpoint_func(session, query)
275              if result['success']:
276                  result['time'] = (datetime.now() - start_time).total_seconds()
277                  return result
278          
279          # All failed
280          result['time'] = (datetime.now() - start_time).total_seconds()
281          return result
282  
283  # ====================== MAIN ======================
284  async def main():
285      if len(sys.argv) < 2:
286          print("Usage: python3 ai_orchestrator_hf.py 'your query' [--mode auto|cloud|local]")
287          print("Endpoints: OpenRouter, Groq, HuggingFace, Local GGUF, Custom API")
288          sys.exit(1)
289      
290      args = sys.argv[1:]
291      mode = "auto"
292      query_parts = []
293      
294      for arg in args:
295          if arg.startswith("--mode="):
296              mode = arg.split("=")[1]
297          elif arg == "--cloud":
298              mode = "cloud"
299          elif arg == "--local":
300              mode = "local"
301          elif arg.startswith("--"):
302              continue
303          else:
304              query_parts.append(arg)
305      
306      query = " ".join(query_parts)
307      result = await get_ai_response(query, mode)
308      
309      if result['success']:
310          print("āœ… SUCCESS")
311          print("=" * 60)
312          print(f"Source: {result['source']}")
313          print(f"Time: {result['time']:.1f}s")
314          print("\nšŸŽÆ ANSWER:")
315          print("=" * 60)
316          print(result['response'])
317          print("=" * 60)
318          
319          # Save result
320          timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
321          filename = f"ai_result_{timestamp}.json"
322          with open(filename, "w") as f:
323              json.dump({
324                  "query": query,
325                  "response": result['response'],
326                  "source": result['source'],
327                  "time": result['time'],
328                  "mode": mode,
329                  "success": True
330              }, f, indent=2)
331          print(f"\nšŸ’¾ Saved to: {filename}")
332      else:
333          print("āŒ ALL ENDPOINTS FAILED")
334          print("=" * 60)
335          print(f"Error: {result.get('error', 'Unknown error')}")
336          print(f"Time: {result.get('time', 0):.1f}s")
337  
338  if __name__ == "__main__":
339      asyncio.run(main())