/ working_orchestrator.py
working_orchestrator.py
  1  #!/usr/bin/env python3
  2  """
  3  WORKING AI ORCHESTRATOR - Fixed prompting and response handling
  4  """
  5  
  6  import os
  7  import sys
  8  import asyncio
  9  import aiohttp
 10  import json
 11  import subprocess
 12  import time
 13  import re
 14  from datetime import datetime
 15  
 16  class WorkingOrchestrator:
 17      def __init__(self):
 18          self.openrouter_key = os.getenv('OPENROUTER_API_KEY')
 19          self.groq_key = os.getenv('GROQ_API_KEY')
 20          
 21          # Tested working models
 22          self.groq_models = [
 23              "llama-3.3-70b-versatile",  # Primary
 24              "llama-3.2-90b-vision-preview",  # Backup
 25              "gemma2-9b-it"  # Fallback
 26          ]
 27      
 28      def create_system_prompt(self) -> str:
 29          """Create an effective system prompt"""
 30          return """You are a helpful AI assistant. Follow these rules:
 31  1. Answer questions directly and concisely
 32  2. Do NOT use markdown, XML tags, or special formatting
 33  3. Do NOT add prefixes like "Answer:" or "Response:"
 34  4. Just provide the answer itself
 35  5. For simple questions, give short answers
 36  6. For complex questions, give detailed answers"""
 37      
 38      async def call_openrouter(self, query: str) -> dict:
 39          """Call OpenRouter with optimized prompting"""
 40          if not self.openrouter_key:
 41              return {"success": False, "error": "No API key"}
 42          
 43          url = "https://openrouter.ai/api/v1/chat/completions"
 44          headers = {
 45              "Authorization": f"Bearer {self.openrouter_key}",
 46              "Content-Type": "application/json",
 47              "HTTP-Referer": "https://github.com/working-orchestrator"
 48          }
 49          
 50          messages = [
 51              {"role": "system", "content": self.create_system_prompt()},
 52              {"role": "user", "content": query}
 53          ]
 54          
 55          data = {
 56              "model": "mistralai/mistral-7b-instruct:free",
 57              "messages": messages,
 58              "max_tokens": 300,
 59              "temperature": 0.7,
 60              "top_p": 0.9
 61          }
 62          
 63          try:
 64              timeout = aiohttp.ClientTimeout(total=15)
 65              async with aiohttp.ClientSession(timeout=timeout) as session:
 66                  async with session.post(url, headers=headers, json=data) as response:
 67                      if response.status == 200:
 68                          result = await response.json()
 69                          answer = result['choices'][0]['message']['content']
 70                          
 71                          # Clean the response
 72                          answer = self.clean_response(answer)
 73                          
 74                          # Validate it's not empty
 75                          if not answer or len(answer.strip()) < 2:
 76                              return {"success": False, "error": "Empty response"}
 77                          
 78                          return {
 79                              "success": True,
 80                              "api": "openrouter",
 81                              "response": answer,
 82                              "tokens": result.get('usage', {}).get('total_tokens', 0)
 83                          }
 84                      else:
 85                          error_text = await response.text()
 86                          return {"success": False, "error": f"HTTP {response.status}: {error_text[:100]}"}
 87          except Exception as e:
 88              return {"success": False, "error": str(e)[:100]}
 89      
 90      async def call_groq(self, query: str) -> dict:
 91          """Call Groq with updated models"""
 92          if not self.groq_key:
 93              return {"success": False, "error": "No API key"}
 94          
 95          url = "https://api.groq.com/openai/v1/chat/completions"
 96          headers = {
 97              "Authorization": f"Bearer {self.groq_key}",
 98              "Content-Type": "application/json"
 99          }
100          
101          messages = [
102              {"role": "system", "content": self.create_system_prompt()},
103              {"role": "user", "content": query}
104          ]
105          
106          for model in self.groq_models:
107              data = {
108                  "model": model,
109                  "messages": messages,
110                  "max_tokens": 300,
111                  "temperature": 0.7
112              }
113              
114              try:
115                  timeout = aiohttp.ClientTimeout(total=15)
116                  async with aiohttp.ClientSession(timeout=timeout) as session:
117                      async with session.post(url, headers=headers, json=data) as response:
118                          if response.status == 200:
119                              result = await response.json()
120                              answer = result['choices'][0]['message']['content']
121                              answer = self.clean_response(answer)
122                              
123                              if not answer or len(answer.strip()) < 2:
124                                  continue  # Try next model if empty
125                              
126                              return {
127                                  "success": True,
128                                  "api": "groq",
129                                  "model": model,
130                                  "response": answer,
131                                  "tokens": result.get('usage', {}).get('total_tokens', 0)
132                              }
133              except:
134                  continue  # Try next model on error
135          
136          return {"success": False, "error": "All Groq models failed"}
137      
138      def call_local_fast(self, query: str) -> dict:
139          """Call local model with timeout and proper parsing"""
140          # First, try a quick test to see if local models work
141          try:
142              # Use a simpler approach - call tinyllama directly
143              model_path = os.path.expanduser("~/models/tinyllama.gguf")
144              if not os.path.exists(model_path):
145                  return {"success": False, "error": "Model file not found"}
146              
147              # Create a simple prompt file
148              prompt_file = "/tmp/ai_prompt.txt"
149              with open(prompt_file, "w") as f:
150                  f.write(f"Q: {query}\nA: ")
151              
152              # Try to use llama.cpp directly if available
153              llama_cli = os.path.expanduser("~/llama.cpp/build/bin/llama-cli")
154              if os.path.exists(llama_cli):
155                  try:
156                      result = subprocess.run(
157                          [llama_cli, "-m", model_path, "-p", f"Q: {query}\nA:", "-n", "50", "-t", "4"],
158                          capture_output=True,
159                          text=True,
160                          timeout=30
161                      )
162                      
163                      if result.returncode == 0:
164                          # Extract the answer after "A:"
165                          output = result.stdout
166                          if "A:" in output:
167                              answer = output.split("A:", 1)[1].strip()
168                              answer = self.clean_response(answer)
169                              return {
170                                  "success": True,
171                                  "api": "local",
172                                  "model": "tinyllama",
173                                  "response": answer,
174                                  "tokens": 0
175                              }
176                  except subprocess.TimeoutExpired:
177                      return {"success": False, "error": "Local model timeout"}
178              
179              # Fallback to ultimate_orchestrator with shorter timeout
180              try:
181                  result = subprocess.run(
182                      ["python3", "ultimate_orchestrator.py", "tinyllama", query],
183                      capture_output=True,
184                      text=True,
185                      timeout=45  # Much shorter timeout
186                  )
187                  
188                  if result.returncode == 0:
189                      # Simple extraction
190                      lines = result.stdout.split('\n')
191                      for i, line in enumerate(lines):
192                          if "FINAL ANSWER" in line or "šŸŽÆ FINAL ANSWER" in line:
193                              # Get next non-empty line
194                              for j in range(i+1, len(lines)):
195                                  if lines[j].strip() and not lines[j].startswith("==="):
196                                      answer = self.clean_response(lines[j].strip())
197                                      return {
198                                          "success": True,
199                                          "api": "local",
200                                          "model": "tinyllama",
201                                          "response": answer,
202                                          "tokens": 0
203                                      }
204              except subprocess.TimeoutExpired:
205                  return {"success": False, "error": "Local model timeout (45s)"}
206              
207              return {"success": False, "error": "Could not extract answer"}
208              
209          except Exception as e:
210              return {"success": False, "error": str(e)[:100]}
211      
212      def clean_response(self, text: str) -> str:
213          """Clean and validate AI responses"""
214          if not text:
215              return ""
216          
217          # Remove common tags
218          text = re.sub(r'<s>|</s>|\[OUT\]|\[/OUT\]|\[/s\]|<\|.*?\|>', '', text)
219          
220          # Remove "Answer:" or similar prefixes
221          text = re.sub(r'^(Answer|Response|A)\s*[:.-]\s*', '', text, flags=re.IGNORECASE)
222          
223          # Clean whitespace
224          text = re.sub(r'\s+', ' ', text).strip()
225          
226          return text
227      
228      async def orchestrate(self, query: str) -> dict:
229          """Simple, reliable orchestration"""
230          print(f"\nšŸ¤– WORKING ORCHESTRATOR")
231          print("="*60)
232          print(f"Query: {query}")
233          print("="*60)
234          
235          start_time = time.time()
236          
237          # Strategy: Try OpenRouter first, then Groq, then local
238          print("\n1. Trying OpenRouter...")
239          result = await self.call_openrouter(query)
240          
241          if result["success"]:
242              print(f"   āœ… OpenRouter: Success")
243              source = "openrouter"
244          else:
245              print(f"   āŒ OpenRouter: {result['error']}")
246              
247              print("\n2. Trying Groq...")
248              result = await self.call_groq(query)
249              
250              if result["success"]:
251                  print(f"   āœ… Groq: Success ({result.get('model', '')})")
252                  source = "groq"
253              else:
254                  print(f"   āŒ Groq: {result['error']}")
255                  
256                  print("\n3. Trying local model...")
257                  result = self.call_local_fast(query)
258                  
259                  if result["success"]:
260                      print(f"   āœ… Local: Success ({result.get('model', '')})")
261                      source = "local"
262                  else:
263                      print(f"   āŒ Local: {result['error']}")
264          
265          elapsed = time.time() - start_time
266          
267          if result["success"]:
268              return {
269                  "success": True,
270                  "query": query,
271                  "response": result["response"],
272                  "source": source,
273                  "model": result.get("model", ""),
274                  "tokens": result.get("tokens", 0),
275                  "time": elapsed,
276                  "timestamp": datetime.now().isoformat()
277              }
278          else:
279              return {
280                  "success": False,
281                  "query": query,
282                  "error": "All methods failed",
283                  "time": elapsed,
284                  "timestamp": datetime.now().isoformat()
285              }
286  
287  async def main():
288      if len(sys.argv) < 2:
289          print("Usage: python working_orchestrator.py \"Your query\"")
290          sys.exit(1)
291      
292      query = " ".join(sys.argv[1:])
293      
294      orchestrator = WorkingOrchestrator()
295      result = await orchestrator.orchestrate(query)
296      
297      print("\n" + "="*60)
298      if result["success"]:
299          print("āœ… SUCCESS")
300          print("="*60)
301          print(f"Source: {result['source']}")
302          if result.get('model'):
303              print(f"Model: {result['model']}")
304          print(f"Time: {result['time']:.1f}s")
305          
306          print("\nšŸŽÆ ANSWER:")
307          print("="*60)
308          print(result['response'])
309          print("="*60)
310          
311          # Save result
312          timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
313          filename = f"working_result_{timestamp}.json"
314          with open(filename, "w") as f:
315              json.dump(result, f, indent=2)
316          print(f"\nšŸ’¾ Saved to: {filename}")
317      else:
318          print("āŒ FAILED")
319          print("="*60)
320          print(f"Error: {result['error']}")
321  
322  if __name__ == "__main__":
323      asyncio.run(main())