/ enhanced_local_server_7b_fixed.py
enhanced_local_server_7b_fixed.py
  1  #!/usr/bin/env python3
  2  import http.server
  3  import json
  4  import socketserver
  5  import subprocess
  6  import os
  7  import threading
  8  from datetime import datetime
  9  
 10  # Use dolphin-7b model if available, otherwise fallback to tinyllama
 11  MODEL_PATH = os.path.expanduser("~/models/dolphin-7b.gguf")
 12  FALLBACK_MODEL = os.path.expanduser("~/models/tinyllama.gguf")
 13  
 14  LLAMA_CPP_PATH = os.path.expanduser("~/llama.cpp")
 15  
 16  class EnhancedAIHandler(http.server.BaseHTTPRequestHandler):
 17      def get_model_info(self):
 18          if os.path.exists(MODEL_PATH):
 19              return MODEL_PATH, "dolphin-2.2.1-mistral-7b"
 20          elif os.path.exists(FALLBACK_MODEL):
 21              return FALLBACK_MODEL, "tinyllama-1.1b"
 22          else:
 23              return None, "no-model"
 24      
 25      def log_message(self, format, *args):
 26          # Suppress default logging
 27          pass
 28      
 29      def do_POST(self):
 30          if self.path == '/completion':
 31              self.handle_completion()
 32          elif self.path == '/v1/chat/completions':
 33              self.handle_openai_completion()
 34          else:
 35              self.send_response(404)
 36              self.end_headers()
 37      
 38      def handle_completion(self):
 39          content_length = int(self.headers['Content-Length'])
 40          post_data = self.rfile.read(content_length)
 41          
 42          try:
 43              data = json.loads(post_data)
 44              prompt = data.get('prompt', 'Hello')
 45              model_path, model_name = self.get_model_info()
 46              
 47              if model_path:
 48                  response_text = self.generate_response(prompt, model_path)
 49              else:
 50                  response_text = "Error: No model file found. Please download a model."
 51              
 52              self.send_response(200)
 53              self.send_header('Content-type', 'application/json')
 54              self.end_headers()
 55              self.wfile.write(json.dumps({
 56                  'content': response_text,
 57                  'model': model_name,
 58                  'success': True,
 59                  'created': int(datetime.now().timestamp())
 60              }).encode())
 61              
 62          except Exception as e:
 63              self.send_response(500)
 64              self.end_headers()
 65              self.wfile.write(json.dumps({'error': str(e), 'success': False}).encode())
 66      
 67      def handle_openai_completion(self):
 68          content_length = int(self.headers['Content-Length'])
 69          post_data = self.rfile.read(content_length)
 70          
 71          try:
 72              data = json.loads(post_data)
 73              messages = data.get('messages', [])
 74              prompt = messages[-1]['content'] if messages else 'Hello'
 75              model_path, model_name = self.get_model_info()
 76              
 77              if model_path:
 78                  response_text = self.generate_response(prompt, model_path)
 79              else:
 80                  response_text = "Error: No model available."
 81              
 82              self.send_response(200)
 83              self.send_header('Content-type', 'application/json')
 84              self.end_headers()
 85              self.wfile.write(json.dumps({
 86                  'choices': [{
 87                      'message': {
 88                          'role': 'assistant',
 89                          'content': response_text
 90                      },
 91                      'finish_reason': 'stop',
 92                      'index': 0
 93                  }],
 94                  'model': model_name,
 95                  'created': int(datetime.now().timestamp())
 96              }).encode())
 97              
 98          except Exception as e:
 99              self.send_response(500)
100              self.end_headers()
101              self.wfile.write(json.dumps({'error': str(e)}).encode())
102      
103      def generate_response(self, prompt, model_path):
104          # Check for llama.cpp main binary
105          llama_cli = os.path.join(LLAMA_CPP_PATH, 'main')
106          if os.path.exists(llama_cli) and os.path.exists(model_path):
107              try:
108                  # Use actual model inference
109                  temp = "0.7"
110                  if "code" in prompt.lower() or "program" in prompt.lower():
111                      temp = "0.2"  # Lower temp for code generation
112                  
113                  # Build command
114                  cmd = [llama_cli, '-m', model_path, '-p', prompt, '-n', '256', '-t', '4', '--temp', temp]
115                  
116                  # Run with timeout
117                  result = subprocess.run(cmd, capture_output=True, text=True, timeout=120)
118                  return result.stdout.strip()
119              except subprocess.TimeoutExpired:
120                  return "Response timeout - model is processing (120s limit)..."
121              except Exception as e:
122                  return f"Model error: {str(e)}"
123          else:
124              # Fallback - check which model we're using
125              if "dolphin" in model_path:
126                  return f"[Dolphin 7B Model Loaded - Would process: '{prompt[:50]}...']"
127              else:
128                  return f"[TinyLlama Model - Would process: '{prompt[:50]}...']"
129      
130      def do_GET(self):
131          if self.path == '/health':
132              self.send_response(200)
133              self.send_header('Content-type', 'application/json')
134              self.end_headers()
135              
136              model_path, model_name = self.get_model_info()
137              model_size = "Unknown"
138              if model_path and os.path.exists(model_path):
139                  size_bytes = os.path.getsize(model_path)
140                  model_size = f"{size_bytes / (1024**3):.2f}GB"
141              
142              response = {
143                  'status': 'ok',
144                  'model': model_name,
145                  'model_size': model_size,
146                  'server': 'enhanced-local-7b',
147                  'endpoints': ['/completion', '/v1/chat/completions', '/health'],
148                  'message': 'True unrestricted AI server'
149              }
150              self.wfile.write(json.dumps(response).encode())
151          elif self.path == '/':
152              self.send_response(200)
153              self.send_header('Content-type', 'text/html')
154              self.end_headers()
155              self.wfile.write(b'<h1>Local AI Server (Unrestricted)</h1><p>Dolphin 7B Model</p>')
156          else:
157              self.send_response(404)
158              self.end_headers()
159  
160  def get_model_info():
161      """Helper function to get model info without handler instance"""
162      if os.path.exists(MODEL_PATH):
163          return MODEL_PATH, "dolphin-2.2.1-mistral-7b"
164      elif os.path.exists(FALLBACK_MODEL):
165          return FALLBACK_MODEL, "tinyllama-1.1b"
166      else:
167          return None, "no-model"
168  
169  if __name__ == '__main__':
170      PORT = 8081  # Different port to avoid conflict
171      
172      model_path, model_name = get_model_info()
173      
174      print(f"🤖 ENHANCED LOCAL AI SERVER (7B MODEL)")
175      print(f"======================================")
176      
177      if model_path:
178          print(f"✅ Model: {model_name}")
179          size_bytes = os.path.getsize(model_path) if os.path.exists(model_path) else 0
180          print(f"✅ Size: {size_bytes / (1024**3):.2f}GB")
181          print(f"✅ Path: {model_path}")
182      else:
183          print("⚠️  No model found. Using fallback responses.")
184      
185      print(f"✅ Port: {PORT}")
186      print(f"✅ Endpoints:")
187      print(f"  • GET  /health              - Server status")
188      print(f"  • POST /completion          - Basic completion")
189      print(f"  • POST /v1/chat/completions - OpenAI-compatible")
190      print(f"======================================")
191      print(f"🚀 Server starting... (Ctrl+C to stop)")
192      
193      with socketserver.TCPServer(("", PORT), EnhancedAIHandler) as httpd:
194          httpd.serve_forever()