/ enhanced_local_server_7b.py
enhanced_local_server_7b.py
  1  #!/usr/bin/env python3
  2  import http.server
  3  import json
  4  import socketserver
  5  import subprocess
  6  import os
  7  import threading
  8  from datetime import datetime
  9  
 10  # Use dolphin-7b model if available, otherwise fallback to tinyllama
 11  MODEL_PATH = os.path.expanduser("~/models/dolphin-7b.gguf")
 12  FALLBACK_MODEL = os.path.expanduser("~/models/tinyllama.gguf")
 13  
 14  LLAMA_CPP_PATH = os.path.expanduser("~/llama.cpp")
 15  
 16  class EnhancedAIHandler(http.server.BaseHTTPRequestHandler):
 17      def log_message(self, format, *args):
 18          # Suppress default logging
 19          pass
 20      
 21      def get_model_path(self):
 22          if os.path.exists(MODEL_PATH):
 23              return MODEL_PATH, "dolphin-2.2.1-mistral-7b"
 24          elif os.path.exists(FALLBACK_MODEL):
 25              return FALLBACK_MODEL, "tinyllama-1.1b"
 26          else:
 27              return None, "no-model"
 28      
 29      def do_POST(self):
 30          if self.path == '/completion':
 31              self.handle_completion()
 32          elif self.path == '/v1/chat/completions':
 33              self.handle_openai_completion()
 34          else:
 35              self.send_response(404)
 36              self.end_headers()
 37      
 38      def handle_completion(self):
 39          content_length = int(self.headers['Content-Length'])
 40          post_data = self.rfile.read(content_length)
 41          
 42          try:
 43              data = json.loads(post_data)
 44              prompt = data.get('prompt', 'Hello')
 45              model_path, model_name = self.get_model_path()
 46              
 47              if model_path:
 48                  response_text = self.generate_response(prompt, model_path)
 49              else:
 50                  response_text = "Error: No model file found. Please download a model."
 51              
 52              self.send_response(200)
 53              self.send_header('Content-type', 'application/json')
 54              self.end_headers()
 55              self.wfile.write(json.dumps({
 56                  'content': response_text,
 57                  'model': model_name,
 58                  'success': True,
 59                  'created': int(datetime.now().timestamp())
 60              }).encode())
 61              
 62          except Exception as e:
 63              self.send_response(500)
 64              self.end_headers()
 65              self.wfile.write(json.dumps({'error': str(e), 'success': False}).encode())
 66      
 67      def handle_openai_completion(self):
 68          content_length = int(self.headers['Content-Length'])
 69          post_data = self.rfile.read(content_length)
 70          
 71          try:
 72              data = json.loads(post_data)
 73              messages = data.get('messages', [])
 74              prompt = messages[-1]['content'] if messages else 'Hello'
 75              model_path, model_name = self.get_model_path()
 76              
 77              if model_path:
 78                  response_text = self.generate_response(prompt, model_path)
 79              else:
 80                  response_text = "Error: No model available."
 81              
 82              self.send_response(200)
 83              self.send_header('Content-type', 'application/json')
 84              self.end_headers()
 85              self.wfile.write(json.dumps({
 86                  'choices': [{
 87                      'message': {
 88                          'role': 'assistant',
 89                          'content': response_text
 90                      },
 91                      'finish_reason': 'stop',
 92                      'index': 0
 93                  }],
 94                  'model': model_name,
 95                  'created': int(datetime.now().timestamp())
 96              }).encode())
 97              
 98          except Exception as e:
 99              self.send_response(500)
100              self.end_headers()
101              self.wfile.write(json.dumps({'error': str(e)}).encode())
102      
103      def generate_response(self, prompt, model_path):
104          # Check for llama.cpp main binary
105          llama_cli = os.path.join(LLAMA_CPP_PATH, 'main')
106          if os.path.exists(llama_cli) and os.path.exists(model_path):
107              try:
108                  # Use actual model inference
109                  temp = "0.7"
110                  if "code" in prompt.lower() or "program" in prompt.lower():
111                      temp = "0.2"  # Lower temp for code generation
112                  
113                  cmd = [llama_cli, '-m', model_path, '-p', prompt, '-n', '256', '-t', '4', '--temp', temp]
114                  result = subprocess.run(cmd, capture_output=True, text=True, timeout=60)
115                  return result.stdout.strip()
116              except subprocess.TimeoutExpired:
117                  return "Response timeout - model is processing..."
118              except Exception as e:
119                  return f"Model error: {str(e)}"
120          else:
121              # Fallback responses
122              if "dolphin" in model_path:
123                  return f"[Dolphin 7B would process: {prompt[:50]}...]"
124              else:
125                  return f"[TinyLlama would process: {prompt[:50]}...]"
126      
127      def do_GET(self):
128          if self.path == '/health':
129              self.send_response(200)
130              self.send_header('Content-type', 'application/json')
131              self.end_headers()
132              
133              model_path, model_name = self.get_model_path()
134              model_size = "Unknown"
135              if model_path and os.path.exists(model_path):
136                  model_size = f"{os.path.getsize(model_path) / (1024**3):.2f}GB"
137              
138              response = {
139                  'status': 'ok',
140                  'model': model_name,
141                  'model_size': model_size,
142                  'server': 'enhanced-local-7b',
143                  'endpoints': ['/completion', '/v1/chat/completions', '/health']
144              }
145              self.wfile.write(json.dumps(response).encode())
146          elif self.path == '/':
147              self.send_response(200)
148              self.send_header('Content-type', 'text/html')
149              self.end_headers()
150              self.wfile.write(b'<h1>Local AI Server</h1><p>Uncensored model testing</p>')
151          else:
152              self.send_response(404)
153              self.end_headers()
154  
155  if __name__ == '__main__':
156      PORT = 8081  # Different port to avoid conflict
157      print(f"🤖 ENHANCED LOCAL AI SERVER (7B MODEL)")
158      print(f"======================================")
159      
160      model_path, model_name = EnhancedAIHandler().get_model_path()
161      if model_path:
162          print(f"Model: {model_name}")
163          print(f"Path: {model_path}")
164      else:
165          print("⚠️  No model found. Using fallback responses.")
166      
167      print(f"Port: {PORT}")
168      print(f"Endpoints:")
169      print(f"  • GET  /health              - Server status")
170      print(f"  • POST /completion          - Basic completion")
171      print(f"  • POST /v1/chat/completions - OpenAI-compatible")
172      print(f"======================================")
173      
174      with socketserver.TCPServer(("", PORT), EnhancedAIHandler) as httpd:
175          httpd.serve_forever()