/ enhanced_local_server_7b.py
enhanced_local_server_7b.py
1 #!/usr/bin/env python3 2 import http.server 3 import json 4 import socketserver 5 import subprocess 6 import os 7 import threading 8 from datetime import datetime 9 10 # Use dolphin-7b model if available, otherwise fallback to tinyllama 11 MODEL_PATH = os.path.expanduser("~/models/dolphin-7b.gguf") 12 FALLBACK_MODEL = os.path.expanduser("~/models/tinyllama.gguf") 13 14 LLAMA_CPP_PATH = os.path.expanduser("~/llama.cpp") 15 16 class EnhancedAIHandler(http.server.BaseHTTPRequestHandler): 17 def log_message(self, format, *args): 18 # Suppress default logging 19 pass 20 21 def get_model_path(self): 22 if os.path.exists(MODEL_PATH): 23 return MODEL_PATH, "dolphin-2.2.1-mistral-7b" 24 elif os.path.exists(FALLBACK_MODEL): 25 return FALLBACK_MODEL, "tinyllama-1.1b" 26 else: 27 return None, "no-model" 28 29 def do_POST(self): 30 if self.path == '/completion': 31 self.handle_completion() 32 elif self.path == '/v1/chat/completions': 33 self.handle_openai_completion() 34 else: 35 self.send_response(404) 36 self.end_headers() 37 38 def handle_completion(self): 39 content_length = int(self.headers['Content-Length']) 40 post_data = self.rfile.read(content_length) 41 42 try: 43 data = json.loads(post_data) 44 prompt = data.get('prompt', 'Hello') 45 model_path, model_name = self.get_model_path() 46 47 if model_path: 48 response_text = self.generate_response(prompt, model_path) 49 else: 50 response_text = "Error: No model file found. Please download a model." 51 52 self.send_response(200) 53 self.send_header('Content-type', 'application/json') 54 self.end_headers() 55 self.wfile.write(json.dumps({ 56 'content': response_text, 57 'model': model_name, 58 'success': True, 59 'created': int(datetime.now().timestamp()) 60 }).encode()) 61 62 except Exception as e: 63 self.send_response(500) 64 self.end_headers() 65 self.wfile.write(json.dumps({'error': str(e), 'success': False}).encode()) 66 67 def handle_openai_completion(self): 68 content_length = int(self.headers['Content-Length']) 69 post_data = self.rfile.read(content_length) 70 71 try: 72 data = json.loads(post_data) 73 messages = data.get('messages', []) 74 prompt = messages[-1]['content'] if messages else 'Hello' 75 model_path, model_name = self.get_model_path() 76 77 if model_path: 78 response_text = self.generate_response(prompt, model_path) 79 else: 80 response_text = "Error: No model available." 81 82 self.send_response(200) 83 self.send_header('Content-type', 'application/json') 84 self.end_headers() 85 self.wfile.write(json.dumps({ 86 'choices': [{ 87 'message': { 88 'role': 'assistant', 89 'content': response_text 90 }, 91 'finish_reason': 'stop', 92 'index': 0 93 }], 94 'model': model_name, 95 'created': int(datetime.now().timestamp()) 96 }).encode()) 97 98 except Exception as e: 99 self.send_response(500) 100 self.end_headers() 101 self.wfile.write(json.dumps({'error': str(e)}).encode()) 102 103 def generate_response(self, prompt, model_path): 104 # Check for llama.cpp main binary 105 llama_cli = os.path.join(LLAMA_CPP_PATH, 'main') 106 if os.path.exists(llama_cli) and os.path.exists(model_path): 107 try: 108 # Use actual model inference 109 temp = "0.7" 110 if "code" in prompt.lower() or "program" in prompt.lower(): 111 temp = "0.2" # Lower temp for code generation 112 113 cmd = [llama_cli, '-m', model_path, '-p', prompt, '-n', '256', '-t', '4', '--temp', temp] 114 result = subprocess.run(cmd, capture_output=True, text=True, timeout=60) 115 return result.stdout.strip() 116 except subprocess.TimeoutExpired: 117 return "Response timeout - model is processing..." 118 except Exception as e: 119 return f"Model error: {str(e)}" 120 else: 121 # Fallback responses 122 if "dolphin" in model_path: 123 return f"[Dolphin 7B would process: {prompt[:50]}...]" 124 else: 125 return f"[TinyLlama would process: {prompt[:50]}...]" 126 127 def do_GET(self): 128 if self.path == '/health': 129 self.send_response(200) 130 self.send_header('Content-type', 'application/json') 131 self.end_headers() 132 133 model_path, model_name = self.get_model_path() 134 model_size = "Unknown" 135 if model_path and os.path.exists(model_path): 136 model_size = f"{os.path.getsize(model_path) / (1024**3):.2f}GB" 137 138 response = { 139 'status': 'ok', 140 'model': model_name, 141 'model_size': model_size, 142 'server': 'enhanced-local-7b', 143 'endpoints': ['/completion', '/v1/chat/completions', '/health'] 144 } 145 self.wfile.write(json.dumps(response).encode()) 146 elif self.path == '/': 147 self.send_response(200) 148 self.send_header('Content-type', 'text/html') 149 self.end_headers() 150 self.wfile.write(b'<h1>Local AI Server</h1><p>Uncensored model testing</p>') 151 else: 152 self.send_response(404) 153 self.end_headers() 154 155 if __name__ == '__main__': 156 PORT = 8081 # Different port to avoid conflict 157 print(f"🤖 ENHANCED LOCAL AI SERVER (7B MODEL)") 158 print(f"======================================") 159 160 model_path, model_name = EnhancedAIHandler().get_model_path() 161 if model_path: 162 print(f"Model: {model_name}") 163 print(f"Path: {model_path}") 164 else: 165 print("⚠️ No model found. Using fallback responses.") 166 167 print(f"Port: {PORT}") 168 print(f"Endpoints:") 169 print(f" • GET /health - Server status") 170 print(f" • POST /completion - Basic completion") 171 print(f" • POST /v1/chat/completions - OpenAI-compatible") 172 print(f"======================================") 173 174 with socketserver.TCPServer(("", PORT), EnhancedAIHandler) as httpd: 175 httpd.serve_forever()