/ enhanced_local_server_7b_fixed.py
enhanced_local_server_7b_fixed.py
1 #!/usr/bin/env python3 2 import http.server 3 import json 4 import socketserver 5 import subprocess 6 import os 7 import threading 8 from datetime import datetime 9 10 # Use dolphin-7b model if available, otherwise fallback to tinyllama 11 MODEL_PATH = os.path.expanduser("~/models/dolphin-7b.gguf") 12 FALLBACK_MODEL = os.path.expanduser("~/models/tinyllama.gguf") 13 14 LLAMA_CPP_PATH = os.path.expanduser("~/llama.cpp") 15 16 class EnhancedAIHandler(http.server.BaseHTTPRequestHandler): 17 def get_model_info(self): 18 if os.path.exists(MODEL_PATH): 19 return MODEL_PATH, "dolphin-2.2.1-mistral-7b" 20 elif os.path.exists(FALLBACK_MODEL): 21 return FALLBACK_MODEL, "tinyllama-1.1b" 22 else: 23 return None, "no-model" 24 25 def log_message(self, format, *args): 26 # Suppress default logging 27 pass 28 29 def do_POST(self): 30 if self.path == '/completion': 31 self.handle_completion() 32 elif self.path == '/v1/chat/completions': 33 self.handle_openai_completion() 34 else: 35 self.send_response(404) 36 self.end_headers() 37 38 def handle_completion(self): 39 content_length = int(self.headers['Content-Length']) 40 post_data = self.rfile.read(content_length) 41 42 try: 43 data = json.loads(post_data) 44 prompt = data.get('prompt', 'Hello') 45 model_path, model_name = self.get_model_info() 46 47 if model_path: 48 response_text = self.generate_response(prompt, model_path) 49 else: 50 response_text = "Error: No model file found. Please download a model." 51 52 self.send_response(200) 53 self.send_header('Content-type', 'application/json') 54 self.end_headers() 55 self.wfile.write(json.dumps({ 56 'content': response_text, 57 'model': model_name, 58 'success': True, 59 'created': int(datetime.now().timestamp()) 60 }).encode()) 61 62 except Exception as e: 63 self.send_response(500) 64 self.end_headers() 65 self.wfile.write(json.dumps({'error': str(e), 'success': False}).encode()) 66 67 def handle_openai_completion(self): 68 content_length = int(self.headers['Content-Length']) 69 post_data = self.rfile.read(content_length) 70 71 try: 72 data = json.loads(post_data) 73 messages = data.get('messages', []) 74 prompt = messages[-1]['content'] if messages else 'Hello' 75 model_path, model_name = self.get_model_info() 76 77 if model_path: 78 response_text = self.generate_response(prompt, model_path) 79 else: 80 response_text = "Error: No model available." 81 82 self.send_response(200) 83 self.send_header('Content-type', 'application/json') 84 self.end_headers() 85 self.wfile.write(json.dumps({ 86 'choices': [{ 87 'message': { 88 'role': 'assistant', 89 'content': response_text 90 }, 91 'finish_reason': 'stop', 92 'index': 0 93 }], 94 'model': model_name, 95 'created': int(datetime.now().timestamp()) 96 }).encode()) 97 98 except Exception as e: 99 self.send_response(500) 100 self.end_headers() 101 self.wfile.write(json.dumps({'error': str(e)}).encode()) 102 103 def generate_response(self, prompt, model_path): 104 # Check for llama.cpp main binary 105 llama_cli = os.path.join(LLAMA_CPP_PATH, 'main') 106 if os.path.exists(llama_cli) and os.path.exists(model_path): 107 try: 108 # Use actual model inference 109 temp = "0.7" 110 if "code" in prompt.lower() or "program" in prompt.lower(): 111 temp = "0.2" # Lower temp for code generation 112 113 # Build command 114 cmd = [llama_cli, '-m', model_path, '-p', prompt, '-n', '256', '-t', '4', '--temp', temp] 115 116 # Run with timeout 117 result = subprocess.run(cmd, capture_output=True, text=True, timeout=120) 118 return result.stdout.strip() 119 except subprocess.TimeoutExpired: 120 return "Response timeout - model is processing (120s limit)..." 121 except Exception as e: 122 return f"Model error: {str(e)}" 123 else: 124 # Fallback - check which model we're using 125 if "dolphin" in model_path: 126 return f"[Dolphin 7B Model Loaded - Would process: '{prompt[:50]}...']" 127 else: 128 return f"[TinyLlama Model - Would process: '{prompt[:50]}...']" 129 130 def do_GET(self): 131 if self.path == '/health': 132 self.send_response(200) 133 self.send_header('Content-type', 'application/json') 134 self.end_headers() 135 136 model_path, model_name = self.get_model_info() 137 model_size = "Unknown" 138 if model_path and os.path.exists(model_path): 139 size_bytes = os.path.getsize(model_path) 140 model_size = f"{size_bytes / (1024**3):.2f}GB" 141 142 response = { 143 'status': 'ok', 144 'model': model_name, 145 'model_size': model_size, 146 'server': 'enhanced-local-7b', 147 'endpoints': ['/completion', '/v1/chat/completions', '/health'], 148 'message': 'True unrestricted AI server' 149 } 150 self.wfile.write(json.dumps(response).encode()) 151 elif self.path == '/': 152 self.send_response(200) 153 self.send_header('Content-type', 'text/html') 154 self.end_headers() 155 self.wfile.write(b'<h1>Local AI Server (Unrestricted)</h1><p>Dolphin 7B Model</p>') 156 else: 157 self.send_response(404) 158 self.end_headers() 159 160 def get_model_info(): 161 """Helper function to get model info without handler instance""" 162 if os.path.exists(MODEL_PATH): 163 return MODEL_PATH, "dolphin-2.2.1-mistral-7b" 164 elif os.path.exists(FALLBACK_MODEL): 165 return FALLBACK_MODEL, "tinyllama-1.1b" 166 else: 167 return None, "no-model" 168 169 if __name__ == '__main__': 170 PORT = 8081 # Different port to avoid conflict 171 172 model_path, model_name = get_model_info() 173 174 print(f"🤖 ENHANCED LOCAL AI SERVER (7B MODEL)") 175 print(f"======================================") 176 177 if model_path: 178 print(f"✅ Model: {model_name}") 179 size_bytes = os.path.getsize(model_path) if os.path.exists(model_path) else 0 180 print(f"✅ Size: {size_bytes / (1024**3):.2f}GB") 181 print(f"✅ Path: {model_path}") 182 else: 183 print("⚠️ No model found. Using fallback responses.") 184 185 print(f"✅ Port: {PORT}") 186 print(f"✅ Endpoints:") 187 print(f" • GET /health - Server status") 188 print(f" • POST /completion - Basic completion") 189 print(f" • POST /v1/chat/completions - OpenAI-compatible") 190 print(f"======================================") 191 print(f"🚀 Server starting... (Ctrl+C to stop)") 192 193 with socketserver.TCPServer(("", PORT), EnhancedAIHandler) as httpd: 194 httpd.serve_forever()