/ enhanced_local_server.py
enhanced_local_server.py
1 #!/usr/bin/env python3 2 import http.server 3 import json 4 import socketserver 5 import subprocess 6 import os 7 import threading 8 from datetime import datetime 9 10 MODEL_PATH = os.path.expanduser("~/models/tinyllama.gguf") 11 LLAMA_CPP_PATH = os.path.expanduser("~/llama.cpp") 12 13 class EnhancedAIHandler(http.server.BaseHTTPRequestHandler): 14 def log_message(self, format, *args): 15 # Suppress default logging 16 pass 17 18 def do_POST(self): 19 if self.path == '/completion': 20 self.handle_completion() 21 elif self.path == '/v1/chat/completions': 22 self.handle_openai_completion() 23 else: 24 self.send_response(404) 25 self.end_headers() 26 27 def handle_completion(self): 28 content_length = int(self.headers['Content-Length']) 29 post_data = self.rfile.read(content_length) 30 31 try: 32 data = json.loads(post_data) 33 prompt = data.get('prompt', 'Hello') 34 35 # Try to use llama.cpp if available 36 response_text = self.generate_response(prompt) 37 38 self.send_response(200) 39 self.send_header('Content-type', 'application/json') 40 self.end_headers() 41 self.wfile.write(json.dumps({ 42 'content': response_text, 43 'model': 'tinyllama-local', 44 'success': True, 45 'created': int(datetime.now().timestamp()) 46 }).encode()) 47 48 except Exception as e: 49 self.send_response(500) 50 self.end_headers() 51 self.wfile.write(json.dumps({'error': str(e), 'success': False}).encode()) 52 53 def handle_openai_completion(self): 54 content_length = int(self.headers['Content-Length']) 55 post_data = self.rfile.read(content_length) 56 57 try: 58 data = json.loads(post_data) 59 messages = data.get('messages', []) 60 prompt = messages[-1]['content'] if messages else 'Hello' 61 62 response_text = self.generate_response(prompt) 63 64 self.send_response(200) 65 self.send_header('Content-type', 'application/json') 66 self.end_headers() 67 self.wfile.write(json.dumps({ 68 'choices': [{ 69 'message': { 70 'role': 'assistant', 71 'content': response_text 72 }, 73 'finish_reason': 'stop', 74 'index': 0 75 }], 76 'model': 'tinyllama-local', 77 'created': int(datetime.now().timestamp()) 78 }).encode()) 79 80 except Exception as e: 81 self.send_response(500) 82 self.end_headers() 83 self.wfile.write(json.dumps({'error': str(e)}).encode()) 84 85 def generate_response(self, prompt): 86 # Check for llama.cpp main binary 87 llama_cli = os.path.join(LLAMA_CPP_PATH, 'main') 88 if os.path.exists(llama_cli) and os.path.exists(MODEL_PATH): 89 try: 90 # Try to use actual model 91 cmd = [llama_cli, '-m', MODEL_PATH, '-p', prompt, '-n', '100', '-t', '2', '--temp', '0.7'] 92 result = subprocess.run(cmd, capture_output=True, text=True, timeout=30) 93 return result.stdout.strip() 94 except: 95 # Fallback to echo 96 return f"Response to: {prompt} (using TinyLlama 1.1B)" 97 else: 98 # Enhanced echo for testing 99 responses = { 100 "hello": "Hello! I'm your local AI assistant running on TinyLlama.", 101 "help": "I can help with basic questions. Try asking about programming or general knowledge.", 102 "test": "Local server test successful! The AI system is working.", 103 "unrestricted": "This is a local model running without external API filters.", 104 } 105 106 prompt_lower = prompt.lower() 107 for key, response in responses.items(): 108 if key in prompt_lower: 109 return response 110 111 return f"Local AI received: '{prompt}'. This would be processed by TinyLlama 1.1B model." 112 113 def do_GET(self): 114 if self.path == '/health': 115 self.send_response(200) 116 self.send_header('Content-type', 'application/json') 117 self.end_headers() 118 response = { 119 'status': 'ok', 120 'model': 'tinyllama-1.1b', 121 'server': 'enhanced-local', 122 'endpoints': ['/completion', '/v1/chat/completions', '/health'] 123 } 124 self.wfile.write(json.dumps(response).encode()) 125 elif self.path == '/': 126 self.send_response(200) 127 self.send_header('Content-type', 'text/html') 128 self.end_headers() 129 self.wfile.write(b'<h1>Local AI Server</h1><p>TinyLlama 1.1B running locally</p>') 130 else: 131 self.send_response(404) 132 self.end_headers() 133 134 if __name__ == '__main__': 135 PORT = 8080 136 print(f"🤖 ENHANCED LOCAL AI SERVER") 137 print(f"=============================") 138 print(f"Port: {PORT}") 139 print(f"Model: TinyLlama 1.1B") 140 print(f"Endpoints:") 141 print(f" • GET /health - Server status") 142 print(f" • POST /completion - Basic completion") 143 print(f" • POST /v1/chat/completions - OpenAI-compatible") 144 print(f"=============================") 145 146 with socketserver.TCPServer(("", PORT), EnhancedAIHandler) as httpd: 147 httpd.serve_forever()