rpc-server.cpp
1 #ifdef GGML_USE_CUDA 2 #include "ggml-cuda.h" 3 #endif 4 5 #ifdef GGML_USE_METAL 6 #include "ggml-metal.h" 7 #endif 8 9 #include "ggml-rpc.h" 10 #ifdef _WIN32 11 # include <windows.h> 12 #else 13 # include <unistd.h> 14 #endif 15 #include <string> 16 #include <stdio.h> 17 18 struct rpc_server_params { 19 std::string host = "0.0.0.0"; 20 int port = 50052; 21 size_t backend_mem = 0; 22 }; 23 24 static void print_usage(int /*argc*/, char ** argv, rpc_server_params params) { 25 fprintf(stderr, "Usage: %s [options]\n\n", argv[0]); 26 fprintf(stderr, "options:\n"); 27 fprintf(stderr, " -h, --help show this help message and exit\n"); 28 fprintf(stderr, " -H HOST, --host HOST host to bind to (default: %s)\n", params.host.c_str()); 29 fprintf(stderr, " -p PORT, --port PORT port to bind to (default: %d)\n", params.port); 30 fprintf(stderr, " -m MEM, --mem MEM backend memory size (in MB)\n"); 31 fprintf(stderr, "\n"); 32 } 33 34 static bool rpc_server_params_parse(int argc, char ** argv, rpc_server_params & params) { 35 std::string arg; 36 for (int i = 1; i < argc; i++) { 37 arg = argv[i]; 38 if (arg == "-H" || arg == "--host") { 39 if (++i >= argc) { 40 return false; 41 } 42 params.host = argv[i]; 43 } else if (arg == "-p" || arg == "--port") { 44 if (++i >= argc) { 45 return false; 46 } 47 params.port = std::stoi(argv[i]); 48 if (params.port <= 0 || params.port > 65535) { 49 return false; 50 } 51 } else if (arg == "-m" || arg == "--mem") { 52 if (++i >= argc) { 53 return false; 54 } 55 params.backend_mem = std::stoul(argv[i]) * 1024 * 1024; 56 } else if (arg == "-h" || arg == "--help") { 57 print_usage(argc, argv, params); 58 exit(0); 59 } else { 60 fprintf(stderr, "error: unknown argument: %s\n", arg.c_str()); 61 print_usage(argc, argv, params); 62 exit(0); 63 } 64 } 65 return true; 66 } 67 68 static ggml_backend_t create_backend() { 69 ggml_backend_t backend = NULL; 70 #ifdef GGML_USE_CUDA 71 fprintf(stderr, "%s: using CUDA backend\n", __func__); 72 backend = ggml_backend_cuda_init(0); // init device 0 73 if (!backend) { 74 fprintf(stderr, "%s: ggml_backend_cuda_init() failed\n", __func__); 75 } 76 #elif GGML_USE_METAL 77 fprintf(stderr, "%s: using Metal backend\n", __func__); 78 backend = ggml_backend_metal_init(); 79 if (!backend) { 80 fprintf(stderr, "%s: ggml_backend_metal_init() failed\n", __func__); 81 } 82 #endif 83 84 // if there aren't GPU Backends fallback to CPU backend 85 if (!backend) { 86 fprintf(stderr, "%s: using CPU backend\n", __func__); 87 backend = ggml_backend_cpu_init(); 88 } 89 return backend; 90 } 91 92 static void get_backend_memory(size_t * free_mem, size_t * total_mem) { 93 #ifdef GGML_USE_CUDA 94 ggml_backend_cuda_get_device_memory(0, free_mem, total_mem); 95 #else 96 #ifdef _WIN32 97 MEMORYSTATUSEX status; 98 status.dwLength = sizeof(status); 99 GlobalMemoryStatusEx(&status); 100 *total_mem = status.ullTotalPhys; 101 *free_mem = status.ullAvailPhys; 102 #else 103 long pages = sysconf(_SC_PHYS_PAGES); 104 long page_size = sysconf(_SC_PAGE_SIZE); 105 *total_mem = pages * page_size; 106 *free_mem = *total_mem; 107 #endif 108 #endif 109 } 110 111 int main(int argc, char * argv[]) { 112 rpc_server_params params; 113 if (!rpc_server_params_parse(argc, argv, params)) { 114 fprintf(stderr, "Invalid parameters\n"); 115 return 1; 116 } 117 ggml_backend_t backend = create_backend(); 118 if (!backend) { 119 fprintf(stderr, "Failed to create backend\n"); 120 return 1; 121 } 122 std::string endpoint = params.host + ":" + std::to_string(params.port); 123 size_t free_mem, total_mem; 124 if (params.backend_mem > 0) { 125 free_mem = params.backend_mem; 126 total_mem = params.backend_mem; 127 } else { 128 get_backend_memory(&free_mem, &total_mem); 129 } 130 printf("Starting RPC server on %s, backend memory: %zu MB\n", endpoint.c_str(), free_mem / (1024 * 1024)); 131 start_rpc_server(backend, endpoint.c_str(), free_mem, total_mem); 132 ggml_backend_free(backend); 133 return 0; 134 }