/ Makefile
Makefile
1 # Define the default target now so that it is always the first target 2 BUILD_TARGETS = \ 3 libllava.a \ 4 llama-batched \ 5 llama-batched-bench \ 6 llama-bench \ 7 llama-cli \ 8 llama-convert-llama2c-to-ggml \ 9 llama-embedding \ 10 llama-eval-callback \ 11 llama-export-lora \ 12 llama-gbnf-validator \ 13 llama-gguf \ 14 llama-gguf-hash \ 15 llama-gguf-split \ 16 llama-gritlm \ 17 llama-imatrix \ 18 llama-infill \ 19 llama-llava-cli \ 20 llama-minicpmv-cli\ 21 llama-lookahead \ 22 llama-lookup \ 23 llama-lookup-create \ 24 llama-lookup-merge \ 25 llama-lookup-stats \ 26 llama-parallel \ 27 llama-passkey \ 28 llama-perplexity \ 29 llama-q8dot \ 30 llama-quantize \ 31 llama-quantize-stats \ 32 llama-retrieval \ 33 llama-save-load-state \ 34 llama-server \ 35 llama-simple \ 36 llama-simple-chat \ 37 llama-run \ 38 llama-speculative \ 39 llama-tokenize \ 40 llama-vdot \ 41 llama-cvector-generator \ 42 llama-gen-docs \ 43 tests/test-c.o 44 45 # Binaries only useful for tests 46 TEST_TARGETS = \ 47 tests/test-arg-parser \ 48 tests/test-autorelease \ 49 tests/test-backend-ops \ 50 tests/test-chat-template \ 51 tests/test-double-float \ 52 tests/test-grammar-integration \ 53 tests/test-grammar-parser \ 54 tests/test-json-schema-to-grammar \ 55 tests/test-llama-grammar \ 56 tests/test-log \ 57 tests/test-model-load-cancel \ 58 tests/test-quantize-fns \ 59 tests/test-quantize-perf \ 60 tests/test-rope \ 61 tests/test-sampling \ 62 tests/test-tokenizer-0 \ 63 tests/test-tokenizer-1-bpe \ 64 tests/test-tokenizer-1-spm 65 # tests/test-opt \ 66 67 # Legacy build targets that were renamed in #7809, but should still be removed when the project is cleaned 68 LEGACY_TARGETS_CLEAN = main quantize quantize-stats perplexity imatrix embedding vdot q8dot convert-llama2c-to-ggml \ 69 simple batched batched-bench save-load-state server gguf gguf-split eval-callback llama-bench libllava.a llava-cli baby-llama \ 70 retrieval speculative infill tokenize parallel export-lora lookahead lookup passkey gritlm 71 72 # Legacy build targets that were renamed in #7809, but we want to build binaries that for them that output a deprecation warning if people try to use them. 73 # We don't want to clutter things too much, so we only build replacements for the most commonly used binaries. 74 LEGACY_TARGETS_BUILD = main quantize perplexity embedding server 75 76 # Deprecation aliases 77 ifdef LLAMA_CUBLAS 78 $(error LLAMA_CUBLAS is removed. Use GGML_CUDA instead.) 79 endif 80 81 ifdef LLAMA_CUDA 82 GGML_CUDA := 1 83 DEPRECATE_WARNING := 1 84 endif 85 86 ifdef LLAMA_KOMPUTE 87 GGML_KOMPUTE := 1 88 DEPRECATE_WARNING := 1 89 endif 90 91 ifdef LLAMA_METAL 92 GGML_METAL := 1 93 DEPRECATE_WARNING := 1 94 endif 95 96 ifdef LLAMA_RPC 97 GGML_RPC := 1 98 DEPRECATE_WARNING := 1 99 endif 100 101 ifdef LLAMA_SYCL 102 GGML_SYCL := 1 103 DEPRECATE_WARNING := 1 104 endif 105 106 ifdef LLAMA_SYCL_F16 107 GGML_SYCL_F16 := 1 108 DEPRECATE_WARNING := 1 109 endif 110 111 ifdef LLAMA_OPENBLAS 112 GGML_OPENBLAS := 1 113 DEPRECATE_WARNING := 1 114 endif 115 116 ifdef LLAMA_OPENBLAS64 117 GGML_OPENBLAS64 := 1 118 DEPRECATE_WARNING := 1 119 endif 120 121 ifdef LLAMA_BLIS 122 GGML_BLIS := 1 123 DEPRECATE_WARNING := 1 124 endif 125 126 ifdef LLAMA_NO_LLAMAFILE 127 GGML_NO_LLAMAFILE := 1 128 DEPRECATE_WARNING := 1 129 endif 130 131 ifdef LLAMA_NO_ACCELERATE 132 GGML_NO_ACCELERATE := 1 133 DEPRECATE_WARNING := 1 134 endif 135 136 ifdef LLAMA_NO_OPENMP 137 GGML_NO_OPENMP := 1 138 DEPRECATE_WARNING := 1 139 endif 140 141 ifdef LLAMA_NO_METAL 142 GGML_NO_METAL := 1 143 DEPRECATE_WARNING := 1 144 endif 145 146 ifdef LLAMA_DISABLE_LOGS 147 REMOVE_WARNING := 1 148 endif 149 150 ifdef LLAMA_SERVER_VERBOSE 151 REMOVE_WARNING := 1 152 endif 153 154 ifndef UNAME_S 155 UNAME_S := $(shell uname -s) 156 endif 157 158 ifndef UNAME_P 159 UNAME_P := $(shell uname -p) 160 endif 161 162 ifndef UNAME_M 163 UNAME_M := $(shell uname -m) 164 endif 165 166 # In GNU make default CXX is g++ instead of c++. Let's fix that so that users 167 # of non-gcc compilers don't have to provide g++ alias or wrapper. 168 DEFCC := cc 169 DEFCXX := c++ 170 ifeq ($(origin CC),default) 171 CC := $(DEFCC) 172 endif 173 ifeq ($(origin CXX),default) 174 CXX := $(DEFCXX) 175 endif 176 177 # Mac OS + Arm can report x86_64 178 # ref: https://github.com/ggerganov/whisper.cpp/issues/66#issuecomment-1282546789 179 ifeq ($(UNAME_S),Darwin) 180 ifndef GGML_NO_METAL 181 GGML_METAL := 1 182 endif 183 184 GGML_NO_OPENMP := 1 185 186 ifneq ($(UNAME_P),arm) 187 SYSCTL_M := $(shell sysctl -n hw.optional.arm64 2>/dev/null) 188 ifeq ($(SYSCTL_M),1) 189 # UNAME_P := arm 190 # UNAME_M := arm64 191 warn := $(warning Your arch is announced as x86_64, but it seems to actually be ARM64. Not fixing that can lead to bad performance. For more info see: https://github.com/ggerganov/whisper.cpp/issues/66\#issuecomment-1282546789) 192 endif 193 endif 194 endif 195 196 ifdef GGML_METAL 197 GGML_METAL_EMBED_LIBRARY := 1 198 endif 199 200 ifdef GGML_RPC 201 BUILD_TARGETS += rpc-server 202 endif 203 204 ifdef GGML_VULKAN 205 BUILD_TARGETS += vulkan-shaders-gen 206 endif 207 208 default: $(BUILD_TARGETS) $(LEGACY_TARGETS_BUILD) 209 210 test: $(TEST_TARGETS) 211 @failures=0; \ 212 for test_target in $(TEST_TARGETS); do \ 213 if [ "$$test_target" = "tests/test-tokenizer-0" ]; then \ 214 ./$$test_target $(CURDIR)/models/ggml-vocab-llama-spm.gguf; \ 215 ./$$test_target $(CURDIR)/models/ggml-vocab-llama-bpe.gguf; \ 216 ./$$test_target $(CURDIR)/models/ggml-vocab-phi-3.gguf; \ 217 ./$$test_target $(CURDIR)/models/ggml-vocab-falcon.gguf; \ 218 ./$$test_target $(CURDIR)/models/ggml-vocab-bert-bge.gguf; \ 219 ./$$test_target $(CURDIR)/models/ggml-vocab-starcoder.gguf; \ 220 ./$$test_target $(CURDIR)/models/ggml-vocab-gpt-2.gguf; \ 221 ./$$test_target $(CURDIR)/models/ggml-vocab-refact.gguf; \ 222 elif [ "$$test_target" = "tests/test-tokenizer-1-spm" ]; then \ 223 continue; \ 224 elif [ "$$test_target" = "tests/test-tokenizer-1-bpe" ]; then \ 225 continue; \ 226 else \ 227 echo "Running test $$test_target..."; \ 228 ./$$test_target; \ 229 fi; \ 230 if [ $$? -ne 0 ]; then \ 231 printf 'Test %s FAILED!\n\n' $$test_target; \ 232 failures=$$(( failures + 1 )); \ 233 else \ 234 printf 'Test %s passed.\n\n' $$test_target; \ 235 fi; \ 236 done; \ 237 if [ $$failures -gt 0 ]; then \ 238 printf '\n%s tests failed.\n' $$failures; \ 239 exit 1; \ 240 fi 241 @echo 'All tests passed.' 242 243 all: $(BUILD_TARGETS) $(TEST_TARGETS) $(LEGACY_TARGETS_BUILD) 244 245 ifdef RISCV_CROSS_COMPILE 246 CC := riscv64-unknown-linux-gnu-gcc 247 CXX := riscv64-unknown-linux-gnu-g++ 248 endif 249 250 # 251 # Compile flags 252 # 253 254 # keep standard at C11 and C++17 255 MK_CPPFLAGS = -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -DGGML_USE_CPU 256 MK_CFLAGS = -std=c11 -fPIC 257 MK_CXXFLAGS = -std=c++17 -fPIC 258 MK_NVCCFLAGS = -std=c++17 259 260 ifdef LLAMA_NO_CCACHE 261 GGML_NO_CCACHE := 1 262 DEPRECATE_WARNING := 1 263 endif 264 265 ifndef GGML_NO_CCACHE 266 CCACHE := $(shell which ccache) 267 ifdef CCACHE 268 export CCACHE_SLOPPINESS = time_macros 269 $(info I ccache found, compilation results will be cached. Disable with GGML_NO_CCACHE.) 270 CC := $(CCACHE) $(CC) 271 CXX := $(CCACHE) $(CXX) 272 else 273 $(info I ccache not found. Consider installing it for faster compilation.) 274 endif # CCACHE 275 endif # GGML_NO_CCACHE 276 277 # clock_gettime came in POSIX.1b (1993) 278 # CLOCK_MONOTONIC came in POSIX.1-2001 / SUSv3 as optional 279 # posix_memalign came in POSIX.1-2001 / SUSv3 280 # M_PI is an XSI extension since POSIX.1-2001 / SUSv3, came in XPG1 (1985) 281 MK_CPPFLAGS += -D_XOPEN_SOURCE=600 282 283 # Somehow in OpenBSD whenever POSIX conformance is specified 284 # some string functions rely on locale_t availability, 285 # which was introduced in POSIX.1-2008, forcing us to go higher 286 ifeq ($(UNAME_S),OpenBSD) 287 MK_CPPFLAGS += -U_XOPEN_SOURCE -D_XOPEN_SOURCE=700 288 endif 289 290 # Data types, macros and functions related to controlling CPU affinity and 291 # some memory allocation are available on Linux through GNU extensions in libc 292 ifeq ($(UNAME_S),Linux) 293 MK_CPPFLAGS += -D_GNU_SOURCE 294 MK_LDFLAGS += -ldl 295 endif 296 297 # RLIMIT_MEMLOCK came in BSD, is not specified in POSIX.1, 298 # and on macOS its availability depends on enabling Darwin extensions 299 # similarly on DragonFly, enabling BSD extensions is necessary 300 ifeq ($(UNAME_S),Darwin) 301 MK_CPPFLAGS += -D_DARWIN_C_SOURCE 302 endif 303 ifeq ($(UNAME_S),DragonFly) 304 MK_CPPFLAGS += -D__BSD_VISIBLE 305 endif 306 307 # alloca is a non-standard interface that is not visible on BSDs when 308 # POSIX conformance is specified, but not all of them provide a clean way 309 # to enable it in such cases 310 ifeq ($(UNAME_S),FreeBSD) 311 MK_CPPFLAGS += -D__BSD_VISIBLE 312 endif 313 ifeq ($(UNAME_S),NetBSD) 314 MK_CPPFLAGS += -D_NETBSD_SOURCE 315 endif 316 ifeq ($(UNAME_S),OpenBSD) 317 MK_CPPFLAGS += -D_BSD_SOURCE 318 endif 319 320 ifdef GGML_SCHED_MAX_COPIES 321 MK_CPPFLAGS += -DGGML_SCHED_MAX_COPIES=$(GGML_SCHED_MAX_COPIES) 322 endif 323 324 ifdef LLAMA_DEBUG 325 MK_CFLAGS += -O0 -g 326 MK_CXXFLAGS += -O0 -g 327 MK_LDFLAGS += -g 328 MK_NVCCFLAGS += -O0 -g 329 330 ifeq ($(UNAME_S),Linux) 331 MK_CPPFLAGS += -D_GLIBCXX_ASSERTIONS 332 endif 333 else 334 MK_CPPFLAGS += -DNDEBUG 335 MK_CFLAGS += -O3 -g 336 MK_CXXFLAGS += -O3 -g 337 MK_NVCCFLAGS += -O3 -g 338 endif 339 340 ifdef LLAMA_SANITIZE_THREAD 341 MK_CFLAGS += -fsanitize=thread -g 342 MK_CXXFLAGS += -fsanitize=thread -g 343 MK_LDFLAGS += -fsanitize=thread -g 344 endif 345 346 ifdef LLAMA_SANITIZE_ADDRESS 347 MK_CFLAGS += -fsanitize=address -fno-omit-frame-pointer -g 348 MK_CXXFLAGS += -fsanitize=address -fno-omit-frame-pointer -g 349 MK_LDFLAGS += -fsanitize=address -fno-omit-frame-pointer -g 350 endif 351 352 ifdef LLAMA_SANITIZE_UNDEFINED 353 MK_CFLAGS += -fsanitize=undefined -g 354 MK_CXXFLAGS += -fsanitize=undefined -g 355 MK_LDFLAGS += -fsanitize=undefined -g 356 endif 357 358 ifdef LLAMA_SERVER_SSL 359 MK_CPPFLAGS += -DCPPHTTPLIB_OPENSSL_SUPPORT 360 MK_LDFLAGS += -lssl -lcrypto 361 endif 362 363 ifndef GGML_NO_CPU_AARCH64 364 MK_CPPFLAGS += -DGGML_USE_CPU_AARCH64 365 endif 366 367 # warnings 368 WARN_FLAGS = \ 369 -Wall \ 370 -Wextra \ 371 -Wpedantic \ 372 -Wcast-qual \ 373 -Wno-unused-function 374 375 MK_CFLAGS += \ 376 $(WARN_FLAGS) \ 377 -Wshadow \ 378 -Wstrict-prototypes \ 379 -Wpointer-arith \ 380 -Wmissing-prototypes \ 381 -Werror=implicit-int \ 382 -Werror=implicit-function-declaration 383 384 MK_CXXFLAGS += \ 385 $(WARN_FLAGS) \ 386 -Wmissing-declarations \ 387 -Wmissing-noreturn 388 389 ifeq ($(LLAMA_FATAL_WARNINGS),1) 390 MK_CFLAGS += -Werror 391 MK_CXXFLAGS += -Werror 392 endif 393 394 # this version of Apple ld64 is buggy 395 ifneq '' '$(findstring dyld-1015.7,$(shell $(CC) $(LDFLAGS) -Wl,-v 2>&1))' 396 MK_CPPFLAGS += -DHAVE_BUGGY_APPLE_LINKER 397 endif 398 399 # OS specific 400 # TODO: support Windows 401 ifneq '' '$(filter $(UNAME_S),Linux Darwin FreeBSD NetBSD OpenBSD Haiku)' 402 MK_CFLAGS += -pthread 403 MK_CXXFLAGS += -pthread 404 endif 405 406 # detect Windows 407 ifneq ($(findstring _NT,$(UNAME_S)),) 408 _WIN32 := 1 409 endif 410 411 # library name prefix 412 ifneq ($(_WIN32),1) 413 LIB_PRE := lib 414 endif 415 416 # Dynamic Shared Object extension 417 ifneq ($(_WIN32),1) 418 DSO_EXT := .so 419 else 420 DSO_EXT := .dll 421 endif 422 423 # Windows Sockets 2 (Winsock) for network-capable apps 424 ifeq ($(_WIN32),1) 425 LWINSOCK2 := -lws2_32 426 endif 427 428 ifdef LLAMA_GPROF 429 MK_CFLAGS += -pg 430 MK_CXXFLAGS += -pg 431 endif 432 433 # Architecture specific 434 # TODO: probably these flags need to be tweaked on some architectures 435 # feel free to update the Makefile for your architecture and send a pull request or issue 436 437 ifndef RISCV_CROSS_COMPILE 438 439 ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686 amd64)) 440 # Use all CPU extensions that are available: 441 MK_CFLAGS += -march=native -mtune=native 442 HOST_CXXFLAGS += -march=native -mtune=native 443 444 # Usage AVX-only 445 #MK_CFLAGS += -mfma -mf16c -mavx 446 #MK_CXXFLAGS += -mfma -mf16c -mavx 447 448 # Usage SSSE3-only (Not is SSE3!) 449 #MK_CFLAGS += -mssse3 450 #MK_CXXFLAGS += -mssse3 451 endif 452 453 ifneq '' '$(findstring mingw,$(shell $(CC) -dumpmachine))' 454 # The stack is only 16-byte aligned on Windows, so don't let gcc emit aligned moves. 455 # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=54412 456 # https://github.com/ggerganov/llama.cpp/issues/2922 457 MK_CFLAGS += -Xassembler -muse-unaligned-vector-move 458 MK_CXXFLAGS += -Xassembler -muse-unaligned-vector-move 459 460 # Target Windows 8 for PrefetchVirtualMemory 461 MK_CPPFLAGS += -D_WIN32_WINNT=0x602 462 endif 463 464 ifneq ($(filter aarch64%,$(UNAME_M)),) 465 # Apple M1, M2, etc. 466 # Raspberry Pi 3, 4, Zero 2 (64-bit) 467 # Nvidia Jetson 468 MK_CFLAGS += -mcpu=native 469 MK_CXXFLAGS += -mcpu=native 470 JETSON_RELEASE_INFO = $(shell jetson_release) 471 ifdef JETSON_RELEASE_INFO 472 ifneq ($(filter TX2%,$(JETSON_RELEASE_INFO)),) 473 JETSON_EOL_MODULE_DETECT = 1 474 CC = aarch64-unknown-linux-gnu-gcc 475 cxx = aarch64-unknown-linux-gnu-g++ 476 endif 477 endif 478 endif 479 480 ifneq ($(filter armv6%,$(UNAME_M)),) 481 # Raspberry Pi 1, Zero 482 MK_CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access 483 MK_CXXFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access 484 endif 485 486 ifneq ($(filter armv7%,$(UNAME_M)),) 487 # Raspberry Pi 2 488 MK_CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations 489 MK_CXXFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations 490 endif 491 492 ifneq ($(filter armv8%,$(UNAME_M)),) 493 # Raspberry Pi 3, 4, Zero 2 (32-bit) 494 MK_CFLAGS += -mfp16-format=ieee -mno-unaligned-access 495 MK_CXXFLAGS += -mfp16-format=ieee -mno-unaligned-access 496 endif 497 498 ifneq ($(filter ppc64%,$(UNAME_M)),) 499 POWER9_M := $(shell grep "POWER9" /proc/cpuinfo) 500 ifneq (,$(findstring POWER9,$(POWER9_M))) 501 MK_CFLAGS += -mcpu=power9 502 MK_CXXFLAGS += -mcpu=power9 503 endif 504 endif 505 506 ifneq ($(filter ppc64le%,$(UNAME_M)),) 507 MK_CFLAGS += -mcpu=powerpc64le 508 MK_CXXFLAGS += -mcpu=powerpc64le 509 CUDA_POWER_ARCH = 1 510 endif 511 512 ifneq ($(filter loongarch64%,$(UNAME_M)),) 513 MK_CFLAGS += -mlasx 514 MK_CXXFLAGS += -mlasx 515 endif 516 517 ifneq ($(filter riscv64%,$(UNAME_M)),) 518 MK_CFLAGS += -march=rv64gcv -mabi=lp64d 519 MK_CXXFLAGS += -march=rv64gcv -mabi=lp64d 520 endif 521 522 else # RISC-V CROSS COMPILATION 523 MK_CFLAGS += -march=rv64gcv -mabi=lp64d 524 MK_CXXFLAGS += -march=rv64gcv -mabi=lp64d 525 endif 526 527 ifndef GGML_NO_ACCELERATE 528 # Mac OS - include Accelerate framework. 529 # `-framework Accelerate` works both with Apple Silicon and Mac Intel 530 ifeq ($(UNAME_S),Darwin) 531 MK_CPPFLAGS += -DGGML_USE_ACCELERATE -DGGML_USE_BLAS -DGGML_BLAS_USE_ACCELERATE 532 MK_CPPFLAGS += -DACCELERATE_NEW_LAPACK 533 MK_CPPFLAGS += -DACCELERATE_LAPACK_ILP64 534 MK_LDFLAGS += -framework Accelerate 535 OBJ_GGML_EXT += ggml/src/ggml-blas/ggml-blas.o 536 endif 537 endif # GGML_NO_ACCELERATE 538 539 ifndef GGML_NO_OPENMP 540 MK_CPPFLAGS += -DGGML_USE_OPENMP 541 MK_CFLAGS += -fopenmp 542 MK_CXXFLAGS += -fopenmp 543 endif # GGML_NO_OPENMP 544 545 ifdef GGML_OPENBLAS 546 MK_CPPFLAGS += -DGGML_USE_BLAS $(shell pkg-config --cflags-only-I openblas) 547 MK_CFLAGS += $(shell pkg-config --cflags-only-other openblas) 548 MK_LDFLAGS += $(shell pkg-config --libs openblas) 549 OBJ_GGML_EXT += ggml/src/ggml-blas/ggml-blas.o 550 endif # GGML_OPENBLAS 551 552 ifdef GGML_OPENBLAS64 553 MK_CPPFLAGS += -DGGML_USE_BLAS $(shell pkg-config --cflags-only-I openblas64) 554 MK_CFLAGS += $(shell pkg-config --cflags-only-other openblas64) 555 MK_LDFLAGS += $(shell pkg-config --libs openblas64) 556 OBJ_GGML_EXT += ggml/src/ggml-blas/ggml-blas.o 557 endif # GGML_OPENBLAS64 558 559 ifdef GGML_BLIS 560 MK_CPPFLAGS += -DGGML_USE_BLAS -DGGML_BLAS_USE_BLIS -I/usr/local/include/blis -I/usr/include/blis 561 MK_LDFLAGS += -lblis -L/usr/local/lib 562 OBJ_GGML_EXT += ggml/src/ggml-blas/ggml-blas.o 563 endif # GGML_BLIS 564 565 ifdef GGML_NVPL 566 MK_CPPFLAGS += -DGGML_USE_BLAS -DGGML_BLAS_USE_NVPL -DNVPL_ILP64 -I/usr/local/include/nvpl_blas -I/usr/include/nvpl_blas 567 MK_LDFLAGS += -L/usr/local/lib -lnvpl_blas_core -lnvpl_blas_ilp64_gomp 568 OBJ_GGML_EXT += ggml/src/ggml-blas/ggml-blas.o 569 endif # GGML_NVPL 570 571 ifndef GGML_NO_LLAMAFILE 572 MK_CPPFLAGS += -DGGML_USE_LLAMAFILE 573 OBJ_GGML_EXT += ggml/src/ggml-cpu/llamafile/sgemm.o 574 endif 575 576 ifndef GGML_NO_AMX 577 MK_CPPFLAGS += -DGGML_USE_AMX 578 OBJ_GGML_EXT += ggml/src/ggml-cpu/amx/amx.o ggml/src/ggml-cpu/amx/mmq.o 579 endif 580 581 # only necessary for the CPU backend files 582 MK_CPPFLAGS += -Iggml/src/ggml-cpu 583 584 ifdef GGML_RPC 585 MK_CPPFLAGS += -DGGML_USE_RPC 586 OBJ_GGML_EXT += ggml/src/ggml-rpc.o 587 endif # GGML_RPC 588 589 OBJ_CUDA_TMPL = $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-wmma*.cu)) 590 OBJ_CUDA_TMPL += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/mmq*.cu)) 591 592 ifdef GGML_CUDA_FA_ALL_QUANTS 593 OBJ_CUDA_TMPL += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-vec*.cu)) 594 else 595 OBJ_CUDA_TMPL += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-vec*q4_0-q4_0.cu)) 596 OBJ_CUDA_TMPL += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-vec*q8_0-q8_0.cu)) 597 OBJ_CUDA_TMPL += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-vec*f16-f16.cu)) 598 endif # GGML_CUDA_FA_ALL_QUANTS 599 600 ifdef GGML_CUDA 601 ifneq ('', '$(wildcard /opt/cuda)') 602 CUDA_PATH ?= /opt/cuda 603 else 604 CUDA_PATH ?= /usr/local/cuda 605 endif 606 607 MK_CPPFLAGS += -DGGML_USE_CUDA -DGGML_CUDA_USE_GRAPHS -I$(CUDA_PATH)/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include 608 MK_LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L$(CUDA_PATH)/lib64 -L/usr/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib -L$(CUDA_PATH)/lib64/stubs -L/usr/lib/wsl/lib 609 MK_NVCCFLAGS += -use_fast_math 610 611 OBJ_GGML_EXT += ggml/src/ggml-cuda/ggml-cuda.o 612 OBJ_GGML_EXT += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/*.cu)) 613 OBJ_GGML_EXT += $(OBJ_CUDA_TMPL) 614 615 ifdef LLAMA_FATAL_WARNINGS 616 MK_NVCCFLAGS += -Werror all-warnings 617 endif # LLAMA_FATAL_WARNINGS 618 619 ifndef JETSON_EOL_MODULE_DETECT 620 MK_NVCCFLAGS += --forward-unknown-to-host-compiler 621 endif # JETSON_EOL_MODULE_DETECT 622 623 ifdef LLAMA_DEBUG 624 MK_NVCCFLAGS += -lineinfo 625 endif # LLAMA_DEBUG 626 627 ifdef GGML_CUDA_DEBUG 628 MK_NVCCFLAGS += --device-debug 629 endif # GGML_CUDA_DEBUG 630 631 ifdef GGML_CUDA_NVCC 632 NVCC = $(CCACHE) $(GGML_CUDA_NVCC) 633 else 634 NVCC = $(CCACHE) nvcc 635 endif # GGML_CUDA_NVCC 636 637 ifdef CUDA_DOCKER_ARCH 638 MK_NVCCFLAGS += -Wno-deprecated-gpu-targets -arch=$(CUDA_DOCKER_ARCH) 639 else ifndef CUDA_POWER_ARCH 640 MK_NVCCFLAGS += -arch=native 641 endif # CUDA_DOCKER_ARCH 642 643 ifdef GGML_CUDA_FORCE_MMQ 644 MK_NVCCFLAGS += -DGGML_CUDA_FORCE_MMQ 645 endif # GGML_CUDA_FORCE_MMQ 646 647 ifdef GGML_CUDA_FORCE_CUBLAS 648 MK_NVCCFLAGS += -DGGML_CUDA_FORCE_CUBLAS 649 endif # GGML_CUDA_FORCE_CUBLAS 650 651 ifdef GGML_CUDA_F16 652 MK_NVCCFLAGS += -DGGML_CUDA_F16 653 endif # GGML_CUDA_F16 654 655 ifdef GGML_CUDA_DMMV_F16 656 MK_NVCCFLAGS += -DGGML_CUDA_F16 657 endif # GGML_CUDA_DMMV_F16 658 659 ifdef GGML_CUDA_PEER_MAX_BATCH_SIZE 660 MK_NVCCFLAGS += -DGGML_CUDA_PEER_MAX_BATCH_SIZE=$(GGML_CUDA_PEER_MAX_BATCH_SIZE) 661 else 662 MK_NVCCFLAGS += -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 663 endif # GGML_CUDA_PEER_MAX_BATCH_SIZE 664 665 ifdef GGML_CUDA_NO_PEER_COPY 666 MK_NVCCFLAGS += -DGGML_CUDA_NO_PEER_COPY 667 endif # GGML_CUDA_NO_PEER_COPY 668 669 ifdef GGML_CUDA_CCBIN 670 MK_NVCCFLAGS += -ccbin $(GGML_CUDA_CCBIN) 671 endif # GGML_CUDA_CCBIN 672 673 ifdef GGML_CUDA_FA_ALL_QUANTS 674 MK_NVCCFLAGS += -DGGML_CUDA_FA_ALL_QUANTS 675 endif # GGML_CUDA_FA_ALL_QUANTS 676 677 ifdef JETSON_EOL_MODULE_DETECT 678 define NVCC_COMPILE 679 $(NVCC) -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUDA -I/usr/local/cuda/include -I/opt/cuda/include -I/usr/local/cuda/targets/aarch64-linux/include -std=c++11 -O3 $(NVCCFLAGS) $(CPPFLAGS) -Xcompiler "$(CUDA_CXXFLAGS)" -c $< -o $@ 680 endef # NVCC_COMPILE 681 else 682 define NVCC_COMPILE 683 $(NVCC) $(NVCCFLAGS) $(CPPFLAGS) -Xcompiler "$(CUDA_CXXFLAGS)" -c $< -o $@ 684 endef # NVCC_COMPILE 685 endif # JETSON_EOL_MODULE_DETECT 686 687 ggml/src/ggml-cuda/%.o: \ 688 ggml/src/ggml-cuda/%.cu \ 689 ggml/include/ggml.h \ 690 ggml/src/ggml-common.h \ 691 ggml/src/ggml-cuda/common.cuh 692 $(NVCC_COMPILE) 693 694 ggml/src/ggml-cuda/ggml-cuda.o: \ 695 ggml/src/ggml-cuda/ggml-cuda.cu \ 696 ggml/include/ggml-cuda.h \ 697 ggml/include/ggml.h \ 698 ggml/include/ggml-backend.h \ 699 ggml/src/ggml-backend-impl.h \ 700 ggml/src/ggml-common.h \ 701 $(wildcard ggml/src/ggml-cuda/*.cuh) 702 $(NVCC_COMPILE) 703 endif # GGML_CUDA 704 705 ifdef GGML_VULKAN 706 MK_CPPFLAGS += -DGGML_USE_VULKAN 707 MK_LDFLAGS += $(shell pkg-config --libs vulkan) 708 OBJ_GGML_EXT += ggml/src/ggml-vulkan.o ggml/src/ggml-vulkan-shaders.o 709 710 ifdef GGML_VULKAN_CHECK_RESULTS 711 MK_CPPFLAGS += -DGGML_VULKAN_CHECK_RESULTS 712 endif 713 714 ifdef GGML_VULKAN_DEBUG 715 MK_CPPFLAGS += -DGGML_VULKAN_DEBUG 716 endif 717 718 ifdef GGML_VULKAN_MEMORY_DEBUG 719 MK_CPPFLAGS += -DGGML_VULKAN_MEMORY_DEBUG 720 endif 721 722 ifdef GGML_VULKAN_PERF 723 MK_CPPFLAGS += -DGGML_VULKAN_PERF 724 endif 725 726 ifdef GGML_VULKAN_VALIDATE 727 MK_CPPFLAGS += -DGGML_VULKAN_VALIDATE 728 endif 729 730 ifdef GGML_VULKAN_RUN_TESTS 731 MK_CPPFLAGS += -DGGML_VULKAN_RUN_TESTS 732 endif 733 734 GLSLC_CMD = glslc 735 _ggml_vk_genshaders_cmd = $(shell pwd)/vulkan-shaders-gen 736 _ggml_vk_header = ggml/src/ggml-vulkan-shaders.hpp 737 _ggml_vk_source = ggml/src/ggml-vulkan-shaders.cpp 738 _ggml_vk_input_dir = ggml/src/ggml-vulkan/vulkan-shaders 739 _ggml_vk_shader_deps = $(echo $(_ggml_vk_input_dir)/*.comp) 740 741 ggml/src/ggml-vulkan.o: ggml/src/ggml-vulkan/ggml-vulkan.cpp ggml/include/ggml-vulkan.h $(_ggml_vk_header) $(_ggml_vk_source) 742 $(CXX) $(CXXFLAGS) $(shell pkg-config --cflags vulkan) -c $< -o $@ 743 744 $(_ggml_vk_header): $(_ggml_vk_source) 745 746 $(_ggml_vk_source): $(_ggml_vk_shader_deps) vulkan-shaders-gen 747 $(_ggml_vk_genshaders_cmd) \ 748 --glslc $(GLSLC_CMD) \ 749 --input-dir $(_ggml_vk_input_dir) \ 750 --target-hpp $(_ggml_vk_header) \ 751 --target-cpp $(_ggml_vk_source) 752 753 vulkan-shaders-gen: ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp 754 $(CXX) $(CXXFLAGS) -o $@ $(LDFLAGS) ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp 755 756 endif # GGML_VULKAN 757 758 ifdef GGML_HIP 759 ifeq ($(wildcard /opt/rocm),) 760 ROCM_PATH ?= /usr 761 AMDGPU_TARGETS ?= $(shell $(shell which amdgpu-arch)) 762 else 763 ROCM_PATH ?= /opt/rocm 764 AMDGPU_TARGETS ?= $(shell $(ROCM_PATH)/llvm/bin/amdgpu-arch) 765 endif 766 767 MK_CPPFLAGS += -DGGML_USE_HIP -DGGML_USE_CUDA 768 769 ifdef GGML_HIP_UMA 770 MK_CPPFLAGS += -DGGML_HIP_UMA 771 endif # GGML_HIP_UMA 772 773 MK_LDFLAGS += -L$(ROCM_PATH)/lib -Wl,-rpath=$(ROCM_PATH)/lib 774 MK_LDFLAGS += -L$(ROCM_PATH)/lib64 -Wl,-rpath=$(ROCM_PATH)/lib64 775 MK_LDFLAGS += -lhipblas -lamdhip64 -lrocblas 776 777 HIPCC ?= $(CCACHE) $(ROCM_PATH)/bin/hipcc 778 779 HIPFLAGS += $(addprefix --offload-arch=,$(AMDGPU_TARGETS)) 780 781 ifdef GGML_CUDA_FORCE_MMQ 782 HIPFLAGS += -DGGML_CUDA_FORCE_MMQ 783 endif # GGML_CUDA_FORCE_MMQ 784 785 ifdef GGML_CUDA_FORCE_CUBLAS 786 HIPFLAGS += -DGGML_CUDA_FORCE_CUBLAS 787 endif # GGML_CUDA_FORCE_CUBLAS 788 789 ifdef GGML_CUDA_NO_PEER_COPY 790 HIPFLAGS += -DGGML_CUDA_NO_PEER_COPY 791 endif # GGML_CUDA_NO_PEER_COPY 792 793 OBJ_GGML_EXT += ggml/src/ggml-cuda/ggml-cuda.o 794 OBJ_GGML_EXT += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/*.cu)) 795 OBJ_GGML_EXT += $(OBJ_CUDA_TMPL) 796 797 ggml/src/ggml-cuda/ggml-cuda.o: \ 798 ggml/src/ggml-cuda/ggml-cuda.cu \ 799 ggml/include/ggml-cuda.h \ 800 ggml/include/ggml.h \ 801 ggml/include/ggml-backend.h \ 802 ggml/src/ggml-backend-impl.h \ 803 ggml/src/ggml-common.h \ 804 $(wildcard ggml/src/ggml-cuda/*.cuh) 805 $(HIPCC) $(CXXFLAGS) $(HIPFLAGS) -x hip -c -o $@ $< 806 807 ggml/src/ggml-cuda/%.o: \ 808 ggml/src/ggml-cuda/%.cu \ 809 ggml/include/ggml.h \ 810 ggml/src/ggml-common.h \ 811 ggml/src/ggml-cuda/common.cuh 812 $(HIPCC) $(CXXFLAGS) $(HIPFLAGS) -x hip -c -o $@ $< 813 endif # GGML_HIP 814 815 ifdef GGML_MUSA 816 ifeq ($(wildcard /opt/musa),) 817 MUSA_PATH ?= /usr/local/musa 818 else 819 MUSA_PATH ?= /opt/musa 820 endif 821 MUSA_ARCHITECTURES ?= 21;22 822 823 MK_CPPFLAGS += -DGGML_USE_MUSA -DGGML_USE_CUDA 824 MK_LDFLAGS += -L$(MUSA_PATH)/lib -Wl,-rpath=$(MUSA_PATH)/lib 825 MK_LDFLAGS += -lmusa -lmusart -lmublas 826 827 ifndef GGML_NO_OPENMP 828 # For Ubuntu Focal 829 MK_CPPFLAGS += -I/usr/lib/llvm-10/include/openmp 830 MK_LDFLAGS += -L/usr/lib/llvm-10/lib 831 # For Ubuntu Jammy 832 MK_CPPFLAGS += -I/usr/lib/llvm-14/lib/clang/14.0.0/include 833 MK_LDFLAGS += -L/usr/lib/llvm-14/lib 834 endif # GGML_NO_OPENMP 835 836 CC := $(MUSA_PATH)/bin/clang 837 CXX := $(MUSA_PATH)/bin/clang++ 838 MCC := $(CCACHE) $(MUSA_PATH)/bin/mcc 839 840 MUSAFLAGS = -x musa -mtgpu 841 MUSAFLAGS += $(foreach arch,$(subst ;, ,$(MUSA_ARCHITECTURES)),--cuda-gpu-arch=mp_$(arch)) 842 843 ifdef GGML_CUDA_FORCE_MMQ 844 MUSAFLAGS += -DGGML_CUDA_FORCE_MMQ 845 endif # GGML_CUDA_FORCE_MMQ 846 847 ifdef GGML_CUDA_FORCE_CUBLAS 848 MUSAFLAGS += -DGGML_CUDA_FORCE_CUBLAS 849 endif # GGML_CUDA_FORCE_CUBLAS 850 851 ifdef GGML_CUDA_F16 852 MUSAFLAGS += -DGGML_CUDA_F16 853 endif # GGML_CUDA_F16 854 855 ifdef GGML_CUDA_DMMV_F16 856 MUSAFLAGS += -DGGML_CUDA_F16 857 endif # GGML_CUDA_DMMV_F16 858 859 ifdef GGML_CUDA_PEER_MAX_BATCH_SIZE 860 MUSAFLAGS += -DGGML_CUDA_PEER_MAX_BATCH_SIZE=$(GGML_CUDA_PEER_MAX_BATCH_SIZE) 861 else 862 MUSAFLAGS += -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128 863 endif # GGML_CUDA_PEER_MAX_BATCH_SIZE 864 865 ifdef GGML_CUDA_NO_PEER_COPY 866 MUSAFLAGS += -DGGML_CUDA_NO_PEER_COPY 867 endif # GGML_CUDA_NO_PEER_COPY 868 869 ifdef GGML_CUDA_FA_ALL_QUANTS 870 MUSAFLAGS += -DGGML_CUDA_FA_ALL_QUANTS 871 endif # GGML_CUDA_FA_ALL_QUANTS 872 873 OBJ_GGML_EXT += ggml/src/ggml-cuda/ggml-cuda.o 874 OBJ_GGML_EXT += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/*.cu)) 875 OBJ_GGML_EXT += $(OBJ_CUDA_TMPL) 876 877 ggml/src/ggml-cuda/ggml-cuda.o: \ 878 ggml/src/ggml-cuda/ggml-cuda.cu \ 879 ggml/include/ggml-cuda.h \ 880 ggml/include/ggml.h \ 881 ggml/include/ggml-backend.h \ 882 ggml/src/ggml-backend-impl.h \ 883 ggml/src/ggml-common.h \ 884 $(wildcard ggml/src/ggml-cuda/*.cuh) 885 $(MCC) $(CXXFLAGS) $(MUSAFLAGS) -c -o $@ $< 886 887 ggml/src/ggml-cuda/%.o: \ 888 ggml/src/ggml-cuda/%.cu \ 889 ggml/include/ggml.h \ 890 ggml/src/ggml-common.h \ 891 ggml/src/ggml-cuda/common.cuh 892 $(MCC) $(CXXFLAGS) $(MUSAFLAGS) -c -o $@ $< 893 endif # GGML_MUSA 894 895 ifdef GGML_METAL 896 MK_CPPFLAGS += -DGGML_USE_METAL 897 MK_LDFLAGS += -framework Foundation -framework Metal -framework MetalKit 898 OBJ_GGML_EXT += ggml/src/ggml-metal/ggml-metal.o 899 900 ifdef GGML_METAL_USE_BF16 901 MK_CPPFLAGS += -DGGML_METAL_USE_BF16 902 endif # GGML_METAL_USE_BF16 903 ifdef GGML_METAL_NDEBUG 904 MK_CPPFLAGS += -DGGML_METAL_NDEBUG 905 endif 906 ifdef GGML_METAL_EMBED_LIBRARY 907 MK_CPPFLAGS += -DGGML_METAL_EMBED_LIBRARY 908 OBJ_GGML_EXT += ggml/src/ggml-metal-embed.o 909 endif 910 endif # GGML_METAL 911 912 ifdef GGML_METAL 913 ggml/src/ggml-metal/ggml-metal.o: \ 914 ggml/src/ggml-metal/ggml-metal.m \ 915 ggml/src/ggml-metal/ggml-metal-impl.h \ 916 ggml/include/ggml-metal.h \ 917 ggml/include/ggml.h 918 $(CC) $(CFLAGS) -c $< -o $@ 919 920 ifdef GGML_METAL_EMBED_LIBRARY 921 ggml/src/ggml-metal-embed.o: \ 922 ggml/src/ggml-metal/ggml-metal.metal \ 923 ggml/src/ggml-metal/ggml-metal-impl.h \ 924 ggml/src/ggml-common.h 925 @echo "Embedding Metal library" 926 @sed -e '/__embed_ggml-common.h__/r ggml/src/ggml-common.h' -e '/__embed_ggml-common.h__/d' < ggml/src/ggml-metal/ggml-metal.metal > ggml/src/ggml-metal/ggml-metal-embed.metal.tmp 927 @sed -e '/#include "ggml-metal-impl.h"/r ggml/src/ggml-metal/ggml-metal-impl.h' -e '/#include "ggml-metal-impl.h"/d' < ggml/src/ggml-metal/ggml-metal-embed.metal.tmp > ggml/src/ggml-metal/ggml-metal-embed.metal 928 $(eval TEMP_ASSEMBLY=$(shell mktemp -d)) 929 @echo ".section __DATA, __ggml_metallib" > $(TEMP_ASSEMBLY)/ggml-metal-embed.s 930 @echo ".globl _ggml_metallib_start" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s 931 @echo "_ggml_metallib_start:" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s 932 @echo ".incbin \"ggml/src/ggml-metal/ggml-metal-embed.metal\"" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s 933 @echo ".globl _ggml_metallib_end" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s 934 @echo "_ggml_metallib_end:" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s 935 $(CC) $(CFLAGS) -c $(TEMP_ASSEMBLY)/ggml-metal-embed.s -o $@ 936 @rm -f ${TEMP_ASSEMBLY}/ggml-metal-embed.s 937 @rmdir ${TEMP_ASSEMBLY} 938 endif 939 endif # GGML_METAL 940 941 DIR_GGML = ggml 942 DIR_LLAMA = src 943 DIR_COMMON = common 944 945 OBJ_GGML = \ 946 $(DIR_GGML)/src/ggml.o \ 947 $(DIR_GGML)/src/ggml-aarch64.o \ 948 $(DIR_GGML)/src/ggml-alloc.o \ 949 $(DIR_GGML)/src/ggml-backend.o \ 950 $(DIR_GGML)/src/ggml-backend-reg.o \ 951 $(DIR_GGML)/src/ggml-opt.o \ 952 $(DIR_GGML)/src/ggml-quants.o \ 953 $(DIR_GGML)/src/ggml-threading.o \ 954 $(DIR_GGML)/src/ggml-cpu/ggml-cpu.o \ 955 $(DIR_GGML)/src/ggml-cpu/ggml-cpu-cpp.o \ 956 $(DIR_GGML)/src/ggml-cpu/ggml-cpu-aarch64.o \ 957 $(DIR_GGML)/src/ggml-cpu/ggml-cpu-quants.o \ 958 $(OBJ_GGML_EXT) 959 960 OBJ_LLAMA = \ 961 $(DIR_LLAMA)/llama.o \ 962 $(DIR_LLAMA)/llama-vocab.o \ 963 $(DIR_LLAMA)/llama-grammar.o \ 964 $(DIR_LLAMA)/llama-sampling.o \ 965 $(DIR_LLAMA)/unicode.o \ 966 $(DIR_LLAMA)/unicode-data.o 967 968 OBJ_COMMON = \ 969 $(DIR_COMMON)/common.o \ 970 $(DIR_COMMON)/arg.o \ 971 $(DIR_COMMON)/log.o \ 972 $(DIR_COMMON)/console.o \ 973 $(DIR_COMMON)/ngram-cache.o \ 974 $(DIR_COMMON)/sampling.o \ 975 $(DIR_COMMON)/speculative.o \ 976 $(DIR_COMMON)/build-info.o \ 977 $(DIR_COMMON)/json-schema-to-grammar.o 978 979 OBJ_ALL = $(OBJ_GGML) $(OBJ_LLAMA) $(OBJ_COMMON) 980 981 LIB_GGML = $(LIB_PRE)ggml$(DSO_EXT) 982 LIB_GGML_S = $(LIB_PRE)ggml.a 983 984 LIB_LLAMA = $(LIB_PRE)llama$(DSO_EXT) 985 LIB_LLAMA_S = $(LIB_PRE)llama.a 986 987 LIB_COMMON = $(LIB_PRE)common$(DSO_EXT) 988 LIB_COMMON_S = $(LIB_PRE)common.a 989 990 LIB_ALL = $(LIB_GGML) $(LIB_LLAMA) $(LIB_COMMON) 991 LIB_ALL_S = $(LIB_GGML_S) $(LIB_LLAMA_S) $(LIB_COMMON_S) 992 993 GF_CC := $(CC) 994 include scripts/get-flags.mk 995 996 # combine build flags with cmdline overrides 997 override CPPFLAGS := $(MK_CPPFLAGS) $(CPPFLAGS) 998 override CFLAGS := $(CPPFLAGS) $(MK_CFLAGS) $(GF_CFLAGS) $(CFLAGS) 999 BASE_CXXFLAGS := $(MK_CXXFLAGS) $(CXXFLAGS) 1000 override CXXFLAGS := $(BASE_CXXFLAGS) $(HOST_CXXFLAGS) $(GF_CXXFLAGS) $(CPPFLAGS) 1001 override NVCCFLAGS := $(MK_NVCCFLAGS) $(NVCCFLAGS) 1002 override LDFLAGS := $(MK_LDFLAGS) $(LDFLAGS) 1003 1004 # identify CUDA host compiler 1005 ifdef GGML_CUDA 1006 GF_CC := $(NVCC) $(NVCCFLAGS) 2>/dev/null .c -Xcompiler 1007 include scripts/get-flags.mk 1008 CUDA_CXXFLAGS := $(BASE_CXXFLAGS) $(GF_CXXFLAGS) -Wno-pedantic 1009 endif 1010 1011 ifdef LLAMA_CURL 1012 override CXXFLAGS := $(CXXFLAGS) -DLLAMA_USE_CURL 1013 override LDFLAGS := $(LDFLAGS) -lcurl 1014 endif 1015 1016 # 1017 # Print build information 1018 # 1019 1020 $(info I llama.cpp build info: ) 1021 $(info I UNAME_S: $(UNAME_S)) 1022 $(info I UNAME_P: $(UNAME_P)) 1023 $(info I UNAME_M: $(UNAME_M)) 1024 $(info I CFLAGS: $(CFLAGS)) 1025 $(info I CXXFLAGS: $(CXXFLAGS)) 1026 $(info I NVCCFLAGS: $(NVCCFLAGS)) 1027 $(info I LDFLAGS: $(LDFLAGS)) 1028 $(info I CC: $(shell $(CC) --version | head -n 1)) 1029 $(info I CXX: $(shell $(CXX) --version | head -n 1)) 1030 ifdef GGML_CUDA 1031 $(info I NVCC: $(shell $(NVCC) --version | tail -n 1)) 1032 CUDA_VERSION := $(shell $(NVCC) --version | grep -oP 'release (\K[0-9]+\.[0-9])') 1033 ifeq ($(shell awk -v "v=$(CUDA_VERSION)" 'BEGIN { print (v < 11.7) }'),1) 1034 1035 ifndef CUDA_DOCKER_ARCH 1036 ifndef CUDA_POWER_ARCH 1037 $(error I ERROR: For CUDA versions < 11.7 a target CUDA architecture must be explicitly provided via environment variable CUDA_DOCKER_ARCH, e.g. by running "export CUDA_DOCKER_ARCH=compute_XX" on Unix-like systems, where XX is the minimum compute capability that the code needs to run on. A list with compute capabilities can be found here: https://developer.nvidia.com/cuda-gpus ) 1038 endif # CUDA_POWER_ARCH 1039 endif # CUDA_DOCKER_ARCH 1040 1041 endif # eq ($(shell echo "$(CUDA_VERSION) < 11.7" | bc),1) 1042 endif # GGML_CUDA 1043 $(info ) 1044 1045 ifdef DEPRECATE_WARNING 1046 $(info !!! DEPRECATION WARNING !!!) 1047 $(info The following LLAMA_ options are deprecated and will be removed in the future. Use the GGML_ prefix instead) 1048 $(info - LLAMA_CUDA) 1049 $(info - LLAMA_METAL) 1050 $(info - LLAMA_METAL_EMBED_LIBRARY) 1051 $(info - LLAMA_OPENMP) 1052 $(info - LLAMA_RPC) 1053 $(info - LLAMA_SYCL) 1054 $(info - LLAMA_SYCL_F16) 1055 $(info - LLAMA_OPENBLAS) 1056 $(info - LLAMA_OPENBLAS64) 1057 $(info - LLAMA_BLIS) 1058 $(info - LLAMA_NO_LLAMAFILE) 1059 $(info - LLAMA_NO_ACCELERATE) 1060 $(info - LLAMA_NO_OPENMP) 1061 $(info - LLAMA_NO_METAL) 1062 $(info - LLAMA_NO_CCACHE) 1063 $(info ) 1064 endif 1065 1066 ifdef REMOVE_WARNING 1067 $(info !!! REMOVAL WARNING !!!) 1068 $(info The following LLAMA_ options have been removed and are no longer supported) 1069 $(info - LLAMA_DISABLE_LOGS (https://github.com/ggerganov/llama.cpp/pull/9418)) 1070 $(info - LLAMA_SERVER_VERBOSE (https://github.com/ggerganov/llama.cpp/pull/9418)) 1071 $(info ) 1072 endif 1073 1074 # 1075 # Build libraries 1076 # 1077 1078 # Libraries 1079 LIB_GGML = libggml.so 1080 LIB_GGML_S = libggml.a 1081 1082 LIB_LLAMA = libllama.so 1083 LIB_LLAMA_S = libllama.a 1084 1085 LIB_COMMON = libcommon.so 1086 LIB_COMMON_S = libcommon.a 1087 1088 # Targets 1089 BUILD_TARGETS += $(LIB_GGML) $(LIB_GGML_S) $(LIB_LLAMA) $(LIB_LLAMA_S) $(LIB_COMMON) $(LIB_COMMON_S) 1090 1091 # Dependency files 1092 DEP_FILES = $(OBJ_GGML:.o=.d) $(OBJ_LLAMA:.o=.d) $(OBJ_COMMON:.o=.d) 1093 1094 # Default target 1095 all: $(BUILD_TARGETS) 1096 1097 # Note: need this exception because `ggml-cpu.c` and `ggml-cpu.cpp` both produce the same obj/dep files 1098 # g++ -M -I ./ggml/include/ -I ./ggml/src ggml/src/ggml-cpu/ggml-cpu.cpp | grep ggml 1099 $(DIR_GGML)/src/ggml-cpu/ggml-cpu-cpp.o: \ 1100 ggml/src/ggml-cpu/ggml-cpu.cpp \ 1101 ggml/include/ggml-backend.h \ 1102 ggml/include/ggml.h \ 1103 ggml/include/ggml-alloc.h \ 1104 ggml/src/ggml-backend-impl.h \ 1105 ggml/include/ggml-cpu.h \ 1106 ggml/src/ggml-impl.h 1107 $(CXX) $(CXXFLAGS) -c $< -o $@ 1108 1109 # Rules for building object files 1110 $(DIR_GGML)/%.o: $(DIR_GGML)/%.c 1111 $(CC) $(CFLAGS) -MMD -c $< -o $@ 1112 1113 $(DIR_GGML)/%.o: $(DIR_GGML)/%.cpp 1114 $(CXX) $(CXXFLAGS) -MMD -c $< -o $@ 1115 1116 $(DIR_LLAMA)/%.o: $(DIR_LLAMA)/%.cpp 1117 $(CXX) $(CXXFLAGS) -MMD -c $< -o $@ 1118 1119 $(DIR_COMMON)/%.o: $(DIR_COMMON)/%.cpp 1120 $(CXX) $(CXXFLAGS) -MMD -c $< -o $@ 1121 1122 # Rules for building libraries 1123 $(LIB_GGML): $(OBJ_GGML) 1124 $(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS) 1125 1126 $(LIB_GGML_S): $(OBJ_GGML) 1127 ar rcs $(LIB_GGML_S) $^ 1128 1129 $(LIB_LLAMA): $(OBJ_LLAMA) $(LIB_GGML) 1130 $(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS) 1131 1132 $(LIB_LLAMA_S): $(OBJ_LLAMA) 1133 ar rcs $(LIB_LLAMA_S) $^ 1134 1135 $(LIB_COMMON): $(OBJ_COMMON) $(LIB_LLAMA) $(LIB_GGML) 1136 $(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS) 1137 1138 $(LIB_COMMON_S): $(OBJ_COMMON) 1139 ar rcs $(LIB_COMMON_S) $^ 1140 1141 # Include dependency files 1142 -include $(DEP_FILES) 1143 1144 # Clean rule 1145 clean: 1146 rm -vrf $(BUILD_TARGETS) $(TEST_TARGETS) 1147 rm -rvf *.a *.dll *.so *.dot 1148 find ggml src common tests examples pocs -type f -name "*.o" -delete 1149 find ggml src common tests examples pocs -type f -name "*.d" -delete 1150 1151 # 1152 # Examples 1153 # 1154 1155 # $< is the first prerequisite, i.e. the source file. 1156 # Explicitly compile this to an object file so that it can be cached with ccache. 1157 # The source file is then filtered out from $^ (the list of all prerequisites) and the object file is added instead. 1158 1159 # Helper function that replaces .c, .cpp, and .cu file endings with .o: 1160 GET_OBJ_FILE = $(patsubst %.c,%.o,$(patsubst %.cpp,%.o,$(patsubst %.cu,%.o,$(1)))) 1161 1162 llama-cli: examples/main/main.cpp \ 1163 $(OBJ_ALL) 1164 $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) 1165 $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) 1166 @echo 1167 @echo '==== Run ./llama-cli -h for help. ====' 1168 @echo 1169 1170 llama-infill: examples/infill/infill.cpp \ 1171 $(OBJ_ALL) 1172 $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) 1173 $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) 1174 1175 llama-run: examples/run/run.cpp \ 1176 $(OBJ_ALL) 1177 $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) 1178 $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) 1179 1180 llama-simple: examples/simple/simple.cpp \ 1181 $(OBJ_ALL) 1182 $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) 1183 $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) 1184 1185 llama-simple-chat: examples/simple-chat/simple-chat.cpp \ 1186 $(OBJ_ALL) 1187 $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) 1188 $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) 1189 1190 llama-tokenize: examples/tokenize/tokenize.cpp \ 1191 $(OBJ_ALL) 1192 $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) 1193 $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) 1194 1195 llama-batched: examples/batched/batched.cpp \ 1196 $(OBJ_ALL) 1197 $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) 1198 $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) 1199 1200 llama-batched-bench: examples/batched-bench/batched-bench.cpp \ 1201 $(OBJ_ALL) 1202 $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) 1203 $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) 1204 1205 llama-quantize: examples/quantize/quantize.cpp \ 1206 $(OBJ_ALL) 1207 $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) 1208 $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) 1209 1210 llama-quantize-stats: examples/quantize-stats/quantize-stats.cpp \ 1211 $(OBJ_ALL) 1212 $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) 1213 $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) 1214 1215 llama-perplexity: examples/perplexity/perplexity.cpp \ 1216 $(OBJ_ALL) 1217 $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) 1218 $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) 1219 1220 llama-imatrix: examples/imatrix/imatrix.cpp \ 1221 $(OBJ_ALL) 1222 $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) 1223 $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) 1224 1225 llama-embedding: examples/embedding/embedding.cpp \ 1226 $(OBJ_ALL) 1227 $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) 1228 $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) 1229 1230 llama-gritlm: examples/gritlm/gritlm.cpp \ 1231 $(OBJ_ALL) 1232 $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) 1233 $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) 1234 1235 llama-save-load-state: examples/save-load-state/save-load-state.cpp \ 1236 $(OBJ_ALL) 1237 $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) 1238 $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) 1239 1240 llama-gguf: examples/gguf/gguf.cpp \ 1241 $(OBJ_GGML) 1242 $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) 1243 $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) 1244 1245 examples/gguf-hash/deps/sha1/sha1.o: \ 1246 examples/gguf-hash/deps/sha1/sha1.c 1247 $(CC) $(CFLAGS) -Iexamples/gguf-hash/deps -c $< -o $@ 1248 1249 examples/gguf-hash/deps/xxhash/xxhash.o: \ 1250 examples/gguf-hash/deps/xxhash/xxhash.c 1251 $(CC) $(CFLAGS) -Iexamples/gguf-hash/deps -c $< -o $@ 1252 1253 examples/gguf-hash/deps/sha256/sha256.o: \ 1254 examples/gguf-hash/deps/sha256/sha256.c 1255 $(CC) $(CFLAGS) -Iexamples/gguf-hash/deps -c $< -o $@ 1256 1257 llama-gguf-hash: examples/gguf-hash/gguf-hash.cpp examples/gguf-hash/deps/sha1/sha1.o examples/gguf-hash/deps/xxhash/xxhash.o examples/gguf-hash/deps/sha256/sha256.o\ 1258 $(OBJ_ALL) 1259 $(CXX) $(CXXFLAGS) -Iexamples/gguf-hash/deps -c $< -o $(call GET_OBJ_FILE, $<) 1260 $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) 1261 1262 llama-gguf-split: examples/gguf-split/gguf-split.cpp \ 1263 $(OBJ_ALL) 1264 $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) 1265 $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) 1266 1267 llama-eval-callback: examples/eval-callback/eval-callback.cpp \ 1268 $(OBJ_ALL) 1269 $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) 1270 $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) 1271 1272 llama-cvector-generator: examples/cvector-generator/cvector-generator.cpp \ 1273 $(OBJ_ALL) 1274 $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) 1275 $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) 1276 1277 llama-convert-llama2c-to-ggml: examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp \ 1278 $(OBJ_ALL) 1279 $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) 1280 $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) 1281 1282 llama-bench: examples/llama-bench/llama-bench.cpp \ 1283 $(OBJ_ALL) 1284 $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) 1285 $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) 1286 1287 llama-export-lora: examples/export-lora/export-lora.cpp \ 1288 $(OBJ_ALL) 1289 $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) 1290 $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) 1291 1292 llama-retrieval: examples/retrieval/retrieval.cpp \ 1293 $(OBJ_ALL) 1294 $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) 1295 $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) 1296 1297 llama-speculative: examples/speculative/speculative.cpp \ 1298 $(OBJ_ALL) 1299 $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) 1300 $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) 1301 1302 llama-parallel: examples/parallel/parallel.cpp \ 1303 $(OBJ_ALL) 1304 $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) 1305 $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) 1306 1307 llama-lookahead: examples/lookahead/lookahead.cpp \ 1308 $(OBJ_ALL) 1309 $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) 1310 $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) 1311 1312 llama-lookup: examples/lookup/lookup.cpp \ 1313 $(OBJ_ALL) 1314 $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) 1315 $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) 1316 1317 llama-lookup-create: examples/lookup/lookup-create.cpp \ 1318 $(OBJ_ALL) 1319 $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) 1320 $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) 1321 1322 llama-lookup-merge: examples/lookup/lookup-merge.cpp \ 1323 $(OBJ_ALL) 1324 $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) 1325 $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) 1326 1327 llama-lookup-stats: examples/lookup/lookup-stats.cpp \ 1328 $(OBJ_ALL) 1329 $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) 1330 $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) 1331 1332 llama-passkey: examples/passkey/passkey.cpp \ 1333 $(OBJ_ALL) 1334 $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) 1335 $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) 1336 1337 llama-gbnf-validator: examples/gbnf-validator/gbnf-validator.cpp \ 1338 $(OBJ_ALL) 1339 $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) 1340 $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) 1341 1342 ifdef GGML_RPC 1343 rpc-server: examples/rpc/rpc-server.cpp \ 1344 $(OBJ_GGML) 1345 $(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS) 1346 endif # GGML_RPC 1347 1348 llama-server: \ 1349 examples/server/server.cpp \ 1350 examples/server/utils.hpp \ 1351 examples/server/httplib.h \ 1352 examples/server/index.html.hpp \ 1353 examples/server/completion.js.hpp \ 1354 examples/server/loading.html.hpp \ 1355 examples/server/deps_daisyui.min.css.hpp \ 1356 examples/server/deps_markdown-it.js.hpp \ 1357 examples/server/deps_tailwindcss.js.hpp \ 1358 examples/server/deps_vue.esm-browser.js.hpp \ 1359 common/json.hpp \ 1360 common/stb_image.h \ 1361 $(OBJ_ALL) 1362 $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) 1363 $(CXX) $(CXXFLAGS) $(filter-out %.h %.hpp $<,$^) -Iexamples/server $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) $(LWINSOCK2) 1364 1365 # Portable equivalent of `cd examples/server/public && xxd -i $(notdir $<) ../$(notdir $<).hpp`: 1366 examples/server/%.hpp: examples/server/public/% Makefile 1367 @( export NAME=$(subst .,_,$(subst -,_,$(notdir $<))) && \ 1368 echo "unsigned char $${NAME}[] = {" && \ 1369 cat $< | od -v -t x1 -An | sed -E 's/([0-9a-fA-F]+)/0x\1, /g' && \ 1370 echo "};" && \ 1371 echo "unsigned int $${NAME}_len = $(shell cat $< | wc -c );" \ 1372 ) > $@ 1373 1374 llama-gen-docs: examples/gen-docs/gen-docs.cpp \ 1375 $(OBJ_ALL) 1376 $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) 1377 $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) 1378 1379 libllava.a: examples/llava/llava.cpp \ 1380 examples/llava/llava.h \ 1381 examples/llava/clip.cpp \ 1382 examples/llava/clip.h \ 1383 common/stb_image.h \ 1384 common/base64.hpp \ 1385 $(OBJ_ALL) 1386 $(CXX) $(CXXFLAGS) -static -fPIC -c $< -o $@ -Wno-cast-qual 1387 1388 llama-llava-cli: examples/llava/llava-cli.cpp \ 1389 examples/llava/llava.cpp \ 1390 examples/llava/llava.h \ 1391 examples/llava/clip.cpp \ 1392 examples/llava/clip.h \ 1393 $(OBJ_ALL) 1394 $(CXX) $(CXXFLAGS) $< $(filter-out %.h $<,$^) -o $@ $(LDFLAGS) -Wno-cast-qual 1395 1396 llama-minicpmv-cli: examples/llava/minicpmv-cli.cpp \ 1397 examples/llava/llava.cpp \ 1398 examples/llava/llava.h \ 1399 examples/llava/clip.cpp \ 1400 examples/llava/clip.h \ 1401 $(OBJ_ALL) 1402 $(CXX) $(CXXFLAGS) $< $(filter-out %.h $<,$^) -o $@ $(LDFLAGS) -Wno-cast-qual 1403 1404 ifeq ($(UNAME_S),Darwin) 1405 swift: examples/batched.swift 1406 (cd examples/batched.swift; make build) 1407 endif 1408 1409 common/build-info.cpp: $(wildcard .git/index) scripts/build-info.sh 1410 @sh scripts/build-info.sh "$(CC)" > $@.tmp 1411 @if ! cmp -s $@.tmp $@; then \ 1412 mv $@.tmp $@; \ 1413 else \ 1414 rm $@.tmp; \ 1415 fi 1416 1417 common/build-info.o: common/build-info.cpp 1418 $(CXX) $(CXXFLAGS) -c $(filter-out %.h,$^) -o $@ 1419 1420 # 1421 # Tests 1422 # 1423 1424 tests: $(TEST_TARGETS) 1425 1426 tests/test-arg-parser: tests/test-arg-parser.cpp \ 1427 $(OBJ_ALL) 1428 $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) 1429 $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) 1430 1431 tests/test-llama-grammar: tests/test-llama-grammar.cpp \ 1432 $(OBJ_ALL) 1433 $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) 1434 $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) 1435 1436 tests/test-log: tests/test-log.cpp \ 1437 $(OBJ_ALL) 1438 $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) 1439 $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) 1440 1441 tests/test-grammar-parser: tests/test-grammar-parser.cpp \ 1442 $(OBJ_ALL) 1443 $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) 1444 $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) 1445 1446 tests/test-grammar-integration: tests/test-grammar-integration.cpp \ 1447 $(OBJ_ALL) 1448 $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) 1449 $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) 1450 1451 tests/test-double-float: tests/test-double-float.cpp 1452 $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) 1453 $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) 1454 1455 tests/test-json-schema-to-grammar: tests/test-json-schema-to-grammar.cpp \ 1456 $(OBJ_ALL) 1457 $(CXX) $(CXXFLAGS) -Iexamples/server -c $< -o $(call GET_OBJ_FILE, $<) 1458 $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) 1459 1460 tests/test-opt: tests/test-opt.cpp \ 1461 $(OBJ_GGML) 1462 $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) 1463 $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) 1464 1465 tests/test-quantize-fns: tests/test-quantize-fns.cpp \ 1466 $(OBJ_GGML) 1467 $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) 1468 $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) 1469 1470 tests/test-quantize-perf: tests/test-quantize-perf.cpp \ 1471 $(OBJ_GGML) 1472 $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) 1473 $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) 1474 1475 tests/test-sampling: tests/test-sampling.cpp \ 1476 $(OBJ_ALL) 1477 $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) 1478 $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) 1479 1480 tests/test-tokenizer-0: tests/test-tokenizer-0.cpp \ 1481 $(OBJ_ALL) 1482 $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) 1483 $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) 1484 1485 tests/test-tokenizer-1-bpe: tests/test-tokenizer-1-bpe.cpp \ 1486 $(OBJ_ALL) 1487 $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) 1488 $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) 1489 1490 tests/test-tokenizer-1-spm: tests/test-tokenizer-1-spm.cpp \ 1491 $(OBJ_ALL) 1492 $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) 1493 $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) 1494 1495 tests/test-rope: tests/test-rope.cpp ggml/src/ggml.o \ 1496 $(OBJ_GGML) 1497 $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) 1498 $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) 1499 1500 tests/test-c.o: tests/test-c.c include/llama.h 1501 $(CC) $(CFLAGS) -c $(filter-out %.h,$^) -o $@ 1502 1503 tests/test-backend-ops: tests/test-backend-ops.cpp \ 1504 $(OBJ_GGML) 1505 $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) 1506 $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) 1507 1508 tests/test-model-load-cancel: tests/test-model-load-cancel.cpp tests/get-model.cpp \ 1509 $(OBJ_ALL) 1510 $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) 1511 $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) 1512 1513 tests/test-autorelease: tests/test-autorelease.cpp tests/get-model.cpp \ 1514 $(OBJ_ALL) 1515 $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) 1516 $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) 1517 1518 tests/test-chat-template: tests/test-chat-template.cpp \ 1519 $(OBJ_ALL) 1520 $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) 1521 $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) 1522 1523 # 1524 # PoCs 1525 # 1526 1527 llama-vdot: pocs/vdot/vdot.cpp ggml/src/ggml.o \ 1528 $(OBJ_GGML) 1529 $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) 1530 $(CXX) $(CXXFLAGS) $(filter-out $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) 1531 1532 llama-q8dot: pocs/vdot/q8dot.cpp ggml/src/ggml.o \ 1533 $(OBJ_GGML) 1534 $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<) 1535 $(CXX) $(CXXFLAGS) $(filter-out $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) 1536 1537 # 1538 # Deprecated binaries that we want to keep around long enough for people to migrate to the new filenames, then these can be removed. 1539 # 1540 # Mark legacy binary targets as .PHONY so that they are always checked. 1541 .PHONY: main quantize perplexity embedding server 1542 1543 # Define the object file target 1544 examples/deprecation-warning/deprecation-warning.o: examples/deprecation-warning/deprecation-warning.cpp 1545 $(CXX) $(CXXFLAGS) -c $< -o $@ 1546 1547 # NOTE: We currently will always build the deprecation-warning `main` and `server` binaries to help users migrate. 1548 # Eventually we will want to remove these target from building all the time. 1549 main: examples/deprecation-warning/deprecation-warning.o 1550 $(CXX) $(CXXFLAGS) $< -o $@ $(LDFLAGS) 1551 @echo "NOTICE: The 'main' binary is deprecated. Please use 'llama-cli' instead." 1552 1553 server: examples/deprecation-warning/deprecation-warning.o 1554 $(CXX) $(CXXFLAGS) $< -o $@ $(LDFLAGS) 1555 @echo "NOTICE: The 'server' binary is deprecated. Please use 'llama-server' instead." 1556 1557 quantize: examples/deprecation-warning/deprecation-warning.o 1558 ifneq (,$(wildcard quantize)) 1559 $(CXX) $(CXXFLAGS) $< -o $@ $(LDFLAGS) 1560 @echo "#########" 1561 @echo "WARNING: The 'quantize' binary is deprecated. Please use 'llama-quantize' instead." 1562 @echo " Remove the 'quantize' binary to remove this warning." 1563 @echo "#########" 1564 endif 1565 1566 perplexity: examples/deprecation-warning/deprecation-warning.o 1567 ifneq (,$(wildcard perplexity)) 1568 $(CXX) $(CXXFLAGS) $< -o $@ $(LDFLAGS) 1569 @echo "#########" 1570 @echo "WARNING: The 'perplexity' binary is deprecated. Please use 'llama-perplexity' instead." 1571 @echo " Remove the 'perplexity' binary to remove this warning." 1572 @echo "#########" 1573 endif 1574 1575 embedding: examples/deprecation-warning/deprecation-warning.o 1576 ifneq (,$(wildcard embedding)) 1577 $(CXX) $(CXXFLAGS) $< -o $@ $(LDFLAGS) 1578 @echo "#########" 1579 @echo "WARNING: The 'embedding' binary is deprecated. Please use 'llama-embedding' instead." 1580 @echo " Remove the 'embedding' binary to remove this warning." 1581 @echo "#########" 1582 endif