/ Makefile
Makefile
   1  # Define the default target now so that it is always the first target
   2  BUILD_TARGETS = \
   3  	libllava.a \
   4  	llama-batched \
   5  	llama-batched-bench \
   6  	llama-bench \
   7  	llama-cli \
   8  	llama-convert-llama2c-to-ggml \
   9  	llama-embedding \
  10  	llama-eval-callback \
  11  	llama-export-lora \
  12  	llama-gbnf-validator \
  13  	llama-gguf \
  14  	llama-gguf-hash \
  15  	llama-gguf-split \
  16  	llama-gritlm \
  17  	llama-imatrix \
  18  	llama-infill \
  19  	llama-llava-cli \
  20  	llama-minicpmv-cli\
  21  	llama-lookahead \
  22  	llama-lookup \
  23  	llama-lookup-create \
  24  	llama-lookup-merge \
  25  	llama-lookup-stats \
  26  	llama-parallel \
  27  	llama-passkey \
  28  	llama-perplexity \
  29  	llama-q8dot \
  30  	llama-quantize \
  31  	llama-quantize-stats \
  32  	llama-retrieval \
  33  	llama-save-load-state \
  34  	llama-server \
  35  	llama-simple \
  36  	llama-simple-chat \
  37  	llama-run \
  38  	llama-speculative \
  39  	llama-tokenize \
  40  	llama-vdot \
  41  	llama-cvector-generator \
  42  	llama-gen-docs \
  43  	tests/test-c.o
  44  
  45  # Binaries only useful for tests
  46  TEST_TARGETS = \
  47  	tests/test-arg-parser \
  48  	tests/test-autorelease \
  49  	tests/test-backend-ops \
  50  	tests/test-chat-template \
  51  	tests/test-double-float \
  52  	tests/test-grammar-integration \
  53  	tests/test-grammar-parser \
  54  	tests/test-json-schema-to-grammar \
  55  	tests/test-llama-grammar \
  56  	tests/test-log \
  57  	tests/test-model-load-cancel \
  58  	tests/test-quantize-fns \
  59  	tests/test-quantize-perf \
  60  	tests/test-rope \
  61  	tests/test-sampling \
  62  	tests/test-tokenizer-0 \
  63  	tests/test-tokenizer-1-bpe \
  64  	tests/test-tokenizer-1-spm
  65  #	tests/test-opt \
  66  
  67  # Legacy build targets that were renamed in #7809, but should still be removed when the project is cleaned
  68  LEGACY_TARGETS_CLEAN = main quantize quantize-stats perplexity imatrix embedding vdot q8dot convert-llama2c-to-ggml \
  69  	simple batched batched-bench save-load-state server gguf gguf-split eval-callback llama-bench libllava.a llava-cli baby-llama \
  70  	retrieval speculative infill tokenize parallel export-lora lookahead lookup passkey gritlm
  71  
  72  # Legacy build targets that were renamed in #7809, but we want to build binaries that for them that output a deprecation warning if people try to use them.
  73  #  We don't want to clutter things too much, so we only build replacements for the most commonly used binaries.
  74  LEGACY_TARGETS_BUILD = main quantize perplexity embedding server
  75  
  76  # Deprecation aliases
  77  ifdef LLAMA_CUBLAS
  78  $(error LLAMA_CUBLAS is removed. Use GGML_CUDA instead.)
  79  endif
  80  
  81  ifdef LLAMA_CUDA
  82  GGML_CUDA := 1
  83  DEPRECATE_WARNING := 1
  84  endif
  85  
  86  ifdef LLAMA_KOMPUTE
  87  GGML_KOMPUTE := 1
  88  DEPRECATE_WARNING := 1
  89  endif
  90  
  91  ifdef LLAMA_METAL
  92  GGML_METAL := 1
  93  DEPRECATE_WARNING := 1
  94  endif
  95  
  96  ifdef LLAMA_RPC
  97  GGML_RPC := 1
  98  DEPRECATE_WARNING := 1
  99  endif
 100  
 101  ifdef LLAMA_SYCL
 102  GGML_SYCL := 1
 103  DEPRECATE_WARNING := 1
 104  endif
 105  
 106  ifdef LLAMA_SYCL_F16
 107  GGML_SYCL_F16 := 1
 108  DEPRECATE_WARNING := 1
 109  endif
 110  
 111  ifdef LLAMA_OPENBLAS
 112  GGML_OPENBLAS := 1
 113  DEPRECATE_WARNING := 1
 114  endif
 115  
 116  ifdef LLAMA_OPENBLAS64
 117  GGML_OPENBLAS64 := 1
 118  DEPRECATE_WARNING := 1
 119  endif
 120  
 121  ifdef LLAMA_BLIS
 122  GGML_BLIS := 1
 123  DEPRECATE_WARNING := 1
 124  endif
 125  
 126  ifdef LLAMA_NO_LLAMAFILE
 127  GGML_NO_LLAMAFILE := 1
 128  DEPRECATE_WARNING := 1
 129  endif
 130  
 131  ifdef LLAMA_NO_ACCELERATE
 132  GGML_NO_ACCELERATE := 1
 133  DEPRECATE_WARNING := 1
 134  endif
 135  
 136  ifdef LLAMA_NO_OPENMP
 137  GGML_NO_OPENMP := 1
 138  DEPRECATE_WARNING := 1
 139  endif
 140  
 141  ifdef LLAMA_NO_METAL
 142  GGML_NO_METAL := 1
 143  DEPRECATE_WARNING := 1
 144  endif
 145  
 146  ifdef LLAMA_DISABLE_LOGS
 147  REMOVE_WARNING := 1
 148  endif
 149  
 150  ifdef LLAMA_SERVER_VERBOSE
 151  REMOVE_WARNING := 1
 152  endif
 153  
 154  ifndef UNAME_S
 155  UNAME_S := $(shell uname -s)
 156  endif
 157  
 158  ifndef UNAME_P
 159  UNAME_P := $(shell uname -p)
 160  endif
 161  
 162  ifndef UNAME_M
 163  UNAME_M := $(shell uname -m)
 164  endif
 165  
 166  # In GNU make default CXX is g++ instead of c++.  Let's fix that so that users
 167  # of non-gcc compilers don't have to provide g++ alias or wrapper.
 168  DEFCC  := cc
 169  DEFCXX := c++
 170  ifeq ($(origin CC),default)
 171  CC  := $(DEFCC)
 172  endif
 173  ifeq ($(origin CXX),default)
 174  CXX := $(DEFCXX)
 175  endif
 176  
 177  # Mac OS + Arm can report x86_64
 178  # ref: https://github.com/ggerganov/whisper.cpp/issues/66#issuecomment-1282546789
 179  ifeq ($(UNAME_S),Darwin)
 180  	ifndef GGML_NO_METAL
 181  		GGML_METAL := 1
 182  	endif
 183  
 184  	GGML_NO_OPENMP := 1
 185  
 186  	ifneq ($(UNAME_P),arm)
 187  		SYSCTL_M := $(shell sysctl -n hw.optional.arm64 2>/dev/null)
 188  		ifeq ($(SYSCTL_M),1)
 189  			# UNAME_P := arm
 190  			# UNAME_M := arm64
 191  			warn := $(warning Your arch is announced as x86_64, but it seems to actually be ARM64. Not fixing that can lead to bad performance. For more info see: https://github.com/ggerganov/whisper.cpp/issues/66\#issuecomment-1282546789)
 192  		endif
 193  	endif
 194  endif
 195  
 196  ifdef GGML_METAL
 197  	GGML_METAL_EMBED_LIBRARY := 1
 198  endif
 199  
 200  ifdef GGML_RPC
 201  	BUILD_TARGETS += rpc-server
 202  endif
 203  
 204  ifdef GGML_VULKAN
 205  	BUILD_TARGETS += vulkan-shaders-gen
 206  endif
 207  
 208  default: $(BUILD_TARGETS) $(LEGACY_TARGETS_BUILD)
 209  
 210  test: $(TEST_TARGETS)
 211  	@failures=0; \
 212  	for test_target in $(TEST_TARGETS); do \
 213  		if [ "$$test_target" = "tests/test-tokenizer-0" ]; then \
 214  			./$$test_target $(CURDIR)/models/ggml-vocab-llama-spm.gguf; \
 215  			./$$test_target $(CURDIR)/models/ggml-vocab-llama-bpe.gguf; \
 216  			./$$test_target $(CURDIR)/models/ggml-vocab-phi-3.gguf; \
 217  			./$$test_target $(CURDIR)/models/ggml-vocab-falcon.gguf; \
 218  			./$$test_target $(CURDIR)/models/ggml-vocab-bert-bge.gguf; \
 219  			./$$test_target $(CURDIR)/models/ggml-vocab-starcoder.gguf; \
 220  			./$$test_target $(CURDIR)/models/ggml-vocab-gpt-2.gguf; \
 221  			./$$test_target $(CURDIR)/models/ggml-vocab-refact.gguf; \
 222  		elif [ "$$test_target" = "tests/test-tokenizer-1-spm" ]; then \
 223  			continue; \
 224  		elif [ "$$test_target" = "tests/test-tokenizer-1-bpe" ]; then \
 225  			continue; \
 226  		else \
 227  			echo "Running test $$test_target..."; \
 228  			./$$test_target; \
 229  		fi; \
 230  		if [ $$? -ne 0 ]; then \
 231  			printf 'Test %s FAILED!\n\n' $$test_target; \
 232  			failures=$$(( failures + 1 )); \
 233  		else \
 234  			printf 'Test %s passed.\n\n' $$test_target; \
 235  		fi; \
 236  	done; \
 237  	if [ $$failures -gt 0 ]; then \
 238  		printf '\n%s tests failed.\n' $$failures; \
 239  		exit 1; \
 240  	fi
 241  	@echo 'All tests passed.'
 242  
 243  all: $(BUILD_TARGETS) $(TEST_TARGETS) $(LEGACY_TARGETS_BUILD)
 244  
 245  ifdef RISCV_CROSS_COMPILE
 246  CC	:= riscv64-unknown-linux-gnu-gcc
 247  CXX	:= riscv64-unknown-linux-gnu-g++
 248  endif
 249  
 250  #
 251  # Compile flags
 252  #
 253  
 254  # keep standard at C11 and C++17
 255  MK_CPPFLAGS  = -Iggml/include -Iggml/src -Iinclude -Isrc -Icommon -DGGML_USE_CPU
 256  MK_CFLAGS    = -std=c11   -fPIC
 257  MK_CXXFLAGS  = -std=c++17 -fPIC
 258  MK_NVCCFLAGS = -std=c++17
 259  
 260  ifdef LLAMA_NO_CCACHE
 261  GGML_NO_CCACHE := 1
 262  DEPRECATE_WARNING := 1
 263  endif
 264  
 265  ifndef GGML_NO_CCACHE
 266  CCACHE := $(shell which ccache)
 267  ifdef CCACHE
 268  export CCACHE_SLOPPINESS = time_macros
 269  $(info I ccache found, compilation results will be cached. Disable with GGML_NO_CCACHE.)
 270  CC    := $(CCACHE) $(CC)
 271  CXX   := $(CCACHE) $(CXX)
 272  else
 273  $(info I ccache not found. Consider installing it for faster compilation.)
 274  endif # CCACHE
 275  endif # GGML_NO_CCACHE
 276  
 277  # clock_gettime came in POSIX.1b (1993)
 278  # CLOCK_MONOTONIC came in POSIX.1-2001 / SUSv3 as optional
 279  # posix_memalign came in POSIX.1-2001 / SUSv3
 280  # M_PI is an XSI extension since POSIX.1-2001 / SUSv3, came in XPG1 (1985)
 281  MK_CPPFLAGS += -D_XOPEN_SOURCE=600
 282  
 283  # Somehow in OpenBSD whenever POSIX conformance is specified
 284  # some string functions rely on locale_t availability,
 285  # which was introduced in POSIX.1-2008, forcing us to go higher
 286  ifeq ($(UNAME_S),OpenBSD)
 287  	MK_CPPFLAGS += -U_XOPEN_SOURCE -D_XOPEN_SOURCE=700
 288  endif
 289  
 290  # Data types, macros and functions related to controlling CPU affinity and
 291  # some memory allocation are available on Linux through GNU extensions in libc
 292  ifeq ($(UNAME_S),Linux)
 293  	MK_CPPFLAGS += -D_GNU_SOURCE
 294  	MK_LDFLAGS  += -ldl
 295  endif
 296  
 297  # RLIMIT_MEMLOCK came in BSD, is not specified in POSIX.1,
 298  # and on macOS its availability depends on enabling Darwin extensions
 299  # similarly on DragonFly, enabling BSD extensions is necessary
 300  ifeq ($(UNAME_S),Darwin)
 301  	MK_CPPFLAGS += -D_DARWIN_C_SOURCE
 302  endif
 303  ifeq ($(UNAME_S),DragonFly)
 304  	MK_CPPFLAGS += -D__BSD_VISIBLE
 305  endif
 306  
 307  # alloca is a non-standard interface that is not visible on BSDs when
 308  # POSIX conformance is specified, but not all of them provide a clean way
 309  # to enable it in such cases
 310  ifeq ($(UNAME_S),FreeBSD)
 311  	MK_CPPFLAGS += -D__BSD_VISIBLE
 312  endif
 313  ifeq ($(UNAME_S),NetBSD)
 314  	MK_CPPFLAGS += -D_NETBSD_SOURCE
 315  endif
 316  ifeq ($(UNAME_S),OpenBSD)
 317  	MK_CPPFLAGS += -D_BSD_SOURCE
 318  endif
 319  
 320  ifdef GGML_SCHED_MAX_COPIES
 321  	MK_CPPFLAGS += -DGGML_SCHED_MAX_COPIES=$(GGML_SCHED_MAX_COPIES)
 322  endif
 323  
 324  ifdef LLAMA_DEBUG
 325  	MK_CFLAGS    += -O0 -g
 326  	MK_CXXFLAGS  += -O0 -g
 327  	MK_LDFLAGS   += -g
 328  	MK_NVCCFLAGS += -O0 -g
 329  
 330  	ifeq ($(UNAME_S),Linux)
 331  		MK_CPPFLAGS += -D_GLIBCXX_ASSERTIONS
 332  	endif
 333  else
 334  	MK_CPPFLAGS   += -DNDEBUG
 335  	MK_CFLAGS     += -O3 -g
 336  	MK_CXXFLAGS   += -O3 -g
 337  	MK_NVCCFLAGS  += -O3 -g
 338  endif
 339  
 340  ifdef LLAMA_SANITIZE_THREAD
 341  	MK_CFLAGS   += -fsanitize=thread -g
 342  	MK_CXXFLAGS += -fsanitize=thread -g
 343  	MK_LDFLAGS  += -fsanitize=thread -g
 344  endif
 345  
 346  ifdef LLAMA_SANITIZE_ADDRESS
 347  	MK_CFLAGS   += -fsanitize=address -fno-omit-frame-pointer -g
 348  	MK_CXXFLAGS += -fsanitize=address -fno-omit-frame-pointer -g
 349  	MK_LDFLAGS  += -fsanitize=address -fno-omit-frame-pointer -g
 350  endif
 351  
 352  ifdef LLAMA_SANITIZE_UNDEFINED
 353  	MK_CFLAGS   += -fsanitize=undefined -g
 354  	MK_CXXFLAGS += -fsanitize=undefined -g
 355  	MK_LDFLAGS  += -fsanitize=undefined -g
 356  endif
 357  
 358  ifdef LLAMA_SERVER_SSL
 359  	MK_CPPFLAGS += -DCPPHTTPLIB_OPENSSL_SUPPORT
 360  	MK_LDFLAGS += -lssl -lcrypto
 361  endif
 362  
 363  ifndef GGML_NO_CPU_AARCH64
 364  	MK_CPPFLAGS += -DGGML_USE_CPU_AARCH64
 365  endif
 366  
 367  # warnings
 368  WARN_FLAGS = \
 369  	-Wall \
 370  	-Wextra \
 371  	-Wpedantic \
 372  	-Wcast-qual \
 373  	-Wno-unused-function
 374  
 375  MK_CFLAGS += \
 376  	$(WARN_FLAGS) \
 377  	-Wshadow \
 378  	-Wstrict-prototypes \
 379  	-Wpointer-arith \
 380  	-Wmissing-prototypes \
 381  	-Werror=implicit-int \
 382  	-Werror=implicit-function-declaration
 383  
 384  MK_CXXFLAGS += \
 385  	$(WARN_FLAGS) \
 386  	-Wmissing-declarations \
 387  	-Wmissing-noreturn
 388  
 389  ifeq ($(LLAMA_FATAL_WARNINGS),1)
 390  	MK_CFLAGS   += -Werror
 391  	MK_CXXFLAGS += -Werror
 392  endif
 393  
 394  # this version of Apple ld64 is buggy
 395  ifneq '' '$(findstring dyld-1015.7,$(shell $(CC) $(LDFLAGS) -Wl,-v 2>&1))'
 396  	MK_CPPFLAGS += -DHAVE_BUGGY_APPLE_LINKER
 397  endif
 398  
 399  # OS specific
 400  # TODO: support Windows
 401  ifneq '' '$(filter $(UNAME_S),Linux Darwin FreeBSD NetBSD OpenBSD Haiku)'
 402  	MK_CFLAGS   += -pthread
 403  	MK_CXXFLAGS += -pthread
 404  endif
 405  
 406  # detect Windows
 407  ifneq ($(findstring _NT,$(UNAME_S)),)
 408  	_WIN32 := 1
 409  endif
 410  
 411  # library name prefix
 412  ifneq ($(_WIN32),1)
 413  	LIB_PRE := lib
 414  endif
 415  
 416  # Dynamic Shared Object extension
 417  ifneq ($(_WIN32),1)
 418  	DSO_EXT := .so
 419  else
 420  	DSO_EXT := .dll
 421  endif
 422  
 423  # Windows Sockets 2 (Winsock) for network-capable apps
 424  ifeq ($(_WIN32),1)
 425  	LWINSOCK2 := -lws2_32
 426  endif
 427  
 428  ifdef LLAMA_GPROF
 429  	MK_CFLAGS   += -pg
 430  	MK_CXXFLAGS += -pg
 431  endif
 432  
 433  # Architecture specific
 434  # TODO: probably these flags need to be tweaked on some architectures
 435  #       feel free to update the Makefile for your architecture and send a pull request or issue
 436  
 437  ifndef RISCV_CROSS_COMPILE
 438  
 439  ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686 amd64))
 440  	# Use all CPU extensions that are available:
 441  	MK_CFLAGS     += -march=native -mtune=native
 442  	HOST_CXXFLAGS += -march=native -mtune=native
 443  
 444  	# Usage AVX-only
 445  	#MK_CFLAGS   += -mfma -mf16c -mavx
 446  	#MK_CXXFLAGS += -mfma -mf16c -mavx
 447  
 448  	# Usage SSSE3-only (Not is SSE3!)
 449  	#MK_CFLAGS   += -mssse3
 450  	#MK_CXXFLAGS += -mssse3
 451  endif
 452  
 453  ifneq '' '$(findstring mingw,$(shell $(CC) -dumpmachine))'
 454  	# The stack is only 16-byte aligned on Windows, so don't let gcc emit aligned moves.
 455  	# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=54412
 456  	# https://github.com/ggerganov/llama.cpp/issues/2922
 457  	MK_CFLAGS   += -Xassembler -muse-unaligned-vector-move
 458  	MK_CXXFLAGS += -Xassembler -muse-unaligned-vector-move
 459  
 460  	# Target Windows 8 for PrefetchVirtualMemory
 461  	MK_CPPFLAGS += -D_WIN32_WINNT=0x602
 462  endif
 463  
 464  ifneq ($(filter aarch64%,$(UNAME_M)),)
 465  	# Apple M1, M2, etc.
 466  	# Raspberry Pi 3, 4, Zero 2 (64-bit)
 467  	# Nvidia Jetson
 468  	MK_CFLAGS   += -mcpu=native
 469  	MK_CXXFLAGS += -mcpu=native
 470  	JETSON_RELEASE_INFO = $(shell jetson_release)
 471  	ifdef JETSON_RELEASE_INFO
 472  		ifneq ($(filter TX2%,$(JETSON_RELEASE_INFO)),)
 473  			JETSON_EOL_MODULE_DETECT = 1
 474  			CC = aarch64-unknown-linux-gnu-gcc
 475  			cxx = aarch64-unknown-linux-gnu-g++
 476  		endif
 477  	endif
 478  endif
 479  
 480  ifneq ($(filter armv6%,$(UNAME_M)),)
 481  	# Raspberry Pi 1, Zero
 482  	MK_CFLAGS   += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access
 483  	MK_CXXFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access
 484  endif
 485  
 486  ifneq ($(filter armv7%,$(UNAME_M)),)
 487  	# Raspberry Pi 2
 488  	MK_CFLAGS   += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations
 489  	MK_CXXFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations
 490  endif
 491  
 492  ifneq ($(filter armv8%,$(UNAME_M)),)
 493  	# Raspberry Pi 3, 4, Zero 2 (32-bit)
 494  	MK_CFLAGS   += -mfp16-format=ieee -mno-unaligned-access
 495  	MK_CXXFLAGS += -mfp16-format=ieee -mno-unaligned-access
 496  endif
 497  
 498  ifneq ($(filter ppc64%,$(UNAME_M)),)
 499  	POWER9_M := $(shell grep "POWER9" /proc/cpuinfo)
 500  	ifneq (,$(findstring POWER9,$(POWER9_M)))
 501  		MK_CFLAGS   += -mcpu=power9
 502  		MK_CXXFLAGS += -mcpu=power9
 503  	endif
 504  endif
 505  
 506  ifneq ($(filter ppc64le%,$(UNAME_M)),)
 507  	MK_CFLAGS   += -mcpu=powerpc64le
 508  	MK_CXXFLAGS += -mcpu=powerpc64le
 509  	CUDA_POWER_ARCH = 1
 510  endif
 511  
 512  ifneq ($(filter loongarch64%,$(UNAME_M)),)
 513  	MK_CFLAGS   += -mlasx
 514  	MK_CXXFLAGS += -mlasx
 515  endif
 516  
 517  ifneq ($(filter riscv64%,$(UNAME_M)),)
 518  	MK_CFLAGS   += -march=rv64gcv -mabi=lp64d
 519  	MK_CXXFLAGS += -march=rv64gcv -mabi=lp64d
 520  endif
 521  
 522  else # RISC-V CROSS COMPILATION
 523  	MK_CFLAGS   += -march=rv64gcv -mabi=lp64d
 524  	MK_CXXFLAGS += -march=rv64gcv -mabi=lp64d
 525  endif
 526  
 527  ifndef GGML_NO_ACCELERATE
 528  	# Mac OS - include Accelerate framework.
 529  	# `-framework Accelerate` works both with Apple Silicon and Mac Intel
 530  	ifeq ($(UNAME_S),Darwin)
 531  		MK_CPPFLAGS  += -DGGML_USE_ACCELERATE -DGGML_USE_BLAS -DGGML_BLAS_USE_ACCELERATE
 532  		MK_CPPFLAGS  += -DACCELERATE_NEW_LAPACK
 533  		MK_CPPFLAGS  += -DACCELERATE_LAPACK_ILP64
 534  		MK_LDFLAGS   += -framework Accelerate
 535  		OBJ_GGML_EXT += ggml/src/ggml-blas/ggml-blas.o
 536  	endif
 537  endif # GGML_NO_ACCELERATE
 538  
 539  ifndef GGML_NO_OPENMP
 540  	MK_CPPFLAGS += -DGGML_USE_OPENMP
 541  	MK_CFLAGS   += -fopenmp
 542  	MK_CXXFLAGS += -fopenmp
 543  endif # GGML_NO_OPENMP
 544  
 545  ifdef GGML_OPENBLAS
 546  	MK_CPPFLAGS  += -DGGML_USE_BLAS $(shell pkg-config --cflags-only-I openblas)
 547  	MK_CFLAGS    += $(shell pkg-config --cflags-only-other openblas)
 548  	MK_LDFLAGS   += $(shell pkg-config --libs openblas)
 549  	OBJ_GGML_EXT += ggml/src/ggml-blas/ggml-blas.o
 550  endif # GGML_OPENBLAS
 551  
 552  ifdef GGML_OPENBLAS64
 553  	MK_CPPFLAGS  += -DGGML_USE_BLAS $(shell pkg-config --cflags-only-I openblas64)
 554  	MK_CFLAGS    += $(shell pkg-config --cflags-only-other openblas64)
 555  	MK_LDFLAGS   += $(shell pkg-config --libs openblas64)
 556  	OBJ_GGML_EXT += ggml/src/ggml-blas/ggml-blas.o
 557  endif # GGML_OPENBLAS64
 558  
 559  ifdef GGML_BLIS
 560  	MK_CPPFLAGS  += -DGGML_USE_BLAS -DGGML_BLAS_USE_BLIS -I/usr/local/include/blis -I/usr/include/blis
 561  	MK_LDFLAGS   += -lblis -L/usr/local/lib
 562  	OBJ_GGML_EXT += ggml/src/ggml-blas/ggml-blas.o
 563  endif # GGML_BLIS
 564  
 565  ifdef GGML_NVPL
 566  	MK_CPPFLAGS  += -DGGML_USE_BLAS -DGGML_BLAS_USE_NVPL -DNVPL_ILP64 -I/usr/local/include/nvpl_blas -I/usr/include/nvpl_blas
 567  	MK_LDFLAGS   += -L/usr/local/lib -lnvpl_blas_core -lnvpl_blas_ilp64_gomp
 568  	OBJ_GGML_EXT += ggml/src/ggml-blas/ggml-blas.o
 569  endif # GGML_NVPL
 570  
 571  ifndef GGML_NO_LLAMAFILE
 572  	MK_CPPFLAGS  += -DGGML_USE_LLAMAFILE
 573  	OBJ_GGML_EXT += ggml/src/ggml-cpu/llamafile/sgemm.o
 574  endif
 575  
 576  ifndef GGML_NO_AMX
 577  	MK_CPPFLAGS += -DGGML_USE_AMX
 578  	OBJ_GGML_EXT += ggml/src/ggml-cpu/amx/amx.o ggml/src/ggml-cpu/amx/mmq.o
 579  endif
 580  
 581  # only necessary for the CPU backend files
 582  MK_CPPFLAGS += -Iggml/src/ggml-cpu
 583  
 584  ifdef GGML_RPC
 585  	MK_CPPFLAGS  += -DGGML_USE_RPC
 586  	OBJ_GGML_EXT += ggml/src/ggml-rpc.o
 587  endif # GGML_RPC
 588  
 589  OBJ_CUDA_TMPL      = $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-wmma*.cu))
 590  OBJ_CUDA_TMPL     += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/mmq*.cu))
 591  
 592  ifdef GGML_CUDA_FA_ALL_QUANTS
 593  	OBJ_CUDA_TMPL += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-vec*.cu))
 594  else
 595  	OBJ_CUDA_TMPL += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-vec*q4_0-q4_0.cu))
 596  	OBJ_CUDA_TMPL += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-vec*q8_0-q8_0.cu))
 597  	OBJ_CUDA_TMPL += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-vec*f16-f16.cu))
 598  endif # GGML_CUDA_FA_ALL_QUANTS
 599  
 600  ifdef GGML_CUDA
 601  	ifneq ('', '$(wildcard /opt/cuda)')
 602  		CUDA_PATH ?= /opt/cuda
 603  	else
 604  		CUDA_PATH ?= /usr/local/cuda
 605  	endif
 606  
 607  	MK_CPPFLAGS  += -DGGML_USE_CUDA -DGGML_CUDA_USE_GRAPHS -I$(CUDA_PATH)/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include
 608  	MK_LDFLAGS   += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L$(CUDA_PATH)/lib64 -L/usr/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib -L$(CUDA_PATH)/lib64/stubs -L/usr/lib/wsl/lib
 609  	MK_NVCCFLAGS += -use_fast_math
 610  
 611  	OBJ_GGML_EXT += ggml/src/ggml-cuda/ggml-cuda.o
 612  	OBJ_GGML_EXT += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/*.cu))
 613  	OBJ_GGML_EXT += $(OBJ_CUDA_TMPL)
 614  
 615  ifdef LLAMA_FATAL_WARNINGS
 616  	MK_NVCCFLAGS += -Werror all-warnings
 617  endif # LLAMA_FATAL_WARNINGS
 618  
 619  ifndef JETSON_EOL_MODULE_DETECT
 620  	MK_NVCCFLAGS += --forward-unknown-to-host-compiler
 621  endif # JETSON_EOL_MODULE_DETECT
 622  
 623  ifdef LLAMA_DEBUG
 624  	MK_NVCCFLAGS += -lineinfo
 625  endif # LLAMA_DEBUG
 626  
 627  ifdef GGML_CUDA_DEBUG
 628  	MK_NVCCFLAGS += --device-debug
 629  endif # GGML_CUDA_DEBUG
 630  
 631  ifdef GGML_CUDA_NVCC
 632  	NVCC = $(CCACHE) $(GGML_CUDA_NVCC)
 633  else
 634  	NVCC = $(CCACHE) nvcc
 635  endif # GGML_CUDA_NVCC
 636  
 637  ifdef CUDA_DOCKER_ARCH
 638  	MK_NVCCFLAGS += -Wno-deprecated-gpu-targets -arch=$(CUDA_DOCKER_ARCH)
 639  else ifndef CUDA_POWER_ARCH
 640  	MK_NVCCFLAGS += -arch=native
 641  endif # CUDA_DOCKER_ARCH
 642  
 643  ifdef GGML_CUDA_FORCE_MMQ
 644  	MK_NVCCFLAGS += -DGGML_CUDA_FORCE_MMQ
 645  endif # GGML_CUDA_FORCE_MMQ
 646  
 647  ifdef GGML_CUDA_FORCE_CUBLAS
 648  	MK_NVCCFLAGS += -DGGML_CUDA_FORCE_CUBLAS
 649  endif # GGML_CUDA_FORCE_CUBLAS
 650  
 651  ifdef GGML_CUDA_F16
 652  	MK_NVCCFLAGS += -DGGML_CUDA_F16
 653  endif # GGML_CUDA_F16
 654  
 655  ifdef GGML_CUDA_DMMV_F16
 656  	MK_NVCCFLAGS += -DGGML_CUDA_F16
 657  endif # GGML_CUDA_DMMV_F16
 658  
 659  ifdef GGML_CUDA_PEER_MAX_BATCH_SIZE
 660  	MK_NVCCFLAGS += -DGGML_CUDA_PEER_MAX_BATCH_SIZE=$(GGML_CUDA_PEER_MAX_BATCH_SIZE)
 661  else
 662  	MK_NVCCFLAGS += -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128
 663  endif # GGML_CUDA_PEER_MAX_BATCH_SIZE
 664  
 665  ifdef GGML_CUDA_NO_PEER_COPY
 666  	MK_NVCCFLAGS += -DGGML_CUDA_NO_PEER_COPY
 667  endif # GGML_CUDA_NO_PEER_COPY
 668  
 669  ifdef GGML_CUDA_CCBIN
 670  	MK_NVCCFLAGS += -ccbin $(GGML_CUDA_CCBIN)
 671  endif # GGML_CUDA_CCBIN
 672  
 673  ifdef GGML_CUDA_FA_ALL_QUANTS
 674  	MK_NVCCFLAGS += -DGGML_CUDA_FA_ALL_QUANTS
 675  endif # GGML_CUDA_FA_ALL_QUANTS
 676  
 677  ifdef JETSON_EOL_MODULE_DETECT
 678  define NVCC_COMPILE
 679  	$(NVCC) -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUDA -I/usr/local/cuda/include -I/opt/cuda/include -I/usr/local/cuda/targets/aarch64-linux/include -std=c++11 -O3 $(NVCCFLAGS) $(CPPFLAGS) -Xcompiler "$(CUDA_CXXFLAGS)" -c $< -o $@
 680  endef # NVCC_COMPILE
 681  else
 682  define NVCC_COMPILE
 683  	$(NVCC) $(NVCCFLAGS) $(CPPFLAGS) -Xcompiler "$(CUDA_CXXFLAGS)" -c $< -o $@
 684  endef # NVCC_COMPILE
 685  endif # JETSON_EOL_MODULE_DETECT
 686  
 687  ggml/src/ggml-cuda/%.o: \
 688  	ggml/src/ggml-cuda/%.cu \
 689  	ggml/include/ggml.h \
 690  	ggml/src/ggml-common.h \
 691  	ggml/src/ggml-cuda/common.cuh
 692  	$(NVCC_COMPILE)
 693  
 694  ggml/src/ggml-cuda/ggml-cuda.o: \
 695  	ggml/src/ggml-cuda/ggml-cuda.cu \
 696  	ggml/include/ggml-cuda.h \
 697  	ggml/include/ggml.h \
 698  	ggml/include/ggml-backend.h \
 699  	ggml/src/ggml-backend-impl.h \
 700  	ggml/src/ggml-common.h \
 701  	$(wildcard ggml/src/ggml-cuda/*.cuh)
 702  	$(NVCC_COMPILE)
 703  endif # GGML_CUDA
 704  
 705  ifdef GGML_VULKAN
 706  	MK_CPPFLAGS  += -DGGML_USE_VULKAN
 707  	MK_LDFLAGS   += $(shell pkg-config --libs vulkan)
 708  	OBJ_GGML_EXT += ggml/src/ggml-vulkan.o ggml/src/ggml-vulkan-shaders.o
 709  
 710  ifdef GGML_VULKAN_CHECK_RESULTS
 711  	MK_CPPFLAGS  += -DGGML_VULKAN_CHECK_RESULTS
 712  endif
 713  
 714  ifdef GGML_VULKAN_DEBUG
 715  	MK_CPPFLAGS  += -DGGML_VULKAN_DEBUG
 716  endif
 717  
 718  ifdef GGML_VULKAN_MEMORY_DEBUG
 719  	MK_CPPFLAGS  += -DGGML_VULKAN_MEMORY_DEBUG
 720  endif
 721  
 722  ifdef GGML_VULKAN_PERF
 723  	MK_CPPFLAGS  += -DGGML_VULKAN_PERF
 724  endif
 725  
 726  ifdef GGML_VULKAN_VALIDATE
 727  	MK_CPPFLAGS  += -DGGML_VULKAN_VALIDATE
 728  endif
 729  
 730  ifdef GGML_VULKAN_RUN_TESTS
 731  	MK_CPPFLAGS  += -DGGML_VULKAN_RUN_TESTS
 732  endif
 733  
 734  GLSLC_CMD  = glslc
 735  _ggml_vk_genshaders_cmd = $(shell pwd)/vulkan-shaders-gen
 736  _ggml_vk_header = ggml/src/ggml-vulkan-shaders.hpp
 737  _ggml_vk_source = ggml/src/ggml-vulkan-shaders.cpp
 738  _ggml_vk_input_dir = ggml/src/ggml-vulkan/vulkan-shaders
 739  _ggml_vk_shader_deps = $(echo $(_ggml_vk_input_dir)/*.comp)
 740  
 741  ggml/src/ggml-vulkan.o: ggml/src/ggml-vulkan/ggml-vulkan.cpp ggml/include/ggml-vulkan.h $(_ggml_vk_header) $(_ggml_vk_source)
 742  	$(CXX) $(CXXFLAGS) $(shell pkg-config --cflags vulkan) -c $< -o $@
 743  
 744  $(_ggml_vk_header): $(_ggml_vk_source)
 745  
 746  $(_ggml_vk_source): $(_ggml_vk_shader_deps) vulkan-shaders-gen
 747  	$(_ggml_vk_genshaders_cmd) \
 748  		--glslc      $(GLSLC_CMD) \
 749  		--input-dir  $(_ggml_vk_input_dir) \
 750  		--target-hpp $(_ggml_vk_header) \
 751  		--target-cpp $(_ggml_vk_source)
 752  
 753  vulkan-shaders-gen: ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp
 754  	$(CXX) $(CXXFLAGS) -o $@ $(LDFLAGS) ggml/src/ggml-vulkan/vulkan-shaders/vulkan-shaders-gen.cpp
 755  
 756  endif # GGML_VULKAN
 757  
 758  ifdef GGML_HIP
 759  	ifeq ($(wildcard /opt/rocm),)
 760  		ROCM_PATH      ?= /usr
 761  		AMDGPU_TARGETS ?= $(shell $(shell which amdgpu-arch))
 762  	else
 763  		ROCM_PATH	?= /opt/rocm
 764  		AMDGPU_TARGETS ?= $(shell $(ROCM_PATH)/llvm/bin/amdgpu-arch)
 765  	endif
 766  
 767  	MK_CPPFLAGS += -DGGML_USE_HIP -DGGML_USE_CUDA
 768  
 769  ifdef GGML_HIP_UMA
 770  	MK_CPPFLAGS += -DGGML_HIP_UMA
 771  endif # GGML_HIP_UMA
 772  
 773  	MK_LDFLAGS += -L$(ROCM_PATH)/lib -Wl,-rpath=$(ROCM_PATH)/lib
 774  	MK_LDFLAGS += -L$(ROCM_PATH)/lib64 -Wl,-rpath=$(ROCM_PATH)/lib64
 775  	MK_LDFLAGS += -lhipblas -lamdhip64 -lrocblas
 776  
 777  	HIPCC ?= $(CCACHE) $(ROCM_PATH)/bin/hipcc
 778  
 779  	HIPFLAGS += $(addprefix --offload-arch=,$(AMDGPU_TARGETS))
 780  
 781  ifdef GGML_CUDA_FORCE_MMQ
 782  	HIPFLAGS += -DGGML_CUDA_FORCE_MMQ
 783  endif # GGML_CUDA_FORCE_MMQ
 784  
 785  ifdef GGML_CUDA_FORCE_CUBLAS
 786  	HIPFLAGS += -DGGML_CUDA_FORCE_CUBLAS
 787  endif # GGML_CUDA_FORCE_CUBLAS
 788  
 789  ifdef GGML_CUDA_NO_PEER_COPY
 790  	HIPFLAGS += -DGGML_CUDA_NO_PEER_COPY
 791  endif # GGML_CUDA_NO_PEER_COPY
 792  
 793  	OBJ_GGML_EXT += ggml/src/ggml-cuda/ggml-cuda.o
 794  	OBJ_GGML_EXT += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/*.cu))
 795  	OBJ_GGML_EXT += $(OBJ_CUDA_TMPL)
 796  
 797  ggml/src/ggml-cuda/ggml-cuda.o: \
 798  	ggml/src/ggml-cuda/ggml-cuda.cu \
 799  	ggml/include/ggml-cuda.h \
 800  	ggml/include/ggml.h \
 801  	ggml/include/ggml-backend.h \
 802  	ggml/src/ggml-backend-impl.h \
 803  	ggml/src/ggml-common.h \
 804  	$(wildcard ggml/src/ggml-cuda/*.cuh)
 805  	$(HIPCC) $(CXXFLAGS) $(HIPFLAGS) -x hip -c -o $@ $<
 806  
 807  ggml/src/ggml-cuda/%.o: \
 808  	ggml/src/ggml-cuda/%.cu \
 809  	ggml/include/ggml.h \
 810  	ggml/src/ggml-common.h \
 811  	ggml/src/ggml-cuda/common.cuh
 812  	$(HIPCC) $(CXXFLAGS) $(HIPFLAGS) -x hip -c -o $@ $<
 813  endif # GGML_HIP
 814  
 815  ifdef GGML_MUSA
 816  	ifeq ($(wildcard /opt/musa),)
 817  		MUSA_PATH ?= /usr/local/musa
 818  	else
 819  		MUSA_PATH ?= /opt/musa
 820  	endif
 821  	MUSA_ARCHITECTURES ?= 21;22
 822  
 823  	MK_CPPFLAGS += -DGGML_USE_MUSA -DGGML_USE_CUDA
 824  	MK_LDFLAGS += -L$(MUSA_PATH)/lib -Wl,-rpath=$(MUSA_PATH)/lib
 825  	MK_LDFLAGS += -lmusa -lmusart -lmublas
 826  
 827  	ifndef GGML_NO_OPENMP
 828  		# For Ubuntu Focal
 829  		MK_CPPFLAGS += -I/usr/lib/llvm-10/include/openmp
 830  		MK_LDFLAGS  += -L/usr/lib/llvm-10/lib
 831  		# For Ubuntu Jammy
 832  		MK_CPPFLAGS += -I/usr/lib/llvm-14/lib/clang/14.0.0/include
 833  		MK_LDFLAGS  += -L/usr/lib/llvm-14/lib
 834  	endif # GGML_NO_OPENMP
 835  
 836  	CC  := $(MUSA_PATH)/bin/clang
 837  	CXX := $(MUSA_PATH)/bin/clang++
 838  	MCC := $(CCACHE) $(MUSA_PATH)/bin/mcc
 839  
 840  	MUSAFLAGS  = -x musa -mtgpu
 841  	MUSAFLAGS += $(foreach arch,$(subst ;, ,$(MUSA_ARCHITECTURES)),--cuda-gpu-arch=mp_$(arch))
 842  
 843  ifdef GGML_CUDA_FORCE_MMQ
 844  	MUSAFLAGS += -DGGML_CUDA_FORCE_MMQ
 845  endif # GGML_CUDA_FORCE_MMQ
 846  
 847  ifdef GGML_CUDA_FORCE_CUBLAS
 848  	MUSAFLAGS += -DGGML_CUDA_FORCE_CUBLAS
 849  endif # GGML_CUDA_FORCE_CUBLAS
 850  
 851  ifdef GGML_CUDA_F16
 852  	MUSAFLAGS += -DGGML_CUDA_F16
 853  endif # GGML_CUDA_F16
 854  
 855  ifdef GGML_CUDA_DMMV_F16
 856  	MUSAFLAGS += -DGGML_CUDA_F16
 857  endif # GGML_CUDA_DMMV_F16
 858  
 859  ifdef GGML_CUDA_PEER_MAX_BATCH_SIZE
 860  	MUSAFLAGS += -DGGML_CUDA_PEER_MAX_BATCH_SIZE=$(GGML_CUDA_PEER_MAX_BATCH_SIZE)
 861  else
 862  	MUSAFLAGS += -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128
 863  endif # GGML_CUDA_PEER_MAX_BATCH_SIZE
 864  
 865  ifdef GGML_CUDA_NO_PEER_COPY
 866  	MUSAFLAGS += -DGGML_CUDA_NO_PEER_COPY
 867  endif # GGML_CUDA_NO_PEER_COPY
 868  
 869  ifdef GGML_CUDA_FA_ALL_QUANTS
 870  	MUSAFLAGS += -DGGML_CUDA_FA_ALL_QUANTS
 871  endif # GGML_CUDA_FA_ALL_QUANTS
 872  
 873  	OBJ_GGML_EXT += ggml/src/ggml-cuda/ggml-cuda.o
 874  	OBJ_GGML_EXT += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/*.cu))
 875  	OBJ_GGML_EXT += $(OBJ_CUDA_TMPL)
 876  
 877  ggml/src/ggml-cuda/ggml-cuda.o: \
 878  	ggml/src/ggml-cuda/ggml-cuda.cu \
 879  	ggml/include/ggml-cuda.h \
 880  	ggml/include/ggml.h \
 881  	ggml/include/ggml-backend.h \
 882  	ggml/src/ggml-backend-impl.h \
 883  	ggml/src/ggml-common.h \
 884  	$(wildcard ggml/src/ggml-cuda/*.cuh)
 885  	$(MCC) $(CXXFLAGS) $(MUSAFLAGS) -c -o $@ $<
 886  
 887  ggml/src/ggml-cuda/%.o: \
 888  	ggml/src/ggml-cuda/%.cu \
 889  	ggml/include/ggml.h \
 890  	ggml/src/ggml-common.h \
 891  	ggml/src/ggml-cuda/common.cuh
 892  	$(MCC) $(CXXFLAGS) $(MUSAFLAGS) -c -o $@ $<
 893  endif # GGML_MUSA
 894  
 895  ifdef GGML_METAL
 896  	MK_CPPFLAGS  += -DGGML_USE_METAL
 897  	MK_LDFLAGS   += -framework Foundation -framework Metal -framework MetalKit
 898  	OBJ_GGML_EXT += ggml/src/ggml-metal/ggml-metal.o
 899  
 900  ifdef GGML_METAL_USE_BF16
 901  	MK_CPPFLAGS += -DGGML_METAL_USE_BF16
 902  endif # GGML_METAL_USE_BF16
 903  ifdef GGML_METAL_NDEBUG
 904  	MK_CPPFLAGS += -DGGML_METAL_NDEBUG
 905  endif
 906  ifdef GGML_METAL_EMBED_LIBRARY
 907  	MK_CPPFLAGS  += -DGGML_METAL_EMBED_LIBRARY
 908  	OBJ_GGML_EXT += ggml/src/ggml-metal-embed.o
 909  endif
 910  endif # GGML_METAL
 911  
 912  ifdef GGML_METAL
 913  ggml/src/ggml-metal/ggml-metal.o: \
 914  	ggml/src/ggml-metal/ggml-metal.m \
 915  	ggml/src/ggml-metal/ggml-metal-impl.h \
 916  	ggml/include/ggml-metal.h \
 917  	ggml/include/ggml.h
 918  	$(CC) $(CFLAGS) -c $< -o $@
 919  
 920  ifdef GGML_METAL_EMBED_LIBRARY
 921  ggml/src/ggml-metal-embed.o: \
 922  	ggml/src/ggml-metal/ggml-metal.metal \
 923  	ggml/src/ggml-metal/ggml-metal-impl.h \
 924  	ggml/src/ggml-common.h
 925  	@echo "Embedding Metal library"
 926  	@sed -e '/__embed_ggml-common.h__/r      ggml/src/ggml-common.h'                -e '/__embed_ggml-common.h__/d'      < ggml/src/ggml-metal/ggml-metal.metal           > ggml/src/ggml-metal/ggml-metal-embed.metal.tmp
 927  	@sed -e '/#include "ggml-metal-impl.h"/r ggml/src/ggml-metal/ggml-metal-impl.h' -e '/#include "ggml-metal-impl.h"/d' < ggml/src/ggml-metal/ggml-metal-embed.metal.tmp > ggml/src/ggml-metal/ggml-metal-embed.metal
 928  	$(eval TEMP_ASSEMBLY=$(shell mktemp -d))
 929  	@echo ".section __DATA, __ggml_metallib"                       >  $(TEMP_ASSEMBLY)/ggml-metal-embed.s
 930  	@echo ".globl _ggml_metallib_start"                            >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
 931  	@echo "_ggml_metallib_start:"                                  >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
 932  	@echo ".incbin \"ggml/src/ggml-metal/ggml-metal-embed.metal\"" >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
 933  	@echo ".globl _ggml_metallib_end"                              >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
 934  	@echo "_ggml_metallib_end:"                                    >> $(TEMP_ASSEMBLY)/ggml-metal-embed.s
 935  	$(CC) $(CFLAGS) -c $(TEMP_ASSEMBLY)/ggml-metal-embed.s -o $@
 936  	@rm -f ${TEMP_ASSEMBLY}/ggml-metal-embed.s
 937  	@rmdir ${TEMP_ASSEMBLY}
 938  endif
 939  endif # GGML_METAL
 940  
 941  DIR_GGML = ggml
 942  DIR_LLAMA = src
 943  DIR_COMMON = common
 944  
 945  OBJ_GGML = \
 946  	$(DIR_GGML)/src/ggml.o \
 947  	$(DIR_GGML)/src/ggml-aarch64.o \
 948  	$(DIR_GGML)/src/ggml-alloc.o \
 949  	$(DIR_GGML)/src/ggml-backend.o \
 950  	$(DIR_GGML)/src/ggml-backend-reg.o \
 951  	$(DIR_GGML)/src/ggml-opt.o \
 952  	$(DIR_GGML)/src/ggml-quants.o \
 953  	$(DIR_GGML)/src/ggml-threading.o \
 954  	$(DIR_GGML)/src/ggml-cpu/ggml-cpu.o \
 955  	$(DIR_GGML)/src/ggml-cpu/ggml-cpu-cpp.o \
 956  	$(DIR_GGML)/src/ggml-cpu/ggml-cpu-aarch64.o \
 957  	$(DIR_GGML)/src/ggml-cpu/ggml-cpu-quants.o \
 958  	$(OBJ_GGML_EXT)
 959  
 960  OBJ_LLAMA = \
 961  	$(DIR_LLAMA)/llama.o \
 962  	$(DIR_LLAMA)/llama-vocab.o \
 963  	$(DIR_LLAMA)/llama-grammar.o \
 964  	$(DIR_LLAMA)/llama-sampling.o \
 965  	$(DIR_LLAMA)/unicode.o \
 966  	$(DIR_LLAMA)/unicode-data.o
 967  
 968  OBJ_COMMON = \
 969  	$(DIR_COMMON)/common.o \
 970  	$(DIR_COMMON)/arg.o \
 971  	$(DIR_COMMON)/log.o \
 972  	$(DIR_COMMON)/console.o \
 973  	$(DIR_COMMON)/ngram-cache.o \
 974  	$(DIR_COMMON)/sampling.o \
 975  	$(DIR_COMMON)/speculative.o \
 976  	$(DIR_COMMON)/build-info.o \
 977  	$(DIR_COMMON)/json-schema-to-grammar.o
 978  
 979  OBJ_ALL = $(OBJ_GGML) $(OBJ_LLAMA) $(OBJ_COMMON)
 980  
 981  LIB_GGML   = $(LIB_PRE)ggml$(DSO_EXT)
 982  LIB_GGML_S = $(LIB_PRE)ggml.a
 983  
 984  LIB_LLAMA   = $(LIB_PRE)llama$(DSO_EXT)
 985  LIB_LLAMA_S = $(LIB_PRE)llama.a
 986  
 987  LIB_COMMON   = $(LIB_PRE)common$(DSO_EXT)
 988  LIB_COMMON_S = $(LIB_PRE)common.a
 989  
 990  LIB_ALL   = $(LIB_GGML)   $(LIB_LLAMA)   $(LIB_COMMON)
 991  LIB_ALL_S = $(LIB_GGML_S) $(LIB_LLAMA_S) $(LIB_COMMON_S)
 992  
 993  GF_CC := $(CC)
 994  include scripts/get-flags.mk
 995  
 996  # combine build flags with cmdline overrides
 997  override CPPFLAGS  := $(MK_CPPFLAGS) $(CPPFLAGS)
 998  override CFLAGS    := $(CPPFLAGS) $(MK_CFLAGS) $(GF_CFLAGS) $(CFLAGS)
 999  BASE_CXXFLAGS      := $(MK_CXXFLAGS) $(CXXFLAGS)
1000  override CXXFLAGS  := $(BASE_CXXFLAGS) $(HOST_CXXFLAGS) $(GF_CXXFLAGS) $(CPPFLAGS)
1001  override NVCCFLAGS := $(MK_NVCCFLAGS) $(NVCCFLAGS)
1002  override LDFLAGS   := $(MK_LDFLAGS) $(LDFLAGS)
1003  
1004  # identify CUDA host compiler
1005  ifdef GGML_CUDA
1006  GF_CC := $(NVCC) $(NVCCFLAGS) 2>/dev/null .c -Xcompiler
1007  include scripts/get-flags.mk
1008  CUDA_CXXFLAGS := $(BASE_CXXFLAGS) $(GF_CXXFLAGS) -Wno-pedantic
1009  endif
1010  
1011  ifdef LLAMA_CURL
1012  override CXXFLAGS := $(CXXFLAGS) -DLLAMA_USE_CURL
1013  override LDFLAGS  := $(LDFLAGS) -lcurl
1014  endif
1015  
1016  #
1017  # Print build information
1018  #
1019  
1020  $(info I llama.cpp build info: )
1021  $(info I UNAME_S:   $(UNAME_S))
1022  $(info I UNAME_P:   $(UNAME_P))
1023  $(info I UNAME_M:   $(UNAME_M))
1024  $(info I CFLAGS:    $(CFLAGS))
1025  $(info I CXXFLAGS:  $(CXXFLAGS))
1026  $(info I NVCCFLAGS: $(NVCCFLAGS))
1027  $(info I LDFLAGS:   $(LDFLAGS))
1028  $(info I CC:        $(shell $(CC)   --version | head -n 1))
1029  $(info I CXX:       $(shell $(CXX)  --version | head -n 1))
1030  ifdef GGML_CUDA
1031  $(info I NVCC:      $(shell $(NVCC) --version | tail -n 1))
1032  CUDA_VERSION := $(shell $(NVCC) --version | grep -oP 'release (\K[0-9]+\.[0-9])')
1033  ifeq ($(shell awk -v "v=$(CUDA_VERSION)" 'BEGIN { print (v < 11.7) }'),1)
1034  
1035  ifndef CUDA_DOCKER_ARCH
1036  ifndef CUDA_POWER_ARCH
1037  $(error I ERROR: For CUDA versions < 11.7 a target CUDA architecture must be explicitly provided via environment variable CUDA_DOCKER_ARCH, e.g. by running "export CUDA_DOCKER_ARCH=compute_XX" on Unix-like systems, where XX is the minimum compute capability that the code needs to run on. A list with compute capabilities can be found here: https://developer.nvidia.com/cuda-gpus )
1038  endif # CUDA_POWER_ARCH
1039  endif # CUDA_DOCKER_ARCH
1040  
1041  endif # eq ($(shell echo "$(CUDA_VERSION) < 11.7" | bc),1)
1042  endif # GGML_CUDA
1043  $(info )
1044  
1045  ifdef DEPRECATE_WARNING
1046  $(info !!! DEPRECATION WARNING !!!)
1047  $(info The following LLAMA_ options are deprecated and will be removed in the future. Use the GGML_ prefix instead)
1048  $(info   - LLAMA_CUDA)
1049  $(info   - LLAMA_METAL)
1050  $(info   - LLAMA_METAL_EMBED_LIBRARY)
1051  $(info   - LLAMA_OPENMP)
1052  $(info   - LLAMA_RPC)
1053  $(info   - LLAMA_SYCL)
1054  $(info   - LLAMA_SYCL_F16)
1055  $(info   - LLAMA_OPENBLAS)
1056  $(info   - LLAMA_OPENBLAS64)
1057  $(info   - LLAMA_BLIS)
1058  $(info   - LLAMA_NO_LLAMAFILE)
1059  $(info   - LLAMA_NO_ACCELERATE)
1060  $(info   - LLAMA_NO_OPENMP)
1061  $(info   - LLAMA_NO_METAL)
1062  $(info   - LLAMA_NO_CCACHE)
1063  $(info )
1064  endif
1065  
1066  ifdef REMOVE_WARNING
1067  $(info !!! REMOVAL WARNING !!!)
1068  $(info The following LLAMA_ options have been removed and are no longer supported)
1069  $(info   - LLAMA_DISABLE_LOGS   (https://github.com/ggerganov/llama.cpp/pull/9418))
1070  $(info   - LLAMA_SERVER_VERBOSE (https://github.com/ggerganov/llama.cpp/pull/9418))
1071  $(info )
1072  endif
1073  
1074  #
1075  # Build libraries
1076  #
1077  
1078  # Libraries
1079  LIB_GGML   = libggml.so
1080  LIB_GGML_S = libggml.a
1081  
1082  LIB_LLAMA   = libllama.so
1083  LIB_LLAMA_S = libllama.a
1084  
1085  LIB_COMMON   = libcommon.so
1086  LIB_COMMON_S = libcommon.a
1087  
1088  # Targets
1089  BUILD_TARGETS += $(LIB_GGML) $(LIB_GGML_S) $(LIB_LLAMA) $(LIB_LLAMA_S) $(LIB_COMMON) $(LIB_COMMON_S)
1090  
1091  # Dependency files
1092  DEP_FILES = $(OBJ_GGML:.o=.d) $(OBJ_LLAMA:.o=.d) $(OBJ_COMMON:.o=.d)
1093  
1094  # Default target
1095  all: $(BUILD_TARGETS)
1096  
1097  # Note: need this exception because `ggml-cpu.c` and `ggml-cpu.cpp` both produce the same obj/dep files
1098  #       g++ -M -I ./ggml/include/ -I ./ggml/src ggml/src/ggml-cpu/ggml-cpu.cpp | grep ggml
1099  $(DIR_GGML)/src/ggml-cpu/ggml-cpu-cpp.o: \
1100  	ggml/src/ggml-cpu/ggml-cpu.cpp \
1101  	ggml/include/ggml-backend.h \
1102  	ggml/include/ggml.h \
1103  	ggml/include/ggml-alloc.h \
1104  	ggml/src/ggml-backend-impl.h \
1105  	ggml/include/ggml-cpu.h \
1106  	ggml/src/ggml-impl.h
1107  	$(CXX) $(CXXFLAGS)   -c $< -o $@
1108  
1109  # Rules for building object files
1110  $(DIR_GGML)/%.o: $(DIR_GGML)/%.c
1111  	$(CC) $(CFLAGS) -MMD -c $< -o $@
1112  
1113  $(DIR_GGML)/%.o: $(DIR_GGML)/%.cpp
1114  	$(CXX) $(CXXFLAGS) -MMD -c $< -o $@
1115  
1116  $(DIR_LLAMA)/%.o: $(DIR_LLAMA)/%.cpp
1117  	$(CXX) $(CXXFLAGS) -MMD -c $< -o $@
1118  
1119  $(DIR_COMMON)/%.o: $(DIR_COMMON)/%.cpp
1120  	$(CXX) $(CXXFLAGS) -MMD -c $< -o $@
1121  
1122  # Rules for building libraries
1123  $(LIB_GGML): $(OBJ_GGML)
1124  	$(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS)
1125  
1126  $(LIB_GGML_S): $(OBJ_GGML)
1127  	ar rcs $(LIB_GGML_S) $^
1128  
1129  $(LIB_LLAMA): $(OBJ_LLAMA) $(LIB_GGML)
1130  	$(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS)
1131  
1132  $(LIB_LLAMA_S): $(OBJ_LLAMA)
1133  	ar rcs $(LIB_LLAMA_S) $^
1134  
1135  $(LIB_COMMON): $(OBJ_COMMON) $(LIB_LLAMA) $(LIB_GGML)
1136  	$(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS)
1137  
1138  $(LIB_COMMON_S): $(OBJ_COMMON)
1139  	ar rcs $(LIB_COMMON_S) $^
1140  
1141  # Include dependency files
1142  -include $(DEP_FILES)
1143  
1144  # Clean rule
1145  clean:
1146  	rm -vrf $(BUILD_TARGETS) $(TEST_TARGETS)
1147  	rm -rvf *.a *.dll *.so *.dot
1148  	find ggml src common tests examples pocs -type f -name "*.o" -delete
1149  	find ggml src common tests examples pocs -type f -name "*.d" -delete
1150  
1151  #
1152  # Examples
1153  #
1154  
1155  # $< is the first prerequisite, i.e. the source file.
1156  # Explicitly compile this to an object file so that it can be cached with ccache.
1157  # The source file is then filtered out from $^ (the list of all prerequisites) and the object file is added instead.
1158  
1159  # Helper function that replaces .c, .cpp, and .cu file endings with .o:
1160  GET_OBJ_FILE = $(patsubst %.c,%.o,$(patsubst %.cpp,%.o,$(patsubst %.cu,%.o,$(1))))
1161  
1162  llama-cli: examples/main/main.cpp \
1163  	$(OBJ_ALL)
1164  	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1165  	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1166  	@echo
1167  	@echo '====  Run ./llama-cli -h for help.  ===='
1168  	@echo
1169  
1170  llama-infill: examples/infill/infill.cpp \
1171  	$(OBJ_ALL)
1172  	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1173  	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1174  
1175  llama-run: examples/run/run.cpp \
1176  	$(OBJ_ALL)
1177  	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1178  	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1179  
1180  llama-simple: examples/simple/simple.cpp \
1181  	$(OBJ_ALL)
1182  	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1183  	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1184  
1185  llama-simple-chat: examples/simple-chat/simple-chat.cpp \
1186  	$(OBJ_ALL)
1187  	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1188  	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1189  
1190  llama-tokenize: examples/tokenize/tokenize.cpp \
1191  	$(OBJ_ALL)
1192  	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1193  	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1194  
1195  llama-batched: examples/batched/batched.cpp \
1196  	$(OBJ_ALL)
1197  	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1198  	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1199  
1200  llama-batched-bench: examples/batched-bench/batched-bench.cpp \
1201  	$(OBJ_ALL)
1202  	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1203  	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1204  
1205  llama-quantize: examples/quantize/quantize.cpp \
1206  	$(OBJ_ALL)
1207  	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1208  	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1209  
1210  llama-quantize-stats: examples/quantize-stats/quantize-stats.cpp \
1211  	$(OBJ_ALL)
1212  	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1213  	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1214  
1215  llama-perplexity: examples/perplexity/perplexity.cpp \
1216  	$(OBJ_ALL)
1217  	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1218  	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1219  
1220  llama-imatrix: examples/imatrix/imatrix.cpp \
1221  	$(OBJ_ALL)
1222  	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1223  	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1224  
1225  llama-embedding: examples/embedding/embedding.cpp \
1226  	$(OBJ_ALL)
1227  	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1228  	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1229  
1230  llama-gritlm: examples/gritlm/gritlm.cpp \
1231  	$(OBJ_ALL)
1232  	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1233  	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1234  
1235  llama-save-load-state: examples/save-load-state/save-load-state.cpp \
1236  	$(OBJ_ALL)
1237  	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1238  	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1239  
1240  llama-gguf: examples/gguf/gguf.cpp \
1241  	$(OBJ_GGML)
1242  	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1243  	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1244  
1245  examples/gguf-hash/deps/sha1/sha1.o: \
1246  	examples/gguf-hash/deps/sha1/sha1.c
1247  	$(CC) $(CFLAGS) -Iexamples/gguf-hash/deps -c $< -o $@
1248  
1249  examples/gguf-hash/deps/xxhash/xxhash.o: \
1250  	examples/gguf-hash/deps/xxhash/xxhash.c
1251  	$(CC) $(CFLAGS) -Iexamples/gguf-hash/deps -c $< -o $@
1252  
1253  examples/gguf-hash/deps/sha256/sha256.o: \
1254  	examples/gguf-hash/deps/sha256/sha256.c
1255  	$(CC) $(CFLAGS) -Iexamples/gguf-hash/deps -c $< -o $@
1256  
1257  llama-gguf-hash: examples/gguf-hash/gguf-hash.cpp examples/gguf-hash/deps/sha1/sha1.o examples/gguf-hash/deps/xxhash/xxhash.o examples/gguf-hash/deps/sha256/sha256.o\
1258  	$(OBJ_ALL)
1259  	$(CXX) $(CXXFLAGS) -Iexamples/gguf-hash/deps -c $< -o $(call GET_OBJ_FILE, $<)
1260  	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1261  
1262  llama-gguf-split: examples/gguf-split/gguf-split.cpp \
1263  	$(OBJ_ALL)
1264  	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1265  	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1266  
1267  llama-eval-callback: examples/eval-callback/eval-callback.cpp \
1268  	$(OBJ_ALL)
1269  	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1270  	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1271  
1272  llama-cvector-generator: examples/cvector-generator/cvector-generator.cpp \
1273  	$(OBJ_ALL)
1274  	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1275  	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1276  
1277  llama-convert-llama2c-to-ggml: examples/convert-llama2c-to-ggml/convert-llama2c-to-ggml.cpp \
1278  	$(OBJ_ALL)
1279  	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1280  	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1281  
1282  llama-bench: examples/llama-bench/llama-bench.cpp \
1283  	$(OBJ_ALL)
1284  	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1285  	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1286  
1287  llama-export-lora: examples/export-lora/export-lora.cpp \
1288  	$(OBJ_ALL)
1289  	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1290  	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1291  
1292  llama-retrieval: examples/retrieval/retrieval.cpp \
1293  	$(OBJ_ALL)
1294  	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1295  	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1296  
1297  llama-speculative: examples/speculative/speculative.cpp \
1298  	$(OBJ_ALL)
1299  	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1300  	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1301  
1302  llama-parallel: examples/parallel/parallel.cpp \
1303  	$(OBJ_ALL)
1304  	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1305  	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1306  
1307  llama-lookahead: examples/lookahead/lookahead.cpp \
1308  	$(OBJ_ALL)
1309  	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1310  	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1311  
1312  llama-lookup: examples/lookup/lookup.cpp \
1313  	$(OBJ_ALL)
1314  	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1315  	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1316  
1317  llama-lookup-create: examples/lookup/lookup-create.cpp \
1318  	$(OBJ_ALL)
1319  	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1320  	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1321  
1322  llama-lookup-merge: examples/lookup/lookup-merge.cpp \
1323  	$(OBJ_ALL)
1324  	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1325  	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1326  
1327  llama-lookup-stats: examples/lookup/lookup-stats.cpp \
1328  	$(OBJ_ALL)
1329  	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1330  	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1331  
1332  llama-passkey: examples/passkey/passkey.cpp \
1333  	$(OBJ_ALL)
1334  	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1335  	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1336  
1337  llama-gbnf-validator: examples/gbnf-validator/gbnf-validator.cpp \
1338  	$(OBJ_ALL)
1339  	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1340  	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1341  
1342  ifdef GGML_RPC
1343  rpc-server: examples/rpc/rpc-server.cpp \
1344  	$(OBJ_GGML)
1345  	$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS)
1346  endif # GGML_RPC
1347  
1348  llama-server: \
1349  	examples/server/server.cpp \
1350  	examples/server/utils.hpp \
1351  	examples/server/httplib.h \
1352  	examples/server/index.html.hpp \
1353  	examples/server/completion.js.hpp \
1354  	examples/server/loading.html.hpp \
1355  	examples/server/deps_daisyui.min.css.hpp \
1356  	examples/server/deps_markdown-it.js.hpp \
1357  	examples/server/deps_tailwindcss.js.hpp \
1358  	examples/server/deps_vue.esm-browser.js.hpp \
1359  	common/json.hpp \
1360  	common/stb_image.h \
1361  	$(OBJ_ALL)
1362  	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1363  	$(CXX) $(CXXFLAGS) $(filter-out %.h %.hpp $<,$^) -Iexamples/server $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) $(LWINSOCK2)
1364  
1365  # Portable equivalent of `cd examples/server/public && xxd -i $(notdir $<) ../$(notdir $<).hpp`:
1366  examples/server/%.hpp: examples/server/public/% Makefile
1367  	@( export NAME=$(subst .,_,$(subst -,_,$(notdir $<))) && \
1368  		echo "unsigned char $${NAME}[] = {" && \
1369  		cat $< | od -v -t x1 -An | sed -E 's/([0-9a-fA-F]+)/0x\1, /g' && \
1370  		echo "};" && \
1371  		echo "unsigned int $${NAME}_len = $(shell cat $< | wc -c );" \
1372  	) > $@
1373  
1374  llama-gen-docs: examples/gen-docs/gen-docs.cpp \
1375  	$(OBJ_ALL)
1376  	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1377  	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1378  
1379  libllava.a: examples/llava/llava.cpp \
1380  	examples/llava/llava.h \
1381  	examples/llava/clip.cpp \
1382  	examples/llava/clip.h \
1383  	common/stb_image.h \
1384  	common/base64.hpp \
1385  	$(OBJ_ALL)
1386  	$(CXX) $(CXXFLAGS) -static -fPIC -c $< -o $@ -Wno-cast-qual
1387  
1388  llama-llava-cli: examples/llava/llava-cli.cpp \
1389  	examples/llava/llava.cpp \
1390  	examples/llava/llava.h \
1391  	examples/llava/clip.cpp \
1392  	examples/llava/clip.h \
1393  	$(OBJ_ALL)
1394  	$(CXX) $(CXXFLAGS) $< $(filter-out %.h $<,$^) -o $@ $(LDFLAGS) -Wno-cast-qual
1395  
1396  llama-minicpmv-cli: examples/llava/minicpmv-cli.cpp \
1397  	examples/llava/llava.cpp \
1398  	examples/llava/llava.h \
1399  	examples/llava/clip.cpp \
1400  	examples/llava/clip.h \
1401  	$(OBJ_ALL)
1402  	$(CXX) $(CXXFLAGS) $< $(filter-out %.h $<,$^) -o $@ $(LDFLAGS) -Wno-cast-qual
1403  
1404  ifeq ($(UNAME_S),Darwin)
1405  swift: examples/batched.swift
1406  	(cd examples/batched.swift; make build)
1407  endif
1408  
1409  common/build-info.cpp: $(wildcard .git/index) scripts/build-info.sh
1410  	@sh scripts/build-info.sh "$(CC)" > $@.tmp
1411  	@if ! cmp -s $@.tmp $@; then \
1412  		mv $@.tmp $@; \
1413  	else \
1414  		rm $@.tmp; \
1415  	fi
1416  
1417  common/build-info.o: common/build-info.cpp
1418  	$(CXX) $(CXXFLAGS) -c $(filter-out %.h,$^) -o $@
1419  
1420  #
1421  # Tests
1422  #
1423  
1424  tests: $(TEST_TARGETS)
1425  
1426  tests/test-arg-parser: tests/test-arg-parser.cpp \
1427  	$(OBJ_ALL)
1428  	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1429  	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1430  
1431  tests/test-llama-grammar: tests/test-llama-grammar.cpp \
1432  	$(OBJ_ALL)
1433  	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1434  	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1435  
1436  tests/test-log: tests/test-log.cpp \
1437  	$(OBJ_ALL)
1438  	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1439  	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1440  
1441  tests/test-grammar-parser: tests/test-grammar-parser.cpp \
1442  	$(OBJ_ALL)
1443  	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1444  	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1445  
1446  tests/test-grammar-integration: tests/test-grammar-integration.cpp \
1447  	$(OBJ_ALL)
1448  	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1449  	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1450  
1451  tests/test-double-float: tests/test-double-float.cpp
1452  	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1453  	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1454  
1455  tests/test-json-schema-to-grammar: tests/test-json-schema-to-grammar.cpp \
1456  	$(OBJ_ALL)
1457  	$(CXX) $(CXXFLAGS) -Iexamples/server -c $< -o $(call GET_OBJ_FILE, $<)
1458  	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1459  
1460  tests/test-opt: tests/test-opt.cpp \
1461  	$(OBJ_GGML)
1462  	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1463  	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1464  
1465  tests/test-quantize-fns: tests/test-quantize-fns.cpp \
1466  	$(OBJ_GGML)
1467  	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1468  	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1469  
1470  tests/test-quantize-perf: tests/test-quantize-perf.cpp \
1471  	$(OBJ_GGML)
1472  	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1473  	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1474  
1475  tests/test-sampling: tests/test-sampling.cpp \
1476  	$(OBJ_ALL)
1477  	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1478  	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1479  
1480  tests/test-tokenizer-0: tests/test-tokenizer-0.cpp \
1481  	$(OBJ_ALL)
1482  	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1483  	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1484  
1485  tests/test-tokenizer-1-bpe: tests/test-tokenizer-1-bpe.cpp \
1486  	$(OBJ_ALL)
1487  	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1488  	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1489  
1490  tests/test-tokenizer-1-spm: tests/test-tokenizer-1-spm.cpp \
1491  	$(OBJ_ALL)
1492  	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1493  	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1494  
1495  tests/test-rope: tests/test-rope.cpp ggml/src/ggml.o \
1496  	$(OBJ_GGML)
1497  	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1498  	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1499  
1500  tests/test-c.o: tests/test-c.c include/llama.h
1501  	$(CC) $(CFLAGS) -c $(filter-out %.h,$^) -o $@
1502  
1503  tests/test-backend-ops: tests/test-backend-ops.cpp \
1504  	$(OBJ_GGML)
1505  	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1506  	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1507  
1508  tests/test-model-load-cancel: tests/test-model-load-cancel.cpp tests/get-model.cpp \
1509  	$(OBJ_ALL)
1510  	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1511  	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1512  
1513  tests/test-autorelease: tests/test-autorelease.cpp tests/get-model.cpp \
1514  	$(OBJ_ALL)
1515  	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1516  	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1517  
1518  tests/test-chat-template: tests/test-chat-template.cpp \
1519  	$(OBJ_ALL)
1520  	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1521  	$(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1522  
1523  #
1524  # PoCs
1525  #
1526  
1527  llama-vdot: pocs/vdot/vdot.cpp ggml/src/ggml.o \
1528  	$(OBJ_GGML)
1529  	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1530  	$(CXX) $(CXXFLAGS) $(filter-out $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1531  
1532  llama-q8dot: pocs/vdot/q8dot.cpp ggml/src/ggml.o \
1533  	$(OBJ_GGML)
1534  	$(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1535  	$(CXX) $(CXXFLAGS) $(filter-out $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1536  
1537  #
1538  # Deprecated binaries that we want to keep around long enough for people to migrate to the new filenames, then these can be removed.
1539  #
1540  # Mark legacy binary targets as .PHONY so that they are always checked.
1541  .PHONY: main quantize perplexity embedding server
1542  
1543  # Define the object file target
1544  examples/deprecation-warning/deprecation-warning.o: examples/deprecation-warning/deprecation-warning.cpp
1545  	$(CXX) $(CXXFLAGS) -c $< -o $@
1546  
1547  # NOTE: We currently will always build the deprecation-warning `main` and `server` binaries to help users migrate.
1548  #  Eventually we will want to remove these target from building all the time.
1549  main: examples/deprecation-warning/deprecation-warning.o
1550  	$(CXX) $(CXXFLAGS) $< -o $@ $(LDFLAGS)
1551  	@echo "NOTICE: The 'main' binary is deprecated. Please use 'llama-cli' instead."
1552  
1553  server: examples/deprecation-warning/deprecation-warning.o
1554  	$(CXX) $(CXXFLAGS) $< -o $@ $(LDFLAGS)
1555  	@echo "NOTICE: The 'server' binary is deprecated. Please use 'llama-server' instead."
1556  
1557  quantize: examples/deprecation-warning/deprecation-warning.o
1558  ifneq (,$(wildcard quantize))
1559  	$(CXX) $(CXXFLAGS) $< -o $@ $(LDFLAGS)
1560  	@echo "#########"
1561  	@echo "WARNING: The 'quantize' binary is deprecated. Please use 'llama-quantize' instead."
1562  	@echo "  Remove the 'quantize' binary to remove this warning."
1563  	@echo "#########"
1564  endif
1565  
1566  perplexity: examples/deprecation-warning/deprecation-warning.o
1567  ifneq (,$(wildcard perplexity))
1568  	$(CXX) $(CXXFLAGS) $< -o $@ $(LDFLAGS)
1569  	@echo "#########"
1570  	@echo "WARNING: The 'perplexity' binary is deprecated. Please use 'llama-perplexity' instead."
1571  	@echo "  Remove the 'perplexity' binary to remove this warning."
1572  	@echo "#########"
1573  endif
1574  
1575  embedding: examples/deprecation-warning/deprecation-warning.o
1576  ifneq (,$(wildcard embedding))
1577  	$(CXX) $(CXXFLAGS) $< -o $@ $(LDFLAGS)
1578  	@echo "#########"
1579  	@echo "WARNING: The 'embedding' binary is deprecated. Please use 'llama-embedding' instead."
1580  	@echo "  Remove the 'embedding' binary to remove this warning."
1581  	@echo "#########"
1582  endif