/ CMakeLists.txt
CMakeLists.txt
1 cmake_minimum_required(VERSION 3.14) # for add_link_options and implicit target directories. 2 project("llama.cpp" C CXX) 3 include(CheckIncludeFileCXX) 4 5 set(CMAKE_EXPORT_COMPILE_COMMANDS ON) 6 7 if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE) 8 set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE) 9 set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo") 10 endif() 11 12 set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) 13 14 if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR) 15 set(LLAMA_STANDALONE ON) 16 17 # configure project version 18 # TODO 19 else() 20 set(LLAMA_STANDALONE OFF) 21 endif() 22 23 if (EMSCRIPTEN) 24 set(BUILD_SHARED_LIBS_DEFAULT OFF) 25 26 option(LLAMA_WASM_SINGLE_FILE "llama: embed WASM inside the generated llama.js" ON) 27 else() 28 if (MINGW) 29 set(BUILD_SHARED_LIBS_DEFAULT OFF) 30 else() 31 set(BUILD_SHARED_LIBS_DEFAULT ON) 32 endif() 33 endif() 34 35 36 # 37 # Option list 38 # 39 40 if (APPLE) 41 set(LLAMA_METAL_DEFAULT ON) 42 set(LLAMA_BLAS_DEFAULT ON) 43 set(LLAMA_BLAS_VENDOR_DEFAULT "Apple") 44 else() 45 set(LLAMA_METAL_DEFAULT OFF) 46 set(LLAMA_BLAS_DEFAULT OFF) 47 set(LLAMA_BLAS_VENDOR_DEFAULT "Generic") 48 endif() 49 50 set(LLAMA_LLAMAFILE_DEFAULT ON) 51 52 # general 53 option(BUILD_SHARED_LIBS "build shared libraries" OFF) 54 option(LLAMA_STATIC "llama: static link libraries" OFF) 55 option(LLAMA_NATIVE "llama: enable -march=native flag" ON) 56 option(LLAMA_LTO "llama: enable link time optimization" OFF) 57 option(LLAMA_CCACHE "llama: use ccache if available" ON) 58 59 # debug 60 option(LLAMA_ALL_WARNINGS "llama: enable all compiler warnings" ON) 61 option(LLAMA_ALL_WARNINGS_3RD_PARTY "llama: enable all compiler warnings in 3rd party libs" OFF) 62 option(LLAMA_GPROF "llama: enable gprof" OFF) 63 64 # build 65 option(LLAMA_FATAL_WARNINGS "llama: enable -Werror flag" OFF) 66 67 # sanitizers 68 option(LLAMA_SANITIZE_THREAD "llama: enable thread sanitizer" OFF) 69 option(LLAMA_SANITIZE_ADDRESS "llama: enable address sanitizer" OFF) 70 option(LLAMA_SANITIZE_UNDEFINED "llama: enable undefined sanitizer" OFF) 71 72 # instruction set specific 73 if (LLAMA_NATIVE) 74 set(INS_ENB OFF) 75 else() 76 set(INS_ENB ON) 77 endif() 78 79 option(LLAMA_SVE "llama: enable SVE" OFF) 80 option(LLAMA_AVX "llama: enable AVX" ${INS_ENB}) 81 option(LLAMA_AVX2 "llama: enable AVX2" ${INS_ENB}) 82 option(LLAMA_AVX512 "llama: enable AVX512" OFF) 83 option(LLAMA_AVX512_VBMI "llama: enable AVX512-VBMI" OFF) 84 option(LLAMA_AVX512_VNNI "llama: enable AVX512-VNNI" OFF) 85 option(LLAMA_AVX512_BF16 "llama: enable AVX512-BF16" OFF) 86 option(LLAMA_FMA "llama: enable FMA" ${INS_ENB}) 87 # in MSVC F16C is implied with AVX2/AVX512 88 if (NOT MSVC) 89 option(LLAMA_F16C "llama: enable F16C" ${INS_ENB}) 90 endif() 91 92 if (WIN32) 93 set(LLAMA_WIN_VER "0x602" CACHE STRING "llama: Windows Version") 94 endif() 95 96 # 3rd party libs 97 option(LLAMA_ACCELERATE "llama: enable Accelerate framework" ON) 98 option(LLAMA_BLAS "llama: use BLAS" ${LLAMA_BLAS_DEFAULT}) 99 set(LLAMA_BLAS_VENDOR ${LLAMA_BLAS_VENDOR_DEFAULT} CACHE STRING 100 "llama: BLAS library vendor") 101 option(LLAMA_LLAMAFILE "llama: use llamafile SGEMM" ${LLAMA_LLAMAFILE_DEFAULT}) 102 option(LLAMA_CUDA "llama: use CUDA" OFF) 103 option(LLAMA_CUBLAS "llama: use CUDA (deprecated, use LLAMA_CUDA)" OFF) 104 option(LLAMA_CUDA_FORCE_DMMV "llama: use dmmv instead of mmvq CUDA kernels" OFF) 105 option(LLAMA_CUDA_FORCE_MMQ "llama: use mmq kernels instead of cuBLAS" OFF) 106 set(LLAMA_CUDA_DMMV_X "32" CACHE STRING "llama: x stride for dmmv CUDA kernels") 107 set(LLAMA_CUDA_MMV_Y "1" CACHE STRING "llama: y block size for mmv CUDA kernels") 108 option(LLAMA_CUDA_F16 "llama: use 16 bit floats for some calculations" OFF) 109 set(LLAMA_CUDA_KQUANTS_ITER "2" CACHE STRING "llama: iters./thread per block for Q2_K/Q6_K") 110 set(LLAMA_CUDA_PEER_MAX_BATCH_SIZE "128" CACHE STRING 111 "llama: max. batch size for using peer access") 112 option(LLAMA_CUDA_NO_PEER_COPY "llama: do not use peer to peer copies" OFF) 113 option(LLAMA_CUDA_NO_VMM "llama: do not try to use CUDA VMM" OFF) 114 option(LLAMA_CUDA_FA_ALL_QUANTS "llama: compile all quants for FlashAttention" OFF) 115 116 option(LLAMA_CURL "llama: use libcurl to download model from an URL" OFF) 117 option(LLAMA_HIPBLAS "llama: use hipBLAS" OFF) 118 option(LLAMA_HIP_UMA "llama: use HIP unified memory architecture" OFF) 119 option(LLAMA_VULKAN "llama: use Vulkan" OFF) 120 option(LLAMA_VULKAN_CHECK_RESULTS "llama: run Vulkan op checks" OFF) 121 option(LLAMA_VULKAN_DEBUG "llama: enable Vulkan debug output" OFF) 122 option(LLAMA_VULKAN_MEMORY_DEBUG "llama: enable Vulkan memory debug output" OFF) 123 option(LLAMA_VULKAN_VALIDATE "llama: enable Vulkan validation" OFF) 124 option(LLAMA_VULKAN_RUN_TESTS "llama: run Vulkan tests" OFF) 125 option(LLAMA_METAL "llama: use Metal" ${LLAMA_METAL_DEFAULT}) 126 option(LLAMA_METAL_NDEBUG "llama: disable Metal debugging" OFF) 127 option(LLAMA_METAL_SHADER_DEBUG "llama: compile Metal with -fno-fast-math" OFF) 128 option(LLAMA_METAL_EMBED_LIBRARY "llama: embed Metal library" OFF) 129 set(LLAMA_METAL_MACOSX_VERSION_MIN "" CACHE STRING 130 "llama: metal minimum macOS version") 131 set(LLAMA_METAL_STD "" CACHE STRING "llama: metal standard version (-std flag)") 132 option(LLAMA_KOMPUTE "llama: use Kompute" OFF) 133 option(LLAMA_RPC "llama: use RPC" OFF) 134 option(LLAMA_OPENMP "llama: use OpenMP" ON) 135 option(LLAMA_SYCL "llama: use SYCL" OFF) 136 option(LLAMA_SYCL_F16 "llama: use 16 bit floats for sycl calculations" OFF) 137 set(LLAMA_SYCL_TARGET "INTEL" CACHE STRING "llama: sycl target device") 138 option(LLAMA_CPU_HBM "llama: use memkind for CPU HBM" OFF) 139 set(LLAMA_SCHED_MAX_COPIES "4" CACHE STRING "llama: max input copies for pipeline parallelism") 140 141 option(LLAMA_BUILD_TESTS "llama: build tests" ${LLAMA_STANDALONE}) 142 option(LLAMA_BUILD_EXAMPLES "llama: build examples" ${LLAMA_STANDALONE}) 143 option(LLAMA_BUILD_SERVER "llama: build server example" ON) 144 option(LLAMA_LASX "llama: enable lasx" ON) 145 option(LLAMA_LSX "llama: enable lsx" ON) 146 147 # add perf arguments 148 option(LLAMA_PERF "llama: enable perf" OFF) 149 150 # Required for relocatable CMake package 151 include(${CMAKE_CURRENT_SOURCE_DIR}/scripts/build-info.cmake) 152 153 # 154 # Compile flags 155 # 156 157 if (LLAMA_SYCL) 158 set(CMAKE_CXX_STANDARD 17) 159 else() 160 set(CMAKE_CXX_STANDARD 11) 161 endif() 162 163 set(CMAKE_CXX_STANDARD_REQUIRED true) 164 set(CMAKE_C_STANDARD 11) 165 set(CMAKE_C_STANDARD_REQUIRED true) 166 set(THREADS_PREFER_PTHREAD_FLAG ON) 167 168 find_package(Threads REQUIRED) 169 include(CheckCXXCompilerFlag) 170 171 add_compile_definitions(GGML_SCHED_MAX_COPIES=${LLAMA_SCHED_MAX_COPIES}) 172 173 # enable libstdc++ assertions for debug builds 174 if (CMAKE_SYSTEM_NAME MATCHES "Linux") 175 add_compile_definitions($<$<CONFIG:Debug>:_GLIBCXX_ASSERTIONS>) 176 endif() 177 178 if (NOT MSVC) 179 if (LLAMA_SANITIZE_THREAD) 180 add_compile_options(-fsanitize=thread) 181 link_libraries (-fsanitize=thread) 182 endif() 183 184 if (LLAMA_SANITIZE_ADDRESS) 185 add_compile_options(-fsanitize=address -fno-omit-frame-pointer) 186 link_libraries (-fsanitize=address) 187 endif() 188 189 if (LLAMA_SANITIZE_UNDEFINED) 190 add_compile_options(-fsanitize=undefined) 191 link_libraries (-fsanitize=undefined) 192 endif() 193 endif() 194 195 if (APPLE AND LLAMA_ACCELERATE) 196 find_library(ACCELERATE_FRAMEWORK Accelerate) 197 if (ACCELERATE_FRAMEWORK) 198 message(STATUS "Accelerate framework found") 199 200 add_compile_definitions(GGML_USE_ACCELERATE) 201 add_compile_definitions(ACCELERATE_NEW_LAPACK) 202 add_compile_definitions(ACCELERATE_LAPACK_ILP64) 203 set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} ${ACCELERATE_FRAMEWORK}) 204 else() 205 message(WARNING "Accelerate framework not found") 206 endif() 207 endif() 208 209 if (LLAMA_METAL) 210 find_library(FOUNDATION_LIBRARY Foundation REQUIRED) 211 find_library(METAL_FRAMEWORK Metal REQUIRED) 212 find_library(METALKIT_FRAMEWORK MetalKit REQUIRED) 213 214 message(STATUS "Metal framework found") 215 set(GGML_HEADERS_METAL ggml-metal.h) 216 set(GGML_SOURCES_METAL ggml-metal.m) 217 218 add_compile_definitions(GGML_USE_METAL) 219 if (LLAMA_METAL_NDEBUG) 220 add_compile_definitions(GGML_METAL_NDEBUG) 221 endif() 222 223 # copy ggml-common.h and ggml-metal.metal to bin directory 224 configure_file(ggml-common.h ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-common.h COPYONLY) 225 configure_file(ggml-metal.metal ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.metal COPYONLY) 226 227 if (LLAMA_METAL_EMBED_LIBRARY) 228 enable_language(ASM) 229 add_compile_definitions(GGML_METAL_EMBED_LIBRARY) 230 231 set(METALLIB_COMMON "${CMAKE_CURRENT_SOURCE_DIR}/ggml-common.h") 232 set(METALLIB_SOURCE "${CMAKE_CURRENT_SOURCE_DIR}/ggml-metal.metal") 233 234 file(MAKE_DIRECTORY "${CMAKE_BINARY_DIR}/autogenerated") 235 236 # merge ggml-common.h and ggml-metal.metal into a single file 237 set(METALLIB_EMBED_ASM "${CMAKE_BINARY_DIR}/autogenerated/ggml-metal-embed.s") 238 set(METALLIB_SOURCE_EMBED "${CMAKE_BINARY_DIR}/autogenerated/ggml-metal-embed.metal") 239 240 add_custom_command( 241 OUTPUT ${METALLIB_EMBED_ASM} 242 COMMAND echo "Embedding Metal library" 243 COMMAND sed -e '/\#include \"ggml-common.h\"/r ${METALLIB_COMMON}' -e '/\#include \"ggml-common.h\"/d' < ${METALLIB_SOURCE} > ${METALLIB_SOURCE_EMBED} 244 COMMAND echo ".section __DATA,__ggml_metallib" > ${METALLIB_EMBED_ASM} 245 COMMAND echo ".globl _ggml_metallib_start" >> ${METALLIB_EMBED_ASM} 246 COMMAND echo "_ggml_metallib_start:" >> ${METALLIB_EMBED_ASM} 247 COMMAND echo ".incbin \\\"${METALLIB_SOURCE_EMBED}\\\"" >> ${METALLIB_EMBED_ASM} 248 COMMAND echo ".globl _ggml_metallib_end" >> ${METALLIB_EMBED_ASM} 249 COMMAND echo "_ggml_metallib_end:" >> ${METALLIB_EMBED_ASM} 250 DEPENDS ggml-metal.metal ggml-common.h 251 COMMENT "Generate assembly for embedded Metal library" 252 ) 253 254 set(GGML_SOURCES_METAL ${GGML_SOURCES_METAL} ${METALLIB_EMBED_ASM}) 255 else() 256 if (LLAMA_METAL_SHADER_DEBUG) 257 # custom command to do the following: 258 # xcrun -sdk macosx metal -fno-fast-math -c ggml-metal.metal -o ggml-metal.air 259 # xcrun -sdk macosx metallib ggml-metal.air -o default.metallib 260 # 261 # note: this is the only way I found to disable fast-math in Metal. it's ugly, but at least it works 262 # disabling fast math is needed in order to pass tests/test-backend-ops 263 # note: adding -fno-inline fixes the tests when using MTL_SHADER_VALIDATION=1 264 # note: unfortunately, we have to call it default.metallib instead of ggml.metallib 265 # ref: https://github.com/ggerganov/whisper.cpp/issues/1720 266 set(XC_FLAGS -fno-fast-math -fno-inline -g) 267 else() 268 set(XC_FLAGS -O3) 269 endif() 270 271 # Append macOS metal versioning flags 272 if (LLAMA_METAL_MACOSX_VERSION_MIN) 273 message(STATUS "Adding -mmacosx-version-min=${LLAMA_METAL_MACOSX_VERSION_MIN} flag to metal compilation") 274 list(APPEND XC_FLAGS -mmacosx-version-min=${LLAMA_METAL_MACOSX_VERSION_MIN}) 275 endif() 276 if (LLAMA_METAL_STD) 277 message(STATUS "Adding -std=${LLAMA_METAL_STD} flag to metal compilation") 278 list(APPEND XC_FLAGS -std=${LLAMA_METAL_STD}) 279 endif() 280 281 add_custom_command( 282 OUTPUT ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib 283 COMMAND xcrun -sdk macosx metal ${XC_FLAGS} -c ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.metal -o ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.air 284 COMMAND xcrun -sdk macosx metallib ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.air -o ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib 285 COMMAND rm -f ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.air 286 COMMAND rm -f ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-common.h 287 COMMAND rm -f ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/ggml-metal.metal 288 DEPENDS ggml-metal.metal ggml-common.h 289 COMMENT "Compiling Metal kernels" 290 ) 291 292 add_custom_target( 293 ggml-metal ALL 294 DEPENDS ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib 295 ) 296 endif() # LLAMA_METAL_EMBED_LIBRARY 297 298 set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} 299 ${FOUNDATION_LIBRARY} 300 ${METAL_FRAMEWORK} 301 ${METALKIT_FRAMEWORK} 302 ) 303 endif() 304 305 if (LLAMA_OPENMP) 306 find_package(OpenMP) 307 if (OpenMP_FOUND) 308 message(STATUS "OpenMP found") 309 add_compile_definitions(GGML_USE_OPENMP) 310 set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} OpenMP::OpenMP_C OpenMP::OpenMP_CXX) 311 else() 312 message(WARNING "OpenMP not found") 313 endif() 314 endif() 315 316 if (LLAMA_BLAS) 317 if (LLAMA_STATIC) 318 set(BLA_STATIC ON) 319 endif() 320 #if (CMAKE_VERSION VERSION_GREATER_EQUAL 3.22) 321 # set(BLA_SIZEOF_INTEGER 8) 322 #endif() 323 324 set(BLA_VENDOR ${LLAMA_BLAS_VENDOR}) 325 find_package(BLAS) 326 327 if (BLAS_FOUND) 328 message(STATUS "BLAS found, Libraries: ${BLAS_LIBRARIES}") 329 330 if (("${BLAS_INCLUDE_DIRS}" STREQUAL "") AND NOT (${LLAMA_BLAS_VENDOR} MATCHES "Apple")) 331 # BLAS_INCLUDE_DIRS is missing in FindBLAS.cmake. 332 # see https://gitlab.kitware.com/cmake/cmake/-/issues/20268 333 find_package(PkgConfig REQUIRED) 334 if (${LLAMA_BLAS_VENDOR} MATCHES "Generic") 335 pkg_check_modules(DepBLAS REQUIRED blas) 336 elseif (${LLAMA_BLAS_VENDOR} MATCHES "OpenBLAS") 337 # As of openblas v0.3.22, the 64-bit is named openblas64.pc 338 pkg_check_modules(DepBLAS openblas64) 339 if (NOT DepBLAS_FOUND) 340 pkg_check_modules(DepBLAS REQUIRED openblas) 341 endif() 342 elseif (${LLAMA_BLAS_VENDOR} MATCHES "FLAME") 343 pkg_check_modules(DepBLAS REQUIRED blis) 344 elseif (${LLAMA_BLAS_VENDOR} MATCHES "ATLAS") 345 pkg_check_modules(DepBLAS REQUIRED blas-atlas) 346 elseif (${LLAMA_BLAS_VENDOR} MATCHES "FlexiBLAS") 347 pkg_check_modules(DepBLAS REQUIRED flexiblas_api) 348 elseif (${LLAMA_BLAS_VENDOR} MATCHES "Intel") 349 # all Intel* libraries share the same include path 350 pkg_check_modules(DepBLAS REQUIRED mkl-sdl) 351 elseif (${LLAMA_BLAS_VENDOR} MATCHES "NVHPC") 352 # this doesn't provide pkg-config 353 # suggest to assign BLAS_INCLUDE_DIRS on your own 354 if ("${NVHPC_VERSION}" STREQUAL "") 355 message(WARNING "Better to set NVHPC_VERSION") 356 else() 357 set(DepBLAS_FOUND ON) 358 set(DepBLAS_INCLUDE_DIRS "/opt/nvidia/hpc_sdk/${CMAKE_SYSTEM_NAME}_${CMAKE_SYSTEM_PROCESSOR}/${NVHPC_VERSION}/math_libs/include") 359 endif() 360 endif() 361 if (DepBLAS_FOUND) 362 set(BLAS_INCLUDE_DIRS ${DepBLAS_INCLUDE_DIRS}) 363 else() 364 message(WARNING "BLAS_INCLUDE_DIRS neither been provided nor been automatically" 365 " detected by pkgconfig, trying to find cblas.h from possible paths...") 366 find_path(BLAS_INCLUDE_DIRS 367 NAMES cblas.h 368 HINTS 369 /usr/include 370 /usr/local/include 371 /usr/include/openblas 372 /opt/homebrew/opt/openblas/include 373 /usr/local/opt/openblas/include 374 /usr/include/x86_64-linux-gnu/openblas/include 375 ) 376 endif() 377 endif() 378 379 message(STATUS "BLAS found, Includes: ${BLAS_INCLUDE_DIRS}") 380 381 add_compile_options(${BLAS_LINKER_FLAGS}) 382 383 add_compile_definitions(GGML_USE_BLAS) 384 385 if (${BLAS_INCLUDE_DIRS} MATCHES "mkl" AND (${LLAMA_BLAS_VENDOR} MATCHES "Generic" OR ${LLAMA_BLAS_VENDOR} MATCHES "Intel")) 386 add_compile_definitions(GGML_BLAS_USE_MKL) 387 endif() 388 389 set(GGML_HEADERS_BLAS ggml-blas.h) 390 set(GGML_SOURCES_BLAS ggml-blas.cpp) 391 392 set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} ${BLAS_LIBRARIES}) 393 set(LLAMA_EXTRA_INCLUDES ${LLAMA_EXTRA_INCLUDES} ${BLAS_INCLUDE_DIRS}) 394 else() 395 message(WARNING "BLAS not found, please refer to " 396 "https://cmake.org/cmake/help/latest/module/FindBLAS.html#blas-lapack-vendors" 397 " to set correct LLAMA_BLAS_VENDOR") 398 endif() 399 endif() 400 401 if (LLAMA_LLAMAFILE) 402 add_compile_definitions(GGML_USE_LLAMAFILE) 403 404 set(GGML_HEADERS_LLAMAFILE sgemm.h) 405 set(GGML_SOURCES_LLAMAFILE sgemm.cpp) 406 endif() 407 408 if (LLAMA_CUBLAS) 409 message(WARNING "LLAMA_CUBLAS is deprecated and will be removed in the future.\nUse LLAMA_CUDA instead") 410 set(LLAMA_CUDA ON) 411 endif() 412 413 if (LLAMA_CUDA) 414 cmake_minimum_required(VERSION 3.18) # for CMAKE_CUDA_ARCHITECTURES 415 416 find_package(CUDAToolkit) 417 if (CUDAToolkit_FOUND) 418 message(STATUS "CUDA found") 419 420 if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES) 421 # 52 == lowest CUDA 12 standard 422 # 60 == f16 CUDA intrinsics 423 # 61 == integer CUDA intrinsics 424 # 70 == compute capability at which unrolling a loop in mul_mat_q kernels is faster 425 if (LLAMA_CUDA_F16 OR LLAMA_CUDA_DMMV_F16) 426 set(CMAKE_CUDA_ARCHITECTURES "60;61;70") # needed for f16 CUDA intrinsics 427 else() 428 set(CMAKE_CUDA_ARCHITECTURES "52;61;70") # lowest CUDA 12 standard + lowest for integer intrinsics 429 #set(CMAKE_CUDA_ARCHITECTURES "OFF") # use this to compile much faster, but only F16 models work 430 endif() 431 endif() 432 message(STATUS "Using CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES}") 433 434 enable_language(CUDA) 435 436 set(GGML_HEADERS_CUDA ggml-cuda.h) 437 438 file(GLOB GGML_SOURCES_CUDA "ggml-cuda/*.cu") 439 list(APPEND GGML_SOURCES_CUDA "ggml-cuda.cu") 440 file(GLOB SRCS "ggml-cuda/template-instances/fattn-wmma*.cu") 441 list(APPEND GGML_SOURCES_CUDA ${SRCS}) 442 file(GLOB SRCS "ggml-cuda/template-instances/mmq*.cu") 443 list(APPEND GGML_SOURCES_CUDA ${SRCS}) 444 445 add_compile_definitions(GGML_USE_CUDA) 446 add_compile_definitions(GGML_CUDA_USE_GRAPHS) 447 if (LLAMA_CUDA_FORCE_DMMV) 448 add_compile_definitions(GGML_CUDA_FORCE_DMMV) 449 endif() 450 if (LLAMA_CUDA_FORCE_MMQ) 451 add_compile_definitions(GGML_CUDA_FORCE_MMQ) 452 endif() 453 if (LLAMA_CUDA_NO_VMM) 454 add_compile_definitions(GGML_CUDA_NO_VMM) 455 endif() 456 add_compile_definitions(GGML_CUDA_DMMV_X=${LLAMA_CUDA_DMMV_X}) 457 add_compile_definitions(GGML_CUDA_MMV_Y=${LLAMA_CUDA_MMV_Y}) 458 if (DEFINED LLAMA_CUDA_DMMV_Y) 459 add_compile_definitions(GGML_CUDA_MMV_Y=${LLAMA_CUDA_DMMV_Y}) # for backwards compatibility 460 endif() 461 if (LLAMA_CUDA_F16 OR LLAMA_CUDA_DMMV_F16) 462 add_compile_definitions(GGML_CUDA_F16) 463 endif() 464 add_compile_definitions(K_QUANTS_PER_ITERATION=${LLAMA_CUDA_KQUANTS_ITER}) 465 add_compile_definitions(GGML_CUDA_PEER_MAX_BATCH_SIZE=${LLAMA_CUDA_PEER_MAX_BATCH_SIZE}) 466 if (LLAMA_CUDA_NO_PEER_COPY) 467 add_compile_definitions(GGML_CUDA_NO_PEER_COPY) 468 endif() 469 if (LLAMA_CUDA_FA_ALL_QUANTS) 470 file(GLOB SRCS "ggml-cuda/template-instances/fattn-vec*.cu") 471 list(APPEND GGML_SOURCES_CUDA ${SRCS}) 472 add_compile_definitions(GGML_CUDA_FA_ALL_QUANTS) 473 else() 474 file(GLOB SRCS "ggml-cuda/template-instances/fattn-vec*q4_0-q4_0.cu") 475 list(APPEND GGML_SOURCES_CUDA ${SRCS}) 476 file(GLOB SRCS "ggml-cuda/template-instances/fattn-vec*q8_0-q8_0.cu") 477 list(APPEND GGML_SOURCES_CUDA ${SRCS}) 478 file(GLOB SRCS "ggml-cuda/template-instances/fattn-vec*f16-f16.cu") 479 list(APPEND GGML_SOURCES_CUDA ${SRCS}) 480 endif() 481 482 if (LLAMA_STATIC) 483 if (WIN32) 484 # As of 12.3.1 CUDA Toolkit for Windows does not offer a static cublas library 485 set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} CUDA::cudart_static CUDA::cublas CUDA::cublasLt) 486 else () 487 set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} CUDA::cudart_static CUDA::cublas_static CUDA::cublasLt_static) 488 endif() 489 else() 490 set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} CUDA::cudart CUDA::cublas CUDA::cublasLt) 491 endif() 492 493 if (LLAMA_CUDA_NO_VMM) 494 # No VMM requested, no need to link directly with the cuda driver lib (libcuda.so) 495 else() 496 set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} CUDA::cuda_driver) # required by cuDeviceGetAttribute(), cuMemGetAllocationGranularity(...), ... 497 endif() 498 else() 499 message(WARNING "CUDA not found") 500 endif() 501 endif() 502 503 if (LLAMA_RPC) 504 add_compile_definitions(GGML_USE_RPC) 505 506 if (WIN32) 507 set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} ws2_32) 508 endif() 509 510 set(GGML_HEADERS_RPC ggml-rpc.h) 511 set(GGML_SOURCES_RPC ggml-rpc.cpp) 512 endif() 513 514 if (LLAMA_VULKAN) 515 find_package(Vulkan) 516 if (Vulkan_FOUND) 517 message(STATUS "Vulkan found") 518 519 set(GGML_HEADERS_VULKAN ggml-vulkan.h) 520 set(GGML_SOURCES_VULKAN ggml-vulkan.cpp) 521 522 add_compile_definitions(GGML_USE_VULKAN) 523 524 # Workaround to the "can't dereference invalidated vector iterator" bug in clang-cl debug build 525 # Posssibly relevant: https://stackoverflow.com/questions/74748276/visual-studio-no-displays-the-correct-length-of-stdvector 526 if (MSVC AND CMAKE_CXX_COMPILER_ID STREQUAL "Clang") 527 add_compile_definitions(_ITERATOR_DEBUG_LEVEL=0) 528 endif() 529 530 if (LLAMA_VULKAN_CHECK_RESULTS) 531 add_compile_definitions(GGML_VULKAN_CHECK_RESULTS) 532 endif() 533 534 if (LLAMA_VULKAN_DEBUG) 535 add_compile_definitions(GGML_VULKAN_DEBUG) 536 endif() 537 538 if (LLAMA_VULKAN_MEMORY_DEBUG) 539 add_compile_definitions(GGML_VULKAN_MEMORY_DEBUG) 540 endif() 541 542 if (LLAMA_VULKAN_VALIDATE) 543 add_compile_definitions(GGML_VULKAN_VALIDATE) 544 endif() 545 546 if (LLAMA_VULKAN_RUN_TESTS) 547 add_compile_definitions(GGML_VULKAN_RUN_TESTS) 548 endif() 549 550 set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} Vulkan::Vulkan) 551 else() 552 message(WARNING "Vulkan not found") 553 endif() 554 endif() 555 556 if (LLAMA_HIPBLAS) 557 if (NOT EXISTS $ENV{ROCM_PATH}) 558 if (NOT EXISTS /opt/rocm) 559 set(ROCM_PATH /usr) 560 else() 561 set(ROCM_PATH /opt/rocm) 562 endif() 563 else() 564 set(ROCM_PATH $ENV{ROCM_PATH}) 565 endif() 566 list(APPEND CMAKE_PREFIX_PATH ${ROCM_PATH}) 567 list(APPEND CMAKE_PREFIX_PATH "${ROCM_PATH}/lib64/cmake") 568 569 # CMake on Windows doesn't support the HIP language yet 570 if(WIN32) 571 set(CXX_IS_HIPCC TRUE) 572 else() 573 string(REGEX MATCH "hipcc(\.bat)?$" CXX_IS_HIPCC "${CMAKE_CXX_COMPILER}") 574 endif() 575 576 if(CXX_IS_HIPCC) 577 if(LINUX) 578 if (NOT ${CMAKE_CXX_COMPILER_ID} MATCHES "Clang") 579 message(WARNING "Only LLVM is supported for HIP, hint: CXX=/opt/rocm/llvm/bin/clang++") 580 endif() 581 582 message(WARNING "Setting hipcc as the C++ compiler is legacy behavior." 583 " Prefer setting the HIP compiler directly. See README for details.") 584 endif() 585 else() 586 # Forward AMDGPU_TARGETS to CMAKE_HIP_ARCHITECTURES. 587 if(AMDGPU_TARGETS AND NOT CMAKE_HIP_ARCHITECTURES) 588 set(CMAKE_HIP_ARCHITECTURES ${AMDGPU_TARGETS}) 589 endif() 590 cmake_minimum_required(VERSION 3.21) 591 enable_language(HIP) 592 endif() 593 find_package(hip REQUIRED) 594 find_package(hipblas REQUIRED) 595 find_package(rocblas REQUIRED) 596 597 message(STATUS "HIP and hipBLAS found") 598 599 set(GGML_HEADERS_ROCM ggml-cuda.h) 600 601 file(GLOB GGML_SOURCES_ROCM "ggml-cuda/*.cu") 602 list(APPEND GGML_SOURCES_ROCM "ggml-cuda.cu") 603 file(GLOB SRCS "ggml-cuda/template-instances/fattn-wmma*.cu") 604 list(APPEND GGML_SOURCES_ROCM ${SRCS}) 605 file(GLOB SRCS "ggml-cuda/template-instances/mmq*.cu") 606 list(APPEND GGML_SOURCES_ROCM ${SRCS}) 607 608 add_compile_definitions(GGML_USE_HIPBLAS GGML_USE_CUDA) 609 610 if (LLAMA_HIP_UMA) 611 add_compile_definitions(GGML_HIP_UMA) 612 endif() 613 614 if (LLAMA_CUDA_FORCE_DMMV) 615 add_compile_definitions(GGML_CUDA_FORCE_DMMV) 616 endif() 617 618 if (LLAMA_CUDA_FORCE_MMQ) 619 add_compile_definitions(GGML_CUDA_FORCE_MMQ) 620 endif() 621 622 if (LLAMA_CUDA_NO_PEER_COPY) 623 add_compile_definitions(GGML_CUDA_NO_PEER_COPY) 624 endif() 625 626 if (LLAMA_CUDA_FA_ALL_QUANTS) 627 file(GLOB SRCS "ggml-cuda/template-instances/fattn-vec*.cu") 628 list(APPEND GGML_SOURCES_ROCM ${SRCS}) 629 add_compile_definitions(GGML_CUDA_FA_ALL_QUANTS) 630 else() 631 file(GLOB SRCS "ggml-cuda/template-instances/fattn-vec*q4_0-q4_0.cu") 632 list(APPEND GGML_SOURCES_ROCM ${SRCS}) 633 file(GLOB SRCS "ggml-cuda/template-instances/fattn-vec*q8_0-q8_0.cu") 634 list(APPEND GGML_SOURCES_ROCM ${SRCS}) 635 file(GLOB SRCS "ggml-cuda/template-instances/fattn-vec*f16-f16.cu") 636 list(APPEND GGML_SOURCES_ROCM ${SRCS}) 637 endif() 638 639 add_compile_definitions(GGML_CUDA_DMMV_X=${LLAMA_CUDA_DMMV_X}) 640 add_compile_definitions(GGML_CUDA_MMV_Y=${LLAMA_CUDA_MMV_Y}) 641 add_compile_definitions(K_QUANTS_PER_ITERATION=${LLAMA_CUDA_KQUANTS_ITER}) 642 643 if (CXX_IS_HIPCC) 644 set_source_files_properties(${GGML_SOURCES_ROCM} PROPERTIES LANGUAGE CXX) 645 set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} hip::device) 646 else() 647 set_source_files_properties(${GGML_SOURCES_ROCM} PROPERTIES LANGUAGE HIP) 648 endif() 649 650 if (LLAMA_STATIC) 651 message(FATAL_ERROR "Static linking not supported for HIP/ROCm") 652 endif() 653 654 set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} PUBLIC hip::host roc::rocblas roc::hipblas) 655 endif() 656 657 if (LLAMA_SYCL) 658 if (NOT LLAMA_SYCL_TARGET MATCHES "^(INTEL|NVIDIA)$") 659 message(FATAL_ERROR "Invalid backend chosen, supported options are INTEL or NVIDIA") 660 endif() 661 662 if ( NOT DEFINED ENV{ONEAPI_ROOT}) 663 message(FATAL_ERROR "Not detect ENV {ONEAPI_ROOT}, please install oneAPI & source it, like: source /opt/intel/oneapi/setvars.sh") 664 endif() 665 #todo: AOT 666 667 find_package(IntelSYCL REQUIRED) 668 669 message(STATUS "SYCL found") 670 671 add_compile_definitions(GGML_USE_SYCL) 672 673 if (LLAMA_SYCL_F16) 674 add_compile_definitions(GGML_SYCL_F16) 675 endif() 676 677 if (LLAMA_CUDA_FORCE_MMQ) 678 add_compile_definitions(GGML_SYCL_FORCE_MMQ) 679 endif() 680 681 add_compile_options(-I./) #include DPCT 682 add_compile_options(-I/${SYCL_INCLUDE_DIR}) 683 684 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-narrowing") 685 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -O3") 686 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl -L${MKLROOT}/lib") 687 if (LLAMA_SYCL_TARGET STREQUAL "NVIDIA") 688 set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsycl-targets=nvptx64-nvidia-cuda") 689 endif() 690 691 set(GGML_HEADERS_SYCL ggml-sycl.h) 692 file(GLOB GGML_SOURCES_SYCL "ggml-sycl/*.cpp") 693 list(APPEND GGML_SOURCES_SYCL "ggml-sycl.cpp") 694 695 if (WIN32) 696 set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} -fsycl sycl7 OpenCL mkl_sycl_blas_dll.lib mkl_intel_ilp64_dll.lib mkl_sequential_dll.lib mkl_core_dll.lib) 697 else() 698 if (LLAMA_SYCL_TARGET STREQUAL "INTEL") 699 set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} -fsycl OpenCL mkl_core pthread m dl mkl_sycl_blas mkl_intel_ilp64 mkl_tbb_thread) 700 elseif (LLAMA_SYCL_TARGET STREQUAL "NVIDIA") 701 set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} -fsycl pthread m dl onemkl) 702 endif() 703 endif() 704 endif() 705 706 if (LLAMA_KOMPUTE) 707 add_compile_definitions(VULKAN_HPP_DISPATCH_LOADER_DYNAMIC=1) 708 find_package(Vulkan COMPONENTS glslc REQUIRED) 709 find_program(glslc_executable NAMES glslc HINTS Vulkan::glslc) 710 if (NOT glslc_executable) 711 message(FATAL_ERROR "glslc not found") 712 endif() 713 714 function(compile_shader) 715 set(options) 716 set(oneValueArgs) 717 set(multiValueArgs SOURCES) 718 cmake_parse_arguments(compile_shader "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) 719 foreach(source ${compile_shader_SOURCES}) 720 get_filename_component(filename ${source} NAME) 721 set(spv_file ${filename}.spv) 722 add_custom_command( 723 OUTPUT ${spv_file} 724 DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/${source} 725 ${CMAKE_CURRENT_SOURCE_DIR}/kompute-shaders/common.comp 726 ${CMAKE_CURRENT_SOURCE_DIR}/kompute-shaders/op_getrows.comp 727 ${CMAKE_CURRENT_SOURCE_DIR}/kompute-shaders/op_mul_mv_q_n_pre.comp 728 ${CMAKE_CURRENT_SOURCE_DIR}/kompute-shaders/op_mul_mv_q_n.comp 729 COMMAND ${glslc_executable} --target-env=vulkan1.2 -o ${spv_file} ${CMAKE_CURRENT_SOURCE_DIR}/${source} 730 COMMENT "Compiling ${source} to ${spv_file}" 731 ) 732 733 get_filename_component(RAW_FILE_NAME ${spv_file} NAME) 734 set(FILE_NAME "shader${RAW_FILE_NAME}") 735 string(REPLACE ".comp.spv" ".h" HEADER_FILE ${FILE_NAME}) 736 string(TOUPPER ${HEADER_FILE} HEADER_FILE_DEFINE) 737 string(REPLACE "." "_" HEADER_FILE_DEFINE "${HEADER_FILE_DEFINE}") 738 set(OUTPUT_HEADER_FILE "${HEADER_FILE}") 739 message(STATUS "${HEADER_FILE} generating ${HEADER_FILE_DEFINE}") 740 if(CMAKE_GENERATOR MATCHES "Visual Studio") 741 add_custom_command( 742 OUTPUT ${OUTPUT_HEADER_FILE} 743 COMMAND ${CMAKE_COMMAND} -E echo "/*THIS FILE HAS BEEN AUTOMATICALLY GENERATED - DO NOT EDIT*/" > ${OUTPUT_HEADER_FILE} 744 COMMAND ${CMAKE_COMMAND} -E echo \"\#ifndef ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE} 745 COMMAND ${CMAKE_COMMAND} -E echo \"\#define ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE} 746 COMMAND ${CMAKE_COMMAND} -E echo "namespace kp {" >> ${OUTPUT_HEADER_FILE} 747 COMMAND ${CMAKE_COMMAND} -E echo "namespace shader_data {" >> ${OUTPUT_HEADER_FILE} 748 COMMAND ${CMAKE_BINARY_DIR}/bin/$<CONFIG>/xxd -i ${RAW_FILE_NAME} >> ${OUTPUT_HEADER_FILE} 749 COMMAND ${CMAKE_COMMAND} -E echo "}}" >> ${OUTPUT_HEADER_FILE} 750 COMMAND ${CMAKE_COMMAND} -E echo \"\#endif // define ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE} 751 DEPENDS ${spv_file} xxd 752 COMMENT "Converting to hpp: ${FILE_NAME} ${CMAKE_BINARY_DIR}/bin/$<CONFIG>/xxd" 753 ) 754 else() 755 add_custom_command( 756 OUTPUT ${OUTPUT_HEADER_FILE} 757 COMMAND ${CMAKE_COMMAND} -E echo "/*THIS FILE HAS BEEN AUTOMATICALLY GENERATED - DO NOT EDIT*/" > ${OUTPUT_HEADER_FILE} 758 COMMAND ${CMAKE_COMMAND} -E echo \"\#ifndef ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE} 759 COMMAND ${CMAKE_COMMAND} -E echo \"\#define ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE} 760 COMMAND ${CMAKE_COMMAND} -E echo "namespace kp {" >> ${OUTPUT_HEADER_FILE} 761 COMMAND ${CMAKE_COMMAND} -E echo "namespace shader_data {" >> ${OUTPUT_HEADER_FILE} 762 COMMAND ${CMAKE_BINARY_DIR}/bin/xxd -i ${RAW_FILE_NAME} >> ${OUTPUT_HEADER_FILE} 763 COMMAND ${CMAKE_COMMAND} -E echo "}}" >> ${OUTPUT_HEADER_FILE} 764 COMMAND ${CMAKE_COMMAND} -E echo \"\#endif // define ${HEADER_FILE_DEFINE}\" >> ${OUTPUT_HEADER_FILE} 765 DEPENDS ${spv_file} xxd 766 COMMENT "Converting to hpp: ${FILE_NAME} ${CMAKE_BINARY_DIR}/bin/xxd" 767 ) 768 endif() 769 endforeach() 770 endfunction() 771 772 if (EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/kompute/CMakeLists.txt") 773 message(STATUS "Kompute found") 774 set(KOMPUTE_OPT_LOG_LEVEL Error CACHE STRING "Kompute log level") 775 add_subdirectory(kompute) 776 777 # Compile our shaders 778 compile_shader(SOURCES 779 kompute-shaders/op_scale.comp 780 kompute-shaders/op_scale_8.comp 781 kompute-shaders/op_add.comp 782 kompute-shaders/op_addrow.comp 783 kompute-shaders/op_mul.comp 784 kompute-shaders/op_silu.comp 785 kompute-shaders/op_relu.comp 786 kompute-shaders/op_gelu.comp 787 kompute-shaders/op_softmax.comp 788 kompute-shaders/op_norm.comp 789 kompute-shaders/op_rmsnorm.comp 790 kompute-shaders/op_diagmask.comp 791 kompute-shaders/op_mul_mat_mat_f32.comp 792 kompute-shaders/op_mul_mat_f16.comp 793 kompute-shaders/op_mul_mat_q8_0.comp 794 kompute-shaders/op_mul_mat_q4_0.comp 795 kompute-shaders/op_mul_mat_q4_1.comp 796 kompute-shaders/op_mul_mat_q6_k.comp 797 kompute-shaders/op_getrows_f32.comp 798 kompute-shaders/op_getrows_f16.comp 799 kompute-shaders/op_getrows_q4_0.comp 800 kompute-shaders/op_getrows_q4_1.comp 801 kompute-shaders/op_getrows_q6_k.comp 802 kompute-shaders/op_rope_f16.comp 803 kompute-shaders/op_rope_f32.comp 804 kompute-shaders/op_cpy_f16_f16.comp 805 kompute-shaders/op_cpy_f16_f32.comp 806 kompute-shaders/op_cpy_f32_f16.comp 807 kompute-shaders/op_cpy_f32_f32.comp 808 ) 809 810 # Create a custom target for our generated shaders 811 add_custom_target(generated_shaders DEPENDS 812 shaderop_scale.h 813 shaderop_scale_8.h 814 shaderop_add.h 815 shaderop_addrow.h 816 shaderop_mul.h 817 shaderop_silu.h 818 shaderop_relu.h 819 shaderop_gelu.h 820 shaderop_softmax.h 821 shaderop_norm.h 822 shaderop_rmsnorm.h 823 shaderop_diagmask.h 824 shaderop_mul_mat_mat_f32.h 825 shaderop_mul_mat_f16.h 826 shaderop_mul_mat_q8_0.h 827 shaderop_mul_mat_q4_0.h 828 shaderop_mul_mat_q4_1.h 829 shaderop_mul_mat_q6_k.h 830 shaderop_getrows_f32.h 831 shaderop_getrows_f16.h 832 shaderop_getrows_q4_0.h 833 shaderop_getrows_q4_1.h 834 shaderop_getrows_q6_k.h 835 shaderop_rope_f16.h 836 shaderop_rope_f32.h 837 shaderop_cpy_f16_f16.h 838 shaderop_cpy_f16_f32.h 839 shaderop_cpy_f32_f16.h 840 shaderop_cpy_f32_f32.h 841 ) 842 843 # Create a custom command that depends on the generated_shaders 844 add_custom_command( 845 OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp 846 COMMAND ${CMAKE_COMMAND} -E touch ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp 847 DEPENDS generated_shaders 848 COMMENT "Ensuring shaders are generated before compiling ggml-kompute.cpp" 849 ) 850 851 # Add the stamp to the main sources to ensure dependency tracking 852 set(GGML_SOURCES_KOMPUTE ggml-kompute.cpp ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp) 853 set(GGML_HEADERS_KOMPUTE ggml-kompute.h ${CMAKE_CURRENT_BINARY_DIR}/ggml-kompute.stamp) 854 855 add_compile_definitions(GGML_USE_KOMPUTE) 856 857 set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} kompute) 858 set(LLAMA_EXTRA_INCLUDES ${LLAMA_EXTRA_INCLUDES} ${CMAKE_BINARY_DIR}) 859 else() 860 message(WARNING "Kompute not found") 861 endif() 862 endif() 863 864 if (LLAMA_CPU_HBM) 865 find_library(memkind memkind REQUIRED) 866 867 add_compile_definitions(GGML_USE_CPU_HBM) 868 869 target_link_libraries(ggml PUBLIC memkind) 870 endif() 871 872 if (LLAMA_PERF) 873 add_compile_definitions(GGML_PERF) 874 endif() 875 876 function(get_flags CCID CCVER) 877 set(C_FLAGS "") 878 set(CXX_FLAGS "") 879 880 if (CCID MATCHES "Clang") 881 set(C_FLAGS -Wunreachable-code-break -Wunreachable-code-return) 882 set(CXX_FLAGS -Wunreachable-code-break -Wunreachable-code-return -Wmissing-prototypes -Wextra-semi) 883 884 if ( 885 (CCID STREQUAL "Clang" AND CCVER VERSION_GREATER_EQUAL 3.8.0) OR 886 (CCID STREQUAL "AppleClang" AND CCVER VERSION_GREATER_EQUAL 7.3.0) 887 ) 888 list(APPEND C_FLAGS -Wdouble-promotion) 889 endif() 890 elseif (CCID STREQUAL "GNU") 891 set(C_FLAGS -Wdouble-promotion) 892 set(CXX_FLAGS -Wno-array-bounds) 893 894 if (CCVER VERSION_GREATER_EQUAL 7.1.0) 895 list(APPEND CXX_FLAGS -Wno-format-truncation) 896 endif() 897 if (CCVER VERSION_GREATER_EQUAL 8.1.0) 898 list(APPEND CXX_FLAGS -Wextra-semi) 899 endif() 900 endif() 901 902 set(GF_C_FLAGS ${C_FLAGS} PARENT_SCOPE) 903 set(GF_CXX_FLAGS ${CXX_FLAGS} PARENT_SCOPE) 904 endfunction() 905 906 if (LLAMA_FATAL_WARNINGS) 907 if (CMAKE_CXX_COMPILER_ID MATCHES "GNU" OR CMAKE_CXX_COMPILER_ID MATCHES "Clang") 908 list(APPEND C_FLAGS -Werror) 909 list(APPEND CXX_FLAGS -Werror) 910 elseif (CMAKE_CXX_COMPILER_ID STREQUAL "MSVC") 911 add_compile_options(/WX) 912 endif() 913 endif() 914 915 if (LLAMA_ALL_WARNINGS) 916 if (NOT MSVC) 917 list(APPEND WARNING_FLAGS -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function) 918 list(APPEND C_FLAGS -Wshadow -Wstrict-prototypes -Wpointer-arith -Wmissing-prototypes 919 -Werror=implicit-int -Werror=implicit-function-declaration) 920 list(APPEND CXX_FLAGS -Wmissing-declarations -Wmissing-noreturn) 921 922 list(APPEND C_FLAGS ${WARNING_FLAGS}) 923 list(APPEND CXX_FLAGS ${WARNING_FLAGS}) 924 925 get_flags(${CMAKE_CXX_COMPILER_ID} ${CMAKE_CXX_COMPILER_VERSION}) 926 927 add_compile_options("$<$<COMPILE_LANGUAGE:C>:${C_FLAGS};${GF_C_FLAGS}>" 928 "$<$<COMPILE_LANGUAGE:CXX>:${CXX_FLAGS};${GF_CXX_FLAGS}>") 929 else() 930 # todo : msvc 931 set(C_FLAGS "") 932 set(CXX_FLAGS "") 933 endif() 934 endif() 935 936 set(CUDA_CXX_FLAGS "") 937 938 if (LLAMA_CUDA) 939 set(CUDA_FLAGS -use_fast_math) 940 941 if (LLAMA_FATAL_WARNINGS) 942 list(APPEND CUDA_FLAGS -Werror all-warnings) 943 endif() 944 945 if (LLAMA_ALL_WARNINGS AND NOT MSVC) 946 set(NVCC_CMD ${CMAKE_CUDA_COMPILER} .c) 947 if (NOT CMAKE_CUDA_HOST_COMPILER STREQUAL "") 948 list(APPEND NVCC_CMD -ccbin ${CMAKE_CUDA_HOST_COMPILER}) 949 endif() 950 951 execute_process( 952 COMMAND ${NVCC_CMD} -Xcompiler --version 953 OUTPUT_VARIABLE CUDA_CCFULLVER 954 ERROR_QUIET 955 ) 956 957 if (NOT CUDA_CCFULLVER MATCHES clang) 958 set(CUDA_CCID "GNU") 959 execute_process( 960 COMMAND ${NVCC_CMD} -Xcompiler "-dumpfullversion -dumpversion" 961 OUTPUT_VARIABLE CUDA_CCVER 962 ERROR_QUIET 963 ) 964 else() 965 if (CUDA_CCFULLVER MATCHES Apple) 966 set(CUDA_CCID "AppleClang") 967 else() 968 set(CUDA_CCID "Clang") 969 endif() 970 string(REGEX REPLACE "^.* version ([0-9.]*).*$" "\\1" CUDA_CCVER ${CUDA_CCFULLVER}) 971 endif() 972 973 message("-- CUDA host compiler is ${CUDA_CCID} ${CUDA_CCVER}") 974 975 get_flags(${CUDA_CCID} ${CUDA_CCVER}) 976 list(APPEND CUDA_CXX_FLAGS ${CXX_FLAGS} ${GF_CXX_FLAGS}) # This is passed to -Xcompiler later 977 endif() 978 979 if (NOT MSVC) 980 list(APPEND CUDA_CXX_FLAGS -Wno-pedantic) 981 endif() 982 endif() 983 984 if (WIN32) 985 add_compile_definitions(_CRT_SECURE_NO_WARNINGS) 986 987 if (BUILD_SHARED_LIBS) 988 set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON) 989 endif() 990 endif() 991 992 if (LLAMA_LTO) 993 include(CheckIPOSupported) 994 check_ipo_supported(RESULT result OUTPUT output) 995 if (result) 996 set(CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE) 997 else() 998 message(WARNING "IPO is not supported: ${output}") 999 endif() 1000 endif() 1001 1002 if (LLAMA_CCACHE) 1003 find_program(LLAMA_CCACHE_FOUND ccache) 1004 if (LLAMA_CCACHE_FOUND) 1005 set_property(GLOBAL PROPERTY RULE_LAUNCH_COMPILE ccache) 1006 set(ENV{CCACHE_SLOPPINESS} time_macros) 1007 message(STATUS "ccache found, compilation results will be cached. Disable with LLAMA_CCACHE=OFF.") 1008 else() 1009 message(STATUS "Warning: ccache not found - consider installing it for faster compilation or disable this warning with LLAMA_CCACHE=OFF") 1010 endif () 1011 endif() 1012 1013 # this version of Apple ld64 is buggy 1014 execute_process( 1015 COMMAND ${CMAKE_C_COMPILER} ${CMAKE_EXE_LINKER_FLAGS} -Wl,-v 1016 ERROR_VARIABLE output 1017 OUTPUT_QUIET 1018 ) 1019 1020 if (output MATCHES "dyld-1015\.7") 1021 add_compile_definitions(HAVE_BUGGY_APPLE_LINKER) 1022 endif() 1023 1024 # Architecture specific 1025 # TODO: probably these flags need to be tweaked on some architectures 1026 # feel free to update the Makefile for your architecture and send a pull request or issue 1027 message(STATUS "CMAKE_SYSTEM_PROCESSOR: ${CMAKE_SYSTEM_PROCESSOR}") 1028 if (MSVC) 1029 string(TOLOWER "${CMAKE_GENERATOR_PLATFORM}" CMAKE_GENERATOR_PLATFORM_LWR) 1030 message(STATUS "CMAKE_GENERATOR_PLATFORM: ${CMAKE_GENERATOR_PLATFORM}") 1031 else () 1032 set(CMAKE_GENERATOR_PLATFORM_LWR "") 1033 endif () 1034 1035 if (NOT MSVC) 1036 if (LLAMA_STATIC) 1037 add_link_options(-static) 1038 if (MINGW) 1039 add_link_options(-static-libgcc -static-libstdc++) 1040 endif() 1041 endif() 1042 if (LLAMA_GPROF) 1043 add_compile_options(-pg) 1044 endif() 1045 endif() 1046 1047 set(ARCH_FLAGS "") 1048 1049 if (CMAKE_OSX_ARCHITECTURES STREQUAL "arm64" OR CMAKE_GENERATOR_PLATFORM_LWR STREQUAL "arm64" OR 1050 (NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_GENERATOR_PLATFORM_LWR AND 1051 CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm.*|ARM64)$")) 1052 message(STATUS "ARM detected") 1053 if (MSVC) 1054 add_compile_definitions(__aarch64__) # MSVC defines _M_ARM64 instead 1055 add_compile_definitions(__ARM_NEON) 1056 add_compile_definitions(__ARM_FEATURE_FMA) 1057 1058 set(CMAKE_REQUIRED_FLAGS_PREV ${CMAKE_REQUIRED_FLAGS}) 1059 string(JOIN " " CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS} "/arch:armv8.2") 1060 check_cxx_source_compiles("#include <arm_neon.h>\nint main() { int8x16_t _a, _b; int32x4_t _s = vdotq_s32(_s, _a, _b); return 0; }" GGML_COMPILER_SUPPORT_DOTPROD) 1061 if (GGML_COMPILER_SUPPORT_DOTPROD) 1062 add_compile_definitions(__ARM_FEATURE_DOTPROD) 1063 endif () 1064 check_cxx_source_compiles("#include <arm_neon.h>\nint main() { int8x16_t _a, _b; int32x4_t _s = vmlaq_f32(_s, _a, _b); return 0; }" GGML_COMPILER_SUPPORT_MATMUL_INT8) 1065 if (GGML_COMPILER_SUPPORT_MATMUL_INT8) 1066 add_compile_definitions(__ARM_FEATURE_MATMUL_INT8) 1067 endif () 1068 1069 check_cxx_source_compiles("#include <arm_neon.h>\nint main() { float16_t _a; float16x8_t _s = vdupq_n_f16(_a); return 0; }" GGML_COMPILER_SUPPORT_FP16_VECTOR_ARITHMETIC) 1070 if (GGML_COMPILER_SUPPORT_FP16_VECTOR_ARITHMETIC) 1071 add_compile_definitions(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) 1072 endif () 1073 set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_PREV}) 1074 else() 1075 check_cxx_compiler_flag(-mfp16-format=ieee COMPILER_SUPPORTS_FP16_FORMAT_I3E) 1076 if (NOT "${COMPILER_SUPPORTS_FP16_FORMAT_I3E}" STREQUAL "") 1077 list(APPEND ARCH_FLAGS -mfp16-format=ieee) 1078 endif() 1079 if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv6") 1080 # Raspberry Pi 1, Zero 1081 list(APPEND ARCH_FLAGS -mfpu=neon-fp-armv8 -mno-unaligned-access) 1082 endif() 1083 if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv7") 1084 if ("${CMAKE_SYSTEM_NAME}" STREQUAL "Android") 1085 # Android armeabi-v7a 1086 list(APPEND ARCH_FLAGS -mfpu=neon-vfpv4 -mno-unaligned-access -funsafe-math-optimizations) 1087 else() 1088 # Raspberry Pi 2 1089 list(APPEND ARCH_FLAGS -mfpu=neon-fp-armv8 -mno-unaligned-access -funsafe-math-optimizations) 1090 endif() 1091 endif() 1092 if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "armv8") 1093 # Android arm64-v8a 1094 # Raspberry Pi 3, 4, Zero 2 (32-bit) 1095 list(APPEND ARCH_FLAGS -mno-unaligned-access) 1096 endif() 1097 if (LLAMA_SVE) 1098 list(APPEND ARCH_FLAGS -march=armv8.6-a+sve) 1099 endif() 1100 endif() 1101 elseif (CMAKE_OSX_ARCHITECTURES STREQUAL "x86_64" OR CMAKE_GENERATOR_PLATFORM_LWR MATCHES "^(x86_64|i686|amd64|x64|win32)$" OR 1102 (NOT CMAKE_OSX_ARCHITECTURES AND NOT CMAKE_GENERATOR_PLATFORM_LWR AND 1103 CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|i686|AMD64)$")) 1104 message(STATUS "x86 detected") 1105 if (MSVC) 1106 # instruction set detection for MSVC only 1107 if (LLAMA_NATIVE) 1108 include(cmake/FindSIMD.cmake) 1109 endif () 1110 if (LLAMA_AVX512) 1111 list(APPEND ARCH_FLAGS /arch:AVX512) 1112 # MSVC has no compile-time flags enabling specific 1113 # AVX512 extensions, neither it defines the 1114 # macros corresponding to the extensions. 1115 # Do it manually. 1116 if (LLAMA_AVX512_VBMI) 1117 add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AVX512VBMI__>) 1118 add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AVX512VBMI__>) 1119 endif() 1120 if (LLAMA_AVX512_VNNI) 1121 add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AVX512VNNI__>) 1122 add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AVX512VNNI__>) 1123 endif() 1124 if (LLAMA_AVX512_BF16) 1125 add_compile_definitions($<$<COMPILE_LANGUAGE:C>:__AVX512BF16__>) 1126 add_compile_definitions($<$<COMPILE_LANGUAGE:CXX>:__AVX512BF16__>) 1127 endif() 1128 elseif (LLAMA_AVX2) 1129 list(APPEND ARCH_FLAGS /arch:AVX2) 1130 elseif (LLAMA_AVX) 1131 list(APPEND ARCH_FLAGS /arch:AVX) 1132 endif() 1133 else() 1134 if (LLAMA_NATIVE) 1135 list(APPEND ARCH_FLAGS -march=native) 1136 endif() 1137 if (LLAMA_F16C) 1138 list(APPEND ARCH_FLAGS -mf16c) 1139 endif() 1140 if (LLAMA_FMA) 1141 list(APPEND ARCH_FLAGS -mfma) 1142 endif() 1143 if (LLAMA_AVX) 1144 list(APPEND ARCH_FLAGS -mavx) 1145 endif() 1146 if (LLAMA_AVX2) 1147 list(APPEND ARCH_FLAGS -mavx2) 1148 endif() 1149 if (LLAMA_AVX512) 1150 list(APPEND ARCH_FLAGS -mavx512f) 1151 list(APPEND ARCH_FLAGS -mavx512bw) 1152 endif() 1153 if (LLAMA_AVX512_VBMI) 1154 list(APPEND ARCH_FLAGS -mavx512vbmi) 1155 endif() 1156 if (LLAMA_AVX512_VNNI) 1157 list(APPEND ARCH_FLAGS -mavx512vnni) 1158 endif() 1159 if (LLAMA_AVX512_BF16) 1160 list(APPEND ARCH_FLAGS -mavx512bf16) 1161 endif() 1162 endif() 1163 elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64") 1164 message(STATUS "PowerPC detected") 1165 if (${CMAKE_SYSTEM_PROCESSOR} MATCHES "ppc64le") 1166 list(APPEND ARCH_FLAGS -mcpu=powerpc64le) 1167 else() 1168 list(APPEND ARCH_FLAGS -mcpu=native -mtune=native) 1169 #TODO: Add targets for Power8/Power9 (Altivec/VSX) and Power10(MMA) and query for big endian systems (ppc64/le/be) 1170 endif() 1171 elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "loongarch64") 1172 message(STATUS "loongarch64 detected") 1173 1174 list(APPEND ARCH_FLAGS -march=loongarch64) 1175 if (LLAMA_LASX) 1176 list(APPEND ARCH_FLAGS -mlasx) 1177 endif() 1178 if (LLAMA_LSX) 1179 list(APPEND ARCH_FLAGS -mlsx) 1180 endif() 1181 1182 else() 1183 message(STATUS "Unknown architecture") 1184 endif() 1185 1186 add_compile_options("$<$<COMPILE_LANGUAGE:CXX>:${ARCH_FLAGS}>") 1187 add_compile_options("$<$<COMPILE_LANGUAGE:C>:${ARCH_FLAGS}>") 1188 1189 if (LLAMA_CUDA) 1190 list(APPEND CUDA_CXX_FLAGS ${ARCH_FLAGS}) 1191 list(JOIN CUDA_CXX_FLAGS " " CUDA_CXX_FLAGS_JOINED) # pass host compiler flags as a single argument 1192 if (NOT CUDA_CXX_FLAGS_JOINED STREQUAL "") 1193 list(APPEND CUDA_FLAGS -Xcompiler ${CUDA_CXX_FLAGS_JOINED}) 1194 endif() 1195 add_compile_options("$<$<COMPILE_LANGUAGE:CUDA>:${CUDA_FLAGS}>") 1196 endif() 1197 1198 if (MINGW) 1199 # Target Windows 8 for PrefetchVirtualMemory 1200 add_compile_definitions(_WIN32_WINNT=${LLAMA_WIN_VER}) 1201 endif() 1202 1203 # 1204 # POSIX conformance 1205 # 1206 1207 # clock_gettime came in POSIX.1b (1993) 1208 # CLOCK_MONOTONIC came in POSIX.1-2001 / SUSv3 as optional 1209 # posix_memalign came in POSIX.1-2001 / SUSv3 1210 # M_PI is an XSI extension since POSIX.1-2001 / SUSv3, came in XPG1 (1985) 1211 add_compile_definitions(_XOPEN_SOURCE=600) 1212 1213 # Somehow in OpenBSD whenever POSIX conformance is specified 1214 # some string functions rely on locale_t availability, 1215 # which was introduced in POSIX.1-2008, forcing us to go higher 1216 if (CMAKE_SYSTEM_NAME MATCHES "OpenBSD") 1217 remove_definitions(-D_XOPEN_SOURCE=600) 1218 add_compile_definitions(_XOPEN_SOURCE=700) 1219 endif() 1220 1221 # Data types, macros and functions related to controlling CPU affinity and 1222 # some memory allocation are available on Linux through GNU extensions in libc 1223 if (CMAKE_SYSTEM_NAME MATCHES "Linux") 1224 add_compile_definitions(_GNU_SOURCE) 1225 endif() 1226 1227 # RLIMIT_MEMLOCK came in BSD, is not specified in POSIX.1, 1228 # and on macOS its availability depends on enabling Darwin extensions 1229 # similarly on DragonFly, enabling BSD extensions is necessary 1230 if ( 1231 CMAKE_SYSTEM_NAME MATCHES "Darwin" OR 1232 CMAKE_SYSTEM_NAME MATCHES "iOS" OR 1233 CMAKE_SYSTEM_NAME MATCHES "tvOS" OR 1234 CMAKE_SYSTEM_NAME MATCHES "DragonFly" 1235 ) 1236 add_compile_definitions(_DARWIN_C_SOURCE) 1237 endif() 1238 1239 # alloca is a non-standard interface that is not visible on BSDs when 1240 # POSIX conformance is specified, but not all of them provide a clean way 1241 # to enable it in such cases 1242 if (CMAKE_SYSTEM_NAME MATCHES "FreeBSD") 1243 add_compile_definitions(__BSD_VISIBLE) 1244 endif() 1245 if (CMAKE_SYSTEM_NAME MATCHES "NetBSD") 1246 add_compile_definitions(_NETBSD_SOURCE) 1247 endif() 1248 if (CMAKE_SYSTEM_NAME MATCHES "OpenBSD") 1249 add_compile_definitions(_BSD_SOURCE) 1250 endif() 1251 1252 # 1253 # libraries 1254 # 1255 1256 # ggml 1257 1258 add_library(ggml OBJECT 1259 ggml.c 1260 ggml.h 1261 ggml-alloc.c 1262 ggml-alloc.h 1263 ggml-backend.c 1264 ggml-backend.h 1265 ggml-quants.c 1266 ggml-quants.h 1267 ${GGML_SOURCES_CUDA} ${GGML_HEADERS_CUDA} 1268 ${GGML_SOURCES_METAL} ${GGML_HEADERS_METAL} 1269 ${GGML_SOURCES_RPC} ${GGML_HEADERS_RPC} 1270 ${GGML_SOURCES_EXTRA} ${GGML_HEADERS_EXTRA} 1271 ${GGML_SOURCES_SYCL} ${GGML_HEADERS_SYCL} 1272 ${GGML_SOURCES_KOMPUTE} ${GGML_HEADERS_KOMPUTE} 1273 ${GGML_SOURCES_VULKAN} ${GGML_HEADERS_VULKAN} 1274 ${GGML_SOURCES_ROCM} ${GGML_HEADERS_ROCM} 1275 ${GGML_SOURCES_BLAS} ${GGML_HEADERS_BLAS} 1276 ${GGML_SOURCES_LLAMAFILE} ${GGML_HEADERS_LLAMAFILE} 1277 ) 1278 1279 target_include_directories(ggml PUBLIC . ${LLAMA_EXTRA_INCLUDES}) 1280 target_compile_features (ggml PUBLIC c_std_11) # don't bump 1281 1282 target_link_libraries(ggml PUBLIC Threads::Threads ${LLAMA_EXTRA_LIBS}) 1283 1284 add_library(ggml_static STATIC $<TARGET_OBJECTS:ggml>) 1285 1286 if (BUILD_SHARED_LIBS) 1287 set_target_properties(ggml PROPERTIES POSITION_INDEPENDENT_CODE ON) 1288 add_library(ggml_shared SHARED $<TARGET_OBJECTS:ggml>) 1289 target_link_libraries(ggml_shared PUBLIC Threads::Threads ${LLAMA_EXTRA_LIBS}) 1290 install(TARGETS ggml_shared LIBRARY) 1291 endif() 1292 1293 # llama 1294 1295 add_library(llama 1296 llama.cpp 1297 llama.h 1298 unicode.h 1299 unicode.cpp 1300 unicode-data.cpp 1301 ) 1302 1303 target_include_directories(llama PUBLIC .) 1304 target_compile_features (llama PUBLIC cxx_std_11) # don't bump 1305 1306 target_link_libraries(llama PRIVATE 1307 ggml 1308 ${LLAMA_EXTRA_LIBS} 1309 ) 1310 1311 if (BUILD_SHARED_LIBS) 1312 set_target_properties(llama PROPERTIES POSITION_INDEPENDENT_CODE ON) 1313 target_compile_definitions(llama PRIVATE LLAMA_SHARED LLAMA_BUILD) 1314 if (LLAMA_METAL) 1315 set_target_properties(llama PROPERTIES RESOURCE "${CMAKE_CURRENT_SOURCE_DIR}/ggml-metal.metal") 1316 endif() 1317 endif() 1318 1319 1320 # 1321 # install 1322 # 1323 1324 include(GNUInstallDirs) 1325 include(CMakePackageConfigHelpers) 1326 1327 set(LLAMA_INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR} 1328 CACHE PATH "Location of header files") 1329 set(LLAMA_LIB_INSTALL_DIR ${CMAKE_INSTALL_LIBDIR} 1330 CACHE PATH "Location of library files") 1331 set(LLAMA_BIN_INSTALL_DIR ${CMAKE_INSTALL_BINDIR} 1332 CACHE PATH "Location of binary files") 1333 set(LLAMA_BUILD_NUMBER ${BUILD_NUMBER}) 1334 set(LLAMA_BUILD_COMMIT ${BUILD_COMMIT}) 1335 set(LLAMA_INSTALL_VERSION 0.0.${BUILD_NUMBER}) 1336 get_directory_property(LLAMA_TRANSIENT_DEFINES COMPILE_DEFINITIONS) 1337 1338 configure_package_config_file( 1339 ${CMAKE_CURRENT_SOURCE_DIR}/scripts/LlamaConfig.cmake.in 1340 ${CMAKE_CURRENT_BINARY_DIR}/LlamaConfig.cmake 1341 INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/Llama 1342 PATH_VARS LLAMA_INCLUDE_INSTALL_DIR 1343 LLAMA_LIB_INSTALL_DIR 1344 LLAMA_BIN_INSTALL_DIR ) 1345 1346 write_basic_package_version_file( 1347 ${CMAKE_CURRENT_BINARY_DIR}/LlamaConfigVersion.cmake 1348 VERSION ${LLAMA_INSTALL_VERSION} 1349 COMPATIBILITY SameMajorVersion) 1350 1351 install(FILES ${CMAKE_CURRENT_BINARY_DIR}/LlamaConfig.cmake 1352 ${CMAKE_CURRENT_BINARY_DIR}/LlamaConfigVersion.cmake 1353 DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/Llama) 1354 1355 set(GGML_PUBLIC_HEADERS "ggml.h" "ggml-alloc.h" "ggml-backend.h" 1356 "${GGML_HEADERS_CUDA}" 1357 "${GGML_HEADERS_METAL}" 1358 "${GGML_HEADERS_EXTRA}") 1359 1360 set_target_properties(ggml PROPERTIES PUBLIC_HEADER "${GGML_PUBLIC_HEADERS}") 1361 install(TARGETS ggml PUBLIC_HEADER) 1362 1363 set_target_properties(llama PROPERTIES PUBLIC_HEADER ${CMAKE_CURRENT_SOURCE_DIR}/llama.h) 1364 install(TARGETS llama LIBRARY PUBLIC_HEADER) 1365 1366 install( 1367 FILES convert-hf-to-gguf.py 1368 PERMISSIONS 1369 OWNER_READ 1370 OWNER_WRITE 1371 OWNER_EXECUTE 1372 GROUP_READ 1373 GROUP_EXECUTE 1374 WORLD_READ 1375 WORLD_EXECUTE 1376 DESTINATION ${CMAKE_INSTALL_BINDIR}) 1377 if (LLAMA_METAL) 1378 install( 1379 FILES ggml-metal.metal 1380 PERMISSIONS 1381 OWNER_READ 1382 OWNER_WRITE 1383 GROUP_READ 1384 WORLD_READ 1385 DESTINATION ${CMAKE_INSTALL_BINDIR}) 1386 if (NOT LLAMA_METAL_EMBED_LIBRARY) 1387 install( 1388 FILES ${CMAKE_RUNTIME_OUTPUT_DIRECTORY}/default.metallib 1389 DESTINATION ${CMAKE_INSTALL_BINDIR} 1390 ) 1391 endif() 1392 endif() 1393 1394 configure_file(cmake/llama.pc.in 1395 "${CMAKE_CURRENT_BINARY_DIR}/llama.pc" 1396 @ONLY) 1397 1398 install(FILES "${CMAKE_CURRENT_BINARY_DIR}/llama.pc" 1399 DESTINATION lib/pkgconfig) 1400 1401 # 1402 # programs, examples and tests 1403 # 1404 1405 add_subdirectory(common) 1406 1407 if (LLAMA_BUILD_TESTS AND NOT CMAKE_JS_VERSION) 1408 include(CTest) 1409 add_subdirectory(tests) 1410 endif () 1411 1412 if (LLAMA_BUILD_EXAMPLES) 1413 add_subdirectory(examples) 1414 add_subdirectory(pocs) 1415 endif()