/ flake.nix
flake.nix
1 # The flake interface to llama.cpp's Nix expressions. The flake is used as a 2 # more discoverable entry-point, as well as a way to pin the dependencies and 3 # expose default outputs, including the outputs built by the CI. 4 5 # For more serious applications involving some kind of customization you may 6 # want to consider consuming the overlay, or instantiating `llamaPackages` 7 # directly: 8 # 9 # ```nix 10 # pkgs.callPackage ${llama-cpp-root}/.devops/nix/scope.nix { }` 11 # ``` 12 13 # Cf. https://jade.fyi/blog/flakes-arent-real/ for a more detailed exposition 14 # of the relation between Nix and the Nix Flakes. 15 { 16 description = "Port of Facebook's LLaMA model in C/C++"; 17 18 inputs = { 19 nixpkgs.url = "github:NixOS/nixpkgs/nixos-unstable"; 20 flake-parts.url = "github:hercules-ci/flake-parts"; 21 }; 22 23 # There's an optional binary cache available. The details are below, but they're commented out. 24 # 25 # Why? The terrible experience of being prompted to accept them on every single Nix command run. 26 # Plus, there are warnings shown about not being a trusted user on a default Nix install 27 # if you *do* say yes to the prompts. 28 # 29 # This experience makes having `nixConfig` in a flake a persistent UX problem. 30 # 31 # To make use of the binary cache, please add the relevant settings to your `nix.conf`. 32 # It's located at `/etc/nix/nix.conf` on non-NixOS systems. On NixOS, adjust the `nix.settings` 33 # option in your NixOS configuration to add `extra-substituters` and `extra-trusted-public-keys`, 34 # as shown below. 35 # 36 # ``` 37 # nixConfig = { 38 # extra-substituters = [ 39 # # Populated by the CI in ggerganov/llama.cpp 40 # "https://llama-cpp.cachix.org" 41 # 42 # # A development cache for nixpkgs imported with `config.cudaSupport = true`. 43 # # Populated by https://hercules-ci.com/github/SomeoneSerge/nixpkgs-cuda-ci. 44 # # This lets one skip building e.g. the CUDA-enabled openmpi. 45 # # TODO: Replace once nix-community obtains an official one. 46 # "https://cuda-maintainers.cachix.org" 47 # ]; 48 # 49 # # Verify these are the same keys as published on 50 # # - https://app.cachix.org/cache/llama-cpp 51 # # - https://app.cachix.org/cache/cuda-maintainers 52 # extra-trusted-public-keys = [ 53 # "llama-cpp.cachix.org-1:H75X+w83wUKTIPSO1KWy9ADUrzThyGs8P5tmAbkWhQc=" 54 # "cuda-maintainers.cachix.org-1:0dq3bujKpuEPMCX6U4WylrUDZ9JyUG0VpVZa7CNfq5E=" 55 # ]; 56 # }; 57 # ``` 58 59 # For inspection, use `nix flake show github:ggerganov/llama.cpp` or the nix repl: 60 # 61 # ```bash 62 # ❯ nix repl 63 # nix-repl> :lf github:ggerganov/llama.cpp 64 # Added 13 variables. 65 # nix-repl> outputs.apps.x86_64-linux.quantize 66 # { program = "/nix/store/00000000000000000000000000000000-llama.cpp/bin/llama-quantize"; type = "app"; } 67 # ``` 68 outputs = 69 { self, flake-parts, ... }@inputs: 70 let 71 # We could include the git revisions in the package names but those would 72 # needlessly trigger rebuilds: 73 # llamaVersion = self.dirtyShortRev or self.shortRev; 74 75 # Nix already uses cryptographic hashes for versioning, so we'll just fix 76 # the fake semver for now: 77 llamaVersion = "0.0.0"; 78 in 79 flake-parts.lib.mkFlake { inherit inputs; } 80 81 { 82 83 imports = [ 84 .devops/nix/nixpkgs-instances.nix 85 .devops/nix/apps.nix 86 .devops/nix/devshells.nix 87 .devops/nix/jetson-support.nix 88 ]; 89 90 # An overlay can be used to have a more granular control over llama-cpp's 91 # dependencies and configuration, than that offered by the `.override` 92 # mechanism. Cf. https://nixos.org/manual/nixpkgs/stable/#chap-overlays. 93 # 94 # E.g. in a flake: 95 # ``` 96 # { nixpkgs, llama-cpp, ... }: 97 # let pkgs = import nixpkgs { 98 # overlays = [ (llama-cpp.overlays.default) ]; 99 # system = "aarch64-linux"; 100 # config.allowUnfree = true; 101 # config.cudaSupport = true; 102 # config.cudaCapabilities = [ "7.2" ]; 103 # config.cudaEnableForwardCompat = false; 104 # }; in { 105 # packages.aarch64-linux.llamaJetsonXavier = pkgs.llamaPackages.llama-cpp; 106 # } 107 # ``` 108 # 109 # Cf. https://nixos.org/manual/nix/unstable/command-ref/new-cli/nix3-flake.html?highlight=flake#flake-format 110 flake.overlays.default = ( 111 final: prev: { 112 llamaPackages = final.callPackage .devops/nix/scope.nix { inherit llamaVersion; }; 113 inherit (final.llamaPackages) llama-cpp; 114 } 115 ); 116 117 systems = [ 118 "aarch64-darwin" 119 "aarch64-linux" 120 "x86_64-darwin" # x86_64-darwin isn't tested (and likely isn't relevant) 121 "x86_64-linux" 122 ]; 123 124 perSystem = 125 { 126 config, 127 lib, 128 system, 129 pkgs, 130 pkgsCuda, 131 pkgsRocm, 132 ... 133 }: 134 { 135 # For standardised reproducible formatting with `nix fmt` 136 formatter = pkgs.nixfmt-rfc-style; 137 138 # Unlike `.#packages`, legacyPackages may contain values of 139 # arbitrary types (including nested attrsets) and may even throw 140 # exceptions. This attribute isn't recursed into by `nix flake 141 # show` either. 142 # 143 # You can add arbitrary scripts to `.devops/nix/scope.nix` and 144 # access them as `nix build .#llamaPackages.${scriptName}` using 145 # the same path you would with an overlay. 146 legacyPackages = { 147 llamaPackages = pkgs.callPackage .devops/nix/scope.nix { inherit llamaVersion; }; 148 llamaPackagesWindows = pkgs.pkgsCross.mingwW64.callPackage .devops/nix/scope.nix { inherit llamaVersion; }; 149 llamaPackagesCuda = pkgsCuda.callPackage .devops/nix/scope.nix { inherit llamaVersion; }; 150 llamaPackagesRocm = pkgsRocm.callPackage .devops/nix/scope.nix { inherit llamaVersion; }; 151 }; 152 153 # We don't use the overlay here so as to avoid making too many instances of nixpkgs, 154 # cf. https://zimbatm.com/notes/1000-instances-of-nixpkgs 155 packages = 156 { 157 default = config.legacyPackages.llamaPackages.llama-cpp; 158 vulkan = config.packages.default.override { useVulkan = true; }; 159 windows = config.legacyPackages.llamaPackagesWindows.llama-cpp; 160 } 161 // lib.optionalAttrs pkgs.stdenv.isLinux { 162 cuda = config.legacyPackages.llamaPackagesCuda.llama-cpp; 163 164 mpi-cpu = config.packages.default.override { useMpi = true; }; 165 mpi-cuda = config.packages.default.override { useMpi = true; }; 166 } 167 // lib.optionalAttrs (system == "x86_64-linux") { 168 rocm = config.legacyPackages.llamaPackagesRocm.llama-cpp; 169 }; 170 171 # Packages exposed in `.#checks` will be built by the CI and by 172 # `nix flake check`. 173 # 174 # We could test all outputs e.g. as `checks = confg.packages`. 175 # 176 # TODO: Build more once https://github.com/ggerganov/llama.cpp/issues/6346 has been addressed 177 checks = { 178 inherit (config.packages) default vulkan; 179 }; 180 }; 181 }; 182 }