/ th500-boot-fix.patch
th500-boot-fix.patch
1 diff --git a/modules/dgx-spark.nix b/modules/dgx-spark.nix 2 index 09e999b..17103b1 100644 3 --- a/modules/dgx-spark.nix 4 +++ b/modules/dgx-spark.nix 5 @@ -1,4 +1,9 @@ 6 -{ config, lib, pkgs, ... }: 7 +{ 8 + config, 9 + lib, 10 + pkgs, 11 + ... 12 +}: 13 14 with lib; 15 16 @@ -10,76 +15,79 @@ let 17 # Import generated NVIDIA DGX configuration 18 dgxKernelConfig = import ../kernel-configs/nvidia-dgx-spark-6.17.1.nix { inherit lib; }; 19 20 - nvidiaKernel = pkgs.linuxPackagesFor (baseKernel.override { 21 - argsOverride = rec { 22 - # Use the NVIDIA kernel source 23 - src = pkgs.fetchFromGitHub { 24 - owner = "NVIDIA"; 25 - repo = "NV-Kernels"; 26 - # From https://github.com/NVIDIA/NV-Kernels/commits/24.04_linux-nvidia-6.17-next/ 27 - rev = "47ca203bcc5f4e1580c06fe1074d71497462ac8b"; 28 - hash = "sha256-lPp7RFvZcPhV5v6FOxCVIB53vpNujvvP0NAW6iRaiF8="; 29 + nvidiaKernel = pkgs.linuxPackagesFor ( 30 + baseKernel.override { 31 + argsOverride = rec { 32 + # Use the NVIDIA kernel source 33 + src = pkgs.fetchFromGitHub { 34 + owner = "NVIDIA"; 35 + repo = "NV-Kernels"; 36 + # From https://github.com/NVIDIA/NV-Kernels/commits/24.04_linux-nvidia-6.17-next/ 37 + rev = "47ca203bcc5f4e1580c06fe1074d71497462ac8b"; 38 + hash = "sha256-lPp7RFvZcPhV5v6FOxCVIB53vpNujvvP0NAW6iRaiF8="; 39 + }; 40 + 41 + # Apply Rust gendwarfksyms fix patch 42 + kernelPatches = [ 43 + { 44 + name = "rust-gendwarfksyms-fix"; 45 + patch = ../patches/rust-gendwarfksyms-fix.patch; 46 + } 47 + ]; 48 + 49 + version = "${nvidiaKernelVersion}-nvidia"; 50 + modDirVersion = nvidiaKernelVersion; 51 + enableCommonConfig = true; # Enable NixOS defaults for dependency resolution 52 + ignoreConfigErrors = true; # Ignore unused config options 53 + 54 + # Use comprehensive NVIDIA DGX configuration with NixOS-specific overrides 55 + structuredExtraConfig = 56 + (lib.filterAttrs ( 57 + name: value: 58 + # Remove options that conflict with NixOS requirements or don't exist in this kernel 59 + !lib.elem name [ 60 + "BLK_DEV_DM" # Device mapper - let NixOS handle this 61 + "BLK_DEV_DM_BUILTIN" # Device mapper builtin - let NixOS handle this 62 + "PAHOLE_VERSION" # Tool version - let NixOS handle this 63 + "RUSTC_LLVM_VERSION" # Compiler version - let NixOS handle this 64 + "RUSTC_VERSION" # Compiler version - let NixOS handle this 65 + "GCC_VERSION" # Compiler version - let NixOS handle this 66 + "LD_VERSION" # Linker version - let NixOS handle this 67 + "VERSION_SIGNATURE" # Version signature - let NixOS handle this 68 + "LOCALVERSION" # Local version - let NixOS handle this 69 + "LOCALVERSION_AUTO" # Local version auto - let NixOS handle this 70 + "INITRAMFS_SOURCE" # Initramfs source - let NixOS handle this 71 + "SYSTEM_TRUSTED_KEYS" # System trusted keys - debian-specific paths 72 + "SYSTEM_REVOCATION_KEYS" # System revocation keys - debian-specific paths 73 + "MODULE_SIG_KEY" # Module signing key - let NixOS handle this 74 + "SYSTEM_BLACKLIST_HASH_LIST" # System blacklist hash list - empty string causes build failure 75 + "EXTRA_FIRMWARE" # Extra firmware - empty string causes build failure 76 + "IPE_BOOT_POLICY" # IPE boot policy - empty string causes build failure 77 + "USB_STORAGE" # USB storage - ensure built-in for USB boot 78 + "USB_UAS" # USB Attached SCSI - ensure built-in for modern USB devices 79 + "OVERLAY_FS" # Overlay filesystem - ensure built-in for live boot 80 + "UEVENT_HELPER" # Legacy uevent helper - let NixOS use modern udev 81 + ] 82 + ) dgxKernelConfig) 83 + // (with lib.kernel; { 84 + # Critical NixOS security options that may need to override DGX defaults 85 + SECURITY_APPARMOR_BOOTPARAM_VALUE = freeform "1"; 86 + SECURITY_APPARMOR_RESTRICT_USERNS = lib.mkForce yes; # NixOS enables AppArmor by default 87 + 88 + # USB storage support for USB boot 89 + USB_STORAGE = yes; # Build into kernel for USB boot 90 + USB_UAS = yes; # USB Attached SCSI for modern USB devices 91 + OVERLAY_FS = yes; # Overlay filesystem for live boot 92 + 93 + # Device management - use modern udev instead of legacy helper 94 + UEVENT_HELPER = no; # Disable legacy uevent helper for proper udev operation 95 + 96 + # Platform-specific overrides 97 + UBUNTU_HOST = no; # Not Ubuntu! 98 + }); 99 }; 100 - 101 - # Apply Rust gendwarfksyms fix patch 102 - kernelPatches = [ 103 - { 104 - name = "rust-gendwarfksyms-fix"; 105 - patch = ../patches/rust-gendwarfksyms-fix.patch; 106 - } 107 - ]; 108 - 109 - version = "${nvidiaKernelVersion}-nvidia"; 110 - modDirVersion = nvidiaKernelVersion; 111 - enableCommonConfig = true; # Enable NixOS defaults for dependency resolution 112 - ignoreConfigErrors = true; # Ignore unused config options 113 - 114 - # Use comprehensive NVIDIA DGX configuration with NixOS-specific overrides 115 - structuredExtraConfig = (lib.filterAttrs 116 - (name: value: 117 - # Remove options that conflict with NixOS requirements or don't exist in this kernel 118 - !lib.elem name [ 119 - "BLK_DEV_DM" # Device mapper - let NixOS handle this 120 - "BLK_DEV_DM_BUILTIN" # Device mapper builtin - let NixOS handle this 121 - "PAHOLE_VERSION" # Tool version - let NixOS handle this 122 - "RUSTC_LLVM_VERSION" # Compiler version - let NixOS handle this 123 - "RUSTC_VERSION" # Compiler version - let NixOS handle this 124 - "GCC_VERSION" # Compiler version - let NixOS handle this 125 - "LD_VERSION" # Linker version - let NixOS handle this 126 - "VERSION_SIGNATURE" # Version signature - let NixOS handle this 127 - "LOCALVERSION" # Local version - let NixOS handle this 128 - "LOCALVERSION_AUTO" # Local version auto - let NixOS handle this 129 - "INITRAMFS_SOURCE" # Initramfs source - let NixOS handle this 130 - "SYSTEM_TRUSTED_KEYS" # System trusted keys - debian-specific paths 131 - "SYSTEM_REVOCATION_KEYS" # System revocation keys - debian-specific paths 132 - "MODULE_SIG_KEY" # Module signing key - let NixOS handle this 133 - "SYSTEM_BLACKLIST_HASH_LIST" # System blacklist hash list - empty string causes build failure 134 - "EXTRA_FIRMWARE" # Extra firmware - empty string causes build failure 135 - "IPE_BOOT_POLICY" # IPE boot policy - empty string causes build failure 136 - "USB_STORAGE" # USB storage - ensure built-in for USB boot 137 - "USB_UAS" # USB Attached SCSI - ensure built-in for modern USB devices 138 - "OVERLAY_FS" # Overlay filesystem - ensure built-in for live boot 139 - "UEVENT_HELPER" # Legacy uevent helper - let NixOS use modern udev 140 - ] 141 - ) 142 - dgxKernelConfig) // (with lib.kernel; { 143 - # Critical NixOS security options that may need to override DGX defaults 144 - SECURITY_APPARMOR_BOOTPARAM_VALUE = freeform "1"; 145 - SECURITY_APPARMOR_RESTRICT_USERNS = lib.mkForce yes; # NixOS enables AppArmor by default 146 - 147 - # USB storage support for USB boot 148 - USB_STORAGE = yes; # Build into kernel for USB boot 149 - USB_UAS = yes; # USB Attached SCSI for modern USB devices 150 - OVERLAY_FS = yes; # Overlay filesystem for live boot 151 - 152 - # Device management - use modern udev instead of legacy helper 153 - UEVENT_HELPER = no; # Disable legacy uevent helper for proper udev operation 154 - 155 - # Platform-specific overrides 156 - UBUNTU_HOST = no; # Not Ubuntu! 157 - }); 158 - }; 159 - }); 160 + } 161 + ); 162 in 163 { 164 options.hardware.dgx-spark = { 165 @@ -94,13 +102,48 @@ in 166 167 config = mkIf cfg.enable { 168 # Use the NVIDIA kernel if enabled, otherwise use explicit 6.17 kernel 169 - boot.kernelPackages = 170 - if cfg.useNvidiaKernel 171 - then nvidiaKernel 172 - else pkgs.linuxPackages_6_17; 173 + boot.kernelPackages = if cfg.useNvidiaKernel then nvidiaKernel else pkgs.linuxPackages_6_17; 174 175 boot.kernelParams = [ 176 - "console=tty1" # VGA console 177 + # TH500 early console - REQUIRED for any output before full driver init 178 + "earlycon=uart,mmio32,0x16A00000" 179 + # Serial console at 921600 baud (TH500 default) 180 + "console=ttyS0,921600" 181 + # VGA console (last console= becomes /dev/console) 182 + "console=tty0" 183 + # Conservative PCIe settings for GB10 GPU and ConnectX-7 184 + "pci=pcie_bus_safe" 185 + # Disable memory zeroing for GPU workload performance 186 + "init_on_alloc=0" 187 + ]; 188 + 189 + # Modules to load early in initrd for TH500 platform 190 + boot.initrd.availableKernelModules = [ 191 + # TH500/Tegra241 platform essentials 192 + "tegra-bpmp" 193 + "tegra-bpmp-thermal" 194 + "tegra186-gpc-dma" 195 + "tegra210-adma" 196 + "tegra194-cpufreq" 197 + "arm-smmu-v3" 198 + # NVIDIA GPU (for early modeset) 199 + "nvidia" 200 + "nvidia-modeset" 201 + "nvidia-drm" 202 + # ConnectX-7 networking 203 + "mlx5_core" 204 + # NVMe storage 205 + "nvme" 206 + ]; 207 + 208 + # Include NVIDIA GSP firmware in initrd 209 + hardware.firmware = [ 210 + (pkgs.runCommand "nvidia-firmware-initrd" { } '' 211 + mkdir -p $out/lib/firmware/nvidia 212 + # GSP firmware will be loaded from /lib/firmware at runtime 213 + # This ensures the path exists 214 + touch $out/lib/firmware/nvidia/.placeholder 215 + '') 216 ]; 217 218 boot.blacklistedKernelModules = [ 219 diff --git a/usb-configuration-base.nix b/usb-configuration-base.nix 220 index 3c00441..5cde1ae 100644 221 --- a/usb-configuration-base.nix 222 +++ b/usb-configuration-base.nix 223 @@ -1,4 +1,9 @@ 224 -{ config, pkgs, lib, ... }: 225 +{ 226 + config, 227 + pkgs, 228 + lib, 229 + ... 230 +}: 231 232 { 233 # Enable systemd in the initial ramdisk 234 @@ -22,10 +27,28 @@ 235 236 # Modern storage support 237 "nvme" # NVMe SSD support 238 + 239 + # TH500/Tegra platform support (CRITICAL for DGX Spark) 240 + "tegra-bpmp" # Boot and Power Management Processor 241 + "tegra-bpmp-thermal" # BPMP thermal driver 242 + "tegra194-cpufreq" # CPU frequency scaling (also used by TH500) 243 + "gpio-tegra186" # GPIO controller 244 + "i2c-tegra" # I2C controller 245 + "pinctrl-tegra" # Pin control 246 + "arm-smmu-v3" # ARM IOMMU (for CMDQV) 247 + 248 + # Serial console (for earlycon handoff) 249 + "8250" # 8250 serial driver 250 + "8250_dw" # DesignWare 8250 251 ]; 252 253 # USB storage support 254 - boot.supportedFilesystems = [ "vfat" "ext4" "ntfs" "iso9660" ]; 255 + boot.supportedFilesystems = [ 256 + "vfat" 257 + "ext4" 258 + "ntfs" 259 + "iso9660" 260 + ]; 261 262 # Boot loader configuration for USB 263 boot.loader.grub = { 264 @@ -64,7 +87,10 @@ 265 # User configuration 266 users.users.nixos = { 267 isNormalUser = true; 268 - extraGroups = [ "wheel" "networkmanager" ]; 269 + extraGroups = [ 270 + "wheel" 271 + "networkmanager" 272 + ]; 273 initialPassword = "nixos"; 274 }; 275 276 @@ -115,7 +141,10 @@ 277 }; 278 279 # Enable Flakes and the new command-line tool 280 - nix.settings.experimental-features = [ "nix-command" "flakes" ]; 281 + nix.settings.experimental-features = [ 282 + "nix-command" 283 + "flakes" 284 + ]; 285 286 # Automatically optimize the Nix store 287 nix.settings.auto-optimise-store = true;