/ th500-boot-fix.patch
th500-boot-fix.patch
  1  diff --git a/modules/dgx-spark.nix b/modules/dgx-spark.nix
  2  index 09e999b..17103b1 100644
  3  --- a/modules/dgx-spark.nix
  4  +++ b/modules/dgx-spark.nix
  5  @@ -1,4 +1,9 @@
  6  -{ config, lib, pkgs, ... }:
  7  +{
  8  +  config,
  9  +  lib,
 10  +  pkgs,
 11  +  ...
 12  +}:
 13   
 14   with lib;
 15   
 16  @@ -10,76 +15,79 @@ let
 17     # Import generated NVIDIA DGX configuration
 18     dgxKernelConfig = import ../kernel-configs/nvidia-dgx-spark-6.17.1.nix { inherit lib; };
 19   
 20  -  nvidiaKernel = pkgs.linuxPackagesFor (baseKernel.override {
 21  -    argsOverride = rec {
 22  -      # Use the NVIDIA kernel source
 23  -      src = pkgs.fetchFromGitHub {
 24  -        owner = "NVIDIA";
 25  -        repo = "NV-Kernels";
 26  -        # From https://github.com/NVIDIA/NV-Kernels/commits/24.04_linux-nvidia-6.17-next/
 27  -        rev = "47ca203bcc5f4e1580c06fe1074d71497462ac8b";
 28  -        hash = "sha256-lPp7RFvZcPhV5v6FOxCVIB53vpNujvvP0NAW6iRaiF8=";
 29  +  nvidiaKernel = pkgs.linuxPackagesFor (
 30  +    baseKernel.override {
 31  +      argsOverride = rec {
 32  +        # Use the NVIDIA kernel source
 33  +        src = pkgs.fetchFromGitHub {
 34  +          owner = "NVIDIA";
 35  +          repo = "NV-Kernels";
 36  +          # From https://github.com/NVIDIA/NV-Kernels/commits/24.04_linux-nvidia-6.17-next/
 37  +          rev = "47ca203bcc5f4e1580c06fe1074d71497462ac8b";
 38  +          hash = "sha256-lPp7RFvZcPhV5v6FOxCVIB53vpNujvvP0NAW6iRaiF8=";
 39  +        };
 40  +
 41  +        # Apply Rust gendwarfksyms fix patch
 42  +        kernelPatches = [
 43  +          {
 44  +            name = "rust-gendwarfksyms-fix";
 45  +            patch = ../patches/rust-gendwarfksyms-fix.patch;
 46  +          }
 47  +        ];
 48  +
 49  +        version = "${nvidiaKernelVersion}-nvidia";
 50  +        modDirVersion = nvidiaKernelVersion;
 51  +        enableCommonConfig = true; # Enable NixOS defaults for dependency resolution
 52  +        ignoreConfigErrors = true; # Ignore unused config options
 53  +
 54  +        # Use comprehensive NVIDIA DGX configuration with NixOS-specific overrides
 55  +        structuredExtraConfig =
 56  +          (lib.filterAttrs (
 57  +            name: value:
 58  +            # Remove options that conflict with NixOS requirements or don't exist in this kernel
 59  +            !lib.elem name [
 60  +              "BLK_DEV_DM" # Device mapper - let NixOS handle this
 61  +              "BLK_DEV_DM_BUILTIN" # Device mapper builtin - let NixOS handle this
 62  +              "PAHOLE_VERSION" # Tool version - let NixOS handle this
 63  +              "RUSTC_LLVM_VERSION" # Compiler version - let NixOS handle this
 64  +              "RUSTC_VERSION" # Compiler version - let NixOS handle this
 65  +              "GCC_VERSION" # Compiler version - let NixOS handle this
 66  +              "LD_VERSION" # Linker version - let NixOS handle this
 67  +              "VERSION_SIGNATURE" # Version signature - let NixOS handle this
 68  +              "LOCALVERSION" # Local version - let NixOS handle this
 69  +              "LOCALVERSION_AUTO" # Local version auto - let NixOS handle this
 70  +              "INITRAMFS_SOURCE" # Initramfs source - let NixOS handle this
 71  +              "SYSTEM_TRUSTED_KEYS" # System trusted keys - debian-specific paths
 72  +              "SYSTEM_REVOCATION_KEYS" # System revocation keys - debian-specific paths
 73  +              "MODULE_SIG_KEY" # Module signing key - let NixOS handle this
 74  +              "SYSTEM_BLACKLIST_HASH_LIST" # System blacklist hash list - empty string causes build failure
 75  +              "EXTRA_FIRMWARE" # Extra firmware - empty string causes build failure
 76  +              "IPE_BOOT_POLICY" # IPE boot policy - empty string causes build failure
 77  +              "USB_STORAGE" # USB storage - ensure built-in for USB boot
 78  +              "USB_UAS" # USB Attached SCSI - ensure built-in for modern USB devices
 79  +              "OVERLAY_FS" # Overlay filesystem - ensure built-in for live boot
 80  +              "UEVENT_HELPER" # Legacy uevent helper - let NixOS use modern udev
 81  +            ]
 82  +          ) dgxKernelConfig)
 83  +          // (with lib.kernel; {
 84  +            # Critical NixOS security options that may need to override DGX defaults
 85  +            SECURITY_APPARMOR_BOOTPARAM_VALUE = freeform "1";
 86  +            SECURITY_APPARMOR_RESTRICT_USERNS = lib.mkForce yes; # NixOS enables AppArmor by default
 87  +
 88  +            # USB storage support for USB boot
 89  +            USB_STORAGE = yes; # Build into kernel for USB boot
 90  +            USB_UAS = yes; # USB Attached SCSI for modern USB devices
 91  +            OVERLAY_FS = yes; # Overlay filesystem for live boot
 92  +
 93  +            # Device management - use modern udev instead of legacy helper
 94  +            UEVENT_HELPER = no; # Disable legacy uevent helper for proper udev operation
 95  +
 96  +            # Platform-specific overrides
 97  +            UBUNTU_HOST = no; # Not Ubuntu!
 98  +          });
 99         };
100  -
101  -      # Apply Rust gendwarfksyms fix patch
102  -      kernelPatches = [
103  -        {
104  -          name = "rust-gendwarfksyms-fix";
105  -          patch = ../patches/rust-gendwarfksyms-fix.patch;
106  -        }
107  -      ];
108  -
109  -      version = "${nvidiaKernelVersion}-nvidia";
110  -      modDirVersion = nvidiaKernelVersion;
111  -      enableCommonConfig = true; # Enable NixOS defaults for dependency resolution
112  -      ignoreConfigErrors = true; # Ignore unused config options
113  -
114  -      # Use comprehensive NVIDIA DGX configuration with NixOS-specific overrides
115  -      structuredExtraConfig = (lib.filterAttrs
116  -        (name: value:
117  -          # Remove options that conflict with NixOS requirements or don't exist in this kernel
118  -          !lib.elem name [
119  -            "BLK_DEV_DM" # Device mapper - let NixOS handle this
120  -            "BLK_DEV_DM_BUILTIN" # Device mapper builtin - let NixOS handle this
121  -            "PAHOLE_VERSION" # Tool version - let NixOS handle this
122  -            "RUSTC_LLVM_VERSION" # Compiler version - let NixOS handle this
123  -            "RUSTC_VERSION" # Compiler version - let NixOS handle this
124  -            "GCC_VERSION" # Compiler version - let NixOS handle this
125  -            "LD_VERSION" # Linker version - let NixOS handle this
126  -            "VERSION_SIGNATURE" # Version signature - let NixOS handle this
127  -            "LOCALVERSION" # Local version - let NixOS handle this
128  -            "LOCALVERSION_AUTO" # Local version auto - let NixOS handle this
129  -            "INITRAMFS_SOURCE" # Initramfs source - let NixOS handle this
130  -            "SYSTEM_TRUSTED_KEYS" # System trusted keys - debian-specific paths
131  -            "SYSTEM_REVOCATION_KEYS" # System revocation keys - debian-specific paths
132  -            "MODULE_SIG_KEY" # Module signing key - let NixOS handle this
133  -            "SYSTEM_BLACKLIST_HASH_LIST" # System blacklist hash list - empty string causes build failure
134  -            "EXTRA_FIRMWARE" # Extra firmware - empty string causes build failure
135  -            "IPE_BOOT_POLICY" # IPE boot policy - empty string causes build failure
136  -            "USB_STORAGE" # USB storage - ensure built-in for USB boot
137  -            "USB_UAS" # USB Attached SCSI - ensure built-in for modern USB devices
138  -            "OVERLAY_FS" # Overlay filesystem - ensure built-in for live boot
139  -            "UEVENT_HELPER" # Legacy uevent helper - let NixOS use modern udev
140  -          ]
141  -        )
142  -        dgxKernelConfig) // (with lib.kernel; {
143  -        # Critical NixOS security options that may need to override DGX defaults
144  -        SECURITY_APPARMOR_BOOTPARAM_VALUE = freeform "1";
145  -        SECURITY_APPARMOR_RESTRICT_USERNS = lib.mkForce yes; # NixOS enables AppArmor by default
146  -
147  -        # USB storage support for USB boot
148  -        USB_STORAGE = yes; # Build into kernel for USB boot
149  -        USB_UAS = yes; # USB Attached SCSI for modern USB devices
150  -        OVERLAY_FS = yes; # Overlay filesystem for live boot
151  -
152  -        # Device management - use modern udev instead of legacy helper
153  -        UEVENT_HELPER = no; # Disable legacy uevent helper for proper udev operation
154  -
155  -        # Platform-specific overrides
156  -        UBUNTU_HOST = no; # Not Ubuntu!
157  -      });
158  -    };
159  -  });
160  +    }
161  +  );
162   in
163   {
164     options.hardware.dgx-spark = {
165  @@ -94,13 +102,48 @@ in
166   
167     config = mkIf cfg.enable {
168       # Use the NVIDIA kernel if enabled, otherwise use explicit 6.17 kernel
169  -    boot.kernelPackages =
170  -      if cfg.useNvidiaKernel
171  -      then nvidiaKernel
172  -      else pkgs.linuxPackages_6_17;
173  +    boot.kernelPackages = if cfg.useNvidiaKernel then nvidiaKernel else pkgs.linuxPackages_6_17;
174   
175       boot.kernelParams = [
176  -      "console=tty1" # VGA console
177  +      # TH500 early console - REQUIRED for any output before full driver init
178  +      "earlycon=uart,mmio32,0x16A00000"
179  +      # Serial console at 921600 baud (TH500 default)
180  +      "console=ttyS0,921600"
181  +      # VGA console (last console= becomes /dev/console)
182  +      "console=tty0"
183  +      # Conservative PCIe settings for GB10 GPU and ConnectX-7
184  +      "pci=pcie_bus_safe"
185  +      # Disable memory zeroing for GPU workload performance
186  +      "init_on_alloc=0"
187  +    ];
188  +
189  +    # Modules to load early in initrd for TH500 platform
190  +    boot.initrd.availableKernelModules = [
191  +      # TH500/Tegra241 platform essentials
192  +      "tegra-bpmp"
193  +      "tegra-bpmp-thermal"
194  +      "tegra186-gpc-dma"
195  +      "tegra210-adma"
196  +      "tegra194-cpufreq"
197  +      "arm-smmu-v3"
198  +      # NVIDIA GPU (for early modeset)
199  +      "nvidia"
200  +      "nvidia-modeset"
201  +      "nvidia-drm"
202  +      # ConnectX-7 networking
203  +      "mlx5_core"
204  +      # NVMe storage
205  +      "nvme"
206  +    ];
207  +
208  +    # Include NVIDIA GSP firmware in initrd
209  +    hardware.firmware = [
210  +      (pkgs.runCommand "nvidia-firmware-initrd" { } ''
211  +        mkdir -p $out/lib/firmware/nvidia
212  +        # GSP firmware will be loaded from /lib/firmware at runtime
213  +        # This ensures the path exists
214  +        touch $out/lib/firmware/nvidia/.placeholder
215  +      '')
216       ];
217   
218       boot.blacklistedKernelModules = [
219  diff --git a/usb-configuration-base.nix b/usb-configuration-base.nix
220  index 3c00441..5cde1ae 100644
221  --- a/usb-configuration-base.nix
222  +++ b/usb-configuration-base.nix
223  @@ -1,4 +1,9 @@
224  -{ config, pkgs, lib, ... }:
225  +{
226  +  config,
227  +  pkgs,
228  +  lib,
229  +  ...
230  +}:
231   
232   {
233     # Enable systemd in the initial ramdisk
234  @@ -22,10 +27,28 @@
235   
236       # Modern storage support
237       "nvme" # NVMe SSD support
238  +
239  +    # TH500/Tegra platform support (CRITICAL for DGX Spark)
240  +    "tegra-bpmp" # Boot and Power Management Processor
241  +    "tegra-bpmp-thermal" # BPMP thermal driver
242  +    "tegra194-cpufreq" # CPU frequency scaling (also used by TH500)
243  +    "gpio-tegra186" # GPIO controller
244  +    "i2c-tegra" # I2C controller
245  +    "pinctrl-tegra" # Pin control
246  +    "arm-smmu-v3" # ARM IOMMU (for CMDQV)
247  +
248  +    # Serial console (for earlycon handoff)
249  +    "8250" # 8250 serial driver
250  +    "8250_dw" # DesignWare 8250
251     ];
252   
253     # USB storage support
254  -  boot.supportedFilesystems = [ "vfat" "ext4" "ntfs" "iso9660" ];
255  +  boot.supportedFilesystems = [
256  +    "vfat"
257  +    "ext4"
258  +    "ntfs"
259  +    "iso9660"
260  +  ];
261   
262     # Boot loader configuration for USB
263     boot.loader.grub = {
264  @@ -64,7 +87,10 @@
265     # User configuration
266     users.users.nixos = {
267       isNormalUser = true;
268  -    extraGroups = [ "wheel" "networkmanager" ];
269  +    extraGroups = [
270  +      "wheel"
271  +      "networkmanager"
272  +    ];
273       initialPassword = "nixos";
274     };
275   
276  @@ -115,7 +141,10 @@
277     };
278   
279     # Enable Flakes and the new command-line tool
280  -  nix.settings.experimental-features = [ "nix-command" "flakes" ];
281  +  nix.settings.experimental-features = [
282  +    "nix-command"
283  +    "flakes"
284  +  ];
285   
286     # Automatically optimize the Nix store
287     nix.settings.auto-optimise-store = true;