cloud-hypervisor-gpu.hs
1 {-# LANGUAGE OverloadedStrings #-} 2 {-# LANGUAGE RecordWildCards #-} 3 4 {- | 5 Run an OCI container image in Cloud Hypervisor with GPU passthrough. 6 7 Usage: ch-gpu [OPTIONS] [IMAGE] [COMMAND...] 8 9 Options: 10 --cpus N Number of vCPUs (default: from config) 11 --mem N Memory in MiB (default: from config) 12 --gpu ADDR GPU PCI address (default: auto-detect) 13 14 Environment: 15 CONFIG_FILE - Path to Dhall config (required, set by Nix wrapper) 16 17 Example: 18 ch-gpu nvcr.io/nvidia/pytorch:24.01-py3 19 ch-gpu --gpu 0000:01:00.0 ubuntu:24.04 nvidia-smi 20 21 WARNING: This binds the GPU to vfio-pci. The GPU will be unavailable 22 to the host until the VM exits. 23 -} 24 module Main where 25 26 import Aleph.Script hiding (FilePath) 27 import Aleph.Script.Config (StorePath (..), storePathToFilePath) 28 import qualified Aleph.Script.Oci as Oci 29 import qualified Aleph.Script.Vfio as Vfio 30 import qualified Aleph.Script.Vm as Vm 31 import Aleph.Script.Vm.Config (CloudHypervisorConfig (..), loadCloudHypervisorConfig) 32 33 import Control.Monad (forM_) 34 import qualified Data.List as L 35 import Data.Maybe (fromMaybe) 36 import Numeric.Natural (Natural) 37 import System.Environment (getArgs, lookupEnv) 38 import System.Exit (exitFailure) 39 import Text.Read (readMaybe) 40 41 -- | Parse command line arguments 42 data CliArgs = CliArgs 43 { argCpus :: Maybe Int 44 , argMem :: Maybe Int 45 , argGpu :: Maybe Text 46 , argImage :: String 47 , argCmd :: [String] 48 } 49 50 parseArgs :: [String] -> CliArgs 51 parseArgs = go (CliArgs Nothing Nothing Nothing "nvcr.io/nvidia/pytorch:25.11-py3" []) 52 where 53 go acc [] = acc 54 go acc ("--cpus" : n : rest) = go acc{argCpus = readMaybe n} rest 55 go acc ("--mem" : n : rest) = go acc{argMem = readMaybe n} rest 56 go acc ("--gpu" : addr : rest) = go acc{argGpu = Just (pack addr)} rest 57 go acc (img : rest) 58 | Prelude.take 2 img /= "--" = 59 -- First non-flag is image, rest is command 60 acc{argImage = img, argCmd = rest} 61 | otherwise = go acc rest -- skip unknown flags 62 63 main :: IO () 64 main = do 65 -- Load config from Dhall file (set by Nix wrapper) 66 configPath <- lookupEnv "CONFIG_FILE" 67 case configPath of 68 Nothing -> do 69 putStrLn "Error: CONFIG_FILE environment variable not set" 70 putStrLn "This binary must be run via the Nix-wrapped version." 71 exitFailure 72 Just path -> do 73 cfg <- loadCloudHypervisorConfig path 74 runWithConfig cfg 75 76 runWithConfig :: CloudHypervisorConfig -> IO () 77 runWithConfig cfg@CloudHypervisorConfig{..} = do 78 args <- parseArgs <$> getArgs 79 80 -- Merge CLI args with config defaults (16GB default for GPU workloads) 81 let cpus = fromMaybe (fromIntegral chDefaultCpus) (argCpus args) 82 mem = fromMaybe (max 16384 (fromIntegral chDefaultMemMib)) (argMem args) 83 image = argImage args 84 cmd = if Prelude.null (argCmd args) then ["nvidia-smi"] else argCmd args 85 86 script $ do 87 -- Find GPU 88 gpuAddr <- case argGpu args of 89 Just addr -> pure addr 90 Nothing -> do 91 echoErr ":: Auto-detecting NVIDIA GPU" 92 gpus <- Vfio.listNvidiaGpus 93 case gpus of 94 [] -> die "No NVIDIA GPUs found. Set --gpu 0000:XX:XX.X" 95 (g : _) -> do 96 echoErr $ ":: Found " <> Vfio.pciAddr g 97 pure (Vfio.pciAddr g) 98 99 echoErr $ ":: Cloud Hypervisor + GPU (" <> pack (show cpus) <> " CPUs, " <> pack (show mem) <> " MiB)" 100 echoErr $ ":: GPU: " <> gpuAddr 101 102 -- Bind GPU to vfio-pci 103 echoErr ":: Binding GPU to vfio-pci" 104 _boundDevices <- Vfio.bindToVfio gpuAddr 105 106 -- Cleanup: unbind on exit 107 let cleanup = do 108 echoErr ":: Unbinding GPU from vfio-pci" 109 Vfio.unbindFromVfio gpuAddr 110 111 -- Run VM with cleanup 112 finally (runVm cfg image gpuAddr cpus mem cmd) cleanup 113 114 runVm :: CloudHypervisorConfig -> String -> Vfio.PciAddr -> Int -> Int -> [String] -> Sh () 115 runVm CloudHypervisorConfig{..} image gpuAddr cpus mem cmd = do 116 withTmpDir $ \workDir -> do 117 let rootfsDir = workDir </> "rootfs" 118 disk = workDir </> "disk.raw" 119 kernelPath = storePathToFilePath chKernel 120 busyboxPath = storePathToFilePath chBusybox 121 -- Use GPU init script if available, otherwise regular init 122 initPath = case chGpuInitScript of 123 Just p -> storePathToFilePath p 124 Nothing -> storePathToFilePath chInitScript 125 126 -- Pull image 127 echoErr $ ":: Pulling " <> pack image 128 mkdirP rootfsDir 129 setEnv "SSL_CERT_FILE" "/etc/ssl/certs/ca-bundle.crt" 130 bash_ $ "crane export --platform linux/amd64 '" <> pack image <> "' - | tar -xf - -C " <> pack rootfsDir 131 132 -- Inject busybox and init 133 echoErr ":: Injecting init" 134 Vm.injectBusybox busyboxPath rootfsDir 135 cp initPath (rootfsDir </> "init") 136 run_ "chmod" ["+x", pack (rootfsDir </> "init")] 137 138 -- Write build command 139 let buildCmdPath = rootfsDir </> "build-cmd" 140 cmdScript = 141 "#!/bin/bash\nset -euo pipefail\ncd /workspace 2>/dev/null || cd /root\n" 142 <> Prelude.unwords cmd 143 <> "\n" 144 liftIO $ writeFile buildCmdPath cmdScript 145 run_ "chmod" ["+x", pack buildCmdPath] 146 147 -- Build disk image (16GB for GPU workloads) 148 echoErr ":: Building rootfs" 149 Vm.buildExt4Sized (16 * 1024 * 1024) rootfsDir disk 150 151 -- Boot with GPU 152 echoErr ":: Booting Cloud Hypervisor with GPU passthrough" 153 let vmCfg = 154 Vm.defaultCloudHypervisorConfig 155 { Vm.chKernel = kernelPath 156 , Vm.chDisk = disk 157 , Vm.chCpus = cpus 158 , Vm.chMemMib = mem 159 } 160 Vm.runCloudHypervisorGpu vmCfg gpuAddr