From daef0f5b4ef731eaa3ef0d1033dda899b7373276 Mon Sep 17 00:00:00 2001 From: Rafael Garcia Date: Sat, 13 Dec 2025 21:43:39 +0000 Subject: [PATCH 01/17] feat(devices): add lib/devices package types, errors, and paths Add foundational types for GPU/PCI device passthrough: - Device, AvailableDevice, CreateDeviceRequest structs - Error types (ErrNotFound, ErrInUse, ErrAlreadyExists, etc.) - Device path helpers in lib/paths --- lib/devices/errors.go | 40 +++++++++++++++++++++++++++++++ lib/devices/types.go | 56 +++++++++++++++++++++++++++++++++++++++++++ lib/paths/paths.go | 18 +++++++++++++- 3 files changed, 113 insertions(+), 1 deletion(-) create mode 100644 lib/devices/errors.go create mode 100644 lib/devices/types.go diff --git a/lib/devices/errors.go b/lib/devices/errors.go new file mode 100644 index 00000000..afacaf2e --- /dev/null +++ b/lib/devices/errors.go @@ -0,0 +1,40 @@ +package devices + +import "errors" + +var ( + // ErrNotFound is returned when a device is not found + ErrNotFound = errors.New("device not found") + + // ErrInUse is returned when a device is currently attached to an instance + ErrInUse = errors.New("device is in use") + + // ErrNotBound is returned when a VFIO operation requires the device to be bound + ErrNotBound = errors.New("device is not bound to VFIO") + + // ErrAlreadyBound is returned when trying to bind a device that's already bound to VFIO + ErrAlreadyBound = errors.New("device is already bound to VFIO") + + // ErrAlreadyExists is returned when trying to register a device that already exists + ErrAlreadyExists = errors.New("device already exists") + + // ErrInvalidName is returned when the device name doesn't match the required pattern + ErrInvalidName = errors.New("device name must match pattern ^[a-zA-Z0-9][a-zA-Z0-9_.-]+$") + + // ErrNameExists is returned when a device with the same name already exists + ErrNameExists = errors.New("device name already exists") + + // ErrInvalidPCIAddress is returned when the PCI address format is invalid + ErrInvalidPCIAddress = errors.New("invalid PCI address format") + + // ErrDeviceNotFound is returned when the PCI device doesn't exist on the host + ErrDeviceNotFound = errors.New("PCI device not found on host") + + // ErrVFIONotAvailable is returned when VFIO modules are not loaded + ErrVFIONotAvailable = errors.New("VFIO is not available (modules not loaded)") + + // ErrIOMMUGroupConflict is returned when not all devices in IOMMU group can be passed through + ErrIOMMUGroupConflict = errors.New("IOMMU group contains other devices that must also be passed through") +) + + diff --git a/lib/devices/types.go b/lib/devices/types.go new file mode 100644 index 00000000..ca7b68ed --- /dev/null +++ b/lib/devices/types.go @@ -0,0 +1,56 @@ +package devices + +import ( + "regexp" + "time" +) + +// DeviceType represents the type of PCI device +type DeviceType string + +const ( + DeviceTypeGPU DeviceType = "gpu" + DeviceTypeGeneric DeviceType = "pci" +) + +// Device represents a registered PCI device for passthrough +type Device struct { + Id string `json:"id"` // cuid2 identifier + Name string `json:"name"` // user-provided globally unique name + Type DeviceType `json:"type"` // gpu or pci + PCIAddress string `json:"pci_address"` // e.g., "0000:a2:00.0" + VendorID string `json:"vendor_id"` // e.g., "10de" + DeviceID string `json:"device_id"` // e.g., "27b8" + IOMMUGroup int `json:"iommu_group"` // IOMMU group number + BoundToVFIO bool `json:"bound_to_vfio"` // whether device is bound to vfio-pci + AttachedTo *string `json:"attached_to"` // instance ID if attached, nil otherwise + CreatedAt time.Time `json:"created_at"` +} + +// CreateDeviceRequest is the request to register a new device +type CreateDeviceRequest struct { + Name string `json:"name,omitempty"` // optional: globally unique name (auto-generated if not provided) + PCIAddress string `json:"pci_address"` // required: PCI address (e.g., "0000:a2:00.0") +} + +// AvailableDevice represents a PCI device discovered on the host +type AvailableDevice struct { + PCIAddress string `json:"pci_address"` + VendorID string `json:"vendor_id"` + DeviceID string `json:"device_id"` + VendorName string `json:"vendor_name"` + DeviceName string `json:"device_name"` + IOMMUGroup int `json:"iommu_group"` + CurrentDriver *string `json:"current_driver"` // nil if no driver bound +} + +// DeviceNamePattern is the regex pattern for valid device names +// Must start with alphanumeric, followed by alphanumeric, underscore, dot, or dash +var DeviceNamePattern = regexp.MustCompile(`^[a-zA-Z0-9][a-zA-Z0-9_.-]+$`) + +// ValidateDeviceName validates that a device name matches the required pattern +func ValidateDeviceName(name string) bool { + return DeviceNamePattern.MatchString(name) +} + + diff --git a/lib/paths/paths.go b/lib/paths/paths.go index 65e66a97..ce06aeb0 100644 --- a/lib/paths/paths.go +++ b/lib/paths/paths.go @@ -1,5 +1,4 @@ // Package paths provides centralized path construction for hypeman data directory. - package paths import "path/filepath" @@ -196,6 +195,23 @@ func (p *Paths) GuestsDir() string { return filepath.Join(p.dataDir, "guests") } +// Device path methods + +// DevicesDir returns the root devices directory. +func (p *Paths) DevicesDir() string { + return filepath.Join(p.dataDir, "devices") +} + +// DeviceDir returns the directory for a device. +func (p *Paths) DeviceDir(id string) string { + return filepath.Join(p.DevicesDir(), id) +} + +// DeviceMetadata returns the path to device metadata.json. +func (p *Paths) DeviceMetadata(id string) string { + return filepath.Join(p.DeviceDir(id), "metadata.json") +} + // Volume path methods // VolumesDir returns the root volumes directory. From 7202c7591e2000119002b41b09ec8d13aaac4856 Mon Sep 17 00:00:00 2001 From: Rafael Garcia Date: Sat, 13 Dec 2025 21:43:56 +0000 Subject: [PATCH 02/17] feat(devices): add PCI device discovery and VFIO binding Add low-level device operations: - discovery.go: Scan PCI bus, detect IOMMU groups, identify GPU devices - vfio.go: Bind/unbind devices to vfio-pci driver for VM passthrough --- lib/devices/discovery.go | 279 +++++++++++++++++++++++++++++++++++ lib/devices/vfio.go | 310 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 589 insertions(+) create mode 100644 lib/devices/discovery.go create mode 100644 lib/devices/vfio.go diff --git a/lib/devices/discovery.go b/lib/devices/discovery.go new file mode 100644 index 00000000..b04213c0 --- /dev/null +++ b/lib/devices/discovery.go @@ -0,0 +1,279 @@ +package devices + +import ( + "fmt" + "os" + "path/filepath" + "regexp" + "strconv" + "strings" +) + +const ( + sysfsDevicesPath = "/sys/bus/pci/devices" + sysfsIOMMUPath = "/sys/kernel/iommu_groups" +) + +// pciAddressPattern matches PCI addresses like "0000:a2:00.0" +var pciAddressPattern = regexp.MustCompile(`^[0-9a-fA-F]{4}:[0-9a-fA-F]{2}:[0-9a-fA-F]{2}\.[0-9a-fA-F]$`) + +// ValidatePCIAddress validates that a string is a valid PCI address format +func ValidatePCIAddress(addr string) bool { + return pciAddressPattern.MatchString(addr) +} + +// DiscoverAvailableDevices scans sysfs for PCI devices that can be used for passthrough +// It filters for devices that are likely candidates (GPUs, network cards, etc.) +func DiscoverAvailableDevices() ([]AvailableDevice, error) { + entries, err := os.ReadDir(sysfsDevicesPath) + if err != nil { + return nil, fmt.Errorf("read sysfs devices: %w", err) + } + + var devices []AvailableDevice + for _, entry := range entries { + addr := entry.Name() + if !ValidatePCIAddress(addr) { + continue + } + + device, err := readDeviceInfo(addr) + if err != nil { + // Skip devices we can't read + continue + } + + // Filter for passthrough-capable devices (GPUs, 3D controllers, etc.) + if isPassthroughCandidate(device) { + devices = append(devices, *device) + } + } + + return devices, nil +} + +// GetDeviceInfo reads information about a specific PCI device +func GetDeviceInfo(pciAddress string) (*AvailableDevice, error) { + if !ValidatePCIAddress(pciAddress) { + return nil, ErrInvalidPCIAddress + } + + devicePath := filepath.Join(sysfsDevicesPath, pciAddress) + if _, err := os.Stat(devicePath); os.IsNotExist(err) { + return nil, ErrDeviceNotFound + } + + return readDeviceInfo(pciAddress) +} + +// readDeviceInfo reads device information from sysfs +func readDeviceInfo(pciAddress string) (*AvailableDevice, error) { + devicePath := filepath.Join(sysfsDevicesPath, pciAddress) + + vendorID, err := readSysfsFile(filepath.Join(devicePath, "vendor")) + if err != nil { + return nil, fmt.Errorf("read vendor: %w", err) + } + vendorID = strings.TrimPrefix(vendorID, "0x") + + deviceID, err := readSysfsFile(filepath.Join(devicePath, "device")) + if err != nil { + return nil, fmt.Errorf("read device: %w", err) + } + deviceID = strings.TrimPrefix(deviceID, "0x") + + iommuGroup, err := readIOMMUGroup(pciAddress) + if err != nil { + return nil, fmt.Errorf("read iommu group: %w", err) + } + + driver := readCurrentDriver(pciAddress) + + // Get device class to determine type + classCode, _ := readSysfsFile(filepath.Join(devicePath, "class")) + + return &AvailableDevice{ + PCIAddress: pciAddress, + VendorID: vendorID, + DeviceID: deviceID, + VendorName: getVendorName(vendorID), + DeviceName: getDeviceName(vendorID, deviceID, classCode), + IOMMUGroup: iommuGroup, + CurrentDriver: driver, + }, nil +} + +// readSysfsFile reads and trims a sysfs file +func readSysfsFile(path string) (string, error) { + data, err := os.ReadFile(path) + if err != nil { + return "", err + } + return strings.TrimSpace(string(data)), nil +} + +// readIOMMUGroup reads the IOMMU group number for a device +func readIOMMUGroup(pciAddress string) (int, error) { + iommuLink := filepath.Join(sysfsDevicesPath, pciAddress, "iommu_group") + target, err := os.Readlink(iommuLink) + if err != nil { + return -1, fmt.Errorf("read iommu_group link: %w", err) + } + + // Target is like "../../../../kernel/iommu_groups/82" + groupStr := filepath.Base(target) + group, err := strconv.Atoi(groupStr) + if err != nil { + return -1, fmt.Errorf("parse iommu group: %w", err) + } + + return group, nil +} + +// readCurrentDriver reads the current driver bound to the device +func readCurrentDriver(pciAddress string) *string { + driverLink := filepath.Join(sysfsDevicesPath, pciAddress, "driver") + target, err := os.Readlink(driverLink) + if err != nil { + // No driver bound + return nil + } + + driver := filepath.Base(target) + return &driver +} + +// GetIOMMUGroupDevices returns all PCI devices in the same IOMMU group +func GetIOMMUGroupDevices(iommuGroup int) ([]string, error) { + groupPath := filepath.Join(sysfsIOMMUPath, strconv.Itoa(iommuGroup), "devices") + entries, err := os.ReadDir(groupPath) + if err != nil { + return nil, fmt.Errorf("read iommu group devices: %w", err) + } + + var devices []string + for _, entry := range entries { + devices = append(devices, entry.Name()) + } + return devices, nil +} + +// isPassthroughCandidate determines if a device is a good candidate for passthrough +func isPassthroughCandidate(device *AvailableDevice) bool { + // Check class code for GPUs and 3D controllers + // Class 0x03 = Display controller + // Subclass 0x00 = VGA controller + // Subclass 0x02 = 3D controller (like NVIDIA compute GPUs) + devicePath := filepath.Join(sysfsDevicesPath, device.PCIAddress) + classCode, err := readSysfsFile(filepath.Join(devicePath, "class")) + if err != nil { + return false + } + + classCode = strings.TrimPrefix(classCode, "0x") + if len(classCode) >= 4 { + classPrefix := classCode[:4] + // 0300 = VGA controller, 0302 = 3D controller + if classPrefix == "0300" || classPrefix == "0302" { + return true + } + } + + // Also include NVIDIA devices by vendor ID + if device.VendorID == "10de" { + return true + } + + return false +} + +// getVendorName returns a human-readable vendor name +func getVendorName(vendorID string) string { + vendors := map[string]string{ + "10de": "NVIDIA Corporation", + "1002": "AMD/ATI", + "8086": "Intel Corporation", + } + if name, ok := vendors[vendorID]; ok { + return name + } + return "Unknown Vendor" +} + +// getDeviceName returns a human-readable device name based on class and IDs +func getDeviceName(vendorID, deviceID, classCode string) string { + // For NVIDIA, provide some common device names. + // Sources: + // - NVIDIA Driver README, Appendix A "Supported NVIDIA GPU Products": + // https://download.nvidia.com/XFree86/Linux-x86_64/570.133.07/README/supportedchips.html + // - PCI ID Database: https://pci-ids.ucw.cz/read/PC/10de + if vendorID == "10de" { + nvidiaDevices := map[string]string{ + // H100 series + "2321": "H100 NVL", + "2330": "H100 SXM5 80GB", + "2331": "H100 PCIe", + "2339": "H100", + // H200 series + "2335": "H200", + // L4 + "27b8": "L4", + // L40 series + "26b5": "L40", + "26b9": "L40S", + // A100 series + "20b0": "A100 SXM4 40GB", + "20b2": "A100 SXM4 80GB", + "20b5": "A100 PCIe 40GB", + "20f1": "A100 PCIe 80GB", + // A30/A40 + "20b7": "A30", + "2235": "A40", + // RTX 4000 series (datacenter) + "2684": "RTX 4090", + "27b0": "RTX 4090 D", + // V100 series + "1db4": "V100 PCIe 16GB", + "1db5": "V100 SXM2 16GB", + "1db6": "V100 PCIe 32GB", + } + if name, ok := nvidiaDevices[deviceID]; ok { + return name + } + } + + // Fall back to class-based description + classCode = strings.TrimPrefix(classCode, "0x") + if len(classCode) >= 4 { + switch classCode[:4] { + case "0300": + return "VGA Controller" + case "0302": + return "3D Controller" + case "0403": + return "Audio Device" + } + } + + return "PCI Device" +} + +// DetermineDeviceType determines the DeviceType based on device properties +func DetermineDeviceType(device *AvailableDevice) DeviceType { + devicePath := filepath.Join(sysfsDevicesPath, device.PCIAddress) + classCode, err := readSysfsFile(filepath.Join(devicePath, "class")) + if err != nil { + return DeviceTypeGeneric + } + + classCode = strings.TrimPrefix(classCode, "0x") + if len(classCode) >= 4 { + classPrefix := classCode[:4] + // 0300 = VGA controller, 0302 = 3D controller + if classPrefix == "0300" || classPrefix == "0302" { + return DeviceTypeGPU + } + } + + return DeviceTypeGeneric +} diff --git a/lib/devices/vfio.go b/lib/devices/vfio.go new file mode 100644 index 00000000..38606f5b --- /dev/null +++ b/lib/devices/vfio.go @@ -0,0 +1,310 @@ +package devices + +import ( + "fmt" + "log/slog" + "os" + "os/exec" + "path/filepath" + "strings" + "time" +) + +const ( + vfioDriverPath = "/sys/bus/pci/drivers/vfio-pci" + pciDriversPath = "/sys/bus/pci/drivers" + vfioDevicePath = "/dev/vfio" +) + +// VFIOBinder handles binding and unbinding devices to/from VFIO +type VFIOBinder struct{} + +// NewVFIOBinder creates a new VFIOBinder +func NewVFIOBinder() *VFIOBinder { + return &VFIOBinder{} +} + +// IsVFIOAvailable checks if VFIO is available on the system +func (v *VFIOBinder) IsVFIOAvailable() bool { + _, err := os.Stat(vfioDriverPath) + return err == nil +} + +// IsDeviceBoundToVFIO checks if a device is currently bound to vfio-pci +func (v *VFIOBinder) IsDeviceBoundToVFIO(pciAddress string) bool { + driver := readCurrentDriver(pciAddress) + return driver != nil && *driver == "vfio-pci" +} + +// BindToVFIO binds a PCI device to the vfio-pci driver +// This requires: +// 1. Stopping any processes using the device (e.g., nvidia-persistenced for NVIDIA GPUs) +// 2. Unbinding the device from its current driver (if any) +// 3. Binding it to vfio-pci +func (v *VFIOBinder) BindToVFIO(pciAddress string) error { + if !v.IsVFIOAvailable() { + return ErrVFIONotAvailable + } + + if v.IsDeviceBoundToVFIO(pciAddress) { + return ErrAlreadyBound + } + + // Get device info for vendor/device IDs + deviceInfo, err := GetDeviceInfo(pciAddress) + if err != nil { + return fmt.Errorf("get device info: %w", err) + } + + // For NVIDIA GPUs, stop nvidia-persistenced which holds the device open + // This is required because the service keeps /dev/nvidia* open, blocking driver unbind + isNvidia := deviceInfo.VendorID == "10de" + stoppedNvidiaPersistenced := false + if isNvidia { + if err := v.stopNvidiaPersistenced(); err != nil { + slog.Warn("failed to stop nvidia-persistenced", "error", err) + // Continue anyway - it might not be running + } else { + stoppedNvidiaPersistenced = true + } + } + + // Use defer to ensure nvidia-persistenced is restarted on any error + // after we successfully stopped it + bindSucceeded := false + defer func() { + if stoppedNvidiaPersistenced && !bindSucceeded { + _ = v.startNvidiaPersistenced() + } + }() + + // Unbind from current driver if bound + currentDriver := readCurrentDriver(pciAddress) + if currentDriver != nil && *currentDriver != "" { + if err := v.unbindFromDriver(pciAddress, *currentDriver); err != nil { + return fmt.Errorf("unbind from %s: %w", *currentDriver, err) + } + } + + // Override driver to vfio-pci + if err := v.setDriverOverride(pciAddress, "vfio-pci"); err != nil { + return fmt.Errorf("set driver override: %w", err) + } + + // Bind to vfio-pci using the bind method (more reliable than new_id) + if err := v.bindDeviceToVFIO(pciAddress); err != nil { + return fmt.Errorf("bind to vfio-pci: %w", err) + } + + bindSucceeded = true + return nil +} + +// UnbindFromVFIO unbinds a device from vfio-pci and restores the original driver +func (v *VFIOBinder) UnbindFromVFIO(pciAddress string) error { + if !v.IsDeviceBoundToVFIO(pciAddress) { + return ErrNotBound + } + + // Get device info to check if it's NVIDIA + deviceInfo, err := GetDeviceInfo(pciAddress) + if err != nil { + return fmt.Errorf("get device info: %w", err) + } + isNvidia := deviceInfo.VendorID == "10de" + + // Clear driver override first + if err := v.setDriverOverride(pciAddress, ""); err != nil { + // Non-fatal, continue with unbind + } + + // Unbind from vfio-pci + if err := v.unbindFromDriver(pciAddress, "vfio-pci"); err != nil { + return fmt.Errorf("unbind from vfio-pci: %w", err) + } + + // Trigger driver probe to rebind to original driver + if err := v.triggerDriverProbe(pciAddress); err != nil { + slog.Warn("failed to trigger driver probe", "pci_address", pciAddress, "error", err) + } + + // For NVIDIA GPUs, restart nvidia-persistenced after rebinding + if isNvidia { + if err := v.startNvidiaPersistenced(); err != nil { + slog.Warn("failed to start nvidia-persistenced", "error", err) + } + } + + return nil +} + +// unbindFromDriver unbinds a device from its current driver +func (v *VFIOBinder) unbindFromDriver(pciAddress, driver string) error { + unbindPath := filepath.Join(pciDriversPath, driver, "unbind") + return os.WriteFile(unbindPath, []byte(pciAddress), 0200) +} + +// setDriverOverride sets the driver_override for a device +func (v *VFIOBinder) setDriverOverride(pciAddress, driver string) error { + overridePath := filepath.Join(sysfsDevicesPath, pciAddress, "driver_override") + + // Empty string clears the override + content := driver + if driver == "" { + content = "\n" // Writing newline clears the override + } + + return os.WriteFile(overridePath, []byte(content), 0200) +} + + +// bindDeviceToVFIO binds a specific device to vfio-pci using bind +func (v *VFIOBinder) bindDeviceToVFIO(pciAddress string) error { + bindPath := filepath.Join(vfioDriverPath, "bind") + return os.WriteFile(bindPath, []byte(pciAddress), 0200) +} + +// triggerDriverProbe triggers the kernel to probe for drivers for a device +func (v *VFIOBinder) triggerDriverProbe(pciAddress string) error { + probePath := "/sys/bus/pci/drivers_probe" + return os.WriteFile(probePath, []byte(pciAddress), 0200) +} + +// stopNvidiaPersistenced stops the nvidia-persistenced service +// This service keeps /dev/nvidia* open and blocks driver unbind +func (v *VFIOBinder) stopNvidiaPersistenced() error { + slog.Debug("stopping nvidia-persistenced service") + + // Try systemctl first (works as root) + cmd := exec.Command("systemctl", "stop", "nvidia-persistenced") + if err := cmd.Run(); err == nil { + return nil + } + + // Fall back to killing the process directly (works with CAP_KILL or as root) + // This is less clean but allows running with capabilities instead of full root + cmd = exec.Command("pkill", "-TERM", "nvidia-persistenced") + if err := cmd.Run(); err != nil { + // Check if process even exists + checkCmd := exec.Command("pgrep", "nvidia-persistenced") + if checkCmd.Run() != nil { + // Process doesn't exist, that's fine + return nil + } + return fmt.Errorf("failed to stop nvidia-persistenced (try: sudo systemctl stop nvidia-persistenced)") + } + + // Wait for process to exit with polling instead of arbitrary sleep + return v.waitForProcessExit("nvidia-persistenced", 2*time.Second) +} + +// waitForProcessExit polls for a process to exit, with timeout +func (v *VFIOBinder) waitForProcessExit(processName string, timeout time.Duration) error { + deadline := time.Now().Add(timeout) + pollInterval := 100 * time.Millisecond + + for time.Now().Before(deadline) { + checkCmd := exec.Command("pgrep", processName) + if checkCmd.Run() != nil { + // Process no longer exists + return nil + } + time.Sleep(pollInterval) + } + + // Timeout - process still running + slog.Warn("timeout waiting for process to exit", "process", processName, "timeout", timeout) + return nil // Continue anyway, the bind might still work +} + +// startNvidiaPersistenced starts the nvidia-persistenced service +func (v *VFIOBinder) startNvidiaPersistenced() error { + slog.Debug("starting nvidia-persistenced service") + + // Try systemctl first (works as root) + cmd := exec.Command("systemctl", "start", "nvidia-persistenced") + if err := cmd.Run(); err != nil { + // If we can't start it, just log - not critical for test cleanup + slog.Warn("could not restart nvidia-persistenced", "error", err) + } + return nil +} + +// GetVFIOGroupPath returns the path to the VFIO group device for a PCI device +func (v *VFIOBinder) GetVFIOGroupPath(pciAddress string) (string, error) { + iommuGroup, err := readIOMMUGroup(pciAddress) + if err != nil { + return "", fmt.Errorf("read iommu group: %w", err) + } + + groupPath := filepath.Join(vfioDevicePath, fmt.Sprintf("%d", iommuGroup)) + if _, err := os.Stat(groupPath); os.IsNotExist(err) { + return "", fmt.Errorf("vfio group device not found: %s", groupPath) + } + + return groupPath, nil +} + +// CheckIOMMUGroupSafe checks if all devices in the IOMMU group are safe to pass through +// Returns an error if there are other devices in the group that aren't being passed through +func (v *VFIOBinder) CheckIOMMUGroupSafe(pciAddress string, allowedDevices []string) error { + iommuGroup, err := readIOMMUGroup(pciAddress) + if err != nil { + return fmt.Errorf("read iommu group: %w", err) + } + + groupDevices, err := GetIOMMUGroupDevices(iommuGroup) + if err != nil { + return fmt.Errorf("get iommu group devices: %w", err) + } + + // Build a set of allowed devices + allowed := make(map[string]bool) + for _, addr := range allowedDevices { + allowed[addr] = true + } + + // Check each device in the group + for _, device := range groupDevices { + if allowed[device] { + continue + } + + // Check if device is already bound to vfio-pci or is a bridge + driver := readCurrentDriver(device) + if driver != nil && *driver == "vfio-pci" { + continue + } + + // Check if it's a PCI bridge (these are usually okay to leave) + if v.isPCIBridge(device) { + continue + } + + // Found a device that's not allowed and not safe + return fmt.Errorf("%w: device %s in IOMMU group %d is not included", + ErrIOMMUGroupConflict, device, iommuGroup) + } + + return nil +} + +// isPCIBridge checks if a device is a PCI bridge +func (v *VFIOBinder) isPCIBridge(pciAddress string) bool { + classPath := filepath.Join(sysfsDevicesPath, pciAddress, "class") + classCode, err := readSysfsFile(classPath) + if err != nil { + return false + } + + classCode = strings.TrimPrefix(classCode, "0x") + // Class 06 = Bridge, Subclass 04 = PCI bridge + return len(classCode) >= 4 && classCode[:2] == "06" +} + +// GetDeviceSysfsPath returns the sysfs path for a PCI device (used by cloud-hypervisor) +func GetDeviceSysfsPath(pciAddress string) string { + return filepath.Join(sysfsDevicesPath, pciAddress) + "/" +} + + From 2d22dca7602451ccfe36f54fb4d27e44693813b1 Mon Sep 17 00:00:00 2001 From: Rafael Garcia Date: Sat, 13 Dec 2025 21:44:19 +0000 Subject: [PATCH 03/17] feat(devices): add device manager core Add the main device management logic: - Manager interface with CRUD operations for devices - CreateDevice, GetDevice, DeleteDevice, ListDevices - MarkAttached/MarkDetached for instance lifecycle - BindToVFIO/UnbindFromVFIO for driver management - Persistence via JSON metadata files --- lib/devices/manager.go | 804 ++++++++++++++++++++++++++++++++++++ lib/devices/manager_test.go | 165 ++++++++ 2 files changed, 969 insertions(+) create mode 100644 lib/devices/manager.go create mode 100644 lib/devices/manager_test.go diff --git a/lib/devices/manager.go b/lib/devices/manager.go new file mode 100644 index 00000000..79e9dc32 --- /dev/null +++ b/lib/devices/manager.go @@ -0,0 +1,804 @@ +package devices + +import ( + "context" + "encoding/json" + "fmt" + "os" + "os/exec" + "strings" + "sync" + "time" + + "github.com/nrednav/cuid2" + "github.com/onkernel/hypeman/lib/logger" + "github.com/onkernel/hypeman/lib/paths" +) + +// InstanceLivenessChecker provides a way to check if an instance is running. +// This interface allows devices to query instance state without a circular dependency. +type InstanceLivenessChecker interface { + // IsInstanceRunning returns true if the instance exists and is in a running state + // (i.e., has an active VMM process). Returns false if the instance doesn't exist + // or is stopped/standby/unknown. + IsInstanceRunning(ctx context.Context, instanceID string) bool + + // GetInstanceDevices returns the list of device IDs attached to an instance. + // Returns nil if the instance doesn't exist. + GetInstanceDevices(ctx context.Context, instanceID string) []string + + // ListAllInstanceDevices returns a map of instanceID -> []deviceIDs for all instances. + ListAllInstanceDevices(ctx context.Context) map[string][]string +} + +// Manager provides device management operations +type Manager interface { + // ListDevices returns all registered devices + ListDevices(ctx context.Context) ([]Device, error) + + // ListAvailableDevices discovers passthrough-capable devices on the host + ListAvailableDevices(ctx context.Context) ([]AvailableDevice, error) + + // CreateDevice registers a new device for passthrough + CreateDevice(ctx context.Context, req CreateDeviceRequest) (*Device, error) + + // GetDevice returns a device by ID or name + GetDevice(ctx context.Context, idOrName string) (*Device, error) + + // DeleteDevice unregisters a device + DeleteDevice(ctx context.Context, id string) error + + // BindToVFIO binds a device to vfio-pci driver + BindToVFIO(ctx context.Context, id string) error + + // UnbindFromVFIO unbinds a device from vfio-pci driver + UnbindFromVFIO(ctx context.Context, id string) error + + // MarkAttached marks a device as attached to an instance + MarkAttached(ctx context.Context, deviceID, instanceID string) error + + // MarkDetached marks a device as detached from an instance + MarkDetached(ctx context.Context, deviceID string) error + + // ReconcileDevices cleans up stale device state on startup. + // It detects devices with AttachedTo referencing non-existent instances + // and clears the orphaned attachment state. + ReconcileDevices(ctx context.Context) error + + // SetLivenessChecker sets the instance liveness checker after construction. + // This allows breaking the circular dependency between device and instance managers. + SetLivenessChecker(checker InstanceLivenessChecker) +} + +type manager struct { + paths *paths.Paths + vfioBinder *VFIOBinder + livenessChecker InstanceLivenessChecker + mu sync.RWMutex +} + +// NewManager creates a new device manager. +// Use SetLivenessChecker after construction to enable accurate orphan detection. +func NewManager(p *paths.Paths) Manager { + return &manager{ + paths: p, + vfioBinder: NewVFIOBinder(), + } +} + +// SetLivenessChecker sets the instance liveness checker. +// This enables accurate orphan detection during reconciliation. +// If not set, orphan detection falls back to checking if the instance directory exists. +func (m *manager) SetLivenessChecker(checker InstanceLivenessChecker) { + m.mu.Lock() + defer m.mu.Unlock() + m.livenessChecker = checker +} + +func (m *manager) ListDevices(ctx context.Context) ([]Device, error) { + // RLock protects against concurrent directory modifications (CreateDevice/DeleteDevice) + // during iteration. While individual file reads are atomic, directory iteration could + // see inconsistent state if a device is being created or deleted concurrently. + m.mu.RLock() + defer m.mu.RUnlock() + + entries, err := os.ReadDir(m.paths.DevicesDir()) + if err != nil { + if os.IsNotExist(err) { + return []Device{}, nil + } + return nil, fmt.Errorf("read devices dir: %w", err) + } + + var devices []Device + for _, entry := range entries { + if !entry.IsDir() { + continue + } + + device, err := m.loadDevice(entry.Name()) + if err != nil { + continue + } + + // Update VFIO binding status from system state + device.BoundToVFIO = m.vfioBinder.IsDeviceBoundToVFIO(device.PCIAddress) + + devices = append(devices, *device) + } + + return devices, nil +} + +func (m *manager) ListAvailableDevices(ctx context.Context) ([]AvailableDevice, error) { + return DiscoverAvailableDevices() +} + +func (m *manager) CreateDevice(ctx context.Context, req CreateDeviceRequest) (*Device, error) { + log := logger.FromContext(ctx) + + // Validate PCI address format (required) + if !ValidatePCIAddress(req.PCIAddress) { + return nil, ErrInvalidPCIAddress + } + + // Get device info from sysfs + deviceInfo, err := GetDeviceInfo(req.PCIAddress) + if err != nil { + return nil, fmt.Errorf("get device info: %w", err) + } + + // Generate ID + id := cuid2.Generate() + + // Handle optional name: if not provided, generate one from PCI address + name := req.Name + if name == "" { + // Generate name from PCI address: 0000:a2:00.0 -> pci-0000-a2-00-0 + name = "pci-" + strings.ReplaceAll(strings.ReplaceAll(req.PCIAddress, ":", "-"), ".", "-") + } + + // Validate name format + if !ValidateDeviceName(name) { + return nil, ErrInvalidName + } + + m.mu.Lock() + defer m.mu.Unlock() + + // Check if name already exists + if _, err := m.findByName(name); err == nil { + return nil, ErrNameExists + } + + // Check if PCI address already registered + if _, err := m.findByPCIAddress(req.PCIAddress); err == nil { + return nil, ErrAlreadyExists + } + + // Create device + device := &Device{ + Id: id, + Name: name, + Type: DetermineDeviceType(deviceInfo), + PCIAddress: req.PCIAddress, + VendorID: deviceInfo.VendorID, + DeviceID: deviceInfo.DeviceID, + IOMMUGroup: deviceInfo.IOMMUGroup, + BoundToVFIO: m.vfioBinder.IsDeviceBoundToVFIO(req.PCIAddress), + AttachedTo: nil, + CreatedAt: time.Now(), + } + + // Ensure directories exist + if err := os.MkdirAll(m.paths.DeviceDir(id), 0755); err != nil { + return nil, fmt.Errorf("create device dir: %w", err) + } + + // Save device metadata + if err := m.saveDevice(device); err != nil { + os.RemoveAll(m.paths.DeviceDir(id)) + return nil, fmt.Errorf("save device: %w", err) + } + + log.InfoContext(ctx, "registered device", + "id", id, + "name", name, + "pci_address", req.PCIAddress, + "type", device.Type, + ) + + return device, nil +} + +func (m *manager) GetDevice(ctx context.Context, idOrName string) (*Device, error) { + // RLock protects against concurrent modifications while looking up by name, + // which requires iterating the devices directory. + m.mu.RLock() + defer m.mu.RUnlock() + + // Try by ID first + device, err := m.loadDevice(idOrName) + if err == nil { + device.BoundToVFIO = m.vfioBinder.IsDeviceBoundToVFIO(device.PCIAddress) + return device, nil + } + + // Try by name + device, err = m.findByName(idOrName) + if err == nil { + device.BoundToVFIO = m.vfioBinder.IsDeviceBoundToVFIO(device.PCIAddress) + return device, nil + } + + return nil, ErrNotFound +} + +func (m *manager) DeleteDevice(ctx context.Context, id string) error { + log := logger.FromContext(ctx) + + m.mu.Lock() + defer m.mu.Unlock() + + device, err := m.loadDevice(id) + if err != nil { + // Try by name + device, err = m.findByName(id) + if err != nil { + return ErrNotFound + } + id = device.Id + } + + // Check if device is attached + if device.AttachedTo != nil { + return ErrInUse + } + + // Remove device directory + if err := os.RemoveAll(m.paths.DeviceDir(id)); err != nil { + return fmt.Errorf("remove device dir: %w", err) + } + + log.InfoContext(ctx, "unregistered device", + "id", id, + "name", device.Name, + "pci_address", device.PCIAddress, + ) + + return nil +} + +func (m *manager) BindToVFIO(ctx context.Context, id string) error { + log := logger.FromContext(ctx) + + m.mu.Lock() + defer m.mu.Unlock() + + device, err := m.loadDevice(id) + if err != nil { + // Try by name + device, err = m.findByName(id) + if err != nil { + return ErrNotFound + } + } + + // Check IOMMU group safety + if err := m.vfioBinder.CheckIOMMUGroupSafe(device.PCIAddress, []string{device.PCIAddress}); err != nil { + return err + } + + // Bind to VFIO + if err := m.vfioBinder.BindToVFIO(device.PCIAddress); err != nil { + return err + } + + // Update device state + device.BoundToVFIO = true + if err := m.saveDevice(device); err != nil { + return fmt.Errorf("save device: %w", err) + } + + log.InfoContext(ctx, "bound device to VFIO", + "id", device.Id, + "name", device.Name, + "pci_address", device.PCIAddress, + ) + + return nil +} + +func (m *manager) UnbindFromVFIO(ctx context.Context, id string) error { + log := logger.FromContext(ctx) + + m.mu.Lock() + defer m.mu.Unlock() + + device, err := m.loadDevice(id) + if err != nil { + // Try by name + device, err = m.findByName(id) + if err != nil { + return ErrNotFound + } + } + + // Check if device is attached + if device.AttachedTo != nil { + return ErrInUse + } + + // Unbind from VFIO + if err := m.vfioBinder.UnbindFromVFIO(device.PCIAddress); err != nil { + return err + } + + // Update device state + device.BoundToVFIO = false + if err := m.saveDevice(device); err != nil { + return fmt.Errorf("save device: %w", err) + } + + log.InfoContext(ctx, "unbound device from VFIO", + "id", device.Id, + "name", device.Name, + "pci_address", device.PCIAddress, + ) + + return nil +} + +func (m *manager) MarkAttached(ctx context.Context, deviceID, instanceID string) error { + m.mu.Lock() + defer m.mu.Unlock() + + device, err := m.loadDevice(deviceID) + if err != nil { + device, err = m.findByName(deviceID) + if err != nil { + return ErrNotFound + } + } + + if device.AttachedTo != nil { + return ErrInUse + } + + device.AttachedTo = &instanceID + return m.saveDevice(device) +} + +func (m *manager) MarkDetached(ctx context.Context, deviceID string) error { + m.mu.Lock() + defer m.mu.Unlock() + + device, err := m.loadDevice(deviceID) + if err != nil { + device, err = m.findByName(deviceID) + if err != nil { + return ErrNotFound + } + } + + device.AttachedTo = nil + return m.saveDevice(device) +} + +// ReconcileDevices cleans up stale device state on startup. +// It performs safe-by-default reconciliation: +// 1. Detects orphaned device attachments (instance missing or not running) +// 2. Clears orphaned AttachedTo metadata +// 3. Runs GPU-reset-lite for orphaned devices (unbind VFIO, clear override, probe driver) +// 4. Logs mismatches between instance→device and device→instance references +// 5. Detects suspicious cloud-hypervisor processes +func (m *manager) ReconcileDevices(ctx context.Context) error { + log := logger.FromContext(ctx) + log.InfoContext(ctx, "reconciling device state") + + m.mu.Lock() + defer m.mu.Unlock() + + entries, err := os.ReadDir(m.paths.DevicesDir()) + if err != nil { + if os.IsNotExist(err) { + // No devices directory yet, nothing to reconcile + return nil + } + return fmt.Errorf("read devices dir: %w", err) + } + + // Load all devices + var allDevices []*Device + deviceByID := make(map[string]*Device) + for _, entry := range entries { + if !entry.IsDir() { + continue + } + + device, err := m.loadDevice(entry.Name()) + if err != nil { + log.WarnContext(ctx, "failed to load device during reconciliation", + "device_id", entry.Name(), + "error", err, + ) + continue + } + // Update VFIO binding status from system state + device.BoundToVFIO = m.vfioBinder.IsDeviceBoundToVFIO(device.PCIAddress) + allDevices = append(allDevices, device) + deviceByID[device.Id] = device + } + + // Build instance→device map if we have a liveness checker + var instanceDevices map[string][]string + if m.livenessChecker != nil { + instanceDevices = m.livenessChecker.ListAllInstanceDevices(ctx) + } + + // Track stats + var stats reconcileStats + + // Phase 1: Detect and handle orphaned device attachments + for _, device := range allDevices { + if device.AttachedTo == nil { + continue + } + + instanceID := *device.AttachedTo + orphaned := m.isInstanceOrphaned(ctx, instanceID) + + if orphaned { + log.WarnContext(ctx, "detected orphaned device attachment", + "device_id", device.Id, + "device_name", device.Name, + "pci_address", device.PCIAddress, + "orphaned_instance_id", instanceID, + ) + + // Clear the orphaned attachment + device.AttachedTo = nil + if err := m.saveDevice(device); err != nil { + log.ErrorContext(ctx, "failed to save device after clearing attachment", + "device_id", device.Id, + "error", err, + ) + stats.errors++ + continue + } + stats.orphanedCleared++ + + // Run GPU-reset-lite for orphaned device + m.resetOrphanedDevice(ctx, device, &stats) + } + } + + // Phase 2: Two-way reconciliation (log-only for mismatches) + if instanceDevices != nil { + for instanceID, deviceIDs := range instanceDevices { + for _, deviceID := range deviceIDs { + device, exists := deviceByID[deviceID] + if !exists { + // Instance references a device that doesn't exist in device metadata + log.WarnContext(ctx, "instance references unknown device (mismatch)", + "instance_id", instanceID, + "device_id", deviceID, + ) + stats.mismatches++ + continue + } + + // Check if device's AttachedTo matches + if device.AttachedTo == nil { + log.WarnContext(ctx, "instance references device but device.AttachedTo is nil (mismatch)", + "instance_id", instanceID, + "device_id", deviceID, + "device_name", device.Name, + ) + stats.mismatches++ + } else if *device.AttachedTo != instanceID { + log.WarnContext(ctx, "instance references device but device.AttachedTo points elsewhere (mismatch)", + "instance_id", instanceID, + "device_id", deviceID, + "device_name", device.Name, + "device_attached_to", *device.AttachedTo, + ) + stats.mismatches++ + } + + // Check VFIO binding state - if instance is running, device should be bound + if m.livenessChecker != nil && m.livenessChecker.IsInstanceRunning(ctx, instanceID) { + if !device.BoundToVFIO { + log.WarnContext(ctx, "running instance has device not bound to VFIO (mismatch)", + "instance_id", instanceID, + "device_id", deviceID, + "device_name", device.Name, + "pci_address", device.PCIAddress, + ) + stats.mismatches++ + } + } + } + } + } + + // Phase 3: Detect suspicious cloud-hypervisor processes (log-only) + m.detectSuspiciousVMMProcesses(ctx, &stats) + + // Log summary + log.InfoContext(ctx, "device reconciliation complete", + "orphaned_cleared", stats.orphanedCleared, + "reset_attempted", stats.resetAttempted, + "reset_succeeded", stats.resetSucceeded, + "reset_failed", stats.resetFailed, + "mismatches", stats.mismatches, + "suspicious_vmm", stats.suspiciousVMM, + "errors", stats.errors, + ) + + return nil +} + +// reconcileStats tracks reconciliation metrics +type reconcileStats struct { + orphanedCleared int + resetAttempted int + resetSucceeded int + resetFailed int + mismatches int + suspiciousVMM int + errors int +} + +// isInstanceOrphaned checks if an instance should be considered orphaned +// (device attachment should be cleared). +func (m *manager) isInstanceOrphaned(ctx context.Context, instanceID string) bool { + // If we have a liveness checker, use it for more accurate detection + if m.livenessChecker != nil { + // Instance is orphaned if it's not running (stopped, standby, unknown, or missing) + return !m.livenessChecker.IsInstanceRunning(ctx, instanceID) + } + + // Fallback: just check if instance directory exists + instanceDir := m.paths.InstanceDir(instanceID) + _, err := os.Stat(instanceDir) + return os.IsNotExist(err) +} + +// resetOrphanedDevice performs GPU-reset-lite for an orphaned device. +// This is safe because we've already confirmed the device is orphaned. +// Steps mirror gpu-reset.sh but are per-device and non-destructive. +func (m *manager) resetOrphanedDevice(ctx context.Context, device *Device, stats *reconcileStats) { + log := logger.FromContext(ctx) + stats.resetAttempted++ + + log.InfoContext(ctx, "running GPU-reset-lite for orphaned device", + "device_id", device.Id, + "device_name", device.Name, + "pci_address", device.PCIAddress, + "bound_to_vfio", device.BoundToVFIO, + ) + + // Step 1: If bound to VFIO, unbind + if device.BoundToVFIO { + log.DebugContext(ctx, "unbinding orphaned device from VFIO", "pci_address", device.PCIAddress) + if err := m.vfioBinder.unbindFromDriver(device.PCIAddress, "vfio-pci"); err != nil { + log.WarnContext(ctx, "failed to unbind device from VFIO during reset", + "device_id", device.Id, + "pci_address", device.PCIAddress, + "error", err, + ) + // Continue with other steps + } + } + + // Step 2: Clear driver_override + log.DebugContext(ctx, "clearing driver_override", "pci_address", device.PCIAddress) + if err := m.vfioBinder.setDriverOverride(device.PCIAddress, ""); err != nil { + log.WarnContext(ctx, "failed to clear driver_override during reset", + "device_id", device.Id, + "pci_address", device.PCIAddress, + "error", err, + ) + // Continue with other steps + } + + // Step 3: Trigger driver probe to rebind to original driver + log.DebugContext(ctx, "triggering driver probe", "pci_address", device.PCIAddress) + if err := m.vfioBinder.triggerDriverProbe(device.PCIAddress); err != nil { + log.WarnContext(ctx, "failed to trigger driver probe during reset", + "device_id", device.Id, + "pci_address", device.PCIAddress, + "error", err, + ) + } + + // Step 4: For NVIDIA devices, restart nvidia-persistenced + if device.VendorID == "10de" { + log.DebugContext(ctx, "restarting nvidia-persistenced", "pci_address", device.PCIAddress) + if err := m.vfioBinder.startNvidiaPersistenced(); err != nil { + log.WarnContext(ctx, "failed to restart nvidia-persistenced during reset", + "device_id", device.Id, + "error", err, + ) + } + } + + // Verify the device is now unbound from VFIO + stillBoundToVFIO := m.vfioBinder.IsDeviceBoundToVFIO(device.PCIAddress) + if stillBoundToVFIO { + log.WarnContext(ctx, "device still bound to VFIO after reset-lite", + "device_id", device.Id, + "pci_address", device.PCIAddress, + ) + stats.resetFailed++ + } else { + log.InfoContext(ctx, "GPU-reset-lite completed for orphaned device", + "device_id", device.Id, + "device_name", device.Name, + "pci_address", device.PCIAddress, + ) + stats.resetSucceeded++ + } + + // Update device metadata to reflect new VFIO state + device.BoundToVFIO = stillBoundToVFIO + if err := m.saveDevice(device); err != nil { + log.WarnContext(ctx, "failed to save device after reset-lite", + "device_id", device.Id, + "error", err, + ) + } +} + +// detectSuspiciousVMMProcesses logs warnings about cloud-hypervisor processes +// that don't match known instances. This is log-only (no killing). +func (m *manager) detectSuspiciousVMMProcesses(ctx context.Context, stats *reconcileStats) { + log := logger.FromContext(ctx) + + // Find all cloud-hypervisor processes + cmd := exec.Command("pgrep", "-a", "cloud-hypervisor") + output, err := cmd.Output() + if err != nil { + // pgrep returns exit code 1 if no processes found - that's fine + return + } + + lines := strings.Split(strings.TrimSpace(string(output)), "\n") + if len(lines) == 0 || (len(lines) == 1 && lines[0] == "") { + return + } + + // Get list of running instance sockets if we have liveness checker + var runningInstances map[string]bool + if m.livenessChecker != nil { + instanceDevices := m.livenessChecker.ListAllInstanceDevices(ctx) + runningInstances = make(map[string]bool) + for instanceID := range instanceDevices { + if m.livenessChecker.IsInstanceRunning(ctx, instanceID) { + runningInstances[instanceID] = true + } + } + } + + for _, line := range lines { + if line == "" { + continue + } + + // Try to extract socket path from command line to match against known instances + // cloud-hypervisor command typically includes --api-socket + socketPath := "" + parts := strings.Fields(line) + for i, part := range parts { + if part == "--api-socket" && i+1 < len(parts) { + socketPath = parts[i+1] + break + } + } + + // Check if this socket path matches any instance directory + matched := false + if socketPath != "" { + // Socket path is typically like /var/lib/hypeman/guests//ch.sock + // Try to extract instance ID + if strings.Contains(socketPath, "/guests/") { + pathParts := strings.Split(socketPath, "/guests/") + if len(pathParts) > 1 { + instancePath := pathParts[1] + instanceID := strings.Split(instancePath, "/")[0] + if runningInstances != nil && runningInstances[instanceID] { + matched = true + } + } + } + } + + if !matched { + log.WarnContext(ctx, "detected untracked cloud-hypervisor process", + "process_info", line, + "socket_path", socketPath, + "remediation", "Run lib/devices/scripts/gpu-reset.sh for manual recovery if needed", + ) + stats.suspiciousVMM++ + } + } +} + +// Helper methods + +func (m *manager) loadDevice(id string) (*Device, error) { + data, err := os.ReadFile(m.paths.DeviceMetadata(id)) + if err != nil { + if os.IsNotExist(err) { + return nil, ErrNotFound + } + return nil, err + } + + var device Device + if err := json.Unmarshal(data, &device); err != nil { + return nil, err + } + + return &device, nil +} + +func (m *manager) saveDevice(device *Device) error { + data, err := json.MarshalIndent(device, "", " ") + if err != nil { + return err + } + + return os.WriteFile(m.paths.DeviceMetadata(device.Id), data, 0644) +} + +func (m *manager) findByName(name string) (*Device, error) { + entries, err := os.ReadDir(m.paths.DevicesDir()) + if err != nil { + return nil, ErrNotFound + } + + for _, entry := range entries { + if !entry.IsDir() { + continue + } + + device, err := m.loadDevice(entry.Name()) + if err != nil { + continue + } + + if device.Name == name { + return device, nil + } + } + + return nil, ErrNotFound +} + +func (m *manager) findByPCIAddress(pciAddress string) (*Device, error) { + entries, err := os.ReadDir(m.paths.DevicesDir()) + if err != nil { + return nil, ErrNotFound + } + + for _, entry := range entries { + if !entry.IsDir() { + continue + } + + device, err := m.loadDevice(entry.Name()) + if err != nil { + continue + } + + if device.PCIAddress == pciAddress { + return device, nil + } + } + + return nil, ErrNotFound +} + + diff --git a/lib/devices/manager_test.go b/lib/devices/manager_test.go new file mode 100644 index 00000000..bb6a167f --- /dev/null +++ b/lib/devices/manager_test.go @@ -0,0 +1,165 @@ +package devices + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestValidateDeviceName(t *testing.T) { + tests := []struct { + name string + input string + expected bool + }{ + {"valid alphanumeric", "l4gpu", true}, + {"valid with underscore", "my_gpu", true}, + {"valid with dash", "gpu-1", true}, + {"valid with dot", "nvidia.l4", true}, + {"valid mixed", "my-gpu_01.test", true}, + {"valid starting with number", "1gpu", true}, + {"invalid empty", "", false}, + {"invalid single char", "a", false}, // pattern requires at least 2 chars + {"invalid starts with dash", "-gpu", false}, + {"invalid starts with underscore", "_gpu", false}, + {"invalid starts with dot", ".gpu", false}, + {"invalid contains space", "my gpu", false}, + {"invalid contains special char", "gpu@1", false}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := ValidateDeviceName(tt.input) + assert.Equal(t, tt.expected, result, "ValidateDeviceName(%q)", tt.input) + }) + } +} + +func TestValidatePCIAddress(t *testing.T) { + tests := []struct { + name string + input string + expected bool + }{ + {"valid standard", "0000:00:00.0", true}, + {"valid with letters", "0000:a2:00.0", true}, + {"valid uppercase", "0000:A2:00.0", true}, + {"valid mixed case", "0000:aB:c1.2", true}, + {"invalid too short", "0000:00:0.0", false}, + {"invalid no domain", "00:00.0", false}, + {"invalid missing colon", "000000:00.0", false}, + {"invalid missing dot", "0000:00:000", false}, + {"invalid extra segment", "0000:00:00:00.0", false}, + {"invalid empty", "", false}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := ValidatePCIAddress(tt.input) + assert.Equal(t, tt.expected, result, "ValidatePCIAddress(%q)", tt.input) + }) + } +} + +func TestDetermineDeviceType(t *testing.T) { + // This test is limited since it reads from sysfs + // We test the function structure but can't mock sysfs easily + t.Run("returns generic for nil device", func(t *testing.T) { + device := &AvailableDevice{ + PCIAddress: "0000:99:99.0", // Non-existent device + } + deviceType := DetermineDeviceType(device) + assert.Equal(t, DeviceTypeGeneric, deviceType) + }) +} + +func TestGetDeviceSysfsPath(t *testing.T) { + tests := []struct { + pciAddress string + expected string + }{ + {"0000:a2:00.0", "/sys/bus/pci/devices/0000:a2:00.0/"}, + {"0000:00:1f.0", "/sys/bus/pci/devices/0000:00:1f.0/"}, + } + + for _, tt := range tests { + t.Run(tt.pciAddress, func(t *testing.T) { + result := GetDeviceSysfsPath(tt.pciAddress) + assert.Equal(t, tt.expected, result) + }) + } +} + +func TestGetVendorName(t *testing.T) { + tests := []struct { + vendorID string + expected string + }{ + {"10de", "NVIDIA Corporation"}, + {"1002", "AMD/ATI"}, + {"8086", "Intel Corporation"}, + {"1234", "Unknown Vendor"}, + } + + for _, tt := range tests { + t.Run(tt.vendorID, func(t *testing.T) { + result := getVendorName(tt.vendorID) + assert.Equal(t, tt.expected, result) + }) + } +} + +func TestGetDeviceName(t *testing.T) { + tests := []struct { + name string + vendorID string + deviceID string + classCode string + expected string + }{ + {"NVIDIA L4", "10de", "27b8", "0x030200", "L4"}, + {"NVIDIA RTX 4090", "10de", "2684", "0x030000", "RTX 4090"}, + {"Unknown NVIDIA", "10de", "9999", "0x030000", "VGA Controller"}, + {"Generic VGA", "1234", "5678", "0x030000", "VGA Controller"}, + {"Generic 3D", "1234", "5678", "0x030200", "3D Controller"}, + {"Audio device", "1234", "5678", "0x040300", "Audio Device"}, + {"Unknown class", "1234", "5678", "0x999999", "PCI Device"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := getDeviceName(tt.vendorID, tt.deviceID, tt.classCode) + assert.Equal(t, tt.expected, result) + }) + } +} + +func TestVFIOBinderIsVFIOAvailable(t *testing.T) { + binder := NewVFIOBinder() + // Just test that it doesn't panic + _ = binder.IsVFIOAvailable() +} + +func TestDeviceTypes(t *testing.T) { + t.Run("device type constants", func(t *testing.T) { + require.Equal(t, DeviceType("gpu"), DeviceTypeGPU) + require.Equal(t, DeviceType("pci"), DeviceTypeGeneric) + }) +} + +func TestErrors(t *testing.T) { + t.Run("error types are distinct", func(t *testing.T) { + assert.NotEqual(t, ErrNotFound, ErrInUse) + assert.NotEqual(t, ErrNotBound, ErrAlreadyBound) + assert.NotEqual(t, ErrAlreadyExists, ErrNameExists) + }) + + t.Run("error messages are meaningful", func(t *testing.T) { + assert.Contains(t, ErrNotFound.Error(), "not found") + assert.Contains(t, ErrInUse.Error(), "in use") + assert.Contains(t, ErrInvalidName.Error(), "pattern") + }) +} + + From 909642782599353d96f86491bd0c3d7751e3d18d Mon Sep 17 00:00:00 2001 From: Rafael Garcia Date: Sat, 13 Dec 2025 21:44:37 +0000 Subject: [PATCH 04/17] feat(system): add kernel/initrd NVIDIA GPU support Add support for NVIDIA GPU passthrough in the VM boot chain: - versions.go: Add Kernel_20251213 with NVIDIA module/driver lib URLs - initrd.go: Download and extract NVIDIA kernel modules and driver libs - init_script.go: Load NVIDIA modules at boot, inject driver libs into containers This enables containers to use CUDA without bundling driver versions. --- lib/system/init_script.go | 97 ++++++++++++++++++++- lib/system/initrd.go | 175 +++++++++++++++++++++++++++++++++++++- lib/system/versions.go | 36 +++++++- 3 files changed, 300 insertions(+), 8 deletions(-) diff --git a/lib/system/init_script.go b/lib/system/init_script.go index 2c6a5e97..ebe9f8b4 100644 --- a/lib/system/init_script.go +++ b/lib/system/init_script.go @@ -7,8 +7,12 @@ package system // 1. Mounts essential filesystems (proc, sys, dev) // 2. Sets up overlay filesystem (lowerdir=rootfs, upperdir=overlay disk) // 3. Mounts and sources config disk (/dev/vdc) -// 4. Configures networking (if enabled) -// 5. Executes container entrypoint +// 4. Loads NVIDIA kernel modules (if HAS_GPU=1 in config.sh) +// 5. Configures networking (if enabled) +// 6. Executes container entrypoint +// +// GPU support: When HAS_GPU=1 is set in the instance's config.sh, the init script +// will load NVIDIA kernel modules before launching the container entrypoint. func GenerateInitScript() string { return `#!/bin/sh set -xe @@ -71,6 +75,95 @@ else exit 1 fi +# Load NVIDIA kernel modules for GPU passthrough (if HAS_GPU=1) +if [ "${HAS_GPU:-0}" = "1" ]; then + echo "overlay-init: loading NVIDIA kernel modules for GPU passthrough" + if [ -d /lib/modules ]; then + # Find the kernel version directory + KVER=$(ls /lib/modules/ 2>/dev/null | head -1) + if [ -n "$KVER" ] && [ -d "/lib/modules/$KVER/kernel/drivers/gpu" ]; then + # Load modules in order (dependencies first) + insmod /lib/modules/$KVER/kernel/drivers/gpu/nvidia.ko 2>&1 || echo "overlay-init: nvidia.ko load failed" + insmod /lib/modules/$KVER/kernel/drivers/gpu/nvidia-uvm.ko 2>&1 || echo "overlay-init: nvidia-uvm.ko load failed" + insmod /lib/modules/$KVER/kernel/drivers/gpu/nvidia-modeset.ko 2>&1 || echo "overlay-init: nvidia-modeset.ko load failed" + insmod /lib/modules/$KVER/kernel/drivers/gpu/nvidia-drm.ko modeset=1 2>&1 || echo "overlay-init: nvidia-drm.ko load failed" + echo "overlay-init: NVIDIA modules loaded for kernel $KVER" + + # Use nvidia-modprobe to create device nodes with correct major/minor numbers. + # nvidia-modprobe is the official NVIDIA utility that: + # 1. Loads kernel modules if needed (already done above) + # 2. Creates /dev/nvidiactl and /dev/nvidia0 with correct permissions + # 3. Creates /dev/nvidia-uvm and /dev/nvidia-uvm-tools + if [ -x /usr/bin/nvidia-modprobe ]; then + echo "overlay-init: running nvidia-modprobe to create device nodes" + /usr/bin/nvidia-modprobe 2>&1 || echo "overlay-init: nvidia-modprobe failed" + /usr/bin/nvidia-modprobe -u -c=0 2>&1 || echo "overlay-init: nvidia-modprobe -u failed" + echo "overlay-init: nvidia-modprobe completed" + ls -la /dev/nvidia* 2>/dev/null || true + else + echo "overlay-init: nvidia-modprobe not found, falling back to manual mknod" + # Fallback: Manual device node creation + NVIDIA_MAJOR=$(awk '/nvidia-frontend|^[0-9]+ nvidia$/ {print $1}' /proc/devices 2>/dev/null | head -1) + NVIDIA_UVM_MAJOR=$(awk '/nvidia-uvm/ {print $1}' /proc/devices 2>/dev/null) + + if [ -n "$NVIDIA_MAJOR" ]; then + mknod -m 666 /dev/nvidiactl c $NVIDIA_MAJOR 255 + mknod -m 666 /dev/nvidia0 c $NVIDIA_MAJOR 0 + echo "overlay-init: created /dev/nvidiactl and /dev/nvidia0 (major $NVIDIA_MAJOR)" + fi + + if [ -n "$NVIDIA_UVM_MAJOR" ]; then + mknod -m 666 /dev/nvidia-uvm c $NVIDIA_UVM_MAJOR 0 + mknod -m 666 /dev/nvidia-uvm-tools c $NVIDIA_UVM_MAJOR 1 + echo "overlay-init: created /dev/nvidia-uvm* (major $NVIDIA_UVM_MAJOR)" + fi + fi + else + echo "overlay-init: NVIDIA modules not found in /lib/modules/$KVER" + fi + else + echo "overlay-init: /lib/modules not found, skipping NVIDIA module loading" + fi + + # Inject NVIDIA userspace driver libraries into container rootfs + # This allows containers to use standard CUDA images without bundled drivers + # See lib/devices/GPU.md for documentation + if [ -d /usr/lib/nvidia ]; then + echo "overlay-init: injecting NVIDIA driver libraries into container" + + DRIVER_VERSION=$(cat /usr/lib/nvidia/version 2>/dev/null || echo "unknown") + LIB_DST="/overlay/newroot/usr/lib/x86_64-linux-gnu" + BIN_DST="/overlay/newroot/usr/bin" + + mkdir -p "$LIB_DST" "$BIN_DST" + + # Copy all driver libraries and create symlinks + for lib in /usr/lib/nvidia/*.so.*; do + if [ -f "$lib" ]; then + libname=$(basename "$lib") + cp "$lib" "$LIB_DST/" + + # Create standard symlinks: libfoo.so.VERSION -> libfoo.so.1 -> libfoo.so + base=$(echo "$libname" | sed 's/\.so\..*//') + ln -sf "$libname" "$LIB_DST/${base}.so.1" 2>/dev/null || true + ln -sf "${base}.so.1" "$LIB_DST/${base}.so" 2>/dev/null || true + fi + done + + # Copy nvidia-smi and nvidia-modprobe binaries + for bin in nvidia-smi nvidia-modprobe; do + if [ -x /usr/bin/$bin ]; then + cp /usr/bin/$bin "$BIN_DST/" + fi + done + + # Update ldconfig cache so applications can find the libraries + chroot /overlay/newroot ldconfig 2>/dev/null || true + + echo "overlay-init: NVIDIA driver libraries injected (version: $DRIVER_VERSION)" + fi +fi + # Mount attached volumes (from config: VOLUME_MOUNTS="device:path:mode[:overlay_device] ...") # Modes: ro (read-only), rw (read-write), overlay (base ro + per-instance overlay) if [ -n "${VOLUME_MOUNTS:-}" ]; then diff --git a/lib/system/initrd.go b/lib/system/initrd.go index c409ec70..027b4372 100644 --- a/lib/system/initrd.go +++ b/lib/system/initrd.go @@ -1,10 +1,14 @@ package system import ( + "archive/tar" + "compress/gzip" "context" "crypto/sha256" "encoding/hex" "fmt" + "io" + "net/http" "os" "path/filepath" "strconv" @@ -49,13 +53,21 @@ func (m *manager) buildInitrd(ctx context.Context, arch string) (string, error) if err := os.MkdirAll(binDir, 0755); err != nil { return "", fmt.Errorf("create bin dir: %w", err) } - + agentPath := filepath.Join(binDir, "exec-agent") if err := os.WriteFile(agentPath, ExecAgentBinary, 0755); err != nil { return "", fmt.Errorf("write exec-agent: %w", err) } + // Add NVIDIA kernel modules (for GPU passthrough support) + if err := m.addNvidiaModules(ctx, rootfsDir, arch); err != nil { + // Log but don't fail - NVIDIA modules are optional (not available on all architectures) + fmt.Printf("initrd: skipping NVIDIA modules: %v\n", err) + } + // Write generated init script + // Note: The init script is generated at instance creation time with hasGPU flag, + // so we write a placeholder here that will be replaced per-instance initScript := GenerateInitScript() initPath := filepath.Join(rootfsDir, "init") if err := os.WriteFile(initPath, []byte(initScript), 0755); err != nil { @@ -64,13 +76,13 @@ func (m *manager) buildInitrd(ctx context.Context, arch string) (string, error) // Generate timestamp for this build timestamp := strconv.FormatInt(time.Now().Unix(), 10) - + // Package as cpio.gz outputPath := m.paths.SystemInitrdTimestamp(timestamp, arch) if err := os.MkdirAll(filepath.Dir(outputPath), 0755); err != nil { return "", fmt.Errorf("create output dir: %w", err) } - + if _, err := images.ExportRootfs(rootfsDir, outputPath, images.FormatCpio); err != nil { return "", fmt.Errorf("export initrd: %w", err) } @@ -135,10 +147,165 @@ func (m *manager) isInitrdStale(initrdPath string) bool { return string(storedHash) != currentHash } -// computeInitrdHash computes a hash of the embedded binary and init script +// computeInitrdHash computes a hash of the embedded binary, init script, and NVIDIA assets func computeInitrdHash() string { h := sha256.New() h.Write(ExecAgentBinary) h.Write([]byte(GenerateInitScript())) + // Include NVIDIA driver version in hash so initrd is rebuilt when driver changes + if ver, ok := NvidiaDriverVersion[DefaultKernelVersion]; ok { + h.Write([]byte(ver)) + } + // Include driver libs URL so initrd is rebuilt when the libs tarball changes + if archURLs, ok := NvidiaDriverLibURLs[DefaultKernelVersion]; ok { + if url, ok := archURLs["x86_64"]; ok { + h.Write([]byte(url)) + } + } return hex.EncodeToString(h.Sum(nil))[:16] } + +// addNvidiaModules downloads and extracts NVIDIA kernel modules into the rootfs +func (m *manager) addNvidiaModules(ctx context.Context, rootfsDir, arch string) error { + // Check if NVIDIA modules are available for this architecture + archURLs, ok := NvidiaModuleURLs[DefaultKernelVersion] + if !ok { + return fmt.Errorf("no NVIDIA modules for kernel version %s", DefaultKernelVersion) + } + url, ok := archURLs[arch] + if !ok { + return fmt.Errorf("no NVIDIA modules for architecture %s", arch) + } + + // Download the tarball + client := &http.Client{ + CheckRedirect: func(req *http.Request, via []*http.Request) error { + return nil // Follow redirects + }, + } + + req, err := http.NewRequestWithContext(ctx, "GET", url, nil) + if err != nil { + return fmt.Errorf("create request: %w", err) + } + + resp, err := client.Do(req) + if err != nil { + return fmt.Errorf("download nvidia modules: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return fmt.Errorf("download failed with status %d", resp.StatusCode) + } + + // Extract tarball directly into rootfs + if err := extractTarGz(resp.Body, rootfsDir); err != nil { + return fmt.Errorf("extract nvidia modules: %w", err) + } + + // Add userspace driver libraries (libcuda.so, libnvidia-ml.so, nvidia-smi, etc.) + // These are injected into containers at boot time - see lib/devices/GPU.md + if err := m.addNvidiaDriverLibs(ctx, rootfsDir, arch); err != nil { + fmt.Printf("initrd: warning: could not add nvidia driver libs: %v\n", err) + // Don't fail - kernel modules can still work, but containers won't have driver libs + } + + return nil +} + +// addNvidiaDriverLibs downloads and extracts NVIDIA userspace driver libraries +// These libraries (libcuda.so, libnvidia-ml.so, nvidia-smi, etc.) are injected +// into containers at boot time, eliminating the need for containers to bundle +// matching driver versions. See lib/devices/GPU.md for documentation. +func (m *manager) addNvidiaDriverLibs(ctx context.Context, rootfsDir, arch string) error { + archURLs, ok := NvidiaDriverLibURLs[DefaultKernelVersion] + if !ok { + return fmt.Errorf("no NVIDIA driver libs for kernel version %s", DefaultKernelVersion) + } + url, ok := archURLs[arch] + if !ok { + return fmt.Errorf("no NVIDIA driver libs for architecture %s", arch) + } + + client := &http.Client{ + CheckRedirect: func(req *http.Request, via []*http.Request) error { + return nil // Follow redirects + }, + } + + req, err := http.NewRequestWithContext(ctx, "GET", url, nil) + if err != nil { + return fmt.Errorf("create request: %w", err) + } + + resp, err := client.Do(req) + if err != nil { + return fmt.Errorf("download nvidia driver libs: %w", err) + } + defer resp.Body.Close() + + if resp.StatusCode != http.StatusOK { + return fmt.Errorf("download failed with status %d", resp.StatusCode) + } + + // Extract tarball directly into rootfs + if err := extractTarGz(resp.Body, rootfsDir); err != nil { + return fmt.Errorf("extract nvidia driver libs: %w", err) + } + + fmt.Printf("initrd: added NVIDIA driver libraries from %s\n", url) + return nil +} + +// extractTarGz extracts a gzipped tarball into the destination directory +func extractTarGz(r io.Reader, destDir string) error { + gzr, err := gzip.NewReader(r) + if err != nil { + return fmt.Errorf("create gzip reader: %w", err) + } + defer gzr.Close() + + tr := tar.NewReader(gzr) + for { + header, err := tr.Next() + if err == io.EOF { + break + } + if err != nil { + return fmt.Errorf("read tar: %w", err) + } + + // Calculate destination path + destPath := filepath.Join(destDir, header.Name) + + switch header.Typeflag { + case tar.TypeDir: + if err := os.MkdirAll(destPath, os.FileMode(header.Mode)); err != nil { + return fmt.Errorf("create directory %s: %w", destPath, err) + } + case tar.TypeReg: + // Ensure parent directory exists + if err := os.MkdirAll(filepath.Dir(destPath), 0755); err != nil { + return fmt.Errorf("create parent dir: %w", err) + } + + outFile, err := os.Create(destPath) + if err != nil { + return fmt.Errorf("create file %s: %w", destPath, err) + } + + if _, err := io.Copy(outFile, tr); err != nil { + outFile.Close() + return fmt.Errorf("write file %s: %w", destPath, err) + } + outFile.Close() + + if err := os.Chmod(destPath, os.FileMode(header.Mode)); err != nil { + return fmt.Errorf("chmod %s: %w", destPath, err) + } + } + } + + return nil +} diff --git a/lib/system/versions.go b/lib/system/versions.go index 1aca99e1..aaca2bf0 100644 --- a/lib/system/versions.go +++ b/lib/system/versions.go @@ -6,19 +6,21 @@ import "runtime" type KernelVersion string const ( - // Kernel versions from Kernel linux build + // Kernel versions from onkernel/linux releases Kernel_202511182 KernelVersion = "ch-6.12.8-kernel-1-202511182" Kernel_20251211 KernelVersion = "ch-6.12.8-kernel-1.1-20251211" + Kernel_20251213 KernelVersion = "ch-6.12.8-kernel-1.2-20251213" // NVIDIA module + driver lib support + networking configs ) var ( // DefaultKernelVersion is the kernel version used for new instances - DefaultKernelVersion = Kernel_20251211 + DefaultKernelVersion = Kernel_20251213 // SupportedKernelVersions lists all supported kernel versions SupportedKernelVersions = []KernelVersion{ Kernel_202511182, Kernel_20251211, + Kernel_20251213, // Add future versions here } ) @@ -33,9 +35,39 @@ var KernelDownloadURLs = map[KernelVersion]map[string]string{ "x86_64": "https://github.com/onkernel/linux/releases/download/ch-6.12.8-kernel-1.1-20251211/vmlinux-x86_64", "aarch64": "https://github.com/onkernel/linux/releases/download/ch-6.12.8-kernel-1.1-20251211/Image-arm64", }, + Kernel_20251213: { + "x86_64": "https://github.com/onkernel/linux/releases/download/ch-6.12.8-kernel-1.2-20251213/vmlinux-x86_64", + "aarch64": "https://github.com/onkernel/linux/releases/download/ch-6.12.8-kernel-1.2-20251213/Image-arm64", + }, // Add future versions here } +// NvidiaModuleURLs maps kernel versions and architectures to NVIDIA module tarball URLs +// These tarballs contain pre-built NVIDIA kernel modules that match the kernel version +var NvidiaModuleURLs = map[KernelVersion]map[string]string{ + Kernel_20251213: { + "x86_64": "https://github.com/onkernel/linux/releases/download/ch-6.12.8-kernel-1.2-20251213/nvidia-modules-x86_64.tar.gz", + // Note: NVIDIA open-gpu-kernel-modules does not support arm64 yet + }, + // Kernel_202511182 and Kernel_20251211 do not have NVIDIA modules (pre-module-support kernels) +} + +// NvidiaDriverLibURLs maps kernel versions and architectures to driver library tarball URLs +// These tarballs contain userspace NVIDIA libraries (libcuda.so, libnvidia-ml.so, etc.) +// that match the kernel modules and are injected into containers at boot time. +// See lib/devices/GPU.md for documentation on driver injection. +var NvidiaDriverLibURLs = map[KernelVersion]map[string]string{ + Kernel_20251213: { + "x86_64": "https://github.com/onkernel/linux/releases/download/ch-6.12.8-kernel-1.2-20251213/nvidia-driver-libs-x86_64.tar.gz", + }, +} + +// NvidiaDriverVersion tracks the NVIDIA driver version bundled with each kernel +var NvidiaDriverVersion = map[KernelVersion]string{ + Kernel_20251213: "570.86.16", + // Kernel_202511182 and Kernel_20251211 do not have NVIDIA modules +} + // GetArch returns the architecture string for the current platform func GetArch() string { arch := runtime.GOARCH From 4ba6fa2cb413b0bb10d51ccfc48a2908046ec4ea Mon Sep 17 00:00:00 2001 From: Rafael Garcia Date: Sat, 13 Dec 2025 21:45:25 +0000 Subject: [PATCH 05/17] feat(instances): add instance liveness checker for device reconciliation Add InstanceLivenessChecker adapter to allow the devices package to query instance state without circular imports. Used during startup to detect orphaned device attachments from crashed VMs. - liveness.go: Adapter implementing devices.InstanceLivenessChecker - liveness_test.go: Unit tests - reconcile_test.go: Device reconciliation tests - types.go: Add Devices field to StoredMetadata and CreateInstanceRequest --- lib/devices/reconcile_test.go | 623 +++++++++++++++++++++++++++++++++ lib/instances/liveness.go | 81 +++++ lib/instances/liveness_test.go | 42 +++ lib/instances/types.go | 4 + 4 files changed, 750 insertions(+) create mode 100644 lib/devices/reconcile_test.go create mode 100644 lib/instances/liveness.go create mode 100644 lib/instances/liveness_test.go diff --git a/lib/devices/reconcile_test.go b/lib/devices/reconcile_test.go new file mode 100644 index 00000000..f6aa422e --- /dev/null +++ b/lib/devices/reconcile_test.go @@ -0,0 +1,623 @@ +package devices + +import ( + "context" + "encoding/json" + "os" + "path/filepath" + "testing" + "time" + + "github.com/onkernel/hypeman/lib/paths" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// mockLivenessChecker implements InstanceLivenessChecker for testing +type mockLivenessChecker struct { + runningInstances map[string]bool // instanceID -> isRunning + instanceDevices map[string][]string // instanceID -> deviceIDs +} + +func newMockLivenessChecker() *mockLivenessChecker { + return &mockLivenessChecker{ + runningInstances: make(map[string]bool), + instanceDevices: make(map[string][]string), + } +} + +func (m *mockLivenessChecker) IsInstanceRunning(ctx context.Context, instanceID string) bool { + return m.runningInstances[instanceID] +} + +func (m *mockLivenessChecker) GetInstanceDevices(ctx context.Context, instanceID string) []string { + return m.instanceDevices[instanceID] +} + +func (m *mockLivenessChecker) ListAllInstanceDevices(ctx context.Context) map[string][]string { + return m.instanceDevices +} + +func (m *mockLivenessChecker) setRunning(instanceID string, running bool) { + m.runningInstances[instanceID] = running +} + +func (m *mockLivenessChecker) setInstanceDevices(instanceID string, deviceIDs []string) { + m.instanceDevices[instanceID] = deviceIDs +} + +// setupTestManager creates a manager with a temporary directory for testing +func setupTestManager(t *testing.T) (*manager, *paths.Paths, string) { + t.Helper() + tmpDir := t.TempDir() + p := paths.New(tmpDir) + + // Create devices directory + require.NoError(t, os.MkdirAll(p.DevicesDir(), 0755)) + + mgr := &manager{ + paths: p, + vfioBinder: NewVFIOBinder(), + } + + return mgr, p, tmpDir +} + +// createTestDevice creates a device in the test directory +func createTestDevice(t *testing.T, p *paths.Paths, device *Device) { + t.Helper() + deviceDir := p.DeviceDir(device.Id) + require.NoError(t, os.MkdirAll(deviceDir, 0755)) + + data, err := json.MarshalIndent(device, "", " ") + require.NoError(t, err) + + require.NoError(t, os.WriteFile(p.DeviceMetadata(device.Id), data, 0644)) +} + +// createTestInstanceDir creates an instance directory (simulating instance existence) +func createTestInstanceDir(t *testing.T, p *paths.Paths, instanceID string) { + t.Helper() + instanceDir := p.InstanceDir(instanceID) + require.NoError(t, os.MkdirAll(instanceDir, 0755)) +} + +func TestReconcileDevices_NoDevices(t *testing.T) { + mgr, _, _ := setupTestManager(t) + ctx := context.Background() + + err := mgr.ReconcileDevices(ctx) + require.NoError(t, err) +} + +func TestReconcileDevices_OrphanedAttachment_NoLivenessChecker(t *testing.T) { + mgr, p, _ := setupTestManager(t) + ctx := context.Background() + + instanceID := "orphaned-instance-123" + deviceID := "device-abc" + + // Create device with AttachedTo pointing to non-existent instance + device := &Device{ + Id: deviceID, + Name: "test-gpu", + Type: DeviceTypeGPU, + PCIAddress: "0000:99:00.0", // Non-existent for test + VendorID: "10de", + DeviceID: "1234", + AttachedTo: &instanceID, + CreatedAt: time.Now(), + } + createTestDevice(t, p, device) + + // Don't create the instance directory - it's orphaned + + // Run reconciliation + err := mgr.ReconcileDevices(ctx) + require.NoError(t, err) + + // Verify attachment was cleared + updatedDevice, err := mgr.loadDevice(deviceID) + require.NoError(t, err) + assert.Nil(t, updatedDevice.AttachedTo, "AttachedTo should be cleared for orphaned device") +} + +func TestReconcileDevices_ValidAttachment_NoLivenessChecker(t *testing.T) { + mgr, p, _ := setupTestManager(t) + ctx := context.Background() + + instanceID := "valid-instance-123" + deviceID := "device-abc" + + // Create device with AttachedTo pointing to existing instance + device := &Device{ + Id: deviceID, + Name: "test-gpu", + Type: DeviceTypeGPU, + PCIAddress: "0000:99:00.0", + VendorID: "10de", + DeviceID: "1234", + AttachedTo: &instanceID, + CreatedAt: time.Now(), + } + createTestDevice(t, p, device) + + // Create the instance directory - it exists + createTestInstanceDir(t, p, instanceID) + + // Run reconciliation + err := mgr.ReconcileDevices(ctx) + require.NoError(t, err) + + // Verify attachment was NOT cleared (instance exists) + updatedDevice, err := mgr.loadDevice(deviceID) + require.NoError(t, err) + require.NotNil(t, updatedDevice.AttachedTo, "AttachedTo should NOT be cleared for valid device") + assert.Equal(t, instanceID, *updatedDevice.AttachedTo) +} + +func TestReconcileDevices_OrphanedAttachment_WithLivenessChecker(t *testing.T) { + mgr, p, _ := setupTestManager(t) + ctx := context.Background() + + // Set up liveness checker + liveness := newMockLivenessChecker() + mgr.livenessChecker = liveness + + instanceID := "stopped-instance-123" + deviceID := "device-abc" + + // Create device with AttachedTo + device := &Device{ + Id: deviceID, + Name: "test-gpu", + Type: DeviceTypeGPU, + PCIAddress: "0000:99:00.0", + VendorID: "10de", + DeviceID: "1234", + AttachedTo: &instanceID, + CreatedAt: time.Now(), + } + createTestDevice(t, p, device) + + // Create instance directory but mark as NOT running + createTestInstanceDir(t, p, instanceID) + liveness.setRunning(instanceID, false) // Stopped/standby + + // Run reconciliation + err := mgr.ReconcileDevices(ctx) + require.NoError(t, err) + + // Verify attachment was cleared (instance not running) + updatedDevice, err := mgr.loadDevice(deviceID) + require.NoError(t, err) + assert.Nil(t, updatedDevice.AttachedTo, "AttachedTo should be cleared for non-running instance") +} + +func TestReconcileDevices_ValidAttachment_WithLivenessChecker(t *testing.T) { + mgr, p, _ := setupTestManager(t) + ctx := context.Background() + + // Set up liveness checker + liveness := newMockLivenessChecker() + mgr.livenessChecker = liveness + + instanceID := "running-instance-123" + deviceID := "device-abc" + + // Create device with AttachedTo + device := &Device{ + Id: deviceID, + Name: "test-gpu", + Type: DeviceTypeGPU, + PCIAddress: "0000:99:00.0", + VendorID: "10de", + DeviceID: "1234", + AttachedTo: &instanceID, + CreatedAt: time.Now(), + } + createTestDevice(t, p, device) + + // Create instance and mark as running + createTestInstanceDir(t, p, instanceID) + liveness.setRunning(instanceID, true) // Running + + // Run reconciliation + err := mgr.ReconcileDevices(ctx) + require.NoError(t, err) + + // Verify attachment was NOT cleared (instance is running) + updatedDevice, err := mgr.loadDevice(deviceID) + require.NoError(t, err) + require.NotNil(t, updatedDevice.AttachedTo, "AttachedTo should NOT be cleared for running instance") + assert.Equal(t, instanceID, *updatedDevice.AttachedTo) +} + +func TestReconcileDevices_TwoWayMismatch_InstanceRefsUnknownDevice(t *testing.T) { + mgr, _, _ := setupTestManager(t) + ctx := context.Background() + + // Set up liveness checker with instance that references unknown device + liveness := newMockLivenessChecker() + mgr.livenessChecker = liveness + + instanceID := "instance-with-ghost-device" + unknownDeviceID := "device-that-doesnt-exist" + + // Instance references a device that doesn't exist + liveness.setInstanceDevices(instanceID, []string{unknownDeviceID}) + liveness.setRunning(instanceID, true) + + // Run reconciliation - should not error, just log the mismatch + err := mgr.ReconcileDevices(ctx) + require.NoError(t, err) + // Note: We can't easily verify log output, but the test ensures no panic/error +} + +func TestReconcileDevices_TwoWayMismatch_DeviceAttachedToNil(t *testing.T) { + mgr, p, _ := setupTestManager(t) + ctx := context.Background() + + // Set up liveness checker + liveness := newMockLivenessChecker() + mgr.livenessChecker = liveness + + instanceID := "instance-123" + deviceID := "device-abc" + + // Create device with NO AttachedTo + device := &Device{ + Id: deviceID, + Name: "test-gpu", + Type: DeviceTypeGPU, + PCIAddress: "0000:99:00.0", + VendorID: "10de", + DeviceID: "1234", + AttachedTo: nil, // Not attached according to device metadata + CreatedAt: time.Now(), + } + createTestDevice(t, p, device) + + // Instance claims to have this device + liveness.setInstanceDevices(instanceID, []string{deviceID}) + liveness.setRunning(instanceID, true) + + // Run reconciliation - should log mismatch but not error + err := mgr.ReconcileDevices(ctx) + require.NoError(t, err) + // Note: This is a log-only mismatch, device state should remain unchanged + + updatedDevice, err := mgr.loadDevice(deviceID) + require.NoError(t, err) + assert.Nil(t, updatedDevice.AttachedTo, "Device should remain unattached (log-only mismatch)") +} + +func TestReconcileDevices_TwoWayMismatch_DeviceAttachedToWrongInstance(t *testing.T) { + mgr, p, _ := setupTestManager(t) + ctx := context.Background() + + // Set up liveness checker + liveness := newMockLivenessChecker() + mgr.livenessChecker = liveness + + instanceID1 := "instance-1" + instanceID2 := "instance-2" + deviceID := "device-abc" + + // Create device attached to instance-1 + device := &Device{ + Id: deviceID, + Name: "test-gpu", + Type: DeviceTypeGPU, + PCIAddress: "0000:99:00.0", + VendorID: "10de", + DeviceID: "1234", + AttachedTo: &instanceID1, // Attached to instance-1 + CreatedAt: time.Now(), + } + createTestDevice(t, p, device) + + // Both instances exist and are running + createTestInstanceDir(t, p, instanceID1) + createTestInstanceDir(t, p, instanceID2) + liveness.setRunning(instanceID1, true) + liveness.setRunning(instanceID2, true) + + // instance-2 claims to have this device (mismatch!) + liveness.setInstanceDevices(instanceID2, []string{deviceID}) + + // Run reconciliation - should log mismatch but not error + err := mgr.ReconcileDevices(ctx) + require.NoError(t, err) + // Note: This is a log-only mismatch, device state should remain unchanged + + updatedDevice, err := mgr.loadDevice(deviceID) + require.NoError(t, err) + require.NotNil(t, updatedDevice.AttachedTo) + assert.Equal(t, instanceID1, *updatedDevice.AttachedTo, "Device should remain attached to original instance (log-only mismatch)") +} + +func TestReconcileDevices_MultipleDevices(t *testing.T) { + mgr, p, _ := setupTestManager(t) + ctx := context.Background() + + // Set up liveness checker + liveness := newMockLivenessChecker() + mgr.livenessChecker = liveness + + runningInstanceID := "running-instance" + stoppedInstanceID := "stopped-instance" + orphanedInstanceID := "orphaned-instance" + + // Device 1: Attached to running instance - should stay attached + device1 := &Device{ + Id: "device-1", + Name: "gpu-1", + Type: DeviceTypeGPU, + PCIAddress: "0000:01:00.0", + VendorID: "10de", + DeviceID: "1234", + AttachedTo: &runningInstanceID, + CreatedAt: time.Now(), + } + + // Device 2: Attached to stopped instance - should be cleared + device2 := &Device{ + Id: "device-2", + Name: "gpu-2", + Type: DeviceTypeGPU, + PCIAddress: "0000:02:00.0", + VendorID: "10de", + DeviceID: "5678", + AttachedTo: &stoppedInstanceID, + CreatedAt: time.Now(), + } + + // Device 3: Attached to non-existent instance - should be cleared + device3 := &Device{ + Id: "device-3", + Name: "gpu-3", + Type: DeviceTypeGPU, + PCIAddress: "0000:03:00.0", + VendorID: "10de", + DeviceID: "9abc", + AttachedTo: &orphanedInstanceID, + CreatedAt: time.Now(), + } + + // Device 4: Not attached - should stay unattached + device4 := &Device{ + Id: "device-4", + Name: "gpu-4", + Type: DeviceTypeGPU, + PCIAddress: "0000:04:00.0", + VendorID: "10de", + DeviceID: "def0", + AttachedTo: nil, + CreatedAt: time.Now(), + } + + createTestDevice(t, p, device1) + createTestDevice(t, p, device2) + createTestDevice(t, p, device3) + createTestDevice(t, p, device4) + + // Set up instance states + createTestInstanceDir(t, p, runningInstanceID) + createTestInstanceDir(t, p, stoppedInstanceID) + // Don't create orphanedInstanceID directory + + liveness.setRunning(runningInstanceID, true) + liveness.setRunning(stoppedInstanceID, false) + // orphanedInstanceID doesn't exist in liveness checker + + // Run reconciliation + err := mgr.ReconcileDevices(ctx) + require.NoError(t, err) + + // Verify device 1 stays attached (running instance) + d1, err := mgr.loadDevice("device-1") + require.NoError(t, err) + require.NotNil(t, d1.AttachedTo) + assert.Equal(t, runningInstanceID, *d1.AttachedTo) + + // Verify device 2 is cleared (stopped instance) + d2, err := mgr.loadDevice("device-2") + require.NoError(t, err) + assert.Nil(t, d2.AttachedTo) + + // Verify device 3 is cleared (orphaned instance) + d3, err := mgr.loadDevice("device-3") + require.NoError(t, err) + assert.Nil(t, d3.AttachedTo) + + // Verify device 4 stays unattached + d4, err := mgr.loadDevice("device-4") + require.NoError(t, err) + assert.Nil(t, d4.AttachedTo) +} + +func TestSetLivenessChecker(t *testing.T) { + mgr, _, _ := setupTestManager(t) + + // Initially nil + assert.Nil(t, mgr.livenessChecker) + + // Set liveness checker + liveness := newMockLivenessChecker() + mgr.SetLivenessChecker(liveness) + + // Verify it was set + assert.Equal(t, liveness, mgr.livenessChecker) +} + +func TestIsInstanceOrphaned_NoLivenessChecker(t *testing.T) { + mgr, p, _ := setupTestManager(t) + ctx := context.Background() + + existingInstanceID := "existing-instance" + missingInstanceID := "missing-instance" + + // Create one instance directory + createTestInstanceDir(t, p, existingInstanceID) + + // Existing instance is NOT orphaned + assert.False(t, mgr.isInstanceOrphaned(ctx, existingInstanceID)) + + // Missing instance IS orphaned + assert.True(t, mgr.isInstanceOrphaned(ctx, missingInstanceID)) +} + +func TestIsInstanceOrphaned_WithLivenessChecker(t *testing.T) { + mgr, p, _ := setupTestManager(t) + ctx := context.Background() + + // Set up liveness checker + liveness := newMockLivenessChecker() + mgr.livenessChecker = liveness + + runningInstanceID := "running-instance" + stoppedInstanceID := "stopped-instance" + + // Both instances have directories + createTestInstanceDir(t, p, runningInstanceID) + createTestInstanceDir(t, p, stoppedInstanceID) + + liveness.setRunning(runningInstanceID, true) + liveness.setRunning(stoppedInstanceID, false) + + // Running instance is NOT orphaned + assert.False(t, mgr.isInstanceOrphaned(ctx, runningInstanceID)) + + // Stopped instance IS orphaned (even though directory exists) + assert.True(t, mgr.isInstanceOrphaned(ctx, stoppedInstanceID)) +} + +func TestReconcileDevices_NoDevicesDirectory(t *testing.T) { + tmpDir := t.TempDir() + p := paths.New(tmpDir) + + // Don't create devices directory + + mgr := &manager{ + paths: p, + vfioBinder: NewVFIOBinder(), + } + + ctx := context.Background() + + // Should not error when directory doesn't exist + err := mgr.ReconcileDevices(ctx) + require.NoError(t, err) +} + +func TestReconcileStats(t *testing.T) { + // Verify stats struct has expected fields + stats := reconcileStats{} + + stats.orphanedCleared = 1 + stats.resetAttempted = 2 + stats.resetSucceeded = 3 + stats.resetFailed = 4 + stats.mismatches = 5 + stats.suspiciousVMM = 6 + stats.errors = 7 + + assert.Equal(t, 1, stats.orphanedCleared) + assert.Equal(t, 2, stats.resetAttempted) + assert.Equal(t, 3, stats.resetSucceeded) + assert.Equal(t, 4, stats.resetFailed) + assert.Equal(t, 5, stats.mismatches) + assert.Equal(t, 6, stats.suspiciousVMM) + assert.Equal(t, 7, stats.errors) +} + +// TestResetOrphanedDevice_NonExistentPCIAddress tests that reset-lite +// handles non-existent PCI addresses gracefully (doesn't panic) +func TestResetOrphanedDevice_NonExistentPCIAddress(t *testing.T) { + mgr, p, _ := setupTestManager(t) + ctx := context.Background() + + // Create device with fake PCI address that doesn't exist + device := &Device{ + Id: "test-device", + Name: "test-gpu", + Type: DeviceTypeGPU, + PCIAddress: "0000:ff:ff.f", // Non-existent + VendorID: "10de", // NVIDIA vendor ID + DeviceID: "1234", + BoundToVFIO: true, // Claim it's bound to VFIO + CreatedAt: time.Now(), + } + createTestDevice(t, p, device) + + stats := &reconcileStats{} + + // Should not panic, should handle errors gracefully + mgr.resetOrphanedDevice(ctx, device, stats) + + // Reset was attempted + assert.Equal(t, 1, stats.resetAttempted) + + // May fail due to non-existent device, that's expected + // The key is it doesn't panic +} + +// TestDetectSuspiciousVMMProcesses_NoPgrep tests that detection handles +// missing pgrep gracefully (e.g., in minimal containers) +func TestDetectSuspiciousVMMProcesses_NoPgrep(t *testing.T) { + mgr, _, _ := setupTestManager(t) + ctx := context.Background() + + stats := &reconcileStats{} + + // This test just verifies no panic when pgrep isn't available + // or returns no results + mgr.detectSuspiciousVMMProcesses(ctx, stats) + + // No assertions needed - we just want to ensure no panic +} + +// Helper function for testing: verify device directory structure +func verifyDeviceDir(t *testing.T, p *paths.Paths, deviceID string) bool { + t.Helper() + metadataPath := p.DeviceMetadata(deviceID) + _, err := os.Stat(metadataPath) + return err == nil +} + +// TestReconcileDevices_CorruptedDeviceMetadata tests handling of +// corrupted device metadata files +func TestReconcileDevices_CorruptedDeviceMetadata(t *testing.T) { + mgr, p, _ := setupTestManager(t) + ctx := context.Background() + + // Create a valid device + validDevice := &Device{ + Id: "valid-device", + Name: "valid-gpu", + Type: DeviceTypeGPU, + PCIAddress: "0000:01:00.0", + VendorID: "10de", + DeviceID: "1234", + CreatedAt: time.Now(), + } + createTestDevice(t, p, validDevice) + + // Create a corrupted device directory with invalid JSON + corruptedID := "corrupted-device" + corruptedDir := p.DeviceDir(corruptedID) + require.NoError(t, os.MkdirAll(corruptedDir, 0755)) + corruptedPath := filepath.Join(corruptedDir, "metadata.json") + require.NoError(t, os.WriteFile(corruptedPath, []byte("not valid json{{{"), 0644)) + + // Should not error - should skip corrupted device and continue + err := mgr.ReconcileDevices(ctx) + require.NoError(t, err) + + // Valid device should still be loadable + d, err := mgr.loadDevice("valid-device") + require.NoError(t, err) + assert.Equal(t, "valid-gpu", d.Name) +} + diff --git a/lib/instances/liveness.go b/lib/instances/liveness.go new file mode 100644 index 00000000..3440bbbb --- /dev/null +++ b/lib/instances/liveness.go @@ -0,0 +1,81 @@ +package instances + +import ( + "context" + + "github.com/onkernel/hypeman/lib/devices" +) + +// Ensure instanceLivenessAdapter implements the interface +var _ devices.InstanceLivenessChecker = (*instanceLivenessAdapter)(nil) + +// instanceLivenessAdapter adapts instances.Manager to devices.InstanceLivenessChecker +type instanceLivenessAdapter struct { + manager *manager +} + +// NewLivenessChecker creates a new InstanceLivenessChecker that wraps the instances manager. +// This adapter allows the devices package to query instance state without a circular import. +func NewLivenessChecker(m Manager) devices.InstanceLivenessChecker { + // Type assert to get the concrete manager type + mgr, ok := m.(*manager) + if !ok { + return nil + } + return &instanceLivenessAdapter{manager: mgr} +} + +// IsInstanceRunning returns true if the instance exists and is in a running state +// (i.e., has an active VMM process). Returns false if the instance doesn't exist +// or is stopped/standby/unknown. +func (a *instanceLivenessAdapter) IsInstanceRunning(ctx context.Context, instanceID string) bool { + if a.manager == nil { + return false + } + inst, err := a.manager.getInstance(ctx, instanceID) + if err != nil { + return false + } + + // Consider instance "running" if the VMM is active (any of these states means VM is using the device) + switch inst.State { + case StateRunning, StatePaused, StateCreated: + return true + default: + // StateStopped, StateStandby, StateShutdown, StateUnknown + return false + } +} + +// GetInstanceDevices returns the list of device IDs attached to an instance. +// Returns nil if the instance doesn't exist. +func (a *instanceLivenessAdapter) GetInstanceDevices(ctx context.Context, instanceID string) []string { + if a.manager == nil { + return nil + } + inst, err := a.manager.getInstance(ctx, instanceID) + if err != nil { + return nil + } + return inst.Devices +} + +// ListAllInstanceDevices returns a map of instanceID -> []deviceIDs for all instances. +func (a *instanceLivenessAdapter) ListAllInstanceDevices(ctx context.Context) map[string][]string { + if a.manager == nil { + return nil + } + instances, err := a.manager.listInstances(ctx) + if err != nil { + return nil + } + + result := make(map[string][]string) + for _, inst := range instances { + if len(inst.Devices) > 0 { + result[inst.Id] = inst.Devices + } + } + return result +} + diff --git a/lib/instances/liveness_test.go b/lib/instances/liveness_test.go new file mode 100644 index 00000000..6fa7c8b5 --- /dev/null +++ b/lib/instances/liveness_test.go @@ -0,0 +1,42 @@ +package instances + +import ( + "context" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestNewLivenessChecker_ReturnsNilForNonManagerType(t *testing.T) { + // Test that passing a non-*manager type returns nil + // This would only happen if someone wraps the Manager interface + // We can't easily test this without a mock, but we can test the happy path + + // For now, just verify the interface is implemented correctly + var _ = (*instanceLivenessAdapter)(nil) +} + +func TestInstanceLivenessAdapter_Interface(t *testing.T) { + // Verify the adapter implements the expected interface + // This is a compile-time check via the var _ assignment in liveness.go + // but we can also verify the method signatures exist + adapter := &instanceLivenessAdapter{} + + ctx := context.Background() + + // These should not panic even with nil manager + // (they'll fail, but that's expected) + running := adapter.IsInstanceRunning(ctx, "test-id") + assert.False(t, running, "Should return false for nil manager") + + devices := adapter.GetInstanceDevices(ctx, "test-id") + assert.Nil(t, devices, "Should return nil for nil manager") + + allDevices := adapter.ListAllInstanceDevices(ctx) + assert.Nil(t, allDevices, "Should return nil for nil manager") +} + + + + + diff --git a/lib/instances/types.go b/lib/instances/types.go index e46372a8..6320c23b 100644 --- a/lib/instances/types.go +++ b/lib/instances/types.go @@ -67,6 +67,9 @@ type StoredMetadata struct { // vsock configuration VsockCID int64 // Guest vsock Context ID VsockSocket string // Host-side vsock socket path + + // Attached devices (GPU passthrough) + Devices []string // Device IDs attached to this instance } // Instance represents a virtual machine instance with derived runtime state @@ -89,6 +92,7 @@ type CreateInstanceRequest struct { Vcpus int // Default 2 Env map[string]string // Optional environment variables NetworkEnabled bool // Whether to enable networking (uses default network) + Devices []string // Device IDs or names to attach (GPU passthrough) Volumes []VolumeAttachment // Volumes to attach at creation time } From 5e6ad08439eee5bfeeca6a49cc107b40ad67a042 Mon Sep 17 00:00:00 2001 From: Rafael Garcia Date: Sat, 13 Dec 2025 21:46:04 +0000 Subject: [PATCH 06/17] feat(instances): integrate devices with instance lifecycle Wire up device management throughout the instance lifecycle: - create.go: Validate devices, auto-bind to VFIO, pass to VM config - delete.go: Detach devices, auto-unbind from VFIO - configdisk.go: Add HAS_GPU config flag for GPU instances - manager.go: Add deviceManager dependency - providers.go: Add ProvideDeviceManager - wire.go/wire_gen.go: Wire up DeviceManager in DI - api.go: Add DeviceManager to ApiService struct --- cmd/api/api/api.go | 4 ++ cmd/api/api/api_test.go | 5 ++- cmd/api/wire.go | 3 ++ cmd/api/wire_gen.go | 13 +++--- lib/instances/configdisk.go | 24 ++++++---- lib/instances/create.go | 63 +++++++++++++++++++++++---- lib/instances/delete.go | 19 +++++++- lib/instances/manager.go | 5 ++- lib/instances/manager_test.go | 7 ++- lib/instances/resource_limits_test.go | 7 ++- lib/providers/providers.go | 10 ++++- 11 files changed, 129 insertions(+), 31 deletions(-) diff --git a/cmd/api/api/api.go b/cmd/api/api/api.go index 5fd50330..f511cbfc 100644 --- a/cmd/api/api/api.go +++ b/cmd/api/api/api.go @@ -2,6 +2,7 @@ package api import ( "github.com/onkernel/hypeman/cmd/api/config" + "github.com/onkernel/hypeman/lib/devices" "github.com/onkernel/hypeman/lib/images" "github.com/onkernel/hypeman/lib/ingress" "github.com/onkernel/hypeman/lib/instances" @@ -17,6 +18,7 @@ type ApiService struct { InstanceManager instances.Manager VolumeManager volumes.Manager NetworkManager network.Manager + DeviceManager devices.Manager IngressManager ingress.Manager } @@ -29,6 +31,7 @@ func New( instanceManager instances.Manager, volumeManager volumes.Manager, networkManager network.Manager, + deviceManager devices.Manager, ingressManager ingress.Manager, ) *ApiService { return &ApiService{ @@ -37,6 +40,7 @@ func New( InstanceManager: instanceManager, VolumeManager: volumeManager, NetworkManager: networkManager, + DeviceManager: deviceManager, IngressManager: ingressManager, } } diff --git a/cmd/api/api/api_test.go b/cmd/api/api/api_test.go index 8aaa4064..c5984fde 100644 --- a/cmd/api/api/api_test.go +++ b/cmd/api/api/api_test.go @@ -9,6 +9,7 @@ import ( "time" "github.com/onkernel/hypeman/cmd/api/config" + "github.com/onkernel/hypeman/lib/devices" "github.com/onkernel/hypeman/lib/images" "github.com/onkernel/hypeman/lib/instances" mw "github.com/onkernel/hypeman/lib/middleware" @@ -34,11 +35,12 @@ func newTestService(t *testing.T) *ApiService { systemMgr := system.NewManager(p) networkMgr := network.NewManager(p, cfg, nil) + deviceMgr := devices.NewManager(p) volumeMgr := volumes.NewManager(p, 0, nil) // 0 = unlimited storage limits := instances.ResourceLimits{ MaxOverlaySize: 100 * 1024 * 1024 * 1024, // 100GB } - instanceMgr := instances.NewManager(p, imageMgr, systemMgr, networkMgr, volumeMgr, limits, nil, nil) + instanceMgr := instances.NewManager(p, imageMgr, systemMgr, networkMgr, deviceMgr, volumeMgr, limits, nil, nil) // Register cleanup for orphaned Cloud Hypervisor processes t.Cleanup(func() { @@ -50,6 +52,7 @@ func newTestService(t *testing.T) *ApiService { ImageManager: imageMgr, InstanceManager: instanceMgr, VolumeManager: volumeMgr, + DeviceManager: deviceMgr, } } diff --git a/cmd/api/wire.go b/cmd/api/wire.go index 21f9ddf0..dfa2fc15 100644 --- a/cmd/api/wire.go +++ b/cmd/api/wire.go @@ -9,6 +9,7 @@ import ( "github.com/google/wire" "github.com/onkernel/hypeman/cmd/api/api" "github.com/onkernel/hypeman/cmd/api/config" + "github.com/onkernel/hypeman/lib/devices" "github.com/onkernel/hypeman/lib/images" "github.com/onkernel/hypeman/lib/ingress" "github.com/onkernel/hypeman/lib/instances" @@ -27,6 +28,7 @@ type application struct { ImageManager images.Manager SystemManager system.Manager NetworkManager network.Manager + DeviceManager devices.Manager InstanceManager instances.Manager VolumeManager volumes.Manager IngressManager ingress.Manager @@ -44,6 +46,7 @@ func initializeApp() (*application, func(), error) { providers.ProvideImageManager, providers.ProvideSystemManager, providers.ProvideNetworkManager, + providers.ProvideDeviceManager, providers.ProvideInstanceManager, providers.ProvideVolumeManager, providers.ProvideIngressManager, diff --git a/cmd/api/wire_gen.go b/cmd/api/wire_gen.go index 5a94276c..6b3e81ad 100644 --- a/cmd/api/wire_gen.go +++ b/cmd/api/wire_gen.go @@ -8,8 +8,11 @@ package main import ( "context" + "log/slog" + "github.com/onkernel/hypeman/cmd/api/api" "github.com/onkernel/hypeman/cmd/api/config" + "github.com/onkernel/hypeman/lib/devices" "github.com/onkernel/hypeman/lib/images" "github.com/onkernel/hypeman/lib/ingress" "github.com/onkernel/hypeman/lib/instances" @@ -18,10 +21,7 @@ import ( "github.com/onkernel/hypeman/lib/registry" "github.com/onkernel/hypeman/lib/system" "github.com/onkernel/hypeman/lib/volumes" - "log/slog" -) -import ( _ "embed" ) @@ -39,11 +39,12 @@ func initializeApp() (*application, func(), error) { } systemManager := providers.ProvideSystemManager(paths) networkManager := providers.ProvideNetworkManager(paths, config) + devicesManager := providers.ProvideDeviceManager(paths) volumesManager, err := providers.ProvideVolumeManager(paths, config) if err != nil { return nil, nil, err } - instancesManager, err := providers.ProvideInstanceManager(paths, config, manager, systemManager, networkManager, volumesManager) + instancesManager, err := providers.ProvideInstanceManager(paths, config, manager, systemManager, networkManager, devicesManager, volumesManager) if err != nil { return nil, nil, err } @@ -55,7 +56,7 @@ func initializeApp() (*application, func(), error) { if err != nil { return nil, nil, err } - apiService := api.New(config, manager, instancesManager, volumesManager, networkManager, ingressManager) + apiService := api.New(config, manager, instancesManager, volumesManager, networkManager, devicesManager, ingressManager) mainApplication := &application{ Ctx: context, Logger: logger, @@ -63,6 +64,7 @@ func initializeApp() (*application, func(), error) { ImageManager: manager, SystemManager: systemManager, NetworkManager: networkManager, + DeviceManager: devicesManager, InstanceManager: instancesManager, VolumeManager: volumesManager, IngressManager: ingressManager, @@ -83,6 +85,7 @@ type application struct { ImageManager images.Manager SystemManager system.Manager NetworkManager network.Manager + DeviceManager devices.Manager InstanceManager instances.Manager VolumeManager volumes.Manager IngressManager ingress.Manager diff --git a/lib/instances/configdisk.go b/lib/instances/configdisk.go index dfc6bdb5..2ea6c85a 100644 --- a/lib/instances/configdisk.go +++ b/lib/instances/configdisk.go @@ -53,7 +53,7 @@ func (m *manager) createConfigDisk(inst *Instance, imageInfo *images.Image, netC // Create ext4 disk with config files // Use ext4 for now (can switch to erofs when kernel supports it) diskPath := m.paths.InstanceConfigDisk(inst.Id) - + // Calculate size (config files are tiny, use 1MB minimum) _, err = images.ExportRootfs(tmpDir, diskPath, images.FormatExt4) if err != nil { @@ -70,26 +70,26 @@ func (m *manager) generateConfigScript(inst *Instance, imageInfo *images.Image, if len(imageInfo.Entrypoint) > 0 { entrypoint = shellQuoteArray(imageInfo.Entrypoint) } - + // Prepare cmd value cmd := "" if len(imageInfo.Cmd) > 0 { cmd = shellQuoteArray(imageInfo.Cmd) } - + // Prepare workdir value workdir := shellQuote("/") if imageInfo.WorkingDir != "" { workdir = shellQuote(imageInfo.WorkingDir) } - + // Build environment variable exports var envLines strings.Builder mergedEnv := mergeEnv(imageInfo.Env, inst.Env) for key, value := range mergedEnv { envLines.WriteString(fmt.Sprintf("export %s=%s\n", key, shellQuote(value))) } - + // Build network configuration section // Use netConfig directly instead of trying to derive it (VM hasn't started yet) networkSection := "" @@ -105,6 +105,13 @@ GUEST_DNS="%s" `, netConfig.IP, cidr, netConfig.Gateway, netConfig.DNS) } + // GPU passthrough configuration + // When devices are attached, set HAS_GPU=1 to trigger NVIDIA module loading in init + gpuSection := "" + if len(inst.Devices) > 0 { + gpuSection = "\n# GPU passthrough\nHAS_GPU=1\n" + } + // Build volume mounts section // Volumes are attached as /dev/vdd, /dev/vde, etc. (after vda=rootfs, vdb=overlay, vdc=config) // For overlay volumes, two devices are used: base + overlay disk @@ -137,7 +144,7 @@ GUEST_DNS="%s" volumeLines.WriteString("\"\n") volumeSection = volumeLines.String() } - + // Generate script as a readable template block // ENTRYPOINT and CMD contain shell-quoted arrays that will be eval'd in init script := fmt.Sprintf(`#!/bin/sh @@ -149,7 +156,7 @@ CMD="%s" WORKDIR=%s # Environment variables -%s%s%s`, +%s%s%s%s`, inst.Id, entrypoint, cmd, @@ -157,8 +164,9 @@ WORKDIR=%s envLines.String(), networkSection, volumeSection, + gpuSection, ) - + return script } diff --git a/lib/instances/create.go b/lib/instances/create.go index 0e023073..8d14efa8 100644 --- a/lib/instances/create.go +++ b/lib/instances/create.go @@ -9,6 +9,7 @@ import ( "time" "github.com/nrednav/cuid2" + "github.com/onkernel/hypeman/lib/devices" "github.com/onkernel/hypeman/lib/images" "github.com/onkernel/hypeman/lib/logger" "github.com/onkernel/hypeman/lib/network" @@ -141,7 +142,7 @@ func (m *manager) createInstance( return nil, ErrAlreadyExists } - // 5. Apply defaults + // 6. Apply defaults size := req.Size if size == 0 { size = 1 * 1024 * 1024 * 1024 // 1GB default @@ -191,16 +192,42 @@ func (m *manager) createInstance( req.Env = make(map[string]string) } - // 6. Determine network based on NetworkEnabled flag + // 7. Determine network based on NetworkEnabled flag networkName := "" if req.NetworkEnabled { networkName = "default" } - // 7. Get default kernel version + // 8. Get default kernel version kernelVer := m.systemManager.GetDefaultKernelVersion() - // 8. Create instance metadata + // 9. Validate, resolve, and auto-bind devices (GPU passthrough) + var resolvedDeviceIDs []string + if len(req.Devices) > 0 && m.deviceManager != nil { + for _, deviceRef := range req.Devices { + device, err := m.deviceManager.GetDevice(ctx, deviceRef) + if err != nil { + log.ErrorContext(ctx, "failed to get device", "device", deviceRef, "error", err) + return nil, fmt.Errorf("device %s: %w", deviceRef, err) + } + if device.AttachedTo != nil { + log.ErrorContext(ctx, "device already attached", "device", deviceRef, "instance", *device.AttachedTo) + return nil, fmt.Errorf("device %s is already attached to instance %s", deviceRef, *device.AttachedTo) + } + // Auto-bind to VFIO if not already bound + if !device.BoundToVFIO { + log.InfoContext(ctx, "auto-binding device to VFIO", "device", deviceRef, "pci_address", device.PCIAddress) + if err := m.deviceManager.BindToVFIO(ctx, device.Id); err != nil { + log.ErrorContext(ctx, "failed to bind device to VFIO", "device", deviceRef, "error", err) + return nil, fmt.Errorf("bind device %s to VFIO: %w", deviceRef, err) + } + } + resolvedDeviceIDs = append(resolvedDeviceIDs, device.Id) + } + log.DebugContext(ctx, "validated devices for passthrough", "id", id, "devices", resolvedDeviceIDs) + } + + // 10. Create instance metadata stored := &StoredMetadata{ Id: id, Name: req.Name, @@ -220,6 +247,7 @@ func (m *manager) createInstance( DataDir: m.paths.InstanceDir(id), VsockCID: vsockCID, VsockSocket: vsockSocket, + Devices: resolvedDeviceIDs, } // Setup cleanup stack for automatic rollback on errors @@ -243,7 +271,7 @@ func (m *manager) createInstance( return nil, fmt.Errorf("create overlay disk: %w", err) } - // 10. Allocate network (if network enabled) + // 14. Allocate network (if network enabled) var netConfig *network.NetworkConfig if networkName != "" { log.DebugContext(ctx, "allocating network", "instance_id", id, "network", networkName) @@ -268,7 +296,7 @@ func (m *manager) createInstance( }) } - // 10.5. Validate and attach volumes + // 15. Validate and attach volumes if len(req.Volumes) > 0 { log.DebugContext(ctx, "validating volumes", "instance_id", id, "count", len(req.Volumes)) for _, volAttach := range req.Volumes { @@ -308,7 +336,7 @@ func (m *manager) createInstance( stored.Volumes = req.Volumes } - // 11. Create config disk (needs Instance for buildVMConfig) + // 16. Create config disk (needs Instance for buildVMConfig) inst := &Instance{StoredMetadata: *stored} log.DebugContext(ctx, "creating config disk", "instance_id", id) if err := m.createConfigDisk(inst, imageInfo, netConfig); err != nil { @@ -487,7 +515,7 @@ func (m *manager) startAndBootVM( // Build VM configuration matching Cloud Hypervisor VmConfig inst := &Instance{StoredMetadata: *stored} - vmConfig, err := m.buildVMConfig(inst, imageInfo, netConfig) + vmConfig, err := m.buildVMConfig(ctx, inst, imageInfo, netConfig) if err != nil { return fmt.Errorf("build vm config: %w", err) } @@ -537,7 +565,7 @@ func (m *manager) startAndBootVM( } // buildVMConfig creates the Cloud Hypervisor VmConfig -func (m *manager) buildVMConfig(inst *Instance, imageInfo *images.Image, netConfig *network.NetworkConfig) (vmm.VmConfig, error) { +func (m *manager) buildVMConfig(ctx context.Context, inst *Instance, imageInfo *images.Image, netConfig *network.NetworkConfig) (vmm.VmConfig, error) { // Get system file paths kernelPath, _ := m.systemManager.GetKernelPath(system.KernelVersion(inst.KernelVersion)) initrdPath, _ := m.systemManager.GetInitrdPath() @@ -644,6 +672,22 @@ func (m *manager) buildVMConfig(inst *Instance, imageInfo *images.Image, netConf Socket: inst.VsockSocket, } + // Device passthrough configuration (GPU, etc.) + var deviceConfigs *[]vmm.DeviceConfig + if len(inst.Devices) > 0 && m.deviceManager != nil { + configs := make([]vmm.DeviceConfig, 0, len(inst.Devices)) + for _, deviceID := range inst.Devices { + device, err := m.deviceManager.GetDevice(ctx, deviceID) + if err != nil { + return vmm.VmConfig{}, fmt.Errorf("get device %s: %w", deviceID, err) + } + configs = append(configs, vmm.DeviceConfig{ + Path: devices.GetDeviceSysfsPath(device.PCIAddress), + }) + } + deviceConfigs = &configs + } + return vmm.VmConfig{ Payload: payload, Cpus: &cpus, @@ -653,6 +697,7 @@ func (m *manager) buildVMConfig(inst *Instance, imageInfo *images.Image, netConf Console: &console, Net: nets, Vsock: &vsock, + Devices: deviceConfigs, }, nil } diff --git a/lib/instances/delete.go b/lib/instances/delete.go index 24d8ddbd..06bc50c8 100644 --- a/lib/instances/delete.go +++ b/lib/instances/delete.go @@ -59,7 +59,24 @@ func (m *manager) deleteInstance( } } - // 5. Detach volumes + // 5. Detach and auto-unbind devices from VFIO + if len(inst.Devices) > 0 && m.deviceManager != nil { + for _, deviceID := range inst.Devices { + log.DebugContext(ctx, "detaching device", "id", id, "device", deviceID) + // Mark device as detached + if err := m.deviceManager.MarkDetached(ctx, deviceID); err != nil { + log.WarnContext(ctx, "failed to mark device as detached", "id", id, "device", deviceID, "error", err) + } + // Auto-unbind from VFIO so native driver can reclaim it + log.InfoContext(ctx, "auto-unbinding device from VFIO", "id", id, "device", deviceID) + if err := m.deviceManager.UnbindFromVFIO(ctx, deviceID); err != nil { + // Log but continue - device might already be unbound or in use by another instance + log.WarnContext(ctx, "failed to unbind device from VFIO", "id", id, "device", deviceID, "error", err) + } + } + } + + // 5b. Detach volumes if len(inst.Volumes) > 0 { log.DebugContext(ctx, "detaching volumes", "instance_id", id, "count", len(inst.Volumes)) for _, volAttach := range inst.Volumes { diff --git a/lib/instances/manager.go b/lib/instances/manager.go index 3b95a1cb..7244c01f 100644 --- a/lib/instances/manager.go +++ b/lib/instances/manager.go @@ -5,6 +5,7 @@ import ( "fmt" "sync" + "github.com/onkernel/hypeman/lib/devices" "github.com/onkernel/hypeman/lib/images" "github.com/onkernel/hypeman/lib/network" "github.com/onkernel/hypeman/lib/paths" @@ -46,6 +47,7 @@ type manager struct { imageManager images.Manager systemManager system.Manager networkManager network.Manager + deviceManager devices.Manager volumeManager volumes.Manager limits ResourceLimits instanceLocks sync.Map // map[string]*sync.RWMutex - per-instance locks @@ -55,12 +57,13 @@ type manager struct { // NewManager creates a new instances manager. // If meter is nil, metrics are disabled. -func NewManager(p *paths.Paths, imageManager images.Manager, systemManager system.Manager, networkManager network.Manager, volumeManager volumes.Manager, limits ResourceLimits, meter metric.Meter, tracer trace.Tracer) Manager { +func NewManager(p *paths.Paths, imageManager images.Manager, systemManager system.Manager, networkManager network.Manager, deviceManager devices.Manager, volumeManager volumes.Manager, limits ResourceLimits, meter metric.Meter, tracer trace.Tracer) Manager { m := &manager{ paths: p, imageManager: imageManager, systemManager: systemManager, networkManager: networkManager, + deviceManager: deviceManager, volumeManager: volumeManager, limits: limits, instanceLocks: sync.Map{}, diff --git a/lib/instances/manager_test.go b/lib/instances/manager_test.go index 2ee0a7f4..19241a3a 100644 --- a/lib/instances/manager_test.go +++ b/lib/instances/manager_test.go @@ -17,6 +17,7 @@ import ( "github.com/joho/godotenv" "github.com/onkernel/hypeman/cmd/api/config" + "github.com/onkernel/hypeman/lib/devices" "github.com/onkernel/hypeman/lib/exec" "github.com/onkernel/hypeman/lib/images" "github.com/onkernel/hypeman/lib/ingress" @@ -46,6 +47,7 @@ func setupTestManager(t *testing.T) (*manager, string) { systemManager := system.NewManager(p) networkManager := network.NewManager(p, cfg, nil) + deviceManager := devices.NewManager(p) volumeManager := volumes.NewManager(p, 0, nil) // 0 = unlimited storage limits := ResourceLimits{ MaxOverlaySize: 100 * 1024 * 1024 * 1024, // 100GB @@ -54,7 +56,7 @@ func setupTestManager(t *testing.T) (*manager, string) { MaxTotalVcpus: 0, // unlimited MaxTotalMemory: 0, // unlimited } - mgr := NewManager(p, imageManager, systemManager, networkManager, volumeManager, limits, nil, nil).(*manager) + mgr := NewManager(p, imageManager, systemManager, networkManager, deviceManager, volumeManager, limits, nil, nil).(*manager) // Register cleanup to kill any orphaned Cloud Hypervisor processes t.Cleanup(func() { @@ -754,6 +756,7 @@ func TestStorageOperations(t *testing.T) { imageManager, _ := images.NewManager(p, 1, nil) systemManager := system.NewManager(p) networkManager := network.NewManager(p, cfg, nil) + deviceManager := devices.NewManager(p) volumeManager := volumes.NewManager(p, 0, nil) // 0 = unlimited storage limits := ResourceLimits{ MaxOverlaySize: 100 * 1024 * 1024 * 1024, // 100GB @@ -762,7 +765,7 @@ func TestStorageOperations(t *testing.T) { MaxTotalVcpus: 0, // unlimited MaxTotalMemory: 0, // unlimited } - manager := NewManager(p, imageManager, systemManager, networkManager, volumeManager, limits, nil, nil).(*manager) + manager := NewManager(p, imageManager, systemManager, networkManager, deviceManager, volumeManager, limits, nil, nil).(*manager) // Test metadata doesn't exist initially _, err := manager.loadMetadata("nonexistent") diff --git a/lib/instances/resource_limits_test.go b/lib/instances/resource_limits_test.go index 83930421..f003f3a8 100644 --- a/lib/instances/resource_limits_test.go +++ b/lib/instances/resource_limits_test.go @@ -8,6 +8,7 @@ import ( "time" "github.com/onkernel/hypeman/cmd/api/config" + "github.com/onkernel/hypeman/lib/devices" "github.com/onkernel/hypeman/lib/images" "github.com/onkernel/hypeman/lib/network" "github.com/onkernel/hypeman/lib/paths" @@ -159,9 +160,10 @@ func createTestManager(t *testing.T, limits ResourceLimits) *manager { systemMgr := system.NewManager(p) networkMgr := network.NewManager(p, cfg, nil) + deviceMgr := devices.NewManager(p) volumeMgr := volumes.NewManager(p, 0, nil) - return NewManager(p, imageMgr, systemMgr, networkMgr, volumeMgr, limits, nil, nil).(*manager) + return NewManager(p, imageMgr, systemMgr, networkMgr, deviceMgr, volumeMgr, limits, nil, nil).(*manager) } func TestResourceLimits_StructValues(t *testing.T) { @@ -251,6 +253,7 @@ func TestAggregateLimits_EnforcedAtRuntime(t *testing.T) { systemManager := system.NewManager(p) networkManager := network.NewManager(p, cfg, nil) + deviceManager := devices.NewManager(p) volumeManager := volumes.NewManager(p, 0, nil) // Set small aggregate limits: @@ -264,7 +267,7 @@ func TestAggregateLimits_EnforcedAtRuntime(t *testing.T) { MaxTotalMemory: 2 * 1024 * 1024 * 1024, // aggregate: only 2GB total } - mgr := NewManager(p, imageManager, systemManager, networkManager, volumeManager, limits, nil, nil).(*manager) + mgr := NewManager(p, imageManager, systemManager, networkManager, deviceManager, volumeManager, limits, nil, nil).(*manager) // Cleanup any orphaned processes on test end t.Cleanup(func() { diff --git a/lib/providers/providers.go b/lib/providers/providers.go index 62523063..ecbeb708 100644 --- a/lib/providers/providers.go +++ b/lib/providers/providers.go @@ -8,6 +8,7 @@ import ( "github.com/c2h5oh/datasize" "github.com/onkernel/hypeman/cmd/api/config" + "github.com/onkernel/hypeman/lib/devices" "github.com/onkernel/hypeman/lib/images" "github.com/onkernel/hypeman/lib/ingress" "github.com/onkernel/hypeman/lib/instances" @@ -70,8 +71,13 @@ func ProvideNetworkManager(p *paths.Paths, cfg *config.Config) network.Manager { return network.NewManager(p, cfg, meter) } +// ProvideDeviceManager provides the device manager +func ProvideDeviceManager(p *paths.Paths) devices.Manager { + return devices.NewManager(p) +} + // ProvideInstanceManager provides the instance manager -func ProvideInstanceManager(p *paths.Paths, cfg *config.Config, imageManager images.Manager, systemManager system.Manager, networkManager network.Manager, volumeManager volumes.Manager) (instances.Manager, error) { +func ProvideInstanceManager(p *paths.Paths, cfg *config.Config, imageManager images.Manager, systemManager system.Manager, networkManager network.Manager, deviceManager devices.Manager, volumeManager volumes.Manager) (instances.Manager, error) { // Parse max overlay size from config var maxOverlaySize datasize.ByteSize if err := maxOverlaySize.UnmarshalText([]byte(cfg.MaxOverlaySize)); err != nil { @@ -108,7 +114,7 @@ func ProvideInstanceManager(p *paths.Paths, cfg *config.Config, imageManager ima meter := otel.GetMeterProvider().Meter("hypeman") tracer := otel.GetTracerProvider().Tracer("hypeman") - return instances.NewManager(p, imageManager, systemManager, networkManager, volumeManager, limits, meter, tracer), nil + return instances.NewManager(p, imageManager, systemManager, networkManager, deviceManager, volumeManager, limits, meter, tracer), nil } // ProvideVolumeManager provides the volume manager From f563ada6a82fbb89be9047aa1d41402ec290c252 Mon Sep 17 00:00:00 2001 From: Rafael Garcia Date: Sat, 13 Dec 2025 21:46:56 +0000 Subject: [PATCH 07/17] feat(api): add devices API endpoints and documentation Add REST API for device management and supporting documentation: API endpoints: - GET/POST /devices - List and register devices - GET/DELETE /devices/{id} - Get and delete devices - GET /devices/available - Discover passthrough-capable devices - instances.go: Accept devices param in CreateInstance Documentation: - GPU.md: GPU passthrough architecture and driver injection - README.md: Device management usage guide - scripts/gpu-reset.sh: GPU reset utility Tests and fixtures: - gpu_e2e_test.go, gpu_inference_test.go, gpu_module_test.go - testdata/ollama-cuda/ - CUDA test container Also adds build-preview-cli Makefile target. --- Makefile | 8 +- cmd/api/api/devices.go | 167 ++ cmd/api/api/instances.go | 7 + cmd/api/main.go | 12 + go.mod | 12 +- go.sum | 61 +- lib/devices/GPU.md | 177 ++ lib/devices/README.md | 451 +++++ lib/devices/gpu_e2e_test.go | 353 ++++ lib/devices/gpu_inference_test.go | 536 ++++++ lib/devices/gpu_module_test.go | 505 ++++++ lib/devices/scripts/gpu-reset.sh | 178 ++ lib/devices/testdata/ollama-cuda/Dockerfile | 29 + lib/devices/testdata/ollama-cuda/test-cuda.py | 63 + lib/devices/testdata/ollama-cuda/test-nvml.py | 83 + lib/oapi/oapi.go | 1478 ++++++++++++++++- openapi.yaml | 278 ++++ stainless.yaml | 12 + 18 files changed, 4316 insertions(+), 94 deletions(-) create mode 100644 cmd/api/api/devices.go create mode 100644 lib/devices/GPU.md create mode 100644 lib/devices/README.md create mode 100644 lib/devices/gpu_e2e_test.go create mode 100644 lib/devices/gpu_inference_test.go create mode 100644 lib/devices/gpu_module_test.go create mode 100755 lib/devices/scripts/gpu-reset.sh create mode 100644 lib/devices/testdata/ollama-cuda/Dockerfile create mode 100644 lib/devices/testdata/ollama-cuda/test-cuda.py create mode 100644 lib/devices/testdata/ollama-cuda/test-nvml.py diff --git a/Makefile b/Makefile index 07b2bf0c..d0aa43a3 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ SHELL := /bin/bash -.PHONY: oapi-generate generate-vmm-client generate-wire generate-all dev build test install-tools gen-jwt download-ch-binaries download-ch-spec ensure-ch-binaries build-caddy-binaries build-caddy ensure-caddy-binaries release-prep clean +.PHONY: oapi-generate generate-vmm-client generate-wire generate-all dev build test install-tools gen-jwt download-ch-binaries download-ch-spec ensure-ch-binaries build-caddy-binaries build-caddy ensure-caddy-binaries build-preview-cli release-prep clean # Directory where local binaries will be installed BIN_DIR ?= $(CURDIR)/bin @@ -168,6 +168,12 @@ build: ensure-ch-binaries ensure-caddy-binaries lib/system/exec_agent/exec-agent # Build all binaries build-all: build +# Build preview CLI from stainless-sdks/hypeman-cli +# Usage: make build-preview-cli - uses preview/ +# make build-preview-cli CLI_BRANCH=preview/xyz - uses specific branch +build-preview-cli: + @./scripts/build-preview-cli.sh $(CLI_BRANCH) + # Run in development mode with hot reload dev: ensure-ch-binaries ensure-caddy-binaries lib/system/exec_agent/exec-agent $(AIR) @rm -f ./tmp/main diff --git a/cmd/api/api/devices.go b/cmd/api/api/devices.go new file mode 100644 index 00000000..d7d2dd28 --- /dev/null +++ b/cmd/api/api/devices.go @@ -0,0 +1,167 @@ +package api + +import ( + "context" + "errors" + + "github.com/onkernel/hypeman/lib/devices" + "github.com/onkernel/hypeman/lib/oapi" +) + +// ListDevices returns all registered devices +func (s *ApiService) ListDevices(ctx context.Context, request oapi.ListDevicesRequestObject) (oapi.ListDevicesResponseObject, error) { + deviceList, err := s.DeviceManager.ListDevices(ctx) + if err != nil { + return oapi.ListDevices500JSONResponse{ + Code: "internal_error", + Message: err.Error(), + }, nil + } + + result := make([]oapi.Device, len(deviceList)) + for i, d := range deviceList { + result[i] = deviceToOAPI(d) + } + + return oapi.ListDevices200JSONResponse(result), nil +} + +// ListAvailableDevices discovers passthrough-capable devices on the host +func (s *ApiService) ListAvailableDevices(ctx context.Context, request oapi.ListAvailableDevicesRequestObject) (oapi.ListAvailableDevicesResponseObject, error) { + available, err := s.DeviceManager.ListAvailableDevices(ctx) + if err != nil { + return oapi.ListAvailableDevices500JSONResponse{ + Code: "internal_error", + Message: err.Error(), + }, nil + } + + result := make([]oapi.AvailableDevice, len(available)) + for i, d := range available { + result[i] = availableDeviceToOAPI(d) + } + + return oapi.ListAvailableDevices200JSONResponse(result), nil +} + +// CreateDevice registers a new device for passthrough +func (s *ApiService) CreateDevice(ctx context.Context, request oapi.CreateDeviceRequestObject) (oapi.CreateDeviceResponseObject, error) { + var name string + if request.Body.Name != nil { + name = *request.Body.Name + } + req := devices.CreateDeviceRequest{ + Name: name, + PCIAddress: request.Body.PciAddress, + } + + device, err := s.DeviceManager.CreateDevice(ctx, req) + if err != nil { + switch { + case errors.Is(err, devices.ErrInvalidName): + return oapi.CreateDevice400JSONResponse{ + Code: "invalid_name", + Message: err.Error(), + }, nil + case errors.Is(err, devices.ErrInvalidPCIAddress): + return oapi.CreateDevice400JSONResponse{ + Code: "invalid_pci_address", + Message: err.Error(), + }, nil + case errors.Is(err, devices.ErrDeviceNotFound): + return oapi.CreateDevice404JSONResponse{ + Code: "device_not_found", + Message: err.Error(), + }, nil + case errors.Is(err, devices.ErrAlreadyExists), errors.Is(err, devices.ErrNameExists): + return oapi.CreateDevice409JSONResponse{ + Code: "conflict", + Message: err.Error(), + }, nil + default: + return oapi.CreateDevice500JSONResponse{ + Code: "internal_error", + Message: err.Error(), + }, nil + } + } + + return oapi.CreateDevice201JSONResponse(deviceToOAPI(*device)), nil +} + +// GetDevice returns a device by ID or name +func (s *ApiService) GetDevice(ctx context.Context, request oapi.GetDeviceRequestObject) (oapi.GetDeviceResponseObject, error) { + device, err := s.DeviceManager.GetDevice(ctx, request.Id) + if err != nil { + if errors.Is(err, devices.ErrNotFound) { + return oapi.GetDevice404JSONResponse{ + Code: "not_found", + Message: "device not found", + }, nil + } + return oapi.GetDevice500JSONResponse{ + Code: "internal_error", + Message: err.Error(), + }, nil + } + + return oapi.GetDevice200JSONResponse(deviceToOAPI(*device)), nil +} + +// DeleteDevice unregisters a device +func (s *ApiService) DeleteDevice(ctx context.Context, request oapi.DeleteDeviceRequestObject) (oapi.DeleteDeviceResponseObject, error) { + err := s.DeviceManager.DeleteDevice(ctx, request.Id) + if err != nil { + switch { + case errors.Is(err, devices.ErrNotFound): + return oapi.DeleteDevice404JSONResponse{ + Code: "not_found", + Message: "device not found", + }, nil + case errors.Is(err, devices.ErrInUse): + return oapi.DeleteDevice409JSONResponse{ + Code: "in_use", + Message: "device is attached to an instance", + }, nil + default: + return oapi.DeleteDevice500JSONResponse{ + Code: "internal_error", + Message: err.Error(), + }, nil + } + } + + return oapi.DeleteDevice204Response{}, nil +} + +// Helper functions + +func deviceToOAPI(d devices.Device) oapi.Device { + deviceType := oapi.DeviceType(d.Type) + return oapi.Device{ + Id: d.Id, + Name: &d.Name, + Type: deviceType, + PciAddress: d.PCIAddress, + VendorId: d.VendorID, + DeviceId: d.DeviceID, + IommuGroup: d.IOMMUGroup, + BoundToVfio: d.BoundToVFIO, + AttachedTo: d.AttachedTo, + CreatedAt: d.CreatedAt, + } +} + +func availableDeviceToOAPI(d devices.AvailableDevice) oapi.AvailableDevice { + return oapi.AvailableDevice{ + PciAddress: d.PCIAddress, + VendorId: d.VendorID, + DeviceId: d.DeviceID, + VendorName: &d.VendorName, + DeviceName: &d.DeviceName, + IommuGroup: d.IOMMUGroup, + CurrentDriver: d.CurrentDriver, + } +} + + diff --git a/cmd/api/api/instances.go b/cmd/api/api/instances.go index 8adb8c8d..acbd37c3 100644 --- a/cmd/api/api/instances.go +++ b/cmd/api/api/instances.go @@ -96,6 +96,12 @@ func (s *ApiService) CreateInstance(ctx context.Context, request oapi.CreateInst networkEnabled = *request.Body.Network.Enabled } + // Parse devices (GPU passthrough) + var deviceRefs []string + if request.Body.Devices != nil { + deviceRefs = *request.Body.Devices + } + // Parse volumes var volumes []instances.VolumeAttachment if request.Body.Volumes != nil { @@ -139,6 +145,7 @@ func (s *ApiService) CreateInstance(ctx context.Context, request oapi.CreateInst Vcpus: vcpus, Env: env, NetworkEnabled: networkEnabled, + Devices: deviceRefs, Volumes: volumes, } diff --git a/cmd/api/main.go b/cmd/api/main.go index e2cb704d..48c9e312 100644 --- a/cmd/api/main.go +++ b/cmd/api/main.go @@ -172,6 +172,18 @@ func run() error { } logger.Info("Network manager initialized") + // Reconcile device state (clears orphaned attachments from crashed VMs) + // Set up liveness checker so device reconciliation can accurately detect orphaned attachments + logger.Info("Reconciling device state...") + livenessChecker := instances.NewLivenessChecker(app.InstanceManager) + if livenessChecker != nil { + app.DeviceManager.SetLivenessChecker(livenessChecker) + } + if err := app.DeviceManager.ReconcileDevices(app.Ctx); err != nil { + logger.Error("failed to reconcile device state", "error", err) + return fmt.Errorf("reconcile device state: %w", err) + } + // Initialize ingress manager (starts Caddy daemon and DNS server for dynamic upstreams) logger.Info("Initializing ingress manager...") if err := app.IngressManager.Initialize(app.Ctx); err != nil { diff --git a/go.mod b/go.mod index da8f3155..0359d7bc 100644 --- a/go.mod +++ b/go.mod @@ -41,7 +41,6 @@ require ( go.opentelemetry.io/otel/trace v1.38.0 golang.org/x/sync v0.17.0 golang.org/x/sys v0.38.0 - golang.org/x/term v0.37.0 google.golang.org/grpc v1.77.0 google.golang.org/protobuf v1.36.10 gvisor.dev/gvisor v0.0.0-20251125014920-fc40e232ff54 @@ -49,15 +48,22 @@ require ( require ( github.com/AdaLogics/go-fuzz-headers v0.0.0-20240806141605-e8a1dd7889d6 // indirect + github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c // indirect + github.com/Microsoft/go-winio v0.6.2 // indirect github.com/apapsch/go-jsonmerge/v2 v2.0.0 // indirect github.com/apex/log v1.9.0 // indirect github.com/blang/semver/v4 v4.0.0 // indirect github.com/cenkalti/backoff/v5 v5.0.3 // indirect + github.com/containerd/errdefs v1.0.0 // indirect + github.com/containerd/errdefs/pkg v0.3.0 // indirect github.com/containerd/stargz-snapshotter/estargz v0.16.3 // indirect github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect github.com/docker/cli v28.2.2+incompatible // indirect github.com/docker/distribution v2.8.3+incompatible // indirect + github.com/docker/docker v28.2.2+incompatible // indirect github.com/docker/docker-credential-helpers v0.9.3 // indirect + github.com/docker/go-connections v0.5.0 // indirect + github.com/docker/go-units v0.5.0 // indirect github.com/dustin/go-humanize v1.0.1 // indirect github.com/felixge/httpsnoop v1.0.4 // indirect github.com/go-logr/logr v1.4.3 // indirect @@ -65,6 +71,7 @@ require ( github.com/go-openapi/jsonpointer v0.21.0 // indirect github.com/go-openapi/swag v0.23.0 // indirect github.com/go-test/deep v1.1.1 // indirect + github.com/gogo/protobuf v1.3.2 // indirect github.com/google/uuid v1.6.0 // indirect github.com/gorilla/mux v1.8.1 // indirect github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.2 // indirect @@ -74,6 +81,8 @@ require ( github.com/mailru/easyjson v0.7.7 // indirect github.com/mdlayher/socket v0.5.1 // indirect github.com/mitchellh/go-homedir v1.1.0 // indirect + github.com/moby/docker-image-spec v1.3.1 // indirect + github.com/moby/sys/sequential v0.6.0 // indirect github.com/moby/sys/user v0.4.0 // indirect github.com/moby/sys/userns v0.1.0 // indirect github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826 // indirect @@ -92,6 +101,7 @@ require ( github.com/vishvananda/netns v0.0.5 // indirect github.com/woodsbury/decimal128 v1.3.0 // indirect go.opentelemetry.io/auto/sdk v1.2.1 // indirect + go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0 // indirect go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.38.0 // indirect go.opentelemetry.io/otel/log v0.14.0 // indirect go.opentelemetry.io/proto/otlp v1.7.1 // indirect diff --git a/go.sum b/go.sum index 0ee9efda..6772c9ed 100644 --- a/go.sum +++ b/go.sum @@ -1,5 +1,9 @@ github.com/AdaLogics/go-fuzz-headers v0.0.0-20240806141605-e8a1dd7889d6 h1:He8afgbRMd7mFxO99hRNu+6tazq8nFF9lIwo9JFroBk= github.com/AdaLogics/go-fuzz-headers v0.0.0-20240806141605-e8a1dd7889d6/go.mod h1:8o94RPi1/7XTJvwPpRSzSUedZrtlirdB3r9Z20bi2f8= +github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c h1:udKWzYgxTojEKWjV8V+WSxDXJ4NFATAsZjh8iIbsQIg= +github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E= +github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY= +github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU= github.com/RaveNoX/go-jsoncommentstrip v1.0.0/go.mod h1:78ihd09MekBnJnxpICcwzCMzGrKSKYe4AqU6PDYYpjk= github.com/apapsch/go-jsonmerge/v2 v2.0.0 h1:axGnT1gRIfimI7gJifB699GoE/oq+F2MU7Dml6nw9rQ= github.com/apapsch/go-jsonmerge/v2 v2.0.0/go.mod h1:lvDnEdqiQrp0O42VQGgmlKpxL1AP2+08jFMw88y4klk= @@ -15,8 +19,17 @@ github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2y github.com/bmatcuk/doublestar v1.1.1/go.mod h1:UD6OnuiIn0yFxxA2le/rnRU1G4RaI4UvFv1sNto9p6w= github.com/c2h5oh/datasize v0.0.0-20231215233829-aa82cc1e6500 h1:6lhrsTEnloDPXyeZBvSYvQf8u86jbKehZPVDDlkgDl4= github.com/c2h5oh/datasize v0.0.0-20231215233829-aa82cc1e6500/go.mod h1:S/7n9copUssQ56c7aAgHqftWO4LTf4xY6CGWt8Bc+3M= +github.com/cenkalti/backoff v2.2.1+incompatible h1:tNowT99t7UNflLxfYYSlKYsBpXdEet03Pg2g16Swow4= +github.com/cenkalti/backoff/v4 v4.1.3 h1:cFAlzYUlVYDysBEH2T5hyJZMh3+5+WCBvSnK6Q8UtC4= +github.com/cenkalti/backoff/v4 v4.1.3/go.mod h1:scbssz8iZGpm3xbr14ovlUdkxfGXNInqkPWOWmG2CLw= github.com/cenkalti/backoff/v5 v5.0.3 h1:ZN+IMa753KfX5hd8vVaMixjnqRZ3y8CuJKRKj1xcsSM= github.com/cenkalti/backoff/v5 v5.0.3/go.mod h1:rkhZdG3JZukswDf7f0cwqPNk4K0sa+F97BxZthm/crw= +github.com/containerd/errdefs v1.0.0 h1:tg5yIfIlQIrxYtu9ajqY42W3lpS19XqdxRQeEwYG8PI= +github.com/containerd/errdefs v1.0.0/go.mod h1:+YBYIdtsnF4Iw6nWZhJcqGSg/dwvV7tyJ/kCkyJ2k+M= +github.com/containerd/errdefs/pkg v0.3.0 h1:9IKJ06FvyNlexW690DXuQNx2KA2cUJXx151Xdx3ZPPE= +github.com/containerd/errdefs/pkg v0.3.0/go.mod h1:NJw6s9HwNuRhnjJhM7pylWwMyAkmCQvQ4GpJHEqRLVk= +github.com/containerd/log v0.1.0 h1:TCJt7ioM2cr/tfR8GPbGf9/VRAX8D2B4PjzCpfX540I= +github.com/containerd/log v0.1.0/go.mod h1:VRRf09a7mHDIRezVKTRCrOq78v577GXq3bSa3EhrzVo= github.com/containerd/stargz-snapshotter/estargz v0.16.3 h1:7evrXtoh1mSbGj/pfRccTampEyKpjpOnS3CyiV1Ebr8= github.com/containerd/stargz-snapshotter/estargz v0.16.3/go.mod h1:uyr4BfYfOj3G9WBVE8cOlQmXAbPN9VEQpBBeJIuOipU= github.com/creack/pty v1.1.24 h1:bJrF4RRfyJnbTJqzRLHzcGaZK1NeM5kTC9jGgovnR1s= @@ -33,8 +46,14 @@ github.com/docker/cli v28.2.2+incompatible h1:qzx5BNUDFqlvyq4AHzdNB7gSyVTmU4cgsy github.com/docker/cli v28.2.2+incompatible/go.mod h1:JLrzqnKDaYBop7H2jaqPtU4hHvMKP+vjCwu2uszcLI8= github.com/docker/distribution v2.8.3+incompatible h1:AtKxIZ36LoNK51+Z6RpzLpddBirtxJnzDrHLEKxTAYk= github.com/docker/distribution v2.8.3+incompatible/go.mod h1:J2gT2udsDAN96Uj4KfcMRqY0/ypR+oyYUYmja8H+y+w= +github.com/docker/docker v28.2.2+incompatible h1:CjwRSksz8Yo4+RmQ339Dp/D2tGO5JxwYeqtMOEe0LDw= +github.com/docker/docker v28.2.2+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk= github.com/docker/docker-credential-helpers v0.9.3 h1:gAm/VtF9wgqJMoxzT3Gj5p4AqIjCBS4wrsOh9yRqcz8= github.com/docker/docker-credential-helpers v0.9.3/go.mod h1:x+4Gbw9aGmChi3qTLZj8Dfn0TD20M/fuWy0E5+WDeCo= +github.com/docker/go-connections v0.5.0 h1:USnMq7hx7gwdVZq1L49hLXaFtUdTADjXGp+uj1Br63c= +github.com/docker/go-connections v0.5.0/go.mod h1:ov60Kzw0kKElRwhNs9UlUHAE/F9Fe6GLaXnqyDdmEXc= +github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4= +github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY= github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto= github.com/fatih/color v1.7.0/go.mod h1:Zm6kSWBoL9eyXnKyktHP6abPY2pDugNf5KwzbycvMj4= @@ -59,6 +78,8 @@ github.com/go-openapi/swag v0.23.0 h1:vsEVJDUo2hPJ2tu0/Xc+4noaxyEffXNIs3cOULZ+Gr github.com/go-openapi/swag v0.23.0/go.mod h1:esZ8ITTYEsH1V2trKHjAN8Ai7xHb8RV+YSZ577vPjgQ= github.com/go-test/deep v1.1.1 h1:0r/53hagsehfO4bzD2Pgr/+RgHqhmf+k1Bpse2cTu1U= github.com/go-test/deep v1.1.1/go.mod h1:5C2ZWiW0ErCdrYzpqxLbTX7MG14M9iiw8DgHncVwcsE= +github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q= +github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q= github.com/golang-jwt/jwt/v5 v5.3.0 h1:pv4AsKCKKZuqlgs5sUmn4x8UlGa0kEVt/puTpKx9vvo= github.com/golang-jwt/jwt/v5 v5.3.0/go.mod h1:fxCRLWMO43lRc8nhHWY6LGqRcf+1gQWArsqaEUEa5bE= github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= @@ -90,6 +111,8 @@ github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8Hm github.com/josharian/intern v1.0.0/go.mod h1:5DoeVV0s6jJacbCEi61lwdGj/aVlrQvzHFFd8Hwg//Y= github.com/jpillora/backoff v0.0.0-20180909062703-3050d21c67d7/go.mod h1:2iMrUgbbvHEiQClaW2NsSzMyGHqN+rDFqY705q49KG0= github.com/juju/gnuflag v0.0.0-20171113085948-2ce1bb71843d/go.mod h1:2PavIy+JPciBPrBUjwbNvtwB6RQlve+hkpll6QSNmOE= +github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8= +github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo= github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ= github.com/klauspost/pgzip v1.2.6 h1:8RXeL5crjEUFnR2/Sn6GJNWtSQ3Dk8pq4CL3jvdDyjU= @@ -117,12 +140,22 @@ github.com/miekg/dns v1.1.68 h1:jsSRkNozw7G/mnmXULynzMNIsgY2dHC8LO6U6Ij2JEA= github.com/miekg/dns v1.1.68/go.mod h1:fujopn7TB3Pu3JM69XaawiU0wqjpL9/8xGop5UrTPps= github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y= github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= +github.com/moby/docker-image-spec v1.3.1 h1:jMKff3w6PgbfSa69GfNg+zN/XLhfXJGnEx3Nl2EsFP0= +github.com/moby/docker-image-spec v1.3.1/go.mod h1:eKmb5VW8vQEh/BAr2yvVNvuiJuY6UIocYsFu/DxxRpo= +github.com/moby/sys/atomicwriter v0.1.0 h1:kw5D/EqkBwsBFi0ss9v1VG3wIkVhzGvLklJ+w3A14Sw= +github.com/moby/sys/atomicwriter v0.1.0/go.mod h1:Ul8oqv2ZMNHOceF643P6FKPXeCmYtlQMvpizfsSoaWs= +github.com/moby/sys/sequential v0.6.0 h1:qrx7XFUd/5DxtqcoH1h438hF5TmOvzC/lspjy7zgvCU= +github.com/moby/sys/sequential v0.6.0/go.mod h1:uyv8EUTrca5PnDsdMGXhZe6CCe8U/UiTWd+lL+7b/Ko= github.com/moby/sys/user v0.4.0 h1:jhcMKit7SA80hivmFJcbB1vqmw//wU61Zdui2eQXuMs= github.com/moby/sys/user v0.4.0/go.mod h1:bG+tYYYJgaMtRKgEmuueC0hJEAZWwtIbZTB+85uoHjs= github.com/moby/sys/userns v0.1.0 h1:tVLXkFOxVu9A64/yh59slHVv9ahO9UIev4JZusOLG/g= github.com/moby/sys/userns v0.1.0/go.mod h1:IHUYgu/kao6N8YZlp9Cf444ySSvCmDlmzUcYfDHOl28= +github.com/moby/term v0.0.0-20221205130635-1aeaba878587 h1:HfkjXDfhgVaN5rmueG8cL8KKeFNecRCXFhaJ2qZ5SKA= +github.com/moby/term v0.0.0-20221205130635-1aeaba878587/go.mod h1:8FzsFHVUBGZdbDsJw/ot+X+d5HLUbvklYLJ9uGfcI3Y= github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826 h1:RWengNIwukTxcDr9M+97sNutRR1RKhG96O6jWumTTnw= github.com/mohae/deepcopy v0.0.0-20170929034955-c48cc78d4826/go.mod h1:TaXosZuwdSHYgviHp1DAtfrULt5eUgsSMsZf+YrPgl8= +github.com/morikuni/aec v1.0.0 h1:nP9CBfwrvYnBRgY6qfDQkygYDmYwOilePFkwzv4dU8A= +github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc= github.com/nrednav/cuid2 v1.1.0 h1:Y2P9Fo1Iz7lKuwcn+fS0mbxkNvEqoNLUtm0+moHCnYc= github.com/nrednav/cuid2 v1.1.0/go.mod h1:jBjkJAI+QLM4EUGvtwGDHC1cP1QQrRNfLo/A7qJFDhA= github.com/oapi-codegen/nethttp-middleware v1.1.2 h1:TQwEU3WM6ifc7ObBEtiJgbRPaCe513tvJpiMJjypVPA= @@ -198,10 +231,14 @@ github.com/vishvananda/netns v0.0.5 h1:DfiHV+j8bA32MFM7bfEunvT8IAqQ/NzSJHtcmW5zd github.com/vishvananda/netns v0.0.5/go.mod h1:SpkAiCQRtJ6TvvxPnOSyH3BMl6unz3xZlaprSwhNNJM= github.com/woodsbury/decimal128 v1.3.0 h1:8pffMNWIlC0O5vbyHWFZAt5yWvWcrHA+3ovIIjVWss0= github.com/woodsbury/decimal128 v1.3.0/go.mod h1:C5UTmyTjW3JftjUFzOVhC20BEQa2a4ZKOB5I6Zjb+ds= +github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= +github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64= go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y= go.opentelemetry.io/contrib/bridges/otelslog v0.13.0 h1:bwnLpizECbPr1RrQ27waeY2SPIPeccCx/xLuoYADZ9s= go.opentelemetry.io/contrib/bridges/otelslog v0.13.0/go.mod h1:3nWlOiiqA9UtUnrcNk82mYasNxD8ehOspL0gOfEo6Y4= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0 h1:F7Jx+6hwnZ41NSFTO5q4LYDtJRXBf2PD0rNBkeB/lus= +go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0/go.mod h1:UHB22Z8QsdRDrnAtX4PntOl36ajSxcdUMt1sF7Y6E7Q= go.opentelemetry.io/contrib/instrumentation/runtime v0.63.0 h1:PeBoRj6af6xMI7qCupwFvTbbnd49V7n5YpG6pg8iDYQ= go.opentelemetry.io/contrib/instrumentation/runtime v0.63.0/go.mod h1:ingqBCtMCe8I4vpz/UVzCW6sxoqgZB37nao91mLQ3Bw= go.opentelemetry.io/otel v1.38.0 h1:RkfdswUDRimDg0m2Az18RKOsnI8UDzppJAtj01/Ymk8= @@ -214,6 +251,8 @@ go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.38.0 h1:GqRJVj7UmLjCVyVJ3ZF go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.38.0/go.mod h1:ri3aaHSmCTVYu2AWv44YMauwAQc0aqI9gHKIcSbI1pU= go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.38.0 h1:lwI4Dc5leUqENgGuQImwLo4WnuXFPetmPpkLi2IrX54= go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.38.0/go.mod h1:Kz/oCE7z5wuyhPxsXDuaPteSWqjSBD5YaSdbxZYGbGk= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.33.0 h1:wpMfgF8E1rkrT1Z6meFh1NDtownE9Ii3n3X2GJYjsaU= +go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.33.0/go.mod h1:wAy0T/dUbs468uOlkT31xjvqQgEVXv58BRFWEgn5v/0= go.opentelemetry.io/otel/log v0.14.0 h1:2rzJ+pOAZ8qmZ3DDHg73NEKzSZkhkGIua9gXtxNGgrM= go.opentelemetry.io/otel/log v0.14.0/go.mod h1:5jRG92fEAgx0SU/vFPxmJvhIuDU9E1SUnEQrMlJpOno= go.opentelemetry.io/otel/metric v1.38.0 h1:Kl6lzIYGAh5M159u9NgiRkmoMKjvbsKtYRwgfrA6WpA= @@ -234,37 +273,55 @@ go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20190426145343-a29dc8fdc734/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= +golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.43.0 h1:dduJYIi3A3KOfdGOHX8AVZ/jGiyPa3IbBozJ5kNuE04= golang.org/x/crypto v0.43.0/go.mod h1:BFbav4mRNlXJL4wNeejLpWxB7wMbc79PdRGhWKncxR0= +golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= +golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.28.0 h1:gQBtGhjxykdjY9YhZpSlZIsbnaE2+PgjfLWUQTnoZ1U= golang.org/x/mod v0.28.0/go.mod h1:yfB/L0NOf/kmEbXjzCPOx1iK1fRutOydrCMsqRhEBxI= golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.46.1-0.20251013234738-63d1a5100f82 h1:6/3JGEh1C88g7m+qzzTbl3A0FtsLguXieqofVLU/JAo= golang.org/x/net v0.46.1-0.20251013234738-63d1a5100f82/go.mod h1:Q9BGdFy1y4nkUwiLvT5qtyhAnEHgnQ/zd8PfU6nc210= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.17.0 h1:l60nONMj9l5drqw6jlhIELNv9I0A4OFgRsG9k2oT9Ug= golang.org/x/sync v0.17.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI= golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190222072716-a9d3bda3a223/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210616094352-59db8d763f22/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.10.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.38.0 h1:3yZWxaJjBmCWXqhN1qh02AkOnCQ1poK6oF+a7xWL6Gc= golang.org/x/sys v0.38.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks= -golang.org/x/term v0.37.0 h1:8EGAD0qCmHYZg6J17DvsMy9/wJ7/D/4pV/wfnld5lTU= -golang.org/x/term v0.37.0/go.mod h1:5pB4lxRNYYVZuTLmy8oR2BH8dflOR+IbTYFD8fi3254= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.30.0 h1:yznKA/E9zq54KzlzBEAWn1NXSQ8DIp/NYMy88xJjl4k= golang.org/x/text v0.30.0/go.mod h1:yDdHFIX9t+tORqspjENWgzaCVXgk0yYnYuSZ8UzzBVM= +golang.org/x/time v0.12.0 h1:ScB/8o8olJvc+CQPWrK3fPZNfh7qgwCrY0zJmoEQLSE= +golang.org/x/time v0.12.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= +golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.37.0 h1:DVSRzp7FwePZW356yEAChSdNcQo6Nsp+fex1SUW09lE= golang.org/x/tools v0.37.0/go.mod h1:MBN5QPQtLMHVdvsbtarmTNukZDdgwdwlO5qGacAzF0w= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk= gonum.org/v1/gonum v0.16.0/go.mod h1:fef3am4MQ93R2HHpKnLk4/Tbh/s0+wqD5nfa6Pnwy4E= google.golang.org/genproto/googleapis/api v0.0.0-20251022142026-3a174f9686a8 h1:mepRgnBZa07I4TRuomDE4sTIYieg/osKmzIf4USdWS4= diff --git a/lib/devices/GPU.md b/lib/devices/GPU.md new file mode 100644 index 00000000..55c73673 --- /dev/null +++ b/lib/devices/GPU.md @@ -0,0 +1,177 @@ +# GPU Passthrough Support + +This document covers NVIDIA GPU passthrough specifics. For general device passthrough, see [README.md](README.md). + +## How GPU Passthrough Works + +hypeman supports NVIDIA GPU passthrough via VFIO, with automatic driver injection: + +``` +┌─────────────────────────────────────────────────────────────────────┐ +│ hypeman Initrd (built at startup) │ +│ ┌──────────────────────────────────────────────────────────────┐ │ +│ │ /lib/modules//kernel/drivers/gpu/ │ │ +│ │ ├── nvidia.ko │ │ +│ │ ├── nvidia-uvm.ko │ │ +│ │ ├── nvidia-modeset.ko │ │ +│ │ └── nvidia-drm.ko │ │ +│ ├──────────────────────────────────────────────────────────────┤ │ +│ │ /usr/lib/nvidia/ │ │ +│ │ ├── libcuda.so.570.86.16 │ │ +│ │ ├── libnvidia-ml.so.570.86.16 │ │ +│ │ ├── libnvidia-ptxjitcompiler.so.570.86.16 │ │ +│ │ └── ... (other driver libraries) │ │ +│ └──────────────────────────────────────────────────────────────┘ │ +└─────────────────────────────────────────────────────────────────────┘ + │ + ▼ (at VM boot, if HAS_GPU=1) +┌─────────────────────────────────────────────────────────────────────┐ +│ Guest VM │ +│ 1. Load kernel modules (modprobe nvidia, etc.) │ +│ 2. Create device nodes (/dev/nvidia0, /dev/nvidiactl, etc.) │ +│ 3. Copy driver libs to container rootfs │ +│ 4. Run ldconfig to update library cache │ +│ 5. Container can now use GPU! │ +└─────────────────────────────────────────────────────────────────────┘ +``` + +## Container Image Requirements + +With driver injection, containers **do not need** to bundle NVIDIA driver libraries. + +**Minimal CUDA image example:** + +```dockerfile +FROM nvidia/cuda:12.4-runtime-ubuntu22.04 +# Your application - no driver installation needed! +RUN pip install torch +CMD ["python", "train.py"] +``` + +hypeman injects the following at boot: + +- `libcuda.so` - CUDA driver API +- `libnvidia-ml.so` - NVML (nvidia-smi, monitoring) +- `libnvidia-ptxjitcompiler.so` - PTX JIT compilation +- `libnvidia-nvvm.so` - NVVM compiler +- `libnvidia-gpucomp.so` - GPU compute library +- `nvidia-smi` binary +- `nvidia-modprobe` binary + +## Driver Version Compatibility + +The driver libraries injected by hypeman are pinned to a specific version that matches the kernel modules. This version is tracked in: + +- **Kernel release:** `onkernel/linux` GitHub releases (e.g., `ch-6.12.8-kernel-2-20251211`) +- **hypeman config:** `lib/system/versions.go` - `NvidiaDriverVersion` map + +### Current Driver Version + +| Kernel Version | Driver Version | Release Date | +|---------------|----------------|--------------| +| ch-6.12.8-kernel-2-20251211 | 570.86.16 | 2025-12-11 | + +### CUDA Compatibility + +Driver 570.86.16 supports CUDA 12.4 and earlier. Check [NVIDIA's compatibility matrix](https://docs.nvidia.com/deploy/cuda-compatibility/) for details. + +## Upgrading the Driver + +To upgrade the NVIDIA driver version: + +1. **Choose a new version** from [NVIDIA's Linux drivers](https://www.nvidia.com/Download/index.aspx) + +2. **Update onkernel/linux:** + - Edit `.github/workflows/release.yaml` + - Change `DRIVER_VERSION=` in all locations (search for the current version) + - The workflow file contains comments explaining what to update + - Create a new release tag (e.g., `ch-6.12.8-kernel-2-YYYYMMDD`) + +3. **Update hypeman:** + - Edit `lib/system/versions.go` + - Add new `KernelVersion` constant + - Update `DefaultKernelVersion` + - Update `NvidiaDriverVersion` map entry + - Update `NvidiaModuleURLs` with new release URL + - Update `NvidiaDriverLibURLs` with new release URL + +4. **Test thoroughly** before deploying: + - Run GPU passthrough E2E tests + - Verify with real CUDA workloads (e.g., ollama inference) + +## Supported GPUs + +All NVIDIA datacenter GPUs supported by the open-gpu-kernel-modules are supported: + +- NVIDIA H100, H200 +- NVIDIA L4, L40, L40S +- NVIDIA A100, A10, A30 +- NVIDIA T4 +- And other Turing/Ampere/Hopper/Ada Lovelace architecture GPUs + +Consumer GPUs (GeForce) are **not** supported by the open kernel modules. + +## Troubleshooting + +### nvidia-smi shows wrong driver version + +The driver version shown by nvidia-smi should match hypeman's configured version. If it differs, the container may have its own driver libraries that are taking precedence. Either: + +- Use a minimal CUDA runtime image without driver libs +- Or ensure the container's driver version matches + +### CUDA initialization failed + +Check that: + +1. Kernel modules are loaded: `cat /proc/modules | grep nvidia` +2. Device nodes exist: `ls -la /dev/nvidia*` +3. Libraries are in LD_LIBRARY_PATH: `ldconfig -p | grep nvidia` + +### Driver/library version mismatch + +Error like `NVML_ERROR_LIB_RM_VERSION_MISMATCH` means the userspace library version doesn't match the kernel module version. This shouldn't happen with hypeman's automatic injection, but can occur if the container has its own driver libraries. + +**Solution:** Use a base image that doesn't include driver libraries, or ensure any bundled libraries match the hypeman driver version. + +### GPU not detected in container + +1. Verify the GPU was attached to the instance: + ```bash + hypeman instance get | jq .devices + ``` + +2. Check the VM console log for module loading errors: + ```bash + cat /var/lib/hypeman/instances//console.log | grep -i nvidia + ``` + +3. Verify VFIO binding on the host: + ```bash + ls -la /sys/bus/pci/devices//driver + ``` + +## Performance Tuning + +### Huge Pages + +For best GPU performance, enable huge pages on the host: + +```bash +echo 1024 > /proc/sys/vm/nr_hugepages +``` + +### IOMMU Configuration + +Ensure IOMMU is properly configured: + +```bash +# Intel +intel_iommu=on iommu=pt + +# AMD +amd_iommu=on iommu=pt +``` + +The `iommu=pt` (passthrough) option improves performance for devices not using VFIO. + diff --git a/lib/devices/README.md b/lib/devices/README.md new file mode 100644 index 00000000..0e34e662 --- /dev/null +++ b/lib/devices/README.md @@ -0,0 +1,451 @@ +# Device Passthrough + +This package provides GPU and PCI device passthrough for virtual machines using the Linux VFIO (Virtual Function I/O) framework. + +## Overview + +Device passthrough allows a VM to have direct, near-native access to physical hardware (GPUs, network cards, etc.) by bypassing the host's device drivers and giving the guest exclusive control. For a deep dive into the VFIO framework, see the [kernel documentation](https://docs.kernel.org/driver-api/vfio.html). + +``` +┌─────────────────────────────────────────────────────────────┐ +│ Host │ +│ ┌─────────────┐ ┌─────────────────────────────────┐ │ +│ │ hypeman │ │ VFIO Driver │ │ +│ │ (VMM) │────▶│ /dev/vfio/ │ │ +│ └─────────────┘ └─────────────────────────────────┘ │ +│ │ │ +│ ┌───────────────────────────┼──────────────────────────┐ │ +│ │ IOMMU (hardware) ▼ │ │ +│ │ - Translates guest physical → host physical │ │ +│ │ - Isolates DMA (device can only access VM memory) │ │ +│ └──────────────────────────────────────────────────────┘ │ +│ │ │ +│ ▼ │ +│ ┌──────────────┐ │ +│ │ GPU (PCIe) │ │ +│ └──────────────┘ │ +└─────────────────────────────────────────────────────────────┘ +``` + +## Package Structure + +``` +lib/devices/ +├── types.go # Device, AvailableDevice, CreateDeviceRequest +├── errors.go # Error definitions +├── discovery.go # PCI device discovery from sysfs +├── vfio.go # VFIO bind/unbind operations +├── manager.go # Manager interface and implementation +├── manager_test.go # Unit tests +├── gpu_e2e_test.go # End-to-end GPU passthrough test (auto-skips if no GPU) +└── scripts/ + └── gpu-reset.sh # GPU recovery script (see Troubleshooting) +``` + +## Example: Full Workflow + +```bash +# 1. Discover available devices +curl localhost:8080/devices/available +# → [{"pci_address": "0000:a2:00.0", "vendor_name": "NVIDIA Corporation", ...}] + +# 2. Register the GPU +curl -X POST localhost:8080/devices \ + -d '{"name": "l4-gpu", "pci_address": "0000:a2:00.0"}' + +# 3. Create instance with GPU (auto-binds to VFIO) +curl -X POST localhost:8080/instances \ + -d '{"name": "ml-training", "image": "nvidia/cuda:12.0-base", "devices": ["l4-gpu"]}' + +# 4. Inside VM: verify GPU +lspci | grep -i nvidia +nvidia-smi + +# 5. Delete instance (auto-unbinds from VFIO) +curl -X DELETE localhost:8080/instances/{id} +# GPU returns to host control +``` + +## Device Lifecycle + +### 1. Discovery + +Discover passthrough-capable devices on the host: + +``` +GET /devices/available +``` + +Returns PCI devices that are candidates for passthrough (GPUs, 3D controllers). Each device includes its PCI address, vendor/device IDs, IOMMU group, and current driver. + +### 2. Registration + +Register a device with a unique name: + +``` +POST /devices +{ + "name": "l4-gpu", + "pci_address": "0000:a2:00.0" +} +``` + +Registration does not modify the device's driver binding. The device remains usable by the host until an instance requests it. + +### 3. Instance Creation (Auto-Bind) + +When an instance is created with devices: + +``` +POST /instances +{ + "name": "gpu-workload", + "image": "docker.io/nvidia/cuda:12.0-base", + "devices": ["l4-gpu"] +} +``` + +The system automatically: +1. **Validates** the device exists and isn't attached to another instance +2. **Binds to VFIO** if not already bound (unbinds native driver like `nvidia`) +3. **Passes to cloud-hypervisor** via the `--device` flag +4. **Marks as attached** to prevent concurrent use + +### 4. Instance Deletion (Auto-Unbind) + +When an instance is deleted, the system automatically: +1. **Marks device as detached** +2. **Unbinds from VFIO** (triggers kernel driver probe to restore native driver) + +This returns the device to host control so it can be used by other processes or a new instance. + +### 5. Unregistration + +``` +DELETE /devices/{id} +``` + +Removes the device from hypeman's registry. Fails if the device is currently attached to an instance. + +## Cloud Hypervisor Integration + +Cloud-hypervisor receives device passthrough configuration via the `VmConfig.Devices` field: + +```go +vmConfig.Devices = &[]vmm.DeviceConfig{ + { + Path: "/sys/bus/pci/devices/0000:a2:00.0/", + }, +} +``` + +Cloud-hypervisor then: +1. Opens the VFIO group file (`/dev/vfio/`) +2. Maps device BARs (memory regions) into guest physical address space +3. Configures interrupt routing (MSI/MSI-X) to the guest +4. The guest sees a real PCIe device and loads native drivers + +### NVIDIA-Specific Options + +For multi-GPU configurations, cloud-hypervisor supports GPUDirect P2P: + +```go +DeviceConfig{ + Path: "/sys/bus/pci/devices/0000:a2:00.0/", + XNvGpudirectClique: ptr(int8(0)), // Enable P2P within clique 0 +} +``` + +This is not currently exposed through the hypeman API but could be added for HPC workloads. + +## Constraints and Limitations + +### IOMMU Requirements + +- **IOMMU must be enabled** in BIOS and kernel (`intel_iommu=on` or `amd_iommu=on`) +- All devices in an IOMMU group must be passed through together +- Some motherboards place many devices in the same group (ACS override may help) + +### VFIO Module Requirements + +The following kernel modules must be loaded: +```bash +modprobe vfio_pci +modprobe vfio_iommu_type1 +``` + +### Driver Binding + +- Binding to VFIO **unloads the native driver** (e.g., `nvidia`, `amdgpu`) +- Host processes using the device will lose access +- Some drivers (like NVIDIA) may resist unbinding if in use + +### Single Attachment + +A device can only be attached to one instance at a time. Attempts to attach an already-attached device will fail. + +### No Hot-Plug + +Devices must be specified at instance creation time. Hot-adding devices to a running VM is not currently supported (though cloud-hypervisor has this capability). + +### Guest Driver Requirements + +The guest must have appropriate drivers: +- **NVIDIA GPUs**: Install NVIDIA drivers in the guest image +- **AMD GPUs**: Install amdgpu/ROCm in the guest image + +### Performance Considerations + +- **ACS (Access Control Services)**: Required for proper isolation on some systems +- **Huge Pages**: Recommended for GPU workloads (`hugepages=on` in cloud-hypervisor) +- **CPU Pinning**: Can improve latency for GPU compute workloads + +## Troubleshooting + +### GPU Reset Script + +If GPU passthrough tests fail or hang, the GPU may be left in a bad state (still bound to vfio-pci, or stuck without a driver). Use the provided reset script: + +```bash +# Reset all NVIDIA GPUs to their native driver +sudo ./lib/devices/scripts/gpu-reset.sh + +# Reset a specific GPU +sudo ./lib/devices/scripts/gpu-reset.sh 0000:a2:00.0 +``` + +The script will: +1. Kill any stuck cloud-hypervisor processes holding the GPU +2. Unbind from vfio-pci if still bound +3. Clear `driver_override` +4. Trigger driver probe to rebind to the nvidia driver +5. Restart `nvidia-persistenced` + +### Common Issues + +#### VFIO Bind Hangs + +**Symptom**: `BindToVFIO` hangs indefinitely. + +**Cause**: The `nvidia-persistenced` service keeps `/dev/nvidia*` open, preventing driver unbind. + +**Solution**: The code now automatically stops `nvidia-persistenced` before unbinding. If you're testing manually: +```bash +sudo systemctl stop nvidia-persistenced +# ... do VFIO bind/unbind ... +sudo systemctl start nvidia-persistenced +``` + +#### VM Exec Fails After Boot + +**Symptom**: VM boots but exec commands time out. + +**Cause**: Usually the container's main process exited (e.g., `alpine` image runs `/bin/sh` which exits immediately), causing init to exit and the VM to kernel panic. + +**Solution**: Use an image with a long-running process (e.g., `nginx:alpine`) or ensure your container has a persistent entrypoint. + +#### GPU Not Restored After Test + +**Symptom**: GPU has no driver bound, `nvidia-smi` fails. + +**Solution**: +```bash +# Trigger kernel driver probe +sudo sh -c 'echo 0000:a2:00.0 > /sys/bus/pci/drivers_probe' +# Restart nvidia-persistenced +sudo systemctl start nvidia-persistenced +# Verify +nvidia-smi +``` + +If that fails, a system **reboot** may be necessary. + +#### VFIO Modules Not Loaded + +**Symptom**: `ErrVFIONotAvailable` error. + +**Solution**: +```bash +sudo modprobe vfio_pci vfio_iommu_type1 +# Verify +ls /dev/vfio/ +``` + +Add to `/etc/modules-load.d/vfio.conf` for persistence across reboots. + +#### IOMMU Not Enabled + +**Symptom**: No IOMMU groups found, passthrough fails. + +**Solution**: Add kernel parameter to bootloader: +- Intel: `intel_iommu=on iommu=pt` +- AMD: `amd_iommu=on iommu=pt` + +Then reboot. + +### Running the E2E Test + +The GPU passthrough E2E test **automatically detects** GPU availability and skips if prerequisites aren't met. + +**Why GPU tests require root**: Unlike network tests which can use Linux capabilities (`CAP_NET_ADMIN`), GPU passthrough requires writing to sysfs files (`/sys/bus/pci/drivers/*/unbind`, etc.) which are protected by standard Unix file permissions (owned by root, mode 0200). Capabilities don't bypass DAC (discretionary access control) for file writes. + +Prerequisites for the test to run (not skip): +- **Root permissions** (sudo) - required for sysfs driver operations +- NVIDIA GPU on host +- IOMMU enabled (`intel_iommu=on` or `amd_iommu=on`) +- `vfio_pci` and `vfio_iommu_type1` modules loaded +- `/sbin` in PATH (for `mkfs.ext4`) + +```bash +# Prepare the environment +sudo modprobe vfio_pci vfio_iommu_type1 + +# Run via make - test auto-skips if not root or no GPU +make test + +# Or run directly with sudo +sudo env PATH=$PATH:/sbin:/usr/sbin \ + go test -v -run TestGPUPassthrough -timeout 5m ./lib/devices/... +``` + +The test will: +1. Check prerequisites and skip if not met (not root, no GPU, no IOMMU, etc.) +2. Discover available NVIDIA GPUs +3. Register the first GPU found +4. Create a VM with GPU passthrough +5. Verify the GPU is visible inside the VM +6. Clean up (delete VM, unbind from VFIO, restore nvidia driver) + +## Future Plans: GPU Sharing Across Multiple VMs + +### The Problem + +With current VFIO passthrough, a GPU is assigned **exclusively** to one VM. To share a single GPU across multiple VMs (e.g., give each VM a "slice"), you need NVIDIA's **vGPU (GRID)** technology. + +### Why MIG Alone Doesn't Help + +**MIG (Multi-Instance GPU)** partitions a GPU into isolated instances at the hardware level, but: + +- MIG partitions are **not separate PCI devices**—the GPU remains one PCI endpoint +- MIG partitions are accessed via CUDA APIs (`CUDA_VISIBLE_DEVICES=MIG-`) +- You can only VFIO-passthrough the **whole GPU** to one VM +- MIG is useful for workload isolation **within** a single host or VM, not for multi-VM sharing + +``` +Physical GPU (0000:a2:00.0) ─── still ONE PCI device + └── MIG partitions (logical, not separate devices) + ├── MIG Instance 0 ─┐ + ├── MIG Instance 1 ─┼── All accessed via CUDA on the same GPU + └── MIG Instance 2 ─┘ +``` + +**Supported MIG Hardware**: A100, A30, H100, H200 (NOT L4 or consumer GPUs) + +### vGPU/mdev: The Only Path to Multi-VM GPU Sharing + +To assign GPU shares to **separate VMs**, NVIDIA requires their **vGPU (GRID)** technology, which uses the Linux mediated device (mdev) framework. + +#### Cloud-Hypervisor mdev Support Status + +Cloud-hypervisor **does** support mdev passthrough: + +```bash +cloud-hypervisor --device path=/sys/bus/mdev/devices// +``` + +However, NVIDIA's proprietary vGPU manager has a QEMU-specific quirk: it reads the VMM process's `/proc//cmdline` looking for a `-uuid` argument to map mdev UUIDs to VMs. This doesn't work out-of-the-box with cloud-hypervisor. + +**Workarounds** (from [cloud-hypervisor#5319](https://github.com/cloud-hypervisor/cloud-hypervisor/issues/5319)): +- Patch CH to accept a dummy `-uuid` flag +- Use wrapper scripts that inject the UUID into the process name +- Wait for NVIDIA to fix their driver's VMM assumptions + +#### vGPU Requirements + +- **Hardware**: Datacenter GPUs (A100, L40, etc.) +- **Licensing**: NVIDIA GRID subscription ($$/GPU/year) +- **Host Software**: NVIDIA vGPU Manager installed on host +- **Guest Drivers**: vGPU-aware guest drivers + +### Design Changes for mdev/vGPU Support + +#### 1. New Device Type: `MdevDevice` + +```go +type MdevDevice struct { + UUID string // mdev instance UUID + ParentGPU string // PCI address of parent GPU + Type string // vGPU type (e.g., "nvidia-256") + Available bool // Not assigned to a VM +} +``` + +#### 2. Discovery Extensions + +```go +// List mdev types supported by a GPU +func (m *manager) ListMdevTypes(ctx context.Context, pciAddress string) ([]MdevType, error) + +// List existing mdev instances +func (m *manager) ListMdevInstances(ctx context.Context) ([]MdevDevice, error) + +// Create an mdev instance +func (m *manager) CreateMdevInstance(ctx context.Context, pciAddress, mdevType string) (*MdevDevice, error) + +// Destroy an mdev instance +func (m *manager) DestroyMdevInstance(ctx context.Context, uuid string) error +``` + +#### 3. Passthrough Mechanism + +mdev devices use a different sysfs path: + +``` +# mdev device path +/sys/bus/mdev/devices// + +# vs VFIO-PCI (current) +/sys/bus/pci/devices/0000:a2:00.0/ +``` + +Cloud-hypervisor's `--device` flag already accepts mdev paths. + +#### 4. NVIDIA vGPU Workaround + +To work around NVIDIA's QEMU-specific UUID detection, we may need to: +- Add a `--platform uuid=` option to cloud-hypervisor invocation +- Or use a wrapper that sets the process name appropriately + +### Implementation Phases + +**Phase 1**: mdev Discovery & Passthrough +- Detect mdev-capable GPUs +- List available mdev types and instances +- Pass mdev devices to VMs (path already works) + +**Phase 2**: mdev Lifecycle Management +- Create/destroy mdev instances via sysfs +- API endpoints for mdev management + +**Phase 3**: NVIDIA vGPU Integration +- Implement UUID workaround for NVIDIA's driver +- Test with GRID licensing +- Document guest driver requirements + +### How vGPU + MIG Work Together + +vGPU creates mdev devices that can be backed by MIG partitions, giving you both hardware isolation (MIG) and multi-VM assignment (vGPU): + +``` +Physical GPU (one PCI device) + │ + ├── Without vGPU: VFIO passthrough gives whole GPU to ONE VM + │ + └── With vGPU (GRID license required): + └── MIG Mode enabled on host + ├── MIG Instance 0 ──→ vGPU mdev A ──→ VM 1 + ├── MIG Instance 1 ──→ vGPU mdev B ──→ VM 2 + └── MIG Instance 2 ──→ vGPU mdev C ──→ VM 3 +``` + +Without vGPU, MIG is only useful for workload isolation on the host or within a single VM that owns the whole GPU. diff --git a/lib/devices/gpu_e2e_test.go b/lib/devices/gpu_e2e_test.go new file mode 100644 index 00000000..94941b11 --- /dev/null +++ b/lib/devices/gpu_e2e_test.go @@ -0,0 +1,353 @@ +package devices_test + +import ( + "bytes" + "context" + "os" + "path/filepath" + "strings" + "testing" + "time" + + "github.com/onkernel/hypeman/cmd/api/config" + "github.com/onkernel/hypeman/lib/devices" + "github.com/onkernel/hypeman/lib/exec" + "github.com/onkernel/hypeman/lib/images" + "github.com/onkernel/hypeman/lib/instances" + "github.com/onkernel/hypeman/lib/network" + "github.com/onkernel/hypeman/lib/paths" + "github.com/onkernel/hypeman/lib/system" + "github.com/onkernel/hypeman/lib/volumes" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// TestGPUPassthrough is an E2E test that verifies GPU passthrough works. +// +// This test automatically detects GPU availability and skips if: +// - No NVIDIA GPU is found +// - IOMMU is not enabled +// - VFIO modules are not loaded +// - Not running as root +// +// To run manually: +// +// sudo env PATH=$PATH:/sbin:/usr/sbin go test -v -run TestGPUPassthrough ./lib/devices/... +// +// WARNING: This test will unbind the GPU from the nvidia driver, which may +// disrupt other processes using the GPU. The test attempts to restore the +// nvidia driver binding on cleanup. +func TestGPUPassthrough(t *testing.T) { + ctx := context.Background() + + // Auto-detect GPU availability - skip if prerequisites not met + skipReason := checkGPUTestPrerequisites() + if skipReason != "" { + t.Skip(skipReason) + } + + // Log that prerequisites passed + groups, _ := os.ReadDir("/sys/kernel/iommu_groups") + t.Logf("GPU test prerequisites met: %d IOMMU groups found", len(groups)) + + // Setup test infrastructure + tmpDir := t.TempDir() + p := paths.New(tmpDir) + + cfg := &config.Config{ + DataDir: tmpDir, + BridgeName: "vmbr0", + SubnetCIDR: "10.100.0.0/16", + DNSServer: "1.1.1.1", + } + + // Initialize managers (nil meter/tracer disables metrics/tracing) + imageMgr, err := images.NewManager(p, 1, nil) + require.NoError(t, err) + + systemMgr := system.NewManager(p) + networkMgr := network.NewManager(p, cfg, nil) + deviceMgr := devices.NewManager(p) + volumeMgr := volumes.NewManager(p, 100*1024*1024*1024, nil) // 100GB max volume storage + limits := instances.ResourceLimits{ + MaxOverlaySize: 100 * 1024 * 1024 * 1024, // 100GB + } + instanceMgr := instances.NewManager(p, imageMgr, systemMgr, networkMgr, deviceMgr, volumeMgr, limits, nil, nil) + + // Step 1: Discover available GPUs + t.Log("Step 1: Discovering available GPUs...") + availableDevices, err := deviceMgr.ListAvailableDevices(ctx) + require.NoError(t, err) + + // Find an NVIDIA GPU + var targetGPU *devices.AvailableDevice + for _, d := range availableDevices { + if strings.Contains(strings.ToLower(d.VendorName), "nvidia") { + targetGPU = &d + break + } + } + require.NotNil(t, targetGPU, "No NVIDIA GPU found on this system") + driverStr := "none" + if targetGPU.CurrentDriver != nil { + driverStr = *targetGPU.CurrentDriver + } + t.Logf("Found NVIDIA GPU: %s at %s (driver: %s)", targetGPU.DeviceName, targetGPU.PCIAddress, driverStr) + + // Check GPU is in a usable state (has a driver bound) + if targetGPU.CurrentDriver == nil || *targetGPU.CurrentDriver == "" { + t.Skip("GPU has no driver bound - may need reboot to recover. Run: sudo reboot") + } + + // Verify the driver path exists (GPU not in broken state) + driverPath := filepath.Join("/sys/bus/pci/devices", targetGPU.PCIAddress, "driver") + if _, err := os.Stat(driverPath); os.IsNotExist(err) { + t.Skipf("GPU driver symlink missing at %s - GPU in broken state, reboot required", driverPath) + } + + // Step 2: Register the GPU + t.Log("Step 2: Registering GPU...") + device, err := deviceMgr.CreateDevice(ctx, devices.CreateDeviceRequest{ + Name: "test-gpu", + PCIAddress: targetGPU.PCIAddress, + }) + require.NoError(t, err) + t.Logf("Registered device: %s (ID: %s)", device.Name, device.Id) + + // Store original driver for cleanup + originalDriver := driverStr + + // Cleanup: always unregister device and try to restore original driver + t.Cleanup(func() { + t.Log("Cleanup: Deleting registered device...") + deviceMgr.DeleteDevice(ctx, device.Id) + + // Try to restore original driver binding via driver_probe + if originalDriver != "" && originalDriver != "none" && originalDriver != "vfio-pci" { + t.Logf("Cleanup: Triggering driver probe to restore %s driver...", originalDriver) + // Use driver_probe to let the kernel find and bind the right driver + probePath := "/sys/bus/pci/drivers_probe" + if err := os.WriteFile(probePath, []byte(targetGPU.PCIAddress), 0200); err != nil { + t.Logf("Warning: Could not trigger driver probe: %v (may need reboot)", err) + } else { + t.Logf("Cleanup: Driver probe triggered for %s", targetGPU.PCIAddress) + } + } + }) + + // Step 3: Ensure system files (kernel, initrd) + t.Log("Step 3: Ensuring system files...") + err = systemMgr.EnsureSystemFiles(ctx) + require.NoError(t, err) + t.Log("System files ready") + + // Step 4: Pull nginx:alpine (nginx keeps running unlike plain alpine which exits immediately) + t.Log("Step 4: Pulling nginx:alpine image...") + createdImg, createErr := imageMgr.CreateImage(ctx, images.CreateImageRequest{ + Name: "docker.io/library/nginx:alpine", + }) + require.NoError(t, createErr, "CreateImage should succeed") + t.Logf("CreateImage returned: name=%s, status=%s", createdImg.Name, createdImg.Status) + + // Use the name returned from CreateImage (it may be normalized) + imageName := createdImg.Name + + // Wait for image to be ready + var img *images.Image + for i := 0; i < 90; i++ { + img, err = imageMgr.GetImage(ctx, imageName) + if err != nil { + if i < 5 || i%10 == 0 { + t.Logf("GetImage attempt %d: error=%v", i+1, err) + } + } else { + if i < 5 || i%10 == 0 { + t.Logf("GetImage attempt %d: status=%s", i+1, img.Status) + } + if img.Status == images.StatusReady { + break + } + if img.Status == images.StatusFailed { + errMsg := "unknown" + if img.Error != nil { + errMsg = *img.Error + } + t.Fatalf("Image build failed: %s", errMsg) + } + } + time.Sleep(1 * time.Second) + } + require.NotNil(t, img, "Image should exist after 90 seconds") + require.Equal(t, images.StatusReady, img.Status, "Image should be ready") + t.Log("Image ready") + + // Step 5: Create instance with GPU (with timeout to prevent hang on VFIO issues) + t.Log("Step 5: Creating instance with GPU...") + createCtx, createCancel := context.WithTimeout(ctx, 60*time.Second) + defer createCancel() + + inst, err := instanceMgr.CreateInstance(createCtx, instances.CreateInstanceRequest{ + Name: "gpu-test", + Image: "docker.io/library/nginx:alpine", + Size: 512 * 1024 * 1024, + HotplugSize: 512 * 1024 * 1024, + OverlaySize: 10 * 1024 * 1024 * 1024, + Vcpus: 1, + NetworkEnabled: false, + Devices: []string{"test-gpu"}, + Env: map[string]string{}, + }) + require.NoError(t, err) + t.Logf("Instance created: %s", inst.Id) + + // Cleanup: always delete instance + t.Cleanup(func() { + t.Log("Cleanup: Deleting instance...") + instanceMgr.DeleteInstance(ctx, inst.Id) + }) + + // Step 6: Wait for instance to be ready + t.Log("Step 6: Waiting for instance to be ready...") + err = waitForInstanceReady(ctx, t, instanceMgr, inst.Id, 30*time.Second) + require.NoError(t, err) + t.Log("Instance is ready") + + // Step 7: Verify GPU is visible inside VM + // Note: Alpine doesn't have lspci, so we check /sys/bus/pci directly for NVIDIA vendor ID (0x10de) + t.Log("Step 7: Verifying GPU visibility inside VM...") + actualInst, err := instanceMgr.GetInstance(ctx, inst.Id) + require.NoError(t, err) + + // Create a context with timeout for exec operations + execCtx, cancel := context.WithTimeout(ctx, 30*time.Second) + defer cancel() + + // Retry exec a few times (exec agent may need time to start) + var stdout, stderr outputBuffer + var execErr error + // Command to find NVIDIA devices by checking vendor IDs (0x10de = NVIDIA) + checkGPUCmd := "cat /sys/bus/pci/devices/*/vendor 2>/dev/null | grep -i 10de && echo 'NVIDIA_FOUND'" + + for i := 0; i < 15; i++ { + stdout = outputBuffer{} + stderr = outputBuffer{} + + _, execErr = exec.ExecIntoInstance(execCtx, actualInst.VsockSocket, exec.ExecOptions{ + Command: []string{"/bin/sh", "-c", checkGPUCmd}, + Stdin: nil, + Stdout: &stdout, + Stderr: &stderr, + TTY: false, + }) + + if execErr == nil { + break + } + t.Logf("Exec attempt %d/15 failed: %v", i+1, execErr) + time.Sleep(1 * time.Second) + } + if execErr != nil { + // Print console log for debugging + p := paths.New(tmpDir) + consoleLogPath := p.InstanceAppLog(inst.Id) + if consoleLog, err := os.ReadFile(consoleLogPath); err == nil { + t.Logf("=== VM Console Log ===\n%s\n=== End Console Log ===", string(consoleLog)) + } else { + t.Logf("Could not read console log: %v", err) + } + } + require.NoError(t, execErr, "exec should succeed") + + pciOutput := stdout.String() + t.Logf("PCI vendor check output:\n%s", pciOutput) + + // Verify NVIDIA device is visible (vendor ID 0x10de) + assert.True(t, + strings.Contains(pciOutput, "NVIDIA_FOUND") || + strings.Contains(strings.ToLower(pciOutput), "10de"), + "NVIDIA GPU (vendor 0x10de) should be visible in guest") + + t.Log("✅ GPU passthrough test PASSED!") +} + +// checkGPUTestPrerequisites checks if GPU passthrough test can run. +// Returns empty string if all prerequisites are met, otherwise returns skip reason. +func checkGPUTestPrerequisites() string { + // Check KVM + if _, err := os.Stat("/dev/kvm"); os.IsNotExist(err) { + return "GPU passthrough test requires /dev/kvm" + } + + // Check VFIO modules + if _, err := os.Stat("/dev/vfio/vfio"); os.IsNotExist(err) { + return "GPU passthrough test requires VFIO (modprobe vfio_pci vfio_iommu_type1)" + } + + // Check IOMMU is enabled by looking for IOMMU groups + groups, err := os.ReadDir("/sys/kernel/iommu_groups") + if err != nil || len(groups) == 0 { + return "GPU passthrough test requires IOMMU (intel_iommu=on or amd_iommu=on)" + } + + // Check for NVIDIA GPU + available, err := devices.DiscoverAvailableDevices() + if err != nil { + return "GPU passthrough test failed to discover devices: " + err.Error() + } + + hasNvidiaGPU := false + for _, d := range available { + if strings.Contains(strings.ToLower(d.VendorName), "nvidia") { + hasNvidiaGPU = true + break + } + } + if !hasNvidiaGPU { + return "GPU passthrough test requires an NVIDIA GPU" + } + + // GPU passthrough requires root (euid=0) for sysfs driver bind/unbind operations. + // Unlike network operations which can use CAP_NET_ADMIN, sysfs file writes are + // protected by standard Unix DAC (file permissions), not just capabilities. + // The files in /sys/bus/pci/drivers/ are owned by root with mode 0200. + if os.Geteuid() != 0 { + return "GPU passthrough test requires root (sudo) for sysfs driver operations" + } + + return "" // All prerequisites met +} + +func waitForInstanceReady(ctx context.Context, t *testing.T, mgr instances.Manager, id string, timeout time.Duration) error { + t.Helper() + + deadline := time.Now().Add(timeout) + for time.Now().Before(deadline) { + inst, err := mgr.GetInstance(ctx, id) + if err != nil { + time.Sleep(500 * time.Millisecond) + continue + } + + if inst.State == instances.StateRunning { + // Additional check: wait a bit for exec agent + time.Sleep(2 * time.Second) + return nil + } + + time.Sleep(500 * time.Millisecond) + } + + return context.DeadlineExceeded +} + +type outputBuffer struct { + buf bytes.Buffer +} + +func (b *outputBuffer) Write(p []byte) (n int, err error) { + return b.buf.Write(p) +} + +func (b *outputBuffer) String() string { + return b.buf.String() +} diff --git a/lib/devices/gpu_inference_test.go b/lib/devices/gpu_inference_test.go new file mode 100644 index 00000000..0749b840 --- /dev/null +++ b/lib/devices/gpu_inference_test.go @@ -0,0 +1,536 @@ +package devices_test + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "net/http" + "net/http/httptest" + "os" + osExec "os/exec" + "path/filepath" + "runtime" + "strings" + "testing" + "time" + + "github.com/go-chi/chi/v5" + "github.com/google/go-containerregistry/pkg/name" + "github.com/google/go-containerregistry/pkg/v1/daemon" + "github.com/google/go-containerregistry/pkg/v1/remote" + "github.com/onkernel/hypeman/cmd/api/config" + "github.com/onkernel/hypeman/lib/devices" + "github.com/onkernel/hypeman/lib/exec" + "github.com/onkernel/hypeman/lib/images" + "github.com/onkernel/hypeman/lib/instances" + "github.com/onkernel/hypeman/lib/network" + "github.com/onkernel/hypeman/lib/paths" + "github.com/onkernel/hypeman/lib/registry" + "github.com/onkernel/hypeman/lib/system" + "github.com/onkernel/hypeman/lib/volumes" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// persistentTestDataDir is used to persist volumes between test runs. +// This allows the ollama model cache to survive across test executions. +// Note: Uses /var/lib instead of /tmp because /tmp often has limited space +// and the custom CUDA+Ollama image is ~4GB. +const persistentTestDataDir = "/var/lib/hypeman-gpu-inference-test" + +// ollamaCudaDockerImage is the name we use for the custom CUDA+Ollama image +const ollamaCudaDockerImage = "ollama-cuda:test" + +// TestGPUInference is an E2E test that verifies Ollama GPU inference works with VFIO passthrough. +// +// This test: +// 1. Builds a custom Docker image with NVIDIA CUDA runtime + Ollama +// 2. Pushes the image to hypeman's test registry +// 3. Launches a VM with GPU passthrough + the image +// 4. Runs `ollama run tinyllama` to perform GPU-accelerated inference +// 5. Verifies the model generates output +// +// The custom image bundles CUDA libraries, enabling Ollama to detect and use the GPU +// without needing nvidia-docker/nvidia-container-toolkit. +// +// Prerequisites: +// - NVIDIA GPU on host +// - IOMMU enabled +// - VFIO modules loaded (modprobe vfio_pci) +// - Docker installed (for building custom image) +// - Running as root +// +// To run manually: +// +// sudo env PATH=$PATH:/sbin:/usr/sbin go test -v -run TestGPUInference -timeout 30m ./lib/devices/... +// +// To clean up: +// +// sudo rm -rf /var/lib/hypeman-gpu-inference-test +// docker rmi ollama-cuda:test +func TestGPUInference(t *testing.T) { + ctx := context.Background() + + // Auto-detect GPU availability - skip if prerequisites not met + skipReason := checkGPUTestPrerequisites() + if skipReason != "" { + t.Skip(skipReason) + } + + // Check Docker is available + if _, err := osExec.LookPath("docker"); err != nil { + t.Skip("Docker not installed - required for building custom CUDA image") + } + + groups, _ := os.ReadDir("/sys/kernel/iommu_groups") + t.Logf("GPU inference test prerequisites met: %d IOMMU groups found", len(groups)) + + // Use persistent directory for volume storage (survives between test runs) + if err := os.MkdirAll(persistentTestDataDir, 0755); err != nil { + t.Fatalf("Failed to create persistent test directory: %v", err) + } + p := paths.New(persistentTestDataDir) + + cfg := &config.Config{ + DataDir: persistentTestDataDir, + BridgeName: "vmbr0", + SubnetCIDR: "10.100.0.0/16", + DNSServer: "1.1.1.1", + } + + // Initialize managers + imageMgr, err := images.NewManager(p, 1, nil) + require.NoError(t, err) + + systemMgr := system.NewManager(p) + networkMgr := network.NewManager(p, cfg, nil) + deviceMgr := devices.NewManager(p) + volumeMgr := volumes.NewManager(p, 100*1024*1024*1024, nil) + limits := instances.ResourceLimits{ + MaxOverlaySize: 100 * 1024 * 1024 * 1024, + } + instanceMgr := instances.NewManager(p, imageMgr, systemMgr, networkMgr, deviceMgr, volumeMgr, limits, nil, nil) + + // Step 1: Build custom CUDA+Ollama image + t.Log("Step 1: Building custom CUDA+Ollama Docker image...") + dockerfilePath := getDockerfilePath(t) + buildCustomCudaImage(t, dockerfilePath, ollamaCudaDockerImage) + + // Step 2: Set up test registry and push the image + t.Log("Step 2: Pushing custom image to hypeman registry...") + reg, err := registry.New(p, imageMgr) + require.NoError(t, err) + + router := chi.NewRouter() + router.Mount("/v2", reg.Handler()) + ts := httptest.NewServer(router) + t.Cleanup(ts.Close) + + serverHost := strings.TrimPrefix(ts.URL, "http://") + pushLocalDockerImage(t, ollamaCudaDockerImage, serverHost) + t.Log("Push complete") + + // Wait for image conversion - find image by listing since digest may change during Docker->OCI conversion + t.Log("Waiting for image conversion...") + var img *images.Image + var imageName string + for i := 0; i < 300; i++ { // 5 minutes for large CUDA image + // List images and find our ollama-cuda image + allImages, listErr := imageMgr.ListImages(ctx) + if listErr == nil { + for _, candidate := range allImages { + if strings.Contains(candidate.Name, "ollama-cuda") { + img = &candidate + imageName = candidate.Name + break + } + } + } + if img != nil && img.Status == images.StatusReady { + break + } + if img != nil && img.Status == images.StatusFailed { + errMsg := "unknown" + if img.Error != nil { + errMsg = *img.Error + } + t.Fatalf("Image conversion failed: %s", errMsg) + } + if i%30 == 0 { + status := "not found" + if img != nil { + status = string(img.Status) + } + t.Logf("Waiting for image conversion... (%d/300, status=%s)", i+1, status) + } + time.Sleep(time.Second) + } + require.NotNil(t, img, "Image should exist after 5 minutes") + require.Equal(t, images.StatusReady, img.Status, "Image should be ready") + t.Logf("Image ready: %s (digest: %s)", imageName, img.Digest) + + // Step 3: Discover and register GPU + t.Log("Step 3: Discovering available GPUs...") + availableDevices, err := deviceMgr.ListAvailableDevices(ctx) + require.NoError(t, err) + + var targetGPU *devices.AvailableDevice + for _, d := range availableDevices { + if strings.Contains(strings.ToLower(d.VendorName), "nvidia") { + targetGPU = &d + break + } + } + require.NotNil(t, targetGPU, "No NVIDIA GPU found") + + driverStr := "none" + if targetGPU.CurrentDriver != nil { + driverStr = *targetGPU.CurrentDriver + } + t.Logf("Found NVIDIA GPU: %s at %s (driver: %s)", targetGPU.DeviceName, targetGPU.PCIAddress, driverStr) + + if targetGPU.CurrentDriver == nil || *targetGPU.CurrentDriver == "" { + t.Skip("GPU has no driver bound - may need reboot") + } + + driverPath := filepath.Join("/sys/bus/pci/devices", targetGPU.PCIAddress, "driver") + if _, err := os.Stat(driverPath); os.IsNotExist(err) { + t.Skipf("GPU driver symlink missing - GPU in broken state") + } + + // Register GPU + t.Log("Step 4: Registering GPU...") + device, err := deviceMgr.GetDevice(ctx, "inference-gpu") + if err != nil { + device, err = deviceMgr.CreateDevice(ctx, devices.CreateDeviceRequest{ + Name: "inference-gpu", + PCIAddress: targetGPU.PCIAddress, + }) + require.NoError(t, err) + t.Logf("Registered new device: %s (ID: %s)", device.Name, device.Id) + } else { + t.Logf("Using existing device: %s (ID: %s)", device.Name, device.Id) + } + + originalDriver := driverStr + t.Cleanup(func() { + t.Log("Cleanup: Deleting registered device...") + deviceMgr.DeleteDevice(ctx, device.Id) + if originalDriver != "" && originalDriver != "none" && originalDriver != "vfio-pci" { + probePath := "/sys/bus/pci/drivers_probe" + os.WriteFile(probePath, []byte(targetGPU.PCIAddress), 0200) + } + }) + + // Step 5: Initialize network and create volume + t.Log("Step 5: Initializing network...") + err = networkMgr.Initialize(ctx, []string{}) + require.NoError(t, err) + + t.Log("Step 6: Setting up persistent volume for Ollama models...") + vol, err := volumeMgr.GetVolumeByName(ctx, "ollama-models") + if err != nil { + vol, err = volumeMgr.CreateVolume(ctx, volumes.CreateVolumeRequest{ + Name: "ollama-models", + SizeGb: 5, + }) + require.NoError(t, err) + t.Logf("Created new volume: %s", vol.Name) + } else { + t.Logf("Using existing volume: %s", vol.Name) + } + + // Step 7: Ensure system files + t.Log("Step 7: Ensuring system files...") + err = systemMgr.EnsureSystemFiles(ctx) + require.NoError(t, err) + + // Step 8: Create instance with GPU + t.Log("Step 8: Creating instance with GPU and custom CUDA image...") + createCtx, createCancel := context.WithTimeout(ctx, 120*time.Second) + defer createCancel() + + inst, err := instanceMgr.CreateInstance(createCtx, instances.CreateInstanceRequest{ + Name: "gpu-inference-test", + Image: imageName, + Size: 8 * 1024 * 1024 * 1024, // 8GB RAM for CUDA + HotplugSize: 8 * 1024 * 1024 * 1024, + OverlaySize: 10 * 1024 * 1024 * 1024, + Vcpus: 4, + Env: map[string]string{ + "OLLAMA_HOST": "0.0.0.0", + "OLLAMA_MODELS": "/data/models", + }, + NetworkEnabled: true, + Devices: []string{"inference-gpu"}, + Volumes: []instances.VolumeAttachment{ + {VolumeID: vol.Id, MountPath: "/data/models", Readonly: false}, + }, + }) + require.NoError(t, err) + t.Logf("Instance created: %s", inst.Id) + + t.Cleanup(func() { + t.Log("Cleanup: Deleting instance...") + instanceMgr.DeleteInstance(ctx, inst.Id) + }) + + // Step 9: Wait for instance + t.Log("Step 9: Waiting for instance to be ready...") + err = waitForInstanceReady(ctx, t, instanceMgr, inst.Id, 60*time.Second) + require.NoError(t, err) + + actualInst, err := instanceMgr.GetInstance(ctx, inst.Id) + require.NoError(t, err) + + // Step 10: Wait for Ollama server + t.Log("Step 10: Waiting for Ollama server to be ready...") + ollamaReady := false + for i := 0; i < 60; i++ { // 60 seconds for CUDA init + healthCtx, healthCancel := context.WithTimeout(ctx, 5*time.Second) + var healthStdout, healthStderr inferenceOutputBuffer + + _, err = exec.ExecIntoInstance(healthCtx, actualInst.VsockSocket, exec.ExecOptions{ + Command: []string{"/bin/sh", "-c", "ollama list 2>&1"}, + Stdout: &healthStdout, + Stderr: &healthStderr, + }) + healthCancel() + + output := healthStdout.String() + if err == nil && !strings.Contains(output, "could not connect") { + t.Logf("Ollama is ready (attempt %d)", i+1) + ollamaReady = true + break + } + if i%10 == 0 { + t.Logf("Waiting for Ollama (attempt %d/60)...", i+1) + } + time.Sleep(time.Second) + } + require.True(t, ollamaReady, "Ollama server should become ready") + + // Step 11: Check GPU detection + t.Log("Step 11: Checking GPU detection...") + gpuCheckCtx, gpuCheckCancel := context.WithTimeout(ctx, 10*time.Second) + defer gpuCheckCancel() + + // Check nvidia-smi (should work now with CUDA image) + var nvidiaSmiStdout, nvidiaSmiStderr inferenceOutputBuffer + _, _ = exec.ExecIntoInstance(gpuCheckCtx, actualInst.VsockSocket, exec.ExecOptions{ + Command: []string{"/bin/sh", "-c", "nvidia-smi 2>&1 || echo 'nvidia-smi failed'"}, + Stdout: &nvidiaSmiStdout, + Stderr: &nvidiaSmiStderr, + }) + nvidiaSmiOutput := nvidiaSmiStdout.String() + if strings.Contains(nvidiaSmiOutput, "NVIDIA-SMI") { + t.Logf("✓ nvidia-smi works! GPU detected:\n%s", truncateHead(nvidiaSmiOutput, 500)) + } else { + t.Logf("nvidia-smi output: %s", nvidiaSmiOutput) + } + + // Check NVIDIA kernel modules + var modulesStdout inferenceOutputBuffer + exec.ExecIntoInstance(gpuCheckCtx, actualInst.VsockSocket, exec.ExecOptions{ + Command: []string{"/bin/sh", "-c", "cat /proc/modules | grep nvidia"}, + Stdout: &modulesStdout, + }) + if modulesStdout.String() != "" { + t.Logf("✓ NVIDIA kernel modules loaded:\n%s", modulesStdout.String()) + } + + // Check device nodes + var devStdout inferenceOutputBuffer + exec.ExecIntoInstance(gpuCheckCtx, actualInst.VsockSocket, exec.ExecOptions{ + Command: []string{"/bin/sh", "-c", "ls -la /dev/nvidia* 2>&1"}, + Stdout: &devStdout, + }) + if !strings.Contains(devStdout.String(), "No such file") { + t.Logf("✓ NVIDIA device nodes:\n%s", devStdout.String()) + } + + // Step 12: Pull model via exec (needed for first time) + t.Log("Step 12: Ensuring TinyLlama model is available...") + + var listStdout inferenceOutputBuffer + exec.ExecIntoInstance(gpuCheckCtx, actualInst.VsockSocket, exec.ExecOptions{ + Command: []string{"/bin/sh", "-c", "ollama list 2>&1"}, + Stdout: &listStdout, + }) + + if !strings.Contains(listStdout.String(), "tinyllama") { + t.Log("Model not cached - pulling now...") + pullCtx, pullCancel := context.WithTimeout(ctx, 10*time.Minute) + defer pullCancel() + + var pullStdout inferenceOutputBuffer + _, pullErr := exec.ExecIntoInstance(pullCtx, actualInst.VsockSocket, exec.ExecOptions{ + Command: []string{"/bin/sh", "-c", "ollama pull tinyllama 2>&1"}, + Stdout: &pullStdout, + }) + t.Logf("Pull output: %s", truncateTail(pullStdout.String(), 500)) + require.NoError(t, pullErr, "ollama pull should succeed") + } else { + t.Log("Model already cached") + } + + // Step 13: Test inference via HTTP API using the VM's private IP + // This is much faster than using `ollama run` CLI + t.Log("Step 13: Running inference via Ollama API...") + require.NotEmpty(t, actualInst.IP, "Instance should have a private IP") + ollamaURL := fmt.Sprintf("http://%s:11434/api/generate", actualInst.IP) + t.Logf("Calling Ollama API at %s", ollamaURL) + + // Create the inference request + inferenceReq := map[string]interface{}{ + "model": "tinyllama", + "prompt": "Say hello in 3 words", + "stream": false, + } + reqBody, err := json.Marshal(inferenceReq) + require.NoError(t, err) + + // Make the HTTP request with timeout + httpClient := &http.Client{Timeout: 2 * time.Minute} + start := time.Now() + resp, err := httpClient.Post(ollamaURL, "application/json", bytes.NewReader(reqBody)) + elapsed := time.Since(start) + + if err != nil { + // Log console for debugging + consoleLogPath := p.InstanceAppLog(inst.Id) + if consoleLog, readErr := os.ReadFile(consoleLogPath); readErr == nil { + t.Logf("=== VM Console Log ===\n%s\n=== End ===", truncateTail(string(consoleLog), 3000)) + } + } + require.NoError(t, err, "HTTP request to Ollama should succeed") + defer resp.Body.Close() + + require.Equal(t, http.StatusOK, resp.StatusCode, "Ollama should return 200") + + // Parse response + body, err := io.ReadAll(resp.Body) + require.NoError(t, err) + + var ollamaResp struct { + Response string `json:"response"` + Done bool `json:"done"` + TotalDuration int64 `json:"total_duration"` // nanoseconds + EvalDuration int64 `json:"eval_duration"` // nanoseconds + EvalCount int `json:"eval_count"` // tokens generated + } + err = json.Unmarshal(body, &ollamaResp) + require.NoError(t, err) + + // Log results + t.Logf("Inference response: %s", ollamaResp.Response) + t.Logf("Total time: %v (API reported: %dms)", elapsed, ollamaResp.TotalDuration/1e6) + if ollamaResp.EvalCount > 0 && ollamaResp.EvalDuration > 0 { + tokensPerSec := float64(ollamaResp.EvalCount) / (float64(ollamaResp.EvalDuration) / 1e9) + t.Logf("Generation speed: %.1f tokens/sec (%d tokens in %dms)", + tokensPerSec, ollamaResp.EvalCount, ollamaResp.EvalDuration/1e6) + } + + // Verify output + assert.True(t, ollamaResp.Done, "Inference should complete") + assert.NotEmpty(t, ollamaResp.Response, "Model should generate output") + assert.True(t, len(ollamaResp.Response) > 5, "Model output should be substantive") + + // GPU inference should be fast (< 5 seconds for this small prompt) + assert.Less(t, elapsed, 30*time.Second, "GPU inference should be fast") + + t.Log("✅ GPU inference test PASSED!") +} + +// getDockerfilePath returns the path to the CUDA+Ollama Dockerfile +func getDockerfilePath(t *testing.T) string { + _, thisFile, _, ok := runtime.Caller(0) + require.True(t, ok, "Could not get current file path") + return filepath.Join(filepath.Dir(thisFile), "testdata", "ollama-cuda", "Dockerfile") +} + +// buildCustomCudaImage builds the custom CUDA+Ollama Docker image +func buildCustomCudaImage(t *testing.T, dockerfilePath, imageName string) { + t.Helper() + + // Check if image already exists + checkCmd := osExec.Command("docker", "image", "inspect", imageName) + if checkCmd.Run() == nil { + t.Logf("Docker image %s already exists, skipping build", imageName) + return + } + + t.Logf("Building Docker image %s (this may take several minutes)...", imageName) + dockerfileDir := filepath.Dir(dockerfilePath) + + cmd := osExec.Command("docker", "build", "-t", imageName, dockerfileDir) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + + err := cmd.Run() + require.NoError(t, err, "Docker build should succeed") + t.Logf("Docker image %s built successfully", imageName) +} + +// pushLocalDockerImage loads an image from local Docker and pushes to hypeman's test registry +func pushLocalDockerImage(t *testing.T, dockerImage, serverHost string) { + t.Helper() + + t.Log("Loading image from Docker daemon...") + srcRef, err := name.ParseReference(dockerImage) + require.NoError(t, err, "Parse source image reference") + + img, err := daemon.Image(srcRef) + require.NoError(t, err, "Load image from Docker daemon") + + // Check image size for progress context + layers, _ := img.Layers() + var totalSize int64 + for _, layer := range layers { + if size, err := layer.Size(); err == nil { + totalSize += size + } + } + t.Logf("Image has %d layers, ~%.1f GB total", len(layers), float64(totalSize)/1e9) + + // Push to test registry with a tag (not just digest) so ListImages can find it + targetRef := fmt.Sprintf("%s/test/ollama-cuda:latest", serverHost) + t.Logf("Pushing to %s", targetRef) + + dstRef, err := name.ParseReference(targetRef, name.Insecure) + require.NoError(t, err, "Parse target reference") + + err = remote.Write(dstRef, img) + require.NoError(t, err, "Push to registry") +} + +// inferenceOutputBuffer is a simple buffer for capturing command output +type inferenceOutputBuffer struct { + buf bytes.Buffer +} + +func (b *inferenceOutputBuffer) Write(p []byte) (n int, err error) { + return b.buf.Write(p) +} + +func (b *inferenceOutputBuffer) String() string { + return b.buf.String() +} + +// truncateTail returns the last n characters of s +func truncateTail(s string, n int) string { + if len(s) <= n { + return s + } + return "..." + s[len(s)-n:] +} + +// truncateHead returns the first n characters of s +func truncateHead(s string, n int) string { + if len(s) <= n { + return s + } + return s[:n] + "..." +} diff --git a/lib/devices/gpu_module_test.go b/lib/devices/gpu_module_test.go new file mode 100644 index 00000000..fad9bc9a --- /dev/null +++ b/lib/devices/gpu_module_test.go @@ -0,0 +1,505 @@ +package devices_test + +import ( + "context" + "fmt" + "log" + "net/http" + "net/http/httptest" + "os" + osexec "os/exec" + "path/filepath" + "strings" + "testing" + "time" + + "github.com/google/go-containerregistry/pkg/name" + "github.com/google/go-containerregistry/pkg/v1/daemon" + "github.com/google/go-containerregistry/pkg/v1/remote" + "github.com/onkernel/hypeman/cmd/api/config" + "github.com/onkernel/hypeman/lib/devices" + "github.com/onkernel/hypeman/lib/exec" + "github.com/onkernel/hypeman/lib/images" + "github.com/onkernel/hypeman/lib/instances" + "github.com/onkernel/hypeman/lib/network" + "github.com/onkernel/hypeman/lib/paths" + "github.com/onkernel/hypeman/lib/registry" + "github.com/onkernel/hypeman/lib/system" + "github.com/onkernel/hypeman/lib/volumes" + "github.com/stretchr/testify/require" +) + +// TestNVIDIAModuleLoading verifies that NVIDIA kernel modules load correctly in the VM. +// +// This is a simpler test than TestGPUInference that just verifies: +// 1. NVIDIA kernel modules (nvidia.ko, nvidia-uvm.ko, etc.) load during init +// 2. GSP firmware is found and loaded +// 3. /dev/nvidia* device nodes are created +// +// Prerequisites: +// - NVIDIA GPU on host +// - IOMMU enabled +// - VFIO modules loaded (modprobe vfio_pci) +// - Running as root +// +// To run manually: +// +// sudo env PATH=$PATH:/sbin:/usr/sbin go test -v -run TestNVIDIAModuleLoading -timeout 5m ./lib/devices/... +func TestNVIDIAModuleLoading(t *testing.T) { + ctx := context.Background() + + // Auto-detect GPU availability - skip if prerequisites not met + skipReason := checkGPUTestPrerequisites() + if skipReason != "" { + t.Skip(skipReason) + } + + groups, _ := os.ReadDir("/sys/kernel/iommu_groups") + t.Logf("Test prerequisites met: %d IOMMU groups found", len(groups)) + + // Setup test infrastructure + tmpDir := t.TempDir() + p := paths.New(tmpDir) + + cfg := &config.Config{ + DataDir: tmpDir, + BridgeName: "vmbr0", + SubnetCIDR: "10.100.0.0/16", + DNSServer: "1.1.1.1", + } + + // Initialize managers + imageMgr, err := images.NewManager(p, 1, nil) + require.NoError(t, err) + + systemMgr := system.NewManager(p) + networkMgr := network.NewManager(p, cfg, nil) + deviceMgr := devices.NewManager(p) + volumeMgr := volumes.NewManager(p, 10*1024*1024*1024, nil) + limits := instances.ResourceLimits{MaxOverlaySize: 10 * 1024 * 1024 * 1024} + instanceMgr := instances.NewManager(p, imageMgr, systemMgr, networkMgr, deviceMgr, volumeMgr, limits, nil, nil) + + // Step 1: Find an NVIDIA GPU + t.Log("Step 1: Discovering available GPUs...") + availableDevices, err := deviceMgr.ListAvailableDevices(ctx) + require.NoError(t, err) + + var targetGPU *devices.AvailableDevice + for _, d := range availableDevices { + if strings.Contains(strings.ToLower(d.VendorName), "nvidia") { + targetGPU = &d + break + } + } + require.NotNil(t, targetGPU, "No NVIDIA GPU found") + + driverStr := "none" + if targetGPU.CurrentDriver != nil { + driverStr = *targetGPU.CurrentDriver + } + t.Logf("Found NVIDIA GPU: %s at %s (driver: %s)", targetGPU.DeviceName, targetGPU.PCIAddress, driverStr) + + if targetGPU.CurrentDriver == nil || *targetGPU.CurrentDriver == "" { + t.Skip("GPU has no driver bound - may need reboot") + } + + driverPath := filepath.Join("/sys/bus/pci/devices", targetGPU.PCIAddress, "driver") + if _, err := os.Stat(driverPath); os.IsNotExist(err) { + t.Skipf("GPU driver symlink missing - GPU in broken state") + } + + // Step 2: Register the GPU + t.Log("Step 2: Registering GPU...") + device, err := deviceMgr.CreateDevice(ctx, devices.CreateDeviceRequest{ + Name: "module-test-gpu", + PCIAddress: targetGPU.PCIAddress, + }) + require.NoError(t, err) + t.Logf("Registered device: %s (ID: %s)", device.Name, device.Id) + + originalDriver := driverStr + t.Cleanup(func() { + t.Log("Cleanup: Deleting registered device...") + deviceMgr.DeleteDevice(ctx, device.Id) + if originalDriver != "" && originalDriver != "none" && originalDriver != "vfio-pci" { + probePath := "/sys/bus/pci/drivers_probe" + os.WriteFile(probePath, []byte(targetGPU.PCIAddress), 0200) + } + }) + + // Step 3: Ensure system files + t.Log("Step 3: Ensuring system files...") + require.NoError(t, systemMgr.EnsureSystemFiles(ctx)) + + // Step 4: Pull nginx:alpine (stays running unlike plain alpine) + t.Log("Step 4: Pulling nginx:alpine image...") + createdImg, err := imageMgr.CreateImage(ctx, images.CreateImageRequest{ + Name: "docker.io/library/nginx:alpine", + }) + require.NoError(t, err) + t.Logf("CreateImage returned: name=%s, status=%s", createdImg.Name, createdImg.Status) + + // Wait for image to be ready + var img *images.Image + for i := 0; i < 90; i++ { + img, _ = imageMgr.GetImage(ctx, createdImg.Name) + if img != nil && img.Status == images.StatusReady { + break + } + time.Sleep(time.Second) + } + require.NotNil(t, img, "Image should exist") + require.Equal(t, images.StatusReady, img.Status, "Image should be ready") + t.Log("Image ready") + + // Step 5: Create instance with GPU + t.Log("Step 5: Creating instance with GPU...") + + // Initialize network first + require.NoError(t, networkMgr.Initialize(ctx, []string{})) + + createCtx, createCancel := context.WithTimeout(ctx, 60*time.Second) + defer createCancel() + + inst, err := instanceMgr.CreateInstance(createCtx, instances.CreateInstanceRequest{ + Name: "nvidia-module-test", + Image: createdImg.Name, + Size: 512 * 1024 * 1024, + HotplugSize: 512 * 1024 * 1024, + OverlaySize: 10 * 1024 * 1024 * 1024, + Vcpus: 2, + NetworkEnabled: false, + Devices: []string{"module-test-gpu"}, + Env: map[string]string{}, + }) + require.NoError(t, err) + t.Logf("Instance created: %s", inst.Id) + + t.Cleanup(func() { + t.Log("Cleanup: Deleting instance...") + instanceMgr.DeleteInstance(ctx, inst.Id) + }) + + // Wait for instance to be running + err = waitForInstanceReady(ctx, t, instanceMgr, inst.Id, 30*time.Second) + require.NoError(t, err) + t.Log("Instance is ready") + + // Wait for init script to complete (module loading happens early in boot) + time.Sleep(5 * time.Second) + + // Step 6: Check module loading via dmesg + t.Log("Step 6: Checking NVIDIA module loading in VM...") + + actualInst, err := instanceMgr.GetInstance(ctx, inst.Id) + require.NoError(t, err) + + execCtx, cancel := context.WithTimeout(ctx, 30*time.Second) + defer cancel() + + // Check dmesg for NVIDIA messages + var stdout, stderr outputBuffer + dmesgCmd := "dmesg | grep -i nvidia | head -50" + + for i := 0; i < 10; i++ { + stdout = outputBuffer{} + stderr = outputBuffer{} + _, err = exec.ExecIntoInstance(execCtx, actualInst.VsockSocket, exec.ExecOptions{ + Command: []string{"/bin/sh", "-c", dmesgCmd}, + Stdin: nil, + Stdout: &stdout, + Stderr: &stderr, + }) + if err == nil { + break + } + time.Sleep(time.Second) + } + require.NoError(t, err, "dmesg command should succeed") + + dmesgOutput := stdout.String() + t.Logf("=== NVIDIA dmesg output ===\n%s", dmesgOutput) + + // Check for key error indicators + firmwareMissing := strings.Contains(dmesgOutput, "No firmware image found") + initFailed := strings.Contains(dmesgOutput, "RmInitAdapter failed") + + if firmwareMissing { + t.Errorf("✗ GSP firmware not found - firmware not included in initrd") + } + if initFailed { + t.Errorf("✗ NVIDIA driver RmInitAdapter failed - GPU initialization error") + } + + // Check lsmod for nvidia modules + stdout = outputBuffer{} + stderr = outputBuffer{} + _, err = exec.ExecIntoInstance(execCtx, actualInst.VsockSocket, exec.ExecOptions{ + Command: []string{"/bin/sh", "-c", "cat /proc/modules | grep nvidia || echo 'No nvidia modules loaded'"}, + Stdin: nil, + Stdout: &stdout, + Stderr: &stderr, + }) + require.NoError(t, err) + modulesOutput := stdout.String() + t.Logf("=== Loaded nvidia modules ===\n%s", modulesOutput) + + hasModules := !strings.Contains(modulesOutput, "No nvidia modules loaded") + if !hasModules { + t.Errorf("✗ NVIDIA modules not loaded in VM") + } else { + t.Log("✓ NVIDIA kernel modules are loaded") + } + + // Check for /dev/nvidia* devices + stdout = outputBuffer{} + stderr = outputBuffer{} + _, err = exec.ExecIntoInstance(execCtx, actualInst.VsockSocket, exec.ExecOptions{ + Command: []string{"/bin/sh", "-c", "ls -la /dev/nvidia* 2>&1 || echo 'No nvidia devices found'"}, + Stdin: nil, + Stdout: &stdout, + Stderr: &stderr, + }) + require.NoError(t, err) + devicesOutput := stdout.String() + t.Logf("=== NVIDIA device nodes ===\n%s", devicesOutput) + + hasDevices := !strings.Contains(devicesOutput, "No nvidia devices found") && !strings.Contains(devicesOutput, "No such file") + if hasDevices { + t.Log("✓ /dev/nvidia* device nodes exist") + } else { + t.Log("✗ /dev/nvidia* device nodes not found (expected if init failed)") + } + + // Final verdict + if !firmwareMissing && !initFailed && hasModules { + t.Log("\n=== SUCCESS: NVIDIA kernel modules loaded correctly ===") + } else { + t.Errorf("\n=== FAILURE: NVIDIA module loading has issues ===") + } +} + +// TestNVMLDetection tests if NVML can detect the GPU from userspace. +// This uses the custom CUDA+Ollama image and runs a Python NVML test. +// +// To run manually: +// +// sudo env PATH=$PATH:/sbin:/usr/sbin go test -v -run TestNVMLDetection -timeout 10m ./lib/devices/... +func TestNVMLDetection(t *testing.T) { + ctx := context.Background() + + skipReason := checkGPUTestPrerequisites() + if skipReason != "" { + t.Skip(skipReason) + } + + groups, _ := os.ReadDir("/sys/kernel/iommu_groups") + t.Logf("Test prerequisites met: %d IOMMU groups found", len(groups)) + + // Use persistent test directory for image caching + const persistentTestDataDir = "/var/lib/hypeman-gpu-inference-test" + if err := os.MkdirAll(persistentTestDataDir, 0755); err != nil { + t.Fatalf("Failed to create persistent test dir: %v", err) + } + + p := paths.New(persistentTestDataDir) + cfg := &config.Config{ + DataDir: persistentTestDataDir, + BridgeName: "vmbr0", + SubnetCIDR: "10.100.0.0/16", + DNSServer: "1.1.1.1", + } + + imageMgr, err := images.NewManager(p, 1, nil) + require.NoError(t, err) + + systemMgr := system.NewManager(p) + networkMgr := network.NewManager(p, cfg, nil) + deviceMgr := devices.NewManager(p) + volumeMgr := volumes.NewManager(p, 10*1024*1024*1024, nil) + limits := instances.ResourceLimits{MaxOverlaySize: 10 * 1024 * 1024 * 1024} + instanceMgr := instances.NewManager(p, imageMgr, systemMgr, networkMgr, deviceMgr, volumeMgr, limits, nil, nil) + + // Step 1: Check if ollama-cuda:test image exists in Docker + t.Log("Step 1: Checking for ollama-cuda:test Docker image...") + checkCmd := osexec.Command("docker", "image", "inspect", "ollama-cuda:test") + if err := checkCmd.Run(); err != nil { + t.Fatal("Docker image ollama-cuda:test not found. Build it first with:\n" + + " cd lib/devices/testdata/ollama-cuda && docker build -t ollama-cuda:test .") + } + t.Log("Docker image ollama-cuda:test exists") + + // Step 2: Start registry and push image + t.Log("Step 2: Starting registry and pushing image...") + reg, err := registry.New(p, imageMgr) + require.NoError(t, err) + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + log.Printf("%s %s", r.Method, r.URL.Path) + reg.Handler().ServeHTTP(w, r) + })) + defer server.Close() + + serverHost := strings.TrimPrefix(server.URL, "http://") + pushLocalDockerImageForTest(t, "ollama-cuda:test", serverHost) + t.Log("Push complete") + + // Wait for image conversion + t.Log("Waiting for image conversion...") + var img *images.Image + var imageName string + for i := 0; i < 180; i++ { // 3 minutes max + allImages, listErr := imageMgr.ListImages(ctx) + if listErr == nil { + for _, candidate := range allImages { + if strings.Contains(candidate.Name, "ollama-cuda") { + img = &candidate + imageName = candidate.Name + break + } + } + } + if img != nil && img.Status == images.StatusReady { + break + } + if i%30 == 0 { + status := "not found" + if img != nil { + status = string(img.Status) + } + t.Logf("Waiting for image... (%d/180, status=%s)", i+1, status) + } + time.Sleep(time.Second) + } + require.NotNil(t, img, "Image should exist after 3 minutes") + require.Equal(t, images.StatusReady, img.Status, "Image should be ready") + t.Logf("Image ready: %s", imageName) + + // Step 3: Find and register GPU + t.Log("Step 3: Discovering GPUs...") + availableDevices, err := deviceMgr.ListAvailableDevices(ctx) + require.NoError(t, err) + + var targetGPU *devices.AvailableDevice + for _, d := range availableDevices { + if strings.Contains(strings.ToLower(d.VendorName), "nvidia") { + targetGPU = &d + break + } + } + require.NotNil(t, targetGPU, "No NVIDIA GPU found") + t.Logf("Found GPU: %s at %s", targetGPU.DeviceName, targetGPU.PCIAddress) + + device, err := deviceMgr.CreateDevice(ctx, devices.CreateDeviceRequest{ + Name: "nvml-test-gpu", + PCIAddress: targetGPU.PCIAddress, + }) + require.NoError(t, err) + t.Cleanup(func() { + deviceMgr.DeleteDevice(ctx, device.Id) + }) + + // Step 4: Initialize network and system + require.NoError(t, networkMgr.Initialize(ctx, []string{})) + require.NoError(t, systemMgr.EnsureSystemFiles(ctx)) + + // Step 5: Create instance + t.Log("Step 4: Creating instance with CUDA image...") + inst, err := instanceMgr.CreateInstance(ctx, instances.CreateInstanceRequest{ + Name: "nvml-test", + Image: imageName, + Size: 2 * 1024 * 1024 * 1024, + HotplugSize: 512 * 1024 * 1024, + OverlaySize: 10 * 1024 * 1024 * 1024, + Vcpus: 2, + NetworkEnabled: true, + Devices: []string{"nvml-test-gpu"}, + Env: map[string]string{}, + }) + require.NoError(t, err) + t.Logf("Instance created: %s", inst.Id) + + t.Cleanup(func() { + t.Log("Cleanup: Deleting instance...") + instanceMgr.DeleteInstance(ctx, inst.Id) + }) + + err = waitForInstanceReady(ctx, t, instanceMgr, inst.Id, 60*time.Second) + require.NoError(t, err) + time.Sleep(5 * time.Second) + + actualInst, err := instanceMgr.GetInstance(ctx, inst.Id) + require.NoError(t, err) + + // Step 5: Run NVML test + t.Log("Step 5: Running NVML detection test...") + execCtx, cancel := context.WithTimeout(ctx, 30*time.Second) + defer cancel() + + var stdout, stderr outputBuffer + _, err = exec.ExecIntoInstance(execCtx, actualInst.VsockSocket, exec.ExecOptions{ + Command: []string{"/bin/sh", "-c", "python3 /usr/local/bin/test-nvml.py 2>&1"}, + Stdin: nil, + Stdout: &stdout, + Stderr: &stderr, + }) + + t.Logf("NVML test output:\n%s", stdout.String()) + if stderr.String() != "" { + t.Logf("NVML test stderr:\n%s", stderr.String()) + } + + require.NoError(t, err, "NVML test command should succeed") + + output := stdout.String() + if strings.Contains(output, "GPU DETECTED") { + t.Log("✓ SUCCESS: NVML detected the GPU!") + } else if strings.Contains(output, "NVML_ERROR_LIB_RM_VERSION_MISMATCH") { + t.Log("✗ NVML version mismatch - container NVML library doesn't match kernel driver version") + t.Log(" Container has: 570.195.03") + t.Log(" Kernel driver: 570.86.16") + t.FailNow() + } else if strings.Contains(output, "NVML_ERROR_DRIVER_NOT_LOADED") { + t.Log("✗ NVML reports driver not loaded (but kernel modules are loaded)") + t.FailNow() + } else { + t.Errorf("✗ NVML test failed: %s", output) + } + + // Step 6: Run CUDA test + t.Log("Step 6: Running CUDA driver test...") + stdout = outputBuffer{} + stderr = outputBuffer{} + _, err = exec.ExecIntoInstance(execCtx, actualInst.VsockSocket, exec.ExecOptions{ + Command: []string{"/bin/sh", "-c", "python3 /usr/local/bin/test-cuda.py 2>&1"}, + Stdin: nil, + Stdout: &stdout, + Stderr: &stderr, + }) + + t.Logf("CUDA test output:\n%s", stdout.String()) + if strings.Contains(stdout.String(), "CUDA WORKS") { + t.Log("✓ SUCCESS: CUDA driver works!") + } else { + t.Logf("CUDA test may have issues: %s", stdout.String()) + } +} + +// pushLocalDockerImageForTest is a test helper that pushes a local Docker image to the registry +func pushLocalDockerImageForTest(t *testing.T, dockerImage, serverHost string) { + t.Helper() + + srcRef, err := name.ParseReference(dockerImage) + require.NoError(t, err) + + img, err := daemon.Image(srcRef) + require.NoError(t, err) + + targetRef := fmt.Sprintf("%s/test/ollama-cuda:latest", serverHost) + t.Logf("Pushing to %s", targetRef) + + dstRef, err := name.ParseReference(targetRef, name.Insecure) + require.NoError(t, err) + + err = remote.Write(dstRef, img) + require.NoError(t, err) +} diff --git a/lib/devices/scripts/gpu-reset.sh b/lib/devices/scripts/gpu-reset.sh new file mode 100755 index 00000000..37006f7f --- /dev/null +++ b/lib/devices/scripts/gpu-reset.sh @@ -0,0 +1,178 @@ +#!/bin/bash +# +# gpu-reset.sh - Reset GPU state after failed passthrough tests or hangs +# +# This script handles common GPU recovery scenarios: +# 1. Killing any stuck cloud-hypervisor processes holding the GPU +# 2. Unbinding from vfio-pci if still bound +# 3. Clearing driver_override +# 4. Triggering driver probe to rebind to nvidia driver +# 5. Restarting nvidia-persistenced +# +# Usage: +# sudo ./gpu-reset.sh # Reset all NVIDIA GPUs +# sudo ./gpu-reset.sh 0000:a2:00.0 # Reset specific GPU by PCI address +# +# Must be run as root. + +set -e + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' # No Color + +log_info() { + echo -e "${GREEN}[INFO]${NC} $1" +} + +log_warn() { + echo -e "${YELLOW}[WARN]${NC} $1" +} + +log_error() { + echo -e "${RED}[ERROR]${NC} $1" +} + +# Check if running as root +if [[ $EUID -ne 0 ]]; then + log_error "This script must be run as root (sudo)" + exit 1 +fi + +# Get PCI address from argument or find all NVIDIA GPUs +if [[ -n "$1" ]]; then + PCI_ADDRESSES=("$1") +else + # Find all NVIDIA GPUs (vendor 10de) + PCI_ADDRESSES=() + for dev in /sys/bus/pci/devices/*; do + if [[ -f "$dev/vendor" ]]; then + vendor=$(cat "$dev/vendor" 2>/dev/null) + class=$(cat "$dev/class" 2>/dev/null) + # Check for NVIDIA vendor (0x10de) and display/3D controller class (0x03xxxx) + if [[ "$vendor" == "0x10de" && "$class" == 0x03* ]]; then + addr=$(basename "$dev") + PCI_ADDRESSES+=("$addr") + fi + fi + done +fi + +if [[ ${#PCI_ADDRESSES[@]} -eq 0 ]]; then + log_error "No NVIDIA GPUs found" + exit 1 +fi + +log_info "Found ${#PCI_ADDRESSES[@]} GPU(s) to reset: ${PCI_ADDRESSES[*]}" + +# Step 1: Kill any cloud-hypervisor processes that might be holding GPUs +log_info "Step 1: Checking for stuck cloud-hypervisor processes..." +if pgrep -f "cloud-hypervisor" > /dev/null 2>&1; then + log_warn "Found cloud-hypervisor processes, killing them..." + pkill -9 -f "cloud-hypervisor" 2>/dev/null || true + sleep 2 + if pgrep -f "cloud-hypervisor" > /dev/null 2>&1; then + log_error "Failed to kill cloud-hypervisor processes" + ps aux | grep cloud-hypervisor | grep -v grep + else + log_info "Killed cloud-hypervisor processes" + fi +else + log_info "No cloud-hypervisor processes found" +fi + +# Process each GPU +for PCI_ADDR in "${PCI_ADDRESSES[@]}"; do + log_info "Processing GPU at $PCI_ADDR..." + + DEVICE_PATH="/sys/bus/pci/devices/$PCI_ADDR" + + if [[ ! -d "$DEVICE_PATH" ]]; then + log_error "Device $PCI_ADDR not found at $DEVICE_PATH" + continue + fi + + # Get current driver + CURRENT_DRIVER="" + if [[ -L "$DEVICE_PATH/driver" ]]; then + CURRENT_DRIVER=$(basename "$(readlink "$DEVICE_PATH/driver")") + fi + log_info " Current driver: ${CURRENT_DRIVER:-none}" + + # Step 2: If bound to vfio-pci, unbind + if [[ "$CURRENT_DRIVER" == "vfio-pci" ]]; then + log_info " Step 2: Unbinding from vfio-pci..." + echo "$PCI_ADDR" > /sys/bus/pci/drivers/vfio-pci/unbind 2>/dev/null || true + sleep 1 + else + log_info " Step 2: Not bound to vfio-pci, skipping unbind" + fi + + # Step 3: Clear driver_override + log_info " Step 3: Clearing driver_override..." + if [[ -f "$DEVICE_PATH/driver_override" ]]; then + OVERRIDE=$(cat "$DEVICE_PATH/driver_override" 2>/dev/null) + if [[ -n "$OVERRIDE" && "$OVERRIDE" != "(null)" ]]; then + log_info " Current override: $OVERRIDE" + echo > "$DEVICE_PATH/driver_override" 2>/dev/null || true + log_info " Cleared driver_override" + else + log_info " No driver_override set" + fi + fi + + # Step 4: Trigger driver probe to rebind to nvidia + log_info " Step 4: Triggering driver probe..." + echo "$PCI_ADDR" > /sys/bus/pci/drivers_probe 2>/dev/null || true + sleep 2 + + # Check new driver + NEW_DRIVER="" + if [[ -L "$DEVICE_PATH/driver" ]]; then + NEW_DRIVER=$(basename "$(readlink "$DEVICE_PATH/driver")") + fi + log_info " New driver: ${NEW_DRIVER:-none}" + + if [[ "$NEW_DRIVER" == "nvidia" ]]; then + log_info " ✓ GPU successfully rebound to nvidia driver" + elif [[ -z "$NEW_DRIVER" ]]; then + log_warn " GPU has no driver bound - may need manual intervention or reboot" + else + log_warn " GPU bound to $NEW_DRIVER (expected nvidia)" + fi +done + +# Step 5: Restart nvidia-persistenced +log_info "Step 5: Restarting nvidia-persistenced..." +if systemctl is-active nvidia-persistenced > /dev/null 2>&1; then + log_info " nvidia-persistenced is already running" +else + if systemctl start nvidia-persistenced 2>/dev/null; then + log_info " Started nvidia-persistenced" + else + log_warn " Failed to start nvidia-persistenced (may not be installed or GPU not ready)" + fi +fi + +# Final verification +log_info "" +log_info "=== Final GPU State ===" +for PCI_ADDR in "${PCI_ADDRESSES[@]}"; do + echo "" + lspci -nnks "$PCI_ADDR" 2>/dev/null || echo "Could not query $PCI_ADDR" +done + +echo "" +log_info "=== nvidia-smi ===" +if command -v nvidia-smi &> /dev/null; then + nvidia-smi 2>&1 | head -20 || log_warn "nvidia-smi failed (GPU may need more time or reboot)" +else + log_warn "nvidia-smi not found" +fi + +echo "" +log_info "GPU reset complete!" +log_info "If GPUs are still in a bad state, a system reboot may be required." + diff --git a/lib/devices/testdata/ollama-cuda/Dockerfile b/lib/devices/testdata/ollama-cuda/Dockerfile new file mode 100644 index 00000000..d31107ff --- /dev/null +++ b/lib/devices/testdata/ollama-cuda/Dockerfile @@ -0,0 +1,29 @@ +# Minimal CUDA image for GPU inference testing +# +# NO NVIDIA DRIVER INSTALLATION NEEDED! +# hypeman automatically injects the matching driver libraries at VM boot time. +# See lib/devices/GPU.md for documentation on driver injection. +# +# This image demonstrates that standard CUDA runtime images work out of the box +# with hypeman's GPU passthrough - no driver version matching required. + +FROM nvidia/cuda:12.4.1-runtime-ubuntu22.04 + +# Install dependencies and Ollama +# Note: We use the runtime image (not devel) since we don't need CUDA compilation tools +RUN apt-get update && \ + apt-get install -y curl ca-certificates python3 && \ + curl -fsSL https://ollama.com/install.sh | sh && \ + rm -rf /var/lib/apt/lists/* + +# Add test scripts for verifying GPU access +COPY test-nvml.py /usr/local/bin/test-nvml.py +COPY test-cuda.py /usr/local/bin/test-cuda.py +RUN chmod +x /usr/local/bin/test-nvml.py /usr/local/bin/test-cuda.py + +# Ensure libraries are in the path (hypeman injects to /usr/lib/x86_64-linux-gnu) +ENV LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/lib/x86_64-linux-gnu:${LD_LIBRARY_PATH} +ENV PATH=/usr/local/cuda/bin:/usr/bin:${PATH} + +EXPOSE 11434 +CMD ["ollama", "serve"] diff --git a/lib/devices/testdata/ollama-cuda/test-cuda.py b/lib/devices/testdata/ollama-cuda/test-cuda.py new file mode 100644 index 00000000..9e9c0ebe --- /dev/null +++ b/lib/devices/testdata/ollama-cuda/test-cuda.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python3 +"""Test basic CUDA operations.""" +import ctypes +import os +import sys + +def test_cuda(): + """Try to use the CUDA driver API.""" + print("=== CUDA Driver Test ===") + print(f"LD_LIBRARY_PATH: {os.environ.get('LD_LIBRARY_PATH', 'not set')}") + + # Try loading libcuda + try: + cuda = ctypes.CDLL("libcuda.so") + print("✓ Loaded libcuda.so") + except OSError as e: + print(f"✗ Failed to load libcuda.so: {e}") + return False + + # Initialize CUDA + ret = cuda.cuInit(0) + if ret != 0: + print(f"✗ cuInit failed with code: {ret}") + return False + print("✓ cuInit succeeded") + + # Get device count + count = ctypes.c_int() + ret = cuda.cuDeviceGetCount(ctypes.byref(count)) + if ret != 0: + print(f"✗ cuDeviceGetCount failed with code: {ret}") + return False + print(f"✓ Found {count.value} CUDA device(s)") + + if count.value == 0: + return False + + # Get device name + device = ctypes.c_int() + ret = cuda.cuDeviceGet(ctypes.byref(device), 0) + if ret != 0: + print(f"✗ cuDeviceGet failed: {ret}") + return False + + name = ctypes.create_string_buffer(256) + ret = cuda.cuDeviceGetName(name, 256, device) + if ret == 0: + print(f"✓ Device 0: {name.value.decode()}") + + # Get total memory + total_mem = ctypes.c_size_t() + ret = cuda.cuDeviceTotalMem_v2(ctypes.byref(total_mem), device) + if ret == 0: + print(f"✓ Total memory: {total_mem.value / (1024**3):.1f} GB") + + return True + +if __name__ == "__main__": + success = test_cuda() + print() + print("Result:", "CUDA WORKS" if success else "CUDA FAILED") + sys.exit(0 if success else 1) + diff --git a/lib/devices/testdata/ollama-cuda/test-nvml.py b/lib/devices/testdata/ollama-cuda/test-nvml.py new file mode 100644 index 00000000..42e98826 --- /dev/null +++ b/lib/devices/testdata/ollama-cuda/test-nvml.py @@ -0,0 +1,83 @@ +#!/usr/bin/env python3 +"""Test NVML GPU detection - matches what Ollama does internally.""" +import ctypes +import os + +def test_nvml(): + """Try to initialize NVML and detect GPUs.""" + # Try different library paths + lib_paths = [ + "libnvidia-ml.so.1", + "libnvidia-ml.so", + "/usr/lib/x86_64-linux-gnu/libnvidia-ml.so.1", + ] + + nvml = None + for path in lib_paths: + try: + nvml = ctypes.CDLL(path) + print(f"✓ Loaded NVML from: {path}") + break + except OSError as e: + print(f"✗ Failed to load {path}: {e}") + + if nvml is None: + print("ERROR: Could not load NVML library") + return False + + # Try to initialize + try: + ret = nvml.nvmlInit_v2() + if ret != 0: + print(f"✗ nvmlInit_v2 failed with code: {ret}") + # Error codes: 1=uninitialized, 2=invalid argument, 3=not supported, + # 9=driver not loaded, 12=library not found + error_names = { + 1: "NVML_ERROR_UNINITIALIZED", + 2: "NVML_ERROR_INVALID_ARGUMENT", + 3: "NVML_ERROR_NOT_SUPPORTED", + 9: "NVML_ERROR_DRIVER_NOT_LOADED", + 12: "NVML_ERROR_LIB_RM_VERSION_MISMATCH", + 255: "NVML_ERROR_UNKNOWN", + } + print(f" Error name: {error_names.get(ret, 'UNKNOWN')}") + return False + print("✓ nvmlInit_v2 succeeded") + except Exception as e: + print(f"✗ nvmlInit_v2 exception: {e}") + return False + + # Get device count + try: + count = ctypes.c_uint() + ret = nvml.nvmlDeviceGetCount_v2(ctypes.byref(count)) + if ret != 0: + print(f"✗ nvmlDeviceGetCount failed with code: {ret}") + return False + print(f"✓ Found {count.value} GPU(s)") + except Exception as e: + print(f"✗ nvmlDeviceGetCount exception: {e}") + return False + + # Shutdown + nvml.nvmlShutdown() + return count.value > 0 + +if __name__ == "__main__": + print("=== NVML GPU Detection Test ===") + print(f"LD_LIBRARY_PATH: {os.environ.get('LD_LIBRARY_PATH', 'not set')}") + print() + + # Check device nodes + print("Device nodes:") + for dev in ["/dev/nvidia0", "/dev/nvidiactl", "/dev/nvidia-uvm"]: + exists = os.path.exists(dev) + print(f" {dev}: {'exists' if exists else 'MISSING'}") + print() + + success = test_nvml() + print() + print("Result:", "GPU DETECTED" if success else "NO GPU FOUND") + exit(0 if success else 1) + + diff --git a/lib/oapi/oapi.go b/lib/oapi/oapi.go index 4709a156..0401b78c 100644 --- a/lib/oapi/oapi.go +++ b/lib/oapi/oapi.go @@ -29,6 +29,12 @@ const ( BearerAuthScopes = "bearerAuth.Scopes" ) +// Defines values for DeviceType. +const ( + Gpu DeviceType = "gpu" + Pci DeviceType = "pci" +) + // Defines values for HealthStatus. const ( Ok HealthStatus = "ok" @@ -70,6 +76,39 @@ type AttachVolumeRequest struct { Readonly *bool `json:"readonly,omitempty"` } +// AvailableDevice defines model for AvailableDevice. +type AvailableDevice struct { + // CurrentDriver Currently bound driver (null if none) + CurrentDriver *string `json:"current_driver"` + + // DeviceId PCI device ID (hex) + DeviceId string `json:"device_id"` + + // DeviceName Human-readable device name + DeviceName *string `json:"device_name,omitempty"` + + // IommuGroup IOMMU group number + IommuGroup int `json:"iommu_group"` + + // PciAddress PCI address + PciAddress string `json:"pci_address"` + + // VendorId PCI vendor ID (hex) + VendorId string `json:"vendor_id"` + + // VendorName Human-readable vendor name + VendorName *string `json:"vendor_name,omitempty"` +} + +// CreateDeviceRequest defines model for CreateDeviceRequest. +type CreateDeviceRequest struct { + // Name Optional globally unique device name. If not provided, a name is auto-generated from the PCI address (e.g., "pci-0000-a2-00-0") + Name *string `json:"name,omitempty"` + + // PciAddress PCI address of the device (required, e.g., "0000:a2:00.0") + PciAddress string `json:"pci_address"` +} + // CreateImageRequest defines model for CreateImageRequest. type CreateImageRequest struct { // Name OCI image reference (e.g., docker.io/library/nginx:latest) @@ -87,6 +126,9 @@ type CreateIngressRequest struct { // CreateInstanceRequest defines model for CreateInstanceRequest. type CreateInstanceRequest struct { + // Devices Device IDs or names to attach for GPU/PCI passthrough + Devices *[]string `json:"devices,omitempty"` + // Env Environment variables Env *map[string]string `json:"env,omitempty"` @@ -130,6 +172,44 @@ type CreateVolumeRequest struct { SizeGb int `json:"size_gb"` } +// Device defines model for Device. +type Device struct { + // AttachedTo Instance ID if attached + AttachedTo *string `json:"attached_to"` + + // BoundToVfio Whether the device is currently bound to the vfio-pci driver, which is required for VM passthrough. + // - true: Device is bound to vfio-pci and ready for (or currently in use by) a VM. The device's native driver has been unloaded. + // - false: Device is using its native driver (e.g., nvidia) or no driver. Hypeman will automatically bind to vfio-pci when attaching to an instance. + BoundToVfio bool `json:"bound_to_vfio"` + + // CreatedAt Registration timestamp (RFC3339) + CreatedAt time.Time `json:"created_at"` + + // DeviceId PCI device ID (hex) + DeviceId string `json:"device_id"` + + // Id Auto-generated unique identifier (CUID2 format) + Id string `json:"id"` + + // IommuGroup IOMMU group number + IommuGroup int `json:"iommu_group"` + + // Name Device name (user-provided or auto-generated from PCI address) + Name *string `json:"name,omitempty"` + + // PciAddress PCI address + PciAddress string `json:"pci_address"` + + // Type Type of PCI device + Type DeviceType `json:"type"` + + // VendorId PCI vendor ID (hex) + VendorId string `json:"vendor_id"` +} + +// DeviceType Type of PCI device +type DeviceType string + // Error defines model for Error. type Error struct { // Code Application-specific error code (machine-readable) @@ -415,6 +495,9 @@ type CreateVolumeMultipartBody struct { SizeGb int `json:"size_gb"` } +// CreateDeviceJSONRequestBody defines body for CreateDevice for application/json ContentType. +type CreateDeviceJSONRequestBody = CreateDeviceRequest + // CreateImageJSONRequestBody defines body for CreateImage for application/json ContentType. type CreateImageJSONRequestBody = CreateImageRequest @@ -506,6 +589,23 @@ func WithRequestEditorFn(fn RequestEditorFn) ClientOption { // The interface specification for the client above. type ClientInterface interface { + // ListDevices request + ListDevices(ctx context.Context, reqEditors ...RequestEditorFn) (*http.Response, error) + + // CreateDeviceWithBody request with any body + CreateDeviceWithBody(ctx context.Context, contentType string, body io.Reader, reqEditors ...RequestEditorFn) (*http.Response, error) + + CreateDevice(ctx context.Context, body CreateDeviceJSONRequestBody, reqEditors ...RequestEditorFn) (*http.Response, error) + + // ListAvailableDevices request + ListAvailableDevices(ctx context.Context, reqEditors ...RequestEditorFn) (*http.Response, error) + + // DeleteDevice request + DeleteDevice(ctx context.Context, id string, reqEditors ...RequestEditorFn) (*http.Response, error) + + // GetDevice request + GetDevice(ctx context.Context, id string, reqEditors ...RequestEditorFn) (*http.Response, error) + // GetHealth request GetHealth(ctx context.Context, reqEditors ...RequestEditorFn) (*http.Response, error) @@ -589,6 +689,78 @@ type ClientInterface interface { GetVolume(ctx context.Context, id string, reqEditors ...RequestEditorFn) (*http.Response, error) } +func (c *Client) ListDevices(ctx context.Context, reqEditors ...RequestEditorFn) (*http.Response, error) { + req, err := NewListDevicesRequest(c.Server) + if err != nil { + return nil, err + } + req = req.WithContext(ctx) + if err := c.applyEditors(ctx, req, reqEditors); err != nil { + return nil, err + } + return c.Client.Do(req) +} + +func (c *Client) CreateDeviceWithBody(ctx context.Context, contentType string, body io.Reader, reqEditors ...RequestEditorFn) (*http.Response, error) { + req, err := NewCreateDeviceRequestWithBody(c.Server, contentType, body) + if err != nil { + return nil, err + } + req = req.WithContext(ctx) + if err := c.applyEditors(ctx, req, reqEditors); err != nil { + return nil, err + } + return c.Client.Do(req) +} + +func (c *Client) CreateDevice(ctx context.Context, body CreateDeviceJSONRequestBody, reqEditors ...RequestEditorFn) (*http.Response, error) { + req, err := NewCreateDeviceRequest(c.Server, body) + if err != nil { + return nil, err + } + req = req.WithContext(ctx) + if err := c.applyEditors(ctx, req, reqEditors); err != nil { + return nil, err + } + return c.Client.Do(req) +} + +func (c *Client) ListAvailableDevices(ctx context.Context, reqEditors ...RequestEditorFn) (*http.Response, error) { + req, err := NewListAvailableDevicesRequest(c.Server) + if err != nil { + return nil, err + } + req = req.WithContext(ctx) + if err := c.applyEditors(ctx, req, reqEditors); err != nil { + return nil, err + } + return c.Client.Do(req) +} + +func (c *Client) DeleteDevice(ctx context.Context, id string, reqEditors ...RequestEditorFn) (*http.Response, error) { + req, err := NewDeleteDeviceRequest(c.Server, id) + if err != nil { + return nil, err + } + req = req.WithContext(ctx) + if err := c.applyEditors(ctx, req, reqEditors); err != nil { + return nil, err + } + return c.Client.Do(req) +} + +func (c *Client) GetDevice(ctx context.Context, id string, reqEditors ...RequestEditorFn) (*http.Response, error) { + req, err := NewGetDeviceRequest(c.Server, id) + if err != nil { + return nil, err + } + req = req.WithContext(ctx) + if err := c.applyEditors(ctx, req, reqEditors); err != nil { + return nil, err + } + return c.Client.Do(req) +} + func (c *Client) GetHealth(ctx context.Context, reqEditors ...RequestEditorFn) (*http.Response, error) { req, err := NewGetHealthRequest(c.Server) if err != nil { @@ -937,6 +1109,168 @@ func (c *Client) GetVolume(ctx context.Context, id string, reqEditors ...Request return c.Client.Do(req) } +// NewListDevicesRequest generates requests for ListDevices +func NewListDevicesRequest(server string) (*http.Request, error) { + var err error + + serverURL, err := url.Parse(server) + if err != nil { + return nil, err + } + + operationPath := fmt.Sprintf("/devices") + if operationPath[0] == '/' { + operationPath = "." + operationPath + } + + queryURL, err := serverURL.Parse(operationPath) + if err != nil { + return nil, err + } + + req, err := http.NewRequest("GET", queryURL.String(), nil) + if err != nil { + return nil, err + } + + return req, nil +} + +// NewCreateDeviceRequest calls the generic CreateDevice builder with application/json body +func NewCreateDeviceRequest(server string, body CreateDeviceJSONRequestBody) (*http.Request, error) { + var bodyReader io.Reader + buf, err := json.Marshal(body) + if err != nil { + return nil, err + } + bodyReader = bytes.NewReader(buf) + return NewCreateDeviceRequestWithBody(server, "application/json", bodyReader) +} + +// NewCreateDeviceRequestWithBody generates requests for CreateDevice with any type of body +func NewCreateDeviceRequestWithBody(server string, contentType string, body io.Reader) (*http.Request, error) { + var err error + + serverURL, err := url.Parse(server) + if err != nil { + return nil, err + } + + operationPath := fmt.Sprintf("/devices") + if operationPath[0] == '/' { + operationPath = "." + operationPath + } + + queryURL, err := serverURL.Parse(operationPath) + if err != nil { + return nil, err + } + + req, err := http.NewRequest("POST", queryURL.String(), body) + if err != nil { + return nil, err + } + + req.Header.Add("Content-Type", contentType) + + return req, nil +} + +// NewListAvailableDevicesRequest generates requests for ListAvailableDevices +func NewListAvailableDevicesRequest(server string) (*http.Request, error) { + var err error + + serverURL, err := url.Parse(server) + if err != nil { + return nil, err + } + + operationPath := fmt.Sprintf("/devices/available") + if operationPath[0] == '/' { + operationPath = "." + operationPath + } + + queryURL, err := serverURL.Parse(operationPath) + if err != nil { + return nil, err + } + + req, err := http.NewRequest("GET", queryURL.String(), nil) + if err != nil { + return nil, err + } + + return req, nil +} + +// NewDeleteDeviceRequest generates requests for DeleteDevice +func NewDeleteDeviceRequest(server string, id string) (*http.Request, error) { + var err error + + var pathParam0 string + + pathParam0, err = runtime.StyleParamWithLocation("simple", false, "id", runtime.ParamLocationPath, id) + if err != nil { + return nil, err + } + + serverURL, err := url.Parse(server) + if err != nil { + return nil, err + } + + operationPath := fmt.Sprintf("/devices/%s", pathParam0) + if operationPath[0] == '/' { + operationPath = "." + operationPath + } + + queryURL, err := serverURL.Parse(operationPath) + if err != nil { + return nil, err + } + + req, err := http.NewRequest("DELETE", queryURL.String(), nil) + if err != nil { + return nil, err + } + + return req, nil +} + +// NewGetDeviceRequest generates requests for GetDevice +func NewGetDeviceRequest(server string, id string) (*http.Request, error) { + var err error + + var pathParam0 string + + pathParam0, err = runtime.StyleParamWithLocation("simple", false, "id", runtime.ParamLocationPath, id) + if err != nil { + return nil, err + } + + serverURL, err := url.Parse(server) + if err != nil { + return nil, err + } + + operationPath := fmt.Sprintf("/devices/%s", pathParam0) + if operationPath[0] == '/' { + operationPath = "." + operationPath + } + + queryURL, err := serverURL.Parse(operationPath) + if err != nil { + return nil, err + } + + req, err := http.NewRequest("GET", queryURL.String(), nil) + if err != nil { + return nil, err + } + + return req, nil +} + // NewGetHealthRequest generates requests for GetHealth func NewGetHealthRequest(server string) (*http.Request, error) { var err error @@ -1866,6 +2200,23 @@ func WithBaseURL(baseURL string) ClientOption { // ClientWithResponsesInterface is the interface specification for the client with responses above. type ClientWithResponsesInterface interface { + // ListDevicesWithResponse request + ListDevicesWithResponse(ctx context.Context, reqEditors ...RequestEditorFn) (*ListDevicesResponse, error) + + // CreateDeviceWithBodyWithResponse request with any body + CreateDeviceWithBodyWithResponse(ctx context.Context, contentType string, body io.Reader, reqEditors ...RequestEditorFn) (*CreateDeviceResponse, error) + + CreateDeviceWithResponse(ctx context.Context, body CreateDeviceJSONRequestBody, reqEditors ...RequestEditorFn) (*CreateDeviceResponse, error) + + // ListAvailableDevicesWithResponse request + ListAvailableDevicesWithResponse(ctx context.Context, reqEditors ...RequestEditorFn) (*ListAvailableDevicesResponse, error) + + // DeleteDeviceWithResponse request + DeleteDeviceWithResponse(ctx context.Context, id string, reqEditors ...RequestEditorFn) (*DeleteDeviceResponse, error) + + // GetDeviceWithResponse request + GetDeviceWithResponse(ctx context.Context, id string, reqEditors ...RequestEditorFn) (*GetDeviceResponse, error) + // GetHealthWithResponse request GetHealthWithResponse(ctx context.Context, reqEditors ...RequestEditorFn) (*GetHealthResponse, error) @@ -1949,14 +2300,16 @@ type ClientWithResponsesInterface interface { GetVolumeWithResponse(ctx context.Context, id string, reqEditors ...RequestEditorFn) (*GetVolumeResponse, error) } -type GetHealthResponse struct { +type ListDevicesResponse struct { Body []byte HTTPResponse *http.Response - JSON200 *Health + JSON200 *[]Device + JSON401 *Error + JSON500 *Error } // Status returns HTTPResponse.Status -func (r GetHealthResponse) Status() string { +func (r ListDevicesResponse) Status() string { if r.HTTPResponse != nil { return r.HTTPResponse.Status } @@ -1964,17 +2317,138 @@ func (r GetHealthResponse) Status() string { } // StatusCode returns HTTPResponse.StatusCode -func (r GetHealthResponse) StatusCode() int { +func (r ListDevicesResponse) StatusCode() int { if r.HTTPResponse != nil { return r.HTTPResponse.StatusCode } return 0 } -type ListImagesResponse struct { +type CreateDeviceResponse struct { Body []byte HTTPResponse *http.Response - JSON200 *[]Image + JSON201 *Device + JSON400 *Error + JSON401 *Error + JSON404 *Error + JSON409 *Error + JSON500 *Error +} + +// Status returns HTTPResponse.Status +func (r CreateDeviceResponse) Status() string { + if r.HTTPResponse != nil { + return r.HTTPResponse.Status + } + return http.StatusText(0) +} + +// StatusCode returns HTTPResponse.StatusCode +func (r CreateDeviceResponse) StatusCode() int { + if r.HTTPResponse != nil { + return r.HTTPResponse.StatusCode + } + return 0 +} + +type ListAvailableDevicesResponse struct { + Body []byte + HTTPResponse *http.Response + JSON200 *[]AvailableDevice + JSON401 *Error + JSON500 *Error +} + +// Status returns HTTPResponse.Status +func (r ListAvailableDevicesResponse) Status() string { + if r.HTTPResponse != nil { + return r.HTTPResponse.Status + } + return http.StatusText(0) +} + +// StatusCode returns HTTPResponse.StatusCode +func (r ListAvailableDevicesResponse) StatusCode() int { + if r.HTTPResponse != nil { + return r.HTTPResponse.StatusCode + } + return 0 +} + +type DeleteDeviceResponse struct { + Body []byte + HTTPResponse *http.Response + JSON404 *Error + JSON409 *Error + JSON500 *Error +} + +// Status returns HTTPResponse.Status +func (r DeleteDeviceResponse) Status() string { + if r.HTTPResponse != nil { + return r.HTTPResponse.Status + } + return http.StatusText(0) +} + +// StatusCode returns HTTPResponse.StatusCode +func (r DeleteDeviceResponse) StatusCode() int { + if r.HTTPResponse != nil { + return r.HTTPResponse.StatusCode + } + return 0 +} + +type GetDeviceResponse struct { + Body []byte + HTTPResponse *http.Response + JSON200 *Device + JSON404 *Error + JSON500 *Error +} + +// Status returns HTTPResponse.Status +func (r GetDeviceResponse) Status() string { + if r.HTTPResponse != nil { + return r.HTTPResponse.Status + } + return http.StatusText(0) +} + +// StatusCode returns HTTPResponse.StatusCode +func (r GetDeviceResponse) StatusCode() int { + if r.HTTPResponse != nil { + return r.HTTPResponse.StatusCode + } + return 0 +} + +type GetHealthResponse struct { + Body []byte + HTTPResponse *http.Response + JSON200 *Health +} + +// Status returns HTTPResponse.Status +func (r GetHealthResponse) Status() string { + if r.HTTPResponse != nil { + return r.HTTPResponse.Status + } + return http.StatusText(0) +} + +// StatusCode returns HTTPResponse.StatusCode +func (r GetHealthResponse) StatusCode() int { + if r.HTTPResponse != nil { + return r.HTTPResponse.StatusCode + } + return 0 +} + +type ListImagesResponse struct { + Body []byte + HTTPResponse *http.Response + JSON200 *[]Image JSON401 *Error JSON500 *Error } @@ -2533,6 +3007,59 @@ func (r GetVolumeResponse) StatusCode() int { return 0 } +// ListDevicesWithResponse request returning *ListDevicesResponse +func (c *ClientWithResponses) ListDevicesWithResponse(ctx context.Context, reqEditors ...RequestEditorFn) (*ListDevicesResponse, error) { + rsp, err := c.ListDevices(ctx, reqEditors...) + if err != nil { + return nil, err + } + return ParseListDevicesResponse(rsp) +} + +// CreateDeviceWithBodyWithResponse request with arbitrary body returning *CreateDeviceResponse +func (c *ClientWithResponses) CreateDeviceWithBodyWithResponse(ctx context.Context, contentType string, body io.Reader, reqEditors ...RequestEditorFn) (*CreateDeviceResponse, error) { + rsp, err := c.CreateDeviceWithBody(ctx, contentType, body, reqEditors...) + if err != nil { + return nil, err + } + return ParseCreateDeviceResponse(rsp) +} + +func (c *ClientWithResponses) CreateDeviceWithResponse(ctx context.Context, body CreateDeviceJSONRequestBody, reqEditors ...RequestEditorFn) (*CreateDeviceResponse, error) { + rsp, err := c.CreateDevice(ctx, body, reqEditors...) + if err != nil { + return nil, err + } + return ParseCreateDeviceResponse(rsp) +} + +// ListAvailableDevicesWithResponse request returning *ListAvailableDevicesResponse +func (c *ClientWithResponses) ListAvailableDevicesWithResponse(ctx context.Context, reqEditors ...RequestEditorFn) (*ListAvailableDevicesResponse, error) { + rsp, err := c.ListAvailableDevices(ctx, reqEditors...) + if err != nil { + return nil, err + } + return ParseListAvailableDevicesResponse(rsp) +} + +// DeleteDeviceWithResponse request returning *DeleteDeviceResponse +func (c *ClientWithResponses) DeleteDeviceWithResponse(ctx context.Context, id string, reqEditors ...RequestEditorFn) (*DeleteDeviceResponse, error) { + rsp, err := c.DeleteDevice(ctx, id, reqEditors...) + if err != nil { + return nil, err + } + return ParseDeleteDeviceResponse(rsp) +} + +// GetDeviceWithResponse request returning *GetDeviceResponse +func (c *ClientWithResponses) GetDeviceWithResponse(ctx context.Context, id string, reqEditors ...RequestEditorFn) (*GetDeviceResponse, error) { + rsp, err := c.GetDevice(ctx, id, reqEditors...) + if err != nil { + return nil, err + } + return ParseGetDeviceResponse(rsp) +} + // GetHealthWithResponse request returning *GetHealthResponse func (c *ClientWithResponses) GetHealthWithResponse(ctx context.Context, reqEditors ...RequestEditorFn) (*GetHealthResponse, error) { rsp, err := c.GetHealth(ctx, reqEditors...) @@ -2789,6 +3316,227 @@ func (c *ClientWithResponses) GetVolumeWithResponse(ctx context.Context, id stri return ParseGetVolumeResponse(rsp) } +// ParseListDevicesResponse parses an HTTP response from a ListDevicesWithResponse call +func ParseListDevicesResponse(rsp *http.Response) (*ListDevicesResponse, error) { + bodyBytes, err := io.ReadAll(rsp.Body) + defer func() { _ = rsp.Body.Close() }() + if err != nil { + return nil, err + } + + response := &ListDevicesResponse{ + Body: bodyBytes, + HTTPResponse: rsp, + } + + switch { + case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 200: + var dest []Device + if err := json.Unmarshal(bodyBytes, &dest); err != nil { + return nil, err + } + response.JSON200 = &dest + + case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 401: + var dest Error + if err := json.Unmarshal(bodyBytes, &dest); err != nil { + return nil, err + } + response.JSON401 = &dest + + case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 500: + var dest Error + if err := json.Unmarshal(bodyBytes, &dest); err != nil { + return nil, err + } + response.JSON500 = &dest + + } + + return response, nil +} + +// ParseCreateDeviceResponse parses an HTTP response from a CreateDeviceWithResponse call +func ParseCreateDeviceResponse(rsp *http.Response) (*CreateDeviceResponse, error) { + bodyBytes, err := io.ReadAll(rsp.Body) + defer func() { _ = rsp.Body.Close() }() + if err != nil { + return nil, err + } + + response := &CreateDeviceResponse{ + Body: bodyBytes, + HTTPResponse: rsp, + } + + switch { + case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 201: + var dest Device + if err := json.Unmarshal(bodyBytes, &dest); err != nil { + return nil, err + } + response.JSON201 = &dest + + case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 400: + var dest Error + if err := json.Unmarshal(bodyBytes, &dest); err != nil { + return nil, err + } + response.JSON400 = &dest + + case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 401: + var dest Error + if err := json.Unmarshal(bodyBytes, &dest); err != nil { + return nil, err + } + response.JSON401 = &dest + + case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 404: + var dest Error + if err := json.Unmarshal(bodyBytes, &dest); err != nil { + return nil, err + } + response.JSON404 = &dest + + case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 409: + var dest Error + if err := json.Unmarshal(bodyBytes, &dest); err != nil { + return nil, err + } + response.JSON409 = &dest + + case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 500: + var dest Error + if err := json.Unmarshal(bodyBytes, &dest); err != nil { + return nil, err + } + response.JSON500 = &dest + + } + + return response, nil +} + +// ParseListAvailableDevicesResponse parses an HTTP response from a ListAvailableDevicesWithResponse call +func ParseListAvailableDevicesResponse(rsp *http.Response) (*ListAvailableDevicesResponse, error) { + bodyBytes, err := io.ReadAll(rsp.Body) + defer func() { _ = rsp.Body.Close() }() + if err != nil { + return nil, err + } + + response := &ListAvailableDevicesResponse{ + Body: bodyBytes, + HTTPResponse: rsp, + } + + switch { + case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 200: + var dest []AvailableDevice + if err := json.Unmarshal(bodyBytes, &dest); err != nil { + return nil, err + } + response.JSON200 = &dest + + case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 401: + var dest Error + if err := json.Unmarshal(bodyBytes, &dest); err != nil { + return nil, err + } + response.JSON401 = &dest + + case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 500: + var dest Error + if err := json.Unmarshal(bodyBytes, &dest); err != nil { + return nil, err + } + response.JSON500 = &dest + + } + + return response, nil +} + +// ParseDeleteDeviceResponse parses an HTTP response from a DeleteDeviceWithResponse call +func ParseDeleteDeviceResponse(rsp *http.Response) (*DeleteDeviceResponse, error) { + bodyBytes, err := io.ReadAll(rsp.Body) + defer func() { _ = rsp.Body.Close() }() + if err != nil { + return nil, err + } + + response := &DeleteDeviceResponse{ + Body: bodyBytes, + HTTPResponse: rsp, + } + + switch { + case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 404: + var dest Error + if err := json.Unmarshal(bodyBytes, &dest); err != nil { + return nil, err + } + response.JSON404 = &dest + + case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 409: + var dest Error + if err := json.Unmarshal(bodyBytes, &dest); err != nil { + return nil, err + } + response.JSON409 = &dest + + case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 500: + var dest Error + if err := json.Unmarshal(bodyBytes, &dest); err != nil { + return nil, err + } + response.JSON500 = &dest + + } + + return response, nil +} + +// ParseGetDeviceResponse parses an HTTP response from a GetDeviceWithResponse call +func ParseGetDeviceResponse(rsp *http.Response) (*GetDeviceResponse, error) { + bodyBytes, err := io.ReadAll(rsp.Body) + defer func() { _ = rsp.Body.Close() }() + if err != nil { + return nil, err + } + + response := &GetDeviceResponse{ + Body: bodyBytes, + HTTPResponse: rsp, + } + + switch { + case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 200: + var dest Device + if err := json.Unmarshal(bodyBytes, &dest); err != nil { + return nil, err + } + response.JSON200 = &dest + + case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 404: + var dest Error + if err := json.Unmarshal(bodyBytes, &dest); err != nil { + return nil, err + } + response.JSON404 = &dest + + case strings.Contains(rsp.Header.Get("Content-Type"), "json") && rsp.StatusCode == 500: + var dest Error + if err := json.Unmarshal(bodyBytes, &dest); err != nil { + return nil, err + } + response.JSON500 = &dest + + } + + return response, nil +} + // ParseGetHealthResponse parses an HTTP response from a GetHealthWithResponse call func ParseGetHealthResponse(rsp *http.Response) (*GetHealthResponse, error) { bodyBytes, err := io.ReadAll(rsp.Body) @@ -3807,6 +4555,21 @@ func ParseGetVolumeResponse(rsp *http.Response) (*GetVolumeResponse, error) { // ServerInterface represents all server handlers. type ServerInterface interface { + // List registered devices + // (GET /devices) + ListDevices(w http.ResponseWriter, r *http.Request) + // Register a device for passthrough + // (POST /devices) + CreateDevice(w http.ResponseWriter, r *http.Request) + // Discover passthrough-capable devices on host + // (GET /devices/available) + ListAvailableDevices(w http.ResponseWriter, r *http.Request) + // Unregister device + // (DELETE /devices/{id}) + DeleteDevice(w http.ResponseWriter, r *http.Request, id string) + // Get device details + // (GET /devices/{id}) + GetDevice(w http.ResponseWriter, r *http.Request, id string) // Health check // (GET /health) GetHealth(w http.ResponseWriter, r *http.Request) @@ -3885,6 +4648,36 @@ type ServerInterface interface { type Unimplemented struct{} +// List registered devices +// (GET /devices) +func (_ Unimplemented) ListDevices(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusNotImplemented) +} + +// Register a device for passthrough +// (POST /devices) +func (_ Unimplemented) CreateDevice(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusNotImplemented) +} + +// Discover passthrough-capable devices on host +// (GET /devices/available) +func (_ Unimplemented) ListAvailableDevices(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusNotImplemented) +} + +// Unregister device +// (DELETE /devices/{id}) +func (_ Unimplemented) DeleteDevice(w http.ResponseWriter, r *http.Request, id string) { + w.WriteHeader(http.StatusNotImplemented) +} + +// Get device details +// (GET /devices/{id}) +func (_ Unimplemented) GetDevice(w http.ResponseWriter, r *http.Request, id string) { + w.WriteHeader(http.StatusNotImplemented) +} + // Health check // (GET /health) func (_ Unimplemented) GetHealth(w http.ResponseWriter, r *http.Request) { @@ -4038,6 +4831,128 @@ type ServerInterfaceWrapper struct { type MiddlewareFunc func(http.Handler) http.Handler +// ListDevices operation middleware +func (siw *ServerInterfaceWrapper) ListDevices(w http.ResponseWriter, r *http.Request) { + + ctx := r.Context() + + ctx = context.WithValue(ctx, BearerAuthScopes, []string{}) + + r = r.WithContext(ctx) + + handler := http.Handler(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + siw.Handler.ListDevices(w, r) + })) + + for _, middleware := range siw.HandlerMiddlewares { + handler = middleware(handler) + } + + handler.ServeHTTP(w, r) +} + +// CreateDevice operation middleware +func (siw *ServerInterfaceWrapper) CreateDevice(w http.ResponseWriter, r *http.Request) { + + ctx := r.Context() + + ctx = context.WithValue(ctx, BearerAuthScopes, []string{}) + + r = r.WithContext(ctx) + + handler := http.Handler(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + siw.Handler.CreateDevice(w, r) + })) + + for _, middleware := range siw.HandlerMiddlewares { + handler = middleware(handler) + } + + handler.ServeHTTP(w, r) +} + +// ListAvailableDevices operation middleware +func (siw *ServerInterfaceWrapper) ListAvailableDevices(w http.ResponseWriter, r *http.Request) { + + ctx := r.Context() + + ctx = context.WithValue(ctx, BearerAuthScopes, []string{}) + + r = r.WithContext(ctx) + + handler := http.Handler(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + siw.Handler.ListAvailableDevices(w, r) + })) + + for _, middleware := range siw.HandlerMiddlewares { + handler = middleware(handler) + } + + handler.ServeHTTP(w, r) +} + +// DeleteDevice operation middleware +func (siw *ServerInterfaceWrapper) DeleteDevice(w http.ResponseWriter, r *http.Request) { + + var err error + + // ------------- Path parameter "id" ------------- + var id string + + err = runtime.BindStyledParameterWithOptions("simple", "id", chi.URLParam(r, "id"), &id, runtime.BindStyledParameterOptions{ParamLocation: runtime.ParamLocationPath, Explode: false, Required: true}) + if err != nil { + siw.ErrorHandlerFunc(w, r, &InvalidParamFormatError{ParamName: "id", Err: err}) + return + } + + ctx := r.Context() + + ctx = context.WithValue(ctx, BearerAuthScopes, []string{}) + + r = r.WithContext(ctx) + + handler := http.Handler(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + siw.Handler.DeleteDevice(w, r, id) + })) + + for _, middleware := range siw.HandlerMiddlewares { + handler = middleware(handler) + } + + handler.ServeHTTP(w, r) +} + +// GetDevice operation middleware +func (siw *ServerInterfaceWrapper) GetDevice(w http.ResponseWriter, r *http.Request) { + + var err error + + // ------------- Path parameter "id" ------------- + var id string + + err = runtime.BindStyledParameterWithOptions("simple", "id", chi.URLParam(r, "id"), &id, runtime.BindStyledParameterOptions{ParamLocation: runtime.ParamLocationPath, Explode: false, Required: true}) + if err != nil { + siw.ErrorHandlerFunc(w, r, &InvalidParamFormatError{ParamName: "id", Err: err}) + return + } + + ctx := r.Context() + + ctx = context.WithValue(ctx, BearerAuthScopes, []string{}) + + r = r.WithContext(ctx) + + handler := http.Handler(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + siw.Handler.GetDevice(w, r, id) + })) + + for _, middleware := range siw.HandlerMiddlewares { + handler = middleware(handler) + } + + handler.ServeHTTP(w, r) +} + // GetHealth operation middleware func (siw *ServerInterfaceWrapper) GetHealth(w http.ResponseWriter, r *http.Request) { @@ -4835,6 +5750,21 @@ func HandlerWithOptions(si ServerInterface, options ChiServerOptions) http.Handl ErrorHandlerFunc: options.ErrorHandlerFunc, } + r.Group(func(r chi.Router) { + r.Get(options.BaseURL+"/devices", wrapper.ListDevices) + }) + r.Group(func(r chi.Router) { + r.Post(options.BaseURL+"/devices", wrapper.CreateDevice) + }) + r.Group(func(r chi.Router) { + r.Get(options.BaseURL+"/devices/available", wrapper.ListAvailableDevices) + }) + r.Group(func(r chi.Router) { + r.Delete(options.BaseURL+"/devices/{id}", wrapper.DeleteDevice) + }) + r.Group(func(r chi.Router) { + r.Get(options.BaseURL+"/devices/{id}", wrapper.GetDevice) + }) r.Group(func(r chi.Router) { r.Get(options.BaseURL+"/health", wrapper.GetHealth) }) @@ -4911,6 +5841,214 @@ func HandlerWithOptions(si ServerInterface, options ChiServerOptions) http.Handl return r } +type ListDevicesRequestObject struct { +} + +type ListDevicesResponseObject interface { + VisitListDevicesResponse(w http.ResponseWriter) error +} + +type ListDevices200JSONResponse []Device + +func (response ListDevices200JSONResponse) VisitListDevicesResponse(w http.ResponseWriter) error { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(200) + + return json.NewEncoder(w).Encode(response) +} + +type ListDevices401JSONResponse Error + +func (response ListDevices401JSONResponse) VisitListDevicesResponse(w http.ResponseWriter) error { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(401) + + return json.NewEncoder(w).Encode(response) +} + +type ListDevices500JSONResponse Error + +func (response ListDevices500JSONResponse) VisitListDevicesResponse(w http.ResponseWriter) error { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(500) + + return json.NewEncoder(w).Encode(response) +} + +type CreateDeviceRequestObject struct { + Body *CreateDeviceJSONRequestBody +} + +type CreateDeviceResponseObject interface { + VisitCreateDeviceResponse(w http.ResponseWriter) error +} + +type CreateDevice201JSONResponse Device + +func (response CreateDevice201JSONResponse) VisitCreateDeviceResponse(w http.ResponseWriter) error { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(201) + + return json.NewEncoder(w).Encode(response) +} + +type CreateDevice400JSONResponse Error + +func (response CreateDevice400JSONResponse) VisitCreateDeviceResponse(w http.ResponseWriter) error { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(400) + + return json.NewEncoder(w).Encode(response) +} + +type CreateDevice401JSONResponse Error + +func (response CreateDevice401JSONResponse) VisitCreateDeviceResponse(w http.ResponseWriter) error { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(401) + + return json.NewEncoder(w).Encode(response) +} + +type CreateDevice404JSONResponse Error + +func (response CreateDevice404JSONResponse) VisitCreateDeviceResponse(w http.ResponseWriter) error { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(404) + + return json.NewEncoder(w).Encode(response) +} + +type CreateDevice409JSONResponse Error + +func (response CreateDevice409JSONResponse) VisitCreateDeviceResponse(w http.ResponseWriter) error { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(409) + + return json.NewEncoder(w).Encode(response) +} + +type CreateDevice500JSONResponse Error + +func (response CreateDevice500JSONResponse) VisitCreateDeviceResponse(w http.ResponseWriter) error { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(500) + + return json.NewEncoder(w).Encode(response) +} + +type ListAvailableDevicesRequestObject struct { +} + +type ListAvailableDevicesResponseObject interface { + VisitListAvailableDevicesResponse(w http.ResponseWriter) error +} + +type ListAvailableDevices200JSONResponse []AvailableDevice + +func (response ListAvailableDevices200JSONResponse) VisitListAvailableDevicesResponse(w http.ResponseWriter) error { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(200) + + return json.NewEncoder(w).Encode(response) +} + +type ListAvailableDevices401JSONResponse Error + +func (response ListAvailableDevices401JSONResponse) VisitListAvailableDevicesResponse(w http.ResponseWriter) error { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(401) + + return json.NewEncoder(w).Encode(response) +} + +type ListAvailableDevices500JSONResponse Error + +func (response ListAvailableDevices500JSONResponse) VisitListAvailableDevicesResponse(w http.ResponseWriter) error { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(500) + + return json.NewEncoder(w).Encode(response) +} + +type DeleteDeviceRequestObject struct { + Id string `json:"id"` +} + +type DeleteDeviceResponseObject interface { + VisitDeleteDeviceResponse(w http.ResponseWriter) error +} + +type DeleteDevice204Response struct { +} + +func (response DeleteDevice204Response) VisitDeleteDeviceResponse(w http.ResponseWriter) error { + w.WriteHeader(204) + return nil +} + +type DeleteDevice404JSONResponse Error + +func (response DeleteDevice404JSONResponse) VisitDeleteDeviceResponse(w http.ResponseWriter) error { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(404) + + return json.NewEncoder(w).Encode(response) +} + +type DeleteDevice409JSONResponse Error + +func (response DeleteDevice409JSONResponse) VisitDeleteDeviceResponse(w http.ResponseWriter) error { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(409) + + return json.NewEncoder(w).Encode(response) +} + +type DeleteDevice500JSONResponse Error + +func (response DeleteDevice500JSONResponse) VisitDeleteDeviceResponse(w http.ResponseWriter) error { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(500) + + return json.NewEncoder(w).Encode(response) +} + +type GetDeviceRequestObject struct { + Id string `json:"id"` +} + +type GetDeviceResponseObject interface { + VisitGetDeviceResponse(w http.ResponseWriter) error +} + +type GetDevice200JSONResponse Device + +func (response GetDevice200JSONResponse) VisitGetDeviceResponse(w http.ResponseWriter) error { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(200) + + return json.NewEncoder(w).Encode(response) +} + +type GetDevice404JSONResponse Error + +func (response GetDevice404JSONResponse) VisitGetDeviceResponse(w http.ResponseWriter) error { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(404) + + return json.NewEncoder(w).Encode(response) +} + +type GetDevice500JSONResponse Error + +func (response GetDevice500JSONResponse) VisitGetDeviceResponse(w http.ResponseWriter) error { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(500) + + return json.NewEncoder(w).Encode(response) +} + type GetHealthRequestObject struct { } @@ -5876,6 +7014,21 @@ func (response GetVolume500JSONResponse) VisitGetVolumeResponse(w http.ResponseW // StrictServerInterface represents all server handlers. type StrictServerInterface interface { + // List registered devices + // (GET /devices) + ListDevices(ctx context.Context, request ListDevicesRequestObject) (ListDevicesResponseObject, error) + // Register a device for passthrough + // (POST /devices) + CreateDevice(ctx context.Context, request CreateDeviceRequestObject) (CreateDeviceResponseObject, error) + // Discover passthrough-capable devices on host + // (GET /devices/available) + ListAvailableDevices(ctx context.Context, request ListAvailableDevicesRequestObject) (ListAvailableDevicesResponseObject, error) + // Unregister device + // (DELETE /devices/{id}) + DeleteDevice(ctx context.Context, request DeleteDeviceRequestObject) (DeleteDeviceResponseObject, error) + // Get device details + // (GET /devices/{id}) + GetDevice(ctx context.Context, request GetDeviceRequestObject) (GetDeviceResponseObject, error) // Health check // (GET /health) GetHealth(ctx context.Context, request GetHealthRequestObject) (GetHealthResponseObject, error) @@ -5979,6 +7132,137 @@ type strictHandler struct { options StrictHTTPServerOptions } +// ListDevices operation middleware +func (sh *strictHandler) ListDevices(w http.ResponseWriter, r *http.Request) { + var request ListDevicesRequestObject + + handler := func(ctx context.Context, w http.ResponseWriter, r *http.Request, request interface{}) (interface{}, error) { + return sh.ssi.ListDevices(ctx, request.(ListDevicesRequestObject)) + } + for _, middleware := range sh.middlewares { + handler = middleware(handler, "ListDevices") + } + + response, err := handler(r.Context(), w, r, request) + + if err != nil { + sh.options.ResponseErrorHandlerFunc(w, r, err) + } else if validResponse, ok := response.(ListDevicesResponseObject); ok { + if err := validResponse.VisitListDevicesResponse(w); err != nil { + sh.options.ResponseErrorHandlerFunc(w, r, err) + } + } else if response != nil { + sh.options.ResponseErrorHandlerFunc(w, r, fmt.Errorf("unexpected response type: %T", response)) + } +} + +// CreateDevice operation middleware +func (sh *strictHandler) CreateDevice(w http.ResponseWriter, r *http.Request) { + var request CreateDeviceRequestObject + + var body CreateDeviceJSONRequestBody + if err := json.NewDecoder(r.Body).Decode(&body); err != nil { + sh.options.RequestErrorHandlerFunc(w, r, fmt.Errorf("can't decode JSON body: %w", err)) + return + } + request.Body = &body + + handler := func(ctx context.Context, w http.ResponseWriter, r *http.Request, request interface{}) (interface{}, error) { + return sh.ssi.CreateDevice(ctx, request.(CreateDeviceRequestObject)) + } + for _, middleware := range sh.middlewares { + handler = middleware(handler, "CreateDevice") + } + + response, err := handler(r.Context(), w, r, request) + + if err != nil { + sh.options.ResponseErrorHandlerFunc(w, r, err) + } else if validResponse, ok := response.(CreateDeviceResponseObject); ok { + if err := validResponse.VisitCreateDeviceResponse(w); err != nil { + sh.options.ResponseErrorHandlerFunc(w, r, err) + } + } else if response != nil { + sh.options.ResponseErrorHandlerFunc(w, r, fmt.Errorf("unexpected response type: %T", response)) + } +} + +// ListAvailableDevices operation middleware +func (sh *strictHandler) ListAvailableDevices(w http.ResponseWriter, r *http.Request) { + var request ListAvailableDevicesRequestObject + + handler := func(ctx context.Context, w http.ResponseWriter, r *http.Request, request interface{}) (interface{}, error) { + return sh.ssi.ListAvailableDevices(ctx, request.(ListAvailableDevicesRequestObject)) + } + for _, middleware := range sh.middlewares { + handler = middleware(handler, "ListAvailableDevices") + } + + response, err := handler(r.Context(), w, r, request) + + if err != nil { + sh.options.ResponseErrorHandlerFunc(w, r, err) + } else if validResponse, ok := response.(ListAvailableDevicesResponseObject); ok { + if err := validResponse.VisitListAvailableDevicesResponse(w); err != nil { + sh.options.ResponseErrorHandlerFunc(w, r, err) + } + } else if response != nil { + sh.options.ResponseErrorHandlerFunc(w, r, fmt.Errorf("unexpected response type: %T", response)) + } +} + +// DeleteDevice operation middleware +func (sh *strictHandler) DeleteDevice(w http.ResponseWriter, r *http.Request, id string) { + var request DeleteDeviceRequestObject + + request.Id = id + + handler := func(ctx context.Context, w http.ResponseWriter, r *http.Request, request interface{}) (interface{}, error) { + return sh.ssi.DeleteDevice(ctx, request.(DeleteDeviceRequestObject)) + } + for _, middleware := range sh.middlewares { + handler = middleware(handler, "DeleteDevice") + } + + response, err := handler(r.Context(), w, r, request) + + if err != nil { + sh.options.ResponseErrorHandlerFunc(w, r, err) + } else if validResponse, ok := response.(DeleteDeviceResponseObject); ok { + if err := validResponse.VisitDeleteDeviceResponse(w); err != nil { + sh.options.ResponseErrorHandlerFunc(w, r, err) + } + } else if response != nil { + sh.options.ResponseErrorHandlerFunc(w, r, fmt.Errorf("unexpected response type: %T", response)) + } +} + +// GetDevice operation middleware +func (sh *strictHandler) GetDevice(w http.ResponseWriter, r *http.Request, id string) { + var request GetDeviceRequestObject + + request.Id = id + + handler := func(ctx context.Context, w http.ResponseWriter, r *http.Request, request interface{}) (interface{}, error) { + return sh.ssi.GetDevice(ctx, request.(GetDeviceRequestObject)) + } + for _, middleware := range sh.middlewares { + handler = middleware(handler, "GetDevice") + } + + response, err := handler(r.Context(), w, r, request) + + if err != nil { + sh.options.ResponseErrorHandlerFunc(w, r, err) + } else if validResponse, ok := response.(GetDeviceResponseObject); ok { + if err := validResponse.VisitGetDeviceResponse(w); err != nil { + sh.options.ResponseErrorHandlerFunc(w, r, err) + } + } else if response != nil { + sh.options.ResponseErrorHandlerFunc(w, r, fmt.Errorf("unexpected response type: %T", response)) + } +} + // GetHealth operation middleware func (sh *strictHandler) GetHealth(w http.ResponseWriter, r *http.Request) { var request GetHealthRequestObject @@ -6637,90 +7921,104 @@ func (sh *strictHandler) GetVolume(w http.ResponseWriter, r *http.Request, id st // Base64 encoded, gzipped, json marshaled Swagger object var swaggerSpec = []string{ - "H4sIAAAAAAAC/+xdC3PTyJb+K6e0d2qdXfmRBLjgW1tbmQSYTBFIEcjsXcyGtnRs99DqFt0tJ4bKf9/q", - "h2TJkh8ZEkMuVFFFbPXrvL8+fVr+EkQiSQVHrlXQ/xKoaIIJsX8eaE2iyblgWYKv8VOGSpuvUylSlJqi", - "bZSIjOuLlOiJ+RSjiiRNNRU86AenRE/gcoISYWpHATURGYthiGD7YRyEAV6RJGUY9INuwnU3JpoEYaBn", - "qflKaUn5OLgOA4kkFpzN3DQjkjEd9EeEKQwXpj0xQwNRYLq0bZ9ivKEQDAkPru2InzIqMQ7678pkvC8a", - "i+GfGGkz+aFEovE4IePlnOAkwToPXh0eAzX9QOIIJfIIoYWdcSeEWEQfUXao6DI6lETOunxM+VWfEY1K", - "71RYs7ptnV8L5Nm1rSCMjyUqdUPSfssSwtuGyWTIEEwjaDFxiTIiCoGh1ihVCDEdU61CIDyGmKgJKjBC", - "+QdEhHOhQWkiNQgJyGO4pHoCxLarciCZtUlK29QtNQiDhFy9QD42ivdoPwxSYqYz6/q/d6T9udd+8r7l", - "/2i//4/8q53//lujcmXMUVql8LXINOVjsI9hJCToCVUwXwPVmNh+f5M4CvrBv3Xn1tT1ptTNuZsxNHMl", - "lB+7brvFSoiUZNYstXxxq6SnNOHRcs1EPjX/kTimhjDCTiuPa9yoMuEpn1IpeIJcw5RIaoStyqL5Erx8", - "dfT04unL86BvZo6zyHYNg9NXr98E/WC/1+uZcWvrnwidsmx8oehnrNh1sP/812BxIQfF+iHBRMiZlYgf", - "A1qTqjqOhEyIBkY/IgzMeIMghEGw+3wQVBVrz05VY4I12o3seY2hEpZSjkstNfxerOtSyI9MkLi9e8vG", - "xVGbseskvnQPIBJ8RMeZJOZ7b2YI1Kt1ENbU2XAkriiMllktDvwxQT1BCVoAsaGsGNJ8Zabw3SFfYYkj", - "bsCGqFFTYjFFycisQYl3ew1a/Iek2krU94OYqo9gOq9RYTOa0+GHvboS95q1uGFRDWv61WiUt6lNVlIs", - "ZHfvxP+5t6ldTaM0U5Ul7S0u52WWDFGCGMGUSp0RBoenbysuZ68YmHKNY5R2ZIsxGty4gzCqpAhe/oU+", - "EA2R8aVG/zS1Xncj1+5GtoCj5OBWenPnV5Z78zV4i8YNPin1bjHKlBYJ0Bi5piOKElok06I9Ro6SaIyB", - "jsA4hVSKKY0xrkpsKljbwC/rATZ0U2654ImrOBQ7lBPKMtW8GA/rQ54ZDaQcxnRMhjNdDTa7vbromxmd", - "j9/E6qdSCllnbiTiBhIP0pTRyCpHW6UY0RGNAM0IYDpAKyHRhHIszKXK1SGJL6QXZ9gUbDWhrEFrS+HO", - "TeZbQst4yCRjmqYM3TO1s6nGWsqP7Eh1jQ0DyjnKC8zZc4ORElSqMWIuBLKclqKJdfgxDrPx2LCkzLoT", - "qpTFX166MKLI4r4LwGtBr5XmfGFL9cDTsKE2vDAhuM1wiqysBM6izGITIREKPXFCq1BF+ZQwGl9QnmaN", - "KrGUlc8yaSOaGxTIUGTaOjInsPIkdq9ibX0kMh43MqvGjt+QMLeRq3JCaaIzH3uzxPBWfDT8nE8nPq4V", - "hx+kSQzHOdZaEEDS4OwOT45gJEViUIMmlKOEBDXx28ZiRe8Cu0EKwqBtdCommAgOYjT6h1lBYSp1L5cx", - "ZvR0AQEUBmLDBMYXRDcsrRxClCZJCq3Xzw739/efLEbrvYft3m579+Gb3V6/Z/79bxAGLsoaEEk0tn0c", - "qjsMOvaRYWGzgkqwKcaQEE5HqDT4luWZ1YTsPXzUJ8Nod28/xtGDh486nU7TNMi1nKWC8oapnhbPNhNF", - "16Hi9nzMjpp8nRzuYE+zCS1fgtODN78F/aCbKdllIiKsq4aU90ufi4/zB/YP93FIeeNeqPC5Cyu1LsZ7", - "BBO+nRkBVTAilC1kUNKMMf9931DCMSoUUlhns4Sv68L8S6OajH7GGBozGpqMzR7DadzXpS7C4FOGGV6k", - "QlE3ey2v5J8YkDDMKIvB9oCWIS6HOParKsDZW0p+CUVa2OBgR23iowKqm5lNGz9nxjVlNt80q8z4cP/R", - "47/3nuzulYybcv3oQbDRUgq3uwDXLc3+aVj45BR57CKoUQP3VyT41FiF/WDXZ/yMU5yKA8+f1YRhNkaU", - "jy9i2qCdf7iHEFOJkbZb8vU2FHRJmq5XxWZUV/i0gvySR26MLT5ZU48u39yTN2H5gypezzj9lGEJ0Vdn", - "fzX+/dP/qNO//7n76cX5+T+nz38/ekn/ec5OX31VomF14u2bZs9WbrGo8YaVrNmm6nFCdNQAfCZC6SVc", - "80/MVjIxnTtwSDgMsT/gbXhBNUrC+jAISEo7npmdSCSDAFp4RSLteoHgYIaCCZIY5Y7pfOqSLabzl3yH", - "er04RjzjJKERSM/kIVEYm9FUNoxFQijfGfAB92NBToiCTDkZxxCRVGcSjUQgyiSbwVCSyPg0l5+eTx7C", - "F5Km1zsDridEA15paShIidTKbNMNBM1nsIL2q+rAGwNOXXOMYUpYhgoiy6gBL+JHbJZgBtFEjlF3in25", - "xfudAa8oZDNTmhQzFVJXUg2Pe2GDHMG0M4JkVGnkUCShqLLKC608UfS4VzH/x73H67ejhQ6tUD+r3fWz", - "lVwpN7APp8B2aueMLyZap+sPS6y/cTYCv715c2rYYP4/g3ygOS8KEbcEZzMgZl+MCi4nyEEzi0l8bm6n", - "4cQlDJx0NyTojWtsujG1no6ndmJ48+IMNMqEcue/W5Fh58hs3xFsLoQqlRlVpAQODk+e7nQ2OByyvC3W", - "v0KObwoKF7I2eS6z5kdcj3kmyvA3hOOj0MApb6FzoNUx/uGZkMCcg5nbdR/eKqwmtayohITjIx+f2Sw3", - "7YH36oNgJx8xXfQUfXhd4DtSLMXG94q9F0PO7dIOO+B/GMXI7A6+NnpYXauxtHz/4l0bYUbHNPjciQ3F", - "y13BavNv4Li1ecEXU803s+1yjtpM1qwac9nfOQLZvxkCuZuTofo5D1EXipNUTUQDqXmenkDeBvCKKl3B", - "DHUBeTdQdzP1U6Wqw3fnRSvS3ZudD/0F8Aatw7fHR3s+nV6dRn9+QJ48vroi+skjeqmefE6GcvznPrkn", - "Z1MrT5O+9kjIQ4zNToSaVKvsZ9wRAMZ/+RAoDGjaIHul6JhjDMenQOLYRIPyhjQfvir03Sd7nd1Hjzu7", - "vV5nt7fJ9jwh0Yq5Tw4ON5+8t+c2LH0y7EdxH0dfkR7wYnMBgbBLMlMwyEP2IHAYoQQOSkrpw/pGCcr6", - "WdtfO1pbkMLaw7ObHJZt5D3sqewS139mT2xv7vcfLvX7a6VqNtO4Hpg5IzqzjfNeFzdJXCFEImMx/3cN", - "Q2N5Dqph7BGlQu00xbWlCt7yj1xc8irpLn9h7PdThnIG5ycnlWyXxFGmbGZsA8JFmi6Vg0hvJIa9NeF3", - "7WpKZ6PbOA9d9IQl/3rrp5/lrXl+fOO0boMtelnvatTkj53S2O23O0mN+0YzwI8Ow0xDURBhVO6QiSyG", - "32YpyilVQoLZM0zRIuLXGeeUj80INmZE5gmbgXTfr+58Soz65X1T+2l1j7NJpmNxyW0fNck0mE92yYYE", - "D4dWD+E0uQ8vhe3jVxoa97+Aq1xzwuPhrN58EYO13G7dwHMtJMZ2Mm+WfXhWmGJhzN54Wwr9n85D+GM+", - "e4S542C8z1t6aQVh4LkehIFjYRAGOWfMn45C+5ddfBAGfiElvZlbk1PPOux2ap/kFY8Lx3tUaWNpUSal", - "wbWlxtDCJNWzPMGcW8/OzczloBiw6fz1trcEvSe3kZR8uzIL+S9SNlD2UPkka31TTaZLt/4XTYw9PlrE", - "tm6/48tlq2h14RBZ6bY7xms8Ql5RluvqY82zPO02zhbPCW9QittYcjXBkuW43f28FnfdFm7JJvvCSqhE", - "WWkly2XjwtNX1i1TlRcs/0WWeUS6Po/lnCGkKNuFSuRw1njQS0ntsZNnkGOsYcF/GUzRnHdbjZpPyFUx", - "g8WzRMFC+ZejY57psQVgOx14nddl0FE+hF1GpwqvmyHw5gXduVbVhbGqwjsHQI2G5/3PCo+2zLYWlHM+", - "R7i6iNy4LowySfXszAQEp4ZDJBLlQebU0EYKS4T9ej65zeVeX9sCnZGok/McOUoawcHpsdWShHAyNiI7", - "PwFGRxjNIoY+FVcDEbYo9NXhcdudIeQ7d5vJodoyxLROCDfjB2EwRancvL3OXseW9ooUOUlp0A/2O7sd", - "s5M1bLAkdidFVYnPjRo7tJHsOLZr177uxHBWpYIrx5u9Xs+V4XDtPSuZV2J1/1TuaNhF13Wx189gWbgQ", - "NgwbXGbALdRhV5UlCZEzQ7v9FqIJRh/to67Fr2opQQZCHLsmX0nRZgdkFk3X8XeN0hza+OVfh8GD3u6t", - "cdiV1DVM+5aTTE+EpJ8xNpM+vEWxLp30mGuUnDBQKKcofYFU2QiD/ruq+b17f/2+LHfLrjmvUqEaZF26", - "GBI4x4BK/yri2a2R2HD15LrqhIzHva5p2t6trcArWAOTba5xmFcjuF0VUTMe7Tjt2oKgfyVxfkLwzTT6", - "Qe/BFjR6oaDvHlnSacaYvZ/gq1HmJURlf9r9YsD3tQtuDN1Wv2ptR/b73NpSIklidp3KrmBBRq9ftJFH", - "IjboxLHOJx/MUw8f3d6kqBioWFRYYtwiBHhfs7YHDdjezupI+akmG6iJk26uGOFStPAV8ncQdn7z7pe9", - "Z/4o5Je9Z+4w5Jf9g/kFvLtRlt62XHNeW/5T+dYq33P0wX7ONOua3Ln+OrRXtNoK4PMFVDeBfMUCf6K+", - "TVBfmV0rgV9Ry3aH0K96OXcj8Hd7Ai6UrYnb9lGeYP/BIN+Tu5/0UPARo5GGdq6Rbq9uU4Q2nBFm63bz", - "XL29DuvLciiHTOF9Mj2f+qKFxpX9b/cLjTfBhoVBrkQHueoeH4W+4srVSaUSR/SqOfDbFM9tY0S/jq2j", - "RD9vJVRvRacPkiEdZyJT5bIYW2CHan6preKA7xt+nYfnpQj2O9bS3jZDx9YB6k+9vyPovChQ57zdAcY6", - "8Jy32g549jUON0LP+Qp/oueN0HOJXavRc1Fvcpfwufp2lK3j51zfmhjuD/h+RAR9z1Ap4T7HPa+Sqvq4", - "jQHqvIZ1dez3unF8BLbQZ1ny8m5gqZ98+7g0v7twH3NItmDQvocnR4LzWLMcCn5v+tDbru/bPgS8zyr2", - "vHxXqBlsWUfUZWJchl2Lla0SSTK/VgGmNRAFZ3Zh7TPkGp5ODVWdAc8vFn1QIpMRfoBCUUELUMgw0nA5", - "odHEjGO/s+PbUswPJE0/FBfodvrw3F7mKXHXTd5SKClhEAmuBHMljR+mSfKhXy9YOD85sZ1sm4krTfjQ", - "h7xIobAxZVoN+IC/Rp1JriwVjCgNL4FRjgpaRuBSMIYxDGfwwfCzRN+OvaVkRnS3Y9hswE0PyjNUnkrK", - "x8Dx0g9IR/BhJBgTl7YQ5oO7sLTU6l8YKX0jyw+Xlxk7WrQAaRnnLn2hfYuIndeWXc8n9m84mU9VlPPs", - "9hrr7r7UE12Wp40sJSNtb3BQbfRDZNq9NaVpIY7zzUtZWmtWf8HLGJymL6gySdNN1dcv02rxNElW6DC0", - "JvMvlY5Fpv9T6RiluwvstXuZckOLRO6DJh/dzdXKfSlXaNvEKkdhM6sCdz8/r891n6ZJEoSBX09Dve0G", - "kUTjle6icSttx9aqT10csL4fM5KxHaF1dvZ052fM2BCWWJZVnb1nYEPk8IXetmSycfP22jX44ZFLXhH/", - "jdVw+0cRpVVQe1eHx8OZle38qsF9MhCv0HPKbLzzdDXaSP5sqY34Gwo/vI3M9eMHt5JISPsyBZVfnrs/", - "xVulHUfJ3Fv2XtP8vlCY73rPT052lhmNu4+/1GTkz+2wr6P84WOKvep1/6zF3d0lBQGrkoVd02iVPYj0", - "pzn4O38/g8e9DB42I1pQ0xpLEuEoY/aKa2zvdTfZhb+43P3i/jhel1ef/0zFd5NL8feL1k2TE3gvjNLT", - "FKO7kbh9mxTFFbB7Wt9s3z3uSbB7jPIJQXMUKP8Iy4+j3bd/GNz0YzYbHQVv1bby277fjW1tO/L5NeR1", - "jWV+3Bczd5qWU6LFAgYsvZVjaUmMf0HHVgpivGu5QTlMTsHPyoENimFKzModfNOFbwXEHnm45h04y9JU", - "SK1AXwpIRIzKHkH8fvbqJQxFPOtD0Y+Dey2FVzj/PgH/Anqzh6Kf0fQ9sUVmZnsyEjIpDZD3TCW2U5Fm", - "zL4uxVYaex67YEVAE9kZfwYiowmdYsPRVvknLO60qmfRkYdBkpPXNeTZt0hUB118uX+xlqo8qjTCiDLM", - "32dM+djy1vMrH6L0Zo0h5UTONn2txuLvdkyLsHoff7bjhFzRJEuK92M//xVa/tWv9lce7G9X0FGhU3gV", - "IcbKHljtfN1PfISFOBvu3W+13Cv3pksj/Dcs9YKW/+UJMCI2ET9Xci0EMCLHuPPDXKjwtja/T3F8tHCb", - "4h4WqU1z7ZvjjA3L0jbbYGyI+++iJK3YfG63IO38+8HEpZcB3cNbEdMCZi6rhPu+VLC3vZCw7Qq483uc", - "Q3mOOaQuVb/ZAcyITQrzQkSEQYxTZCK1b6NybYMwyCTz79bpd91vt0yE0vbNz8H1++v/DwAA///8wcL2", - "G3cAAA==", + "H4sIAAAAAAAC/+x9C3MTO7L/V+ma/26t81+/kgAL3rp1KycBjk8RSBHIubsn3CDPtG0dZqRB0jgxVL77", + "LT3mafkRIIYsqaIKx6ORulu/bnW3WvLnIORJyhkyJYPB50CGU0yI+XigFAmnZzzOEnyNHzOUSn+dCp6i", + "UBRNo4RnTF2kRE31XxHKUNBUUc6CQXBC1BQupygQZqYXkFOexRGMEMx7GAXtAK9IksYYDIJewlQvIooE", + "7UDNU/2VVIKySXDdDgSSiLN4bocZkyxWwWBMYontxrDHumsgEvQrHfNO0d+I8xgJC65Njx8zKjAKBn9U", + "2XhXNOajPzFUevCDGaExGcV4hDMa4qIYwkwIZOoiEnSGYlEUh/Z5PIcRz1gEth20WBbHQMfAOMOdmjDY", + "jEZUS0I30UMHAyUy9EgmMjRd0MgzA4dDsI9heAStKV7VB9n7x+hxsLxLRhJc7PTXLCGso4Wrycr7N22r", + "fb944OuZ8iTJLiaCZ+liz8NXx8dvwTwEliUjFNUeH+8V/VGmcIJCd5iG9IJEkUAp/fznD6u09fv9/oDs", + "Dfr9bt9H5QxZxMVSkdrHfpHu9iNc0eVGInX9L4j05dnwaHgAh1ykXBDz7sJIDWBXxVPlqwqb+qz48H8o", + "kCgH/qWmwM/aK/OBxDCJ+YjE8RwyRj9mNdx0YahVQEEq+IxGGLWBmAdAJZBM8c4EGQqiMIKx4AmoKUJl", + "bqGF3Um3Deea3Y6e3A7Z6/T7nf55UJ+d+EFnkmZBO0iJUig0gf/7B+l8Ouj8u9958q78eNHtvPv7X3wT", + "uSnggI8NnY7PVj4rbciJraKwSehqhK6Y5OXTN0zI5MazdzgEqt8DgWMUyDQnlv6Ihx9QdCnvxXQkiJj3", + "2ISyq0FMFEpV52Z127X8GdpWMMYmmvUbstbQOQO3VswvUYREIsSoASLbENEJVbINRJttIqcoQa8p/4SQ", + "MI1ZqYhQwAUgi+CSqikQ064ugWTeISntUEtq0A4ScvUC2USvm4/2F/CowdhyHzrv/n/+1c5/eyEpshg9", + "YHzNM0XZBMxjGHMBakollDRQhYl57y8Cx8Eg+H+90hnoOU+gl0s3i1GPlVA2tK/tFpQQIcjcP2s5catm", + "TyrCVtgVq0Ae/o7ylU2Cs5YSFAdi/BbD7/OTtz2tkimRUk0FzybT6qz8kduDdxVZLEi3zmQ7QDbT7UgU", + "UWvaTmrkehbTKtFP2YwKzhJkCmZEUA2+2uL0OXj56ujpxdOXZ8FASyLKQmfpT169fhMMgv1+v1+hq5Tn", + "lKs0ziYXkn7CmpsU7D//JWgSclDQDwkmXMyNxFwf0JrW1WPMRUIUxPQDwrnu7zzQJmz3edNw7ZmhFtd9", + "bUQ2si9rDAeJU8pwqeVo/yjafsnFh5iTqLP7jZWdodJ9L7L40j6AkLMxnWTWQXBqj0Cdmum1r4ZXZFoi", + "UQ0w1tOsd//7FNUURUXD8i71V3alM69DTmFFIjXXteqEL4CYz1DEZO4B8W7fg+LfBVVmRt17EFH5AfTL", + "ayCse7MYfthfBHHfj2IPUR6aftGIcjq1CSUFIbt7x+7j3qZ6NQvTTNZI2muS89J40todmVGhMhLD4cnb", + "msnxOtY2ZPOYXRsRVk2tm/8CD0RBqG27xp+iZhXYaKmxPZv4bdHw+lcXa1eWry5rwlefh194rGEmFU+A", + "RsgUHVMdrzWcUVp3W+szNuNxR0ezxgJsaKYsuYuefzK3XdlJWQbNi8losctTjUDKYEInZDRX9cVmt784", + "9X5B5/37RL0sKrbwwOhCcU+wl6NleKTlmLfdJOI1MfSF4hezMfX0XFiq0vumEsJGCO5Aq7vopCF1IXkb", + "LqdU2zYJuRCMCT07rjoR3XPWAU3cAI6KAYpuiy71IqKV3i6tLS4qRFAGmUQYzXeAwNlxF94U1P5NAiOK", + "zjBPE0yJhBEig4zpJQUjM75JflQJyKT29qhqvu4cdptR2DG+EnfPuvDrPMWEMLikcWxirYQoGppAbUQb", + "/FxOkbmJ0iNpA8AKre+esyqyXGqmafLbgbEMGF0Q5fFYcUKlEqXlkIokKbRePzvc399/0jTSew87/d3O", + "7sM3u/1BX//7d9AOrHHVvgNR2HHmZxtJE19fB3V74ULfqkU5fDs82nMrQn0c9ekBefL46oqoJ4/opXzy", + "KRmJyZ/7ZCtpFb95OipjdmhlEkUnN30aVb5IvRIQL4nEvzjAvlFGx36xevmx3L3RLW8jB9SwqybxYpq0", + "vyBL0zSCNb1abqPfODHU+dHfav+gRL5mh2WJptPlS0Ja6baU61MhuPDkQ3nkGecgTWMaGu3uyBRDOqYh", + "oO4B9AvQSoxlwcJTqot1RKIL4VZyr0orQmMPZiqRjh3MtYSWNstJFiuaxmifGZRu5KwYzo9MT74okTKG", + "4gJz8dygpwSl9AZLjRgm56VoYlaZCEfZZKJFUhXdMZVmcSjXNIpxNLCx11qomtksCfPBq8rDhmh4oaOv", + "TowzjKsgsBZFE5twgVDgxE5ajSvKZiSm0QVlaeaFxFJRPsuEcRFsp0BGPFPGHbATVh3EpM2MmzfWGucV", + "1oI4fkUS2y2RuiSkIipzYZdVL/5By7Mcjn9YOx2uE980DPMwuzEBiceKHR4fWRsdcqYIZSggQUXcBkwl", + "SWJydUE76GhMRQQTzoCPx/9cnTZZ4sUVCrLKDzisRg+35wPQiQsKml6I5PEMI0gIo2OUClzL6shySvYe", + "PhqQUbi7tx/h+MHDR91u1zcMMiXmKafMM9TT4tlmU9GzCZFO2WdXTr9uHm4hnbUJL5+Dk4M3vwaDoJdJ", + "0Yt5SOKeHFE2qPxd/Fk+MB/snyPKvGmwwuY2KDUmxlkEHXFYNdKO85jQuLEXmWZx7L4faE4YhgUguTE2", + "a6MUvwv1UkMzpp8wAm9yXZGJ9qUs4r4ui94OPmaY4UXKJbWjLzgy7omORkYZjSMwb1T3JZX9qh7b7i1l", + "v+JCmojRRpyLjmSRpdEj6zZuzIwpGtugqTbiw/1Hj//Rf7K7V1FuytSjB8FGpBRmt5GpMTy7p6XLkyKL", + "7AqqYWA/hZzNtFaYPwx92s5Y4NQMeP5sYTIuufhA2eQioh50/m4fQkQFhspkY9frUNAjaboeiv6AvrBp", + "BftrPEi3EeBZXb67Jf+S0Ks++qvJbx//R57848/djy/Ozv41e/7b0Uv6r7P45NVX5ZhX7wF9142cldk1", + "E2/UNnA2hccxUaHH8ZlyqZZIzT0BxSHRL3fhkDAY4eCcdeAFVShIPIDzgKS064TZDXlyHkALr0io7FvA", + "GeiuYIokQrGjXz6xeXb98uc8TXHd7COaM5LQEIQT8ohIHc4ykNko4gmhbOecnTPXF+SMSJO+0Z8iCEmq", + "MoF6RiDMRDyHkSAhFvvS5eBt+EzS9HrnnKkpUYBXSmgOUiJUsWGcj2Am2lFl00OuOUYwI3GGEkIjqHNW", + "rB+RJkF3ooiYoOoWKVnj7zdSNEuE4o3JuVC1LPPjftszj6Db6YmMqVTIoNh/oNKAF1r5HsHjfk39H/cf", + "r89EFhhaAT+D7sUqpRyUG+iHBbAZ2hrji6lS6fqyI2NvrI7Ar2/enGgx6P9PIe+olEUxxS3O4jkQHRej", + "tPk1FRufxG3L7AS+HJqd3Q0ZemMb69diuZ6Pp2ZgePPiFBSKhDJrv1uhFudYh+9oMz1UykxDkRI4ODx+", + "utPdoMzKyLagf8U8vik4bCTs822sxSSGeaPchNDybcPwqK3dKaehpaNlMqjPuIDYGphSrwfwVmJ9P8NM", + "lU322JmM52XJibXq58FO3mPatBQDeF34d6QgpShkKcGQd1nqpen2nP2ugWHTuwu9t+u0msS1i1+caTPJ", + "XKLA5U7MUrzcFKxWf4/Ejc5z1txlvJluV7cn9WB+aJRzf+seyP7NPJDbKQpY3OIn8kIyksopV8s3Pgjk", + "bQCvqFQ1n2Fxgpam6hcLCuoG35YKrNjp3Kw04HvmzX+8soSVhQRfWw3gXIzNigF80KramXzL7ov3/9sB", + "9WxXHEhJJwwjGJ6UBX5lQJp330i5P9nr7j563N3t97u7/U3C84SEK8Y+PjjcfPD+ng1YBmQ0CKMBjr8i", + "PeCmzS4IJL4kcwnn+ZJ9HlgfoeIcVEDplvWNEpSLZRZfVlXR3PhYVzdxkzqJjayHKchZYvpPTbHOze3+", + "w6V2f+2s6mAa1ztmVolOTeP8rYubJK4QQp7FEfubgpHWPOuqYeQ8SonKIsW2pRLesg+MX7I66zZ/ofX3", + "Y4ZiDmfHx7Vsl8BxJjfbk5eKp+nSeeDpjaZhb83yu5aaSlnMNkphmpawYl+/eeFLNTTPt28s6jYI0au4", + "W16RYboz4bctookGGhngeodRpqCohdOQO4x5FplKAjGj0tRiKjpD4xG/zhijbKJ7MGtGqJ/EcxD2+9Uv", + "nxANv/zd1Py1+o3TaaYifsnMO3KaKdB/GZI1C84dWt2FRfIAXnLzjqO0rc1/w6+yzQmLRvPF5k0frGWj", + "de2eKy4wMoM5tRzAs0IVC2V2ytuS6D5aC+G2+cwW5o51413e0s1W0A6c1IN2YEUYtINcMvqj5dB8MsQH", + "7cAR4t3ktfBcVt+T5GeHGtt7VCqtaa7kBSqNoYVJquZ5gjnXnp2bqctB0aFv//VbhwT9J98iKfl2ZRby", + "P6RirGqh8kHW2qaFOV0a+ntLMoZHTd/Wxjvu4FndW21sIkvVsdt43i3kFQfc7Ekz/SxPu02y5j7hDQ61", + "LathKzXHRvflqbZ1IdySINuWk1Q4q1CyfG7s8vSVJwCpzI/+faHInEe6Po9ljSGkKDoFJHJ3VlvQS0HN", + "tpMTkBWsFsF/aZ/Cn3db7TUfk6tiBOPPEgmNyl/LR5npMbW/O114nddl0HHehSGjW3ev/S7w5kcjc1Qt", + "Tsaqs5K5A+RVPGd/Vli0ZbrVAGc5Rnv1cUxtujDMBFXzU70gWBiOkAgUB5mFoVkpDBPm63Jwk8u9vjYF", + "OmNP7ehzZChoCAcnQ4OShDAy0VN2dgwxHWM4D2N0qbgFJ8KcB3h1OOzYPYQ8cjeZHKqMQPJyy4OToan0", + "EtKO2+/udc2pDp4iIykNBsF+d9fUsmkxGBZ7lZMwLjmqFdEsZcPILblHro0Wrkw5k7b9Xr9vK3GYcsaV", + "lMVYvT+l3R22C6wxtpusw67ud9FjXUhw5c6AMAWeqJGeM3PdDh70d29E3NpiKh8JbxnJ1JQL+gkjPejD", + "G0rkiwYdMoWCkRgkihkKV15UhXAw+KMO3j/eXb9rBzJLEiLmuej8cku59KCgelgzsDqGUv3Co/k349d3", + "HvS6rtDael0vgPDbzXOOvUWZuyLVUmQWYluY7V9IVCTZW644rdg8qFXCfi/QP+g/uP1BKwXURdkccLtl", + "YYl4cvtEHHI2jmmooJPT4s4IAoltTX4dIHfFHLx2VAPJ+RqbPafygKPuLl8qeiS/uWDlotG432A7q0fz", + "UoUbLCMFV5WS5fuVZB10jqgMtXNZRUsnJGnlGgdZ6mkVRZ9pdG19pRht5qiOoSPzfbHkpESQBBUKaWha", + "cmoXyisOqH7gIhEb5togsr6ctCsybPqS7xYQ+2Dp8YWMNdeGLRjFo4ZB/I6GsLF1UznDc5fQ/LaYxfzM", + "wnXbb+Geo/qxoNnfnheUH4r4njC/K4h6jipXkUJs2gpOi2L+ZfBy5f63ONFuBA/jpzr6tFptCbVbBiVb", + "9lUIpxh+sAyZbYPVYeTQNtmGH2DPLNxg9Xfk3y/3GwSOpaxWBYtDt490e7Fi7fKZjULFvW9GgQOYR8im", + "xGOUF4HbzSwi5yzc+R4x4392VNg8R3WHNOkki2NzmNsdAihPblTtae+z9g828JNzbVvpi7x9/aKDLOQR", + "Rq4UablDkhdqf1tv2U6YZeUeJpvEV0ZUOTCWO6NfMf9256C8e+uve89cBdpf957ZGrS/7h+UV3DdDlj6", + "2zLN2/Ze7zD4tPNK60IzpsmWU6/z9opWW3H43LmVm7h8BYH3Xt8mXl9VXCsdv+II0S26fvXr+ba8T1CA", + "zSdt8yiva/rJXL7tpp4cIu0WqanMqOXiXYmUuYDOnYawF+TcJdVzFQe0QFzV/m6YQy0VcqV3kEN3eNR2", + "B13s8ZRU4JhebS+jmtOxdS/Rjbv9dOpBMqKTjGeyehrBnGtCWd4lUjPAd81/LZfnpR7sD4zS/jaXjq07", + "qPe4vyXXuTmh1njbbZF1znPeajvOc7lVs7n3nFN47z1v5D1XxLXaey7K/G/Tfa7fj7x1/znHm0/grq7y", + "Z/Sg75hXSpjLcVc2e2s2bmMHtTw6uHrtL6/33PpGfzH49v3S/Mj4XcwhmXNa5ubr3BMs15rlruCPhof+", + "dm3f9l3Auwyx59UrGvzOljFEvZhPqm5X80ChQJKUp9lBtwYi4dQQ1jlFpuDpTHPVPWf5fQ7vJc9EiO+h", + "ACooDhJjDJW79jfm5lpbafo3J+DekzR9X9xbsjOA56a8syJdO3hLoqAkhpAzyWN7kuz9LEneDxbrxM+O", + "j81Lps3UVoS/HxRX8RY6JnWrc3bOXqPKBJOGi5hIBS8hpgwltPSECx7HGMFoDu+1PCv87ZjLIXSP9lKC", + "eH7O9BuUZSgdl5RNgOGl65CO4f2YxzG/NOcP3tt7IpZq/Qs9S99J89vLT3daXhQHYQRn79pAc3mjGdec", + "di0HdhdLlkMVpyh2+97jTp8XE11Gpl6RkrEyB+ep0vjgmbKXVfoIsZL3k7L0iM/ivZoTsEhvQJmk6abw", + "dWQaFM+SZAWGoTUtv5Qq4pn6u1QRCnsFk0P3MnBDi4T2D0U+2AuDatdU2PONPlFZDv2iCuy1aPmxSPvX", + "LEmCduDo8Rxz3GAlUXileqjNSseKtW5Tmx0uxmN6ZsyL0Do9fbpzv2Zs6JYYkdWNvROgZ+Vw52vNSTVv", + "8PbaNvjpPZf8IPJ3huH2tyIqVFBzRQKLRnN3L35xu86dOhNgJrLkzKx3ji+vjuTPluqIOxj+0+tIiY+f", + "XEtCLswddjK/s+TuFG9VIo6KurfMdRLlNQ3tPOo9Oz7eWaY09hq0pSoj7sNhV0f5068p5oaNu6ct9sok", + "UjCwKlnY041W6QNP79XBXbVyv3jcycXDZEQLbloTQUIcZ7G5WSgy12n59MLdF9X7bD8M1+XVy9/Z/mFy", + "Ke5ah3XD5AzeCaV0PEXofmls6zrJi5s37mh9s/m1P8eCiTGqOwT+VaD6K/I/D7q//Waw79f4N9oK3qpu", + "Fb/i96Po1rZXPkdDXtdYlcddUXOLtJwTxRs+YOUyxKUlMe5exK0UxDjTcoNymJyD+8qBDYphKsLKDbzv", + "ni0JxGx52OZdOM3SlAslQV1ySHiE0mxB/Hb66iWMeDQfQPEeA3sboAOcu8bN/e6XjqHoJ9TvHpsiMx2e", + "jLlIKh3kb6YCOylPs9jcUmkqjZ2M7WJFQBHRnXwCIsIpnaFna6v6o7G3WtXTNOTtIMnZ62n2zOV99U6b", + "v6lW0FKfjzqPMKYx5j8jY360c1rcxZZ3UbnQcEQZEfNNbzNs/lLurFhW7+IP5R6TK5pkSfGzRM9/gZb7", + "xQ3z43rmJwPpuMAUXoWIkTQbVjtf96O67WI6PdedbbXcK7emS1f471jqVd6ppKfY/OKoA7niHGIiJrjz", + "0xyocLpWnqcYHjVOU9zBIrVZjr7Sz9iwLG2zAGNDv/82StKK4HO7BWlnP45PXLl25g6eipgVbuaySrgf", + "C4L97S0J266AO7vDOZTnmLvUleo304Hu0QeYFzwkMUQ4w5in5hJg2zZoB5mI3ZWmg579ycwpl8r84E5w", + "/e76/wIAAP//cHRGgNyPAAA=", } // GetSwagger returns the content of the embedded swagger specification file diff --git a/openapi.yaml b/openapi.yaml index 19e39ee1..a096083c 100644 --- a/openapi.yaml +++ b/openapi.yaml @@ -149,6 +149,12 @@ components: description: Whether to attach instance to the default network default: true example: true + devices: + type: array + items: + type: string + description: Device IDs or names to attach for GPU/PCI passthrough + example: ["l4-gpu"] volumes: type: array description: Volumes to attach to the instance at creation time @@ -502,6 +508,107 @@ components: description: Creation timestamp (RFC3339) example: "2025-01-15T10:00:00Z" + DeviceType: + type: string + enum: [gpu, pci] + description: Type of PCI device + + CreateDeviceRequest: + type: object + required: [pci_address] + properties: + name: + type: string + description: Optional globally unique device name. If not provided, a name is auto-generated from the PCI address (e.g., "pci-0000-a2-00-0") + pattern: ^[a-zA-Z0-9][a-zA-Z0-9_.-]+$ + example: l4-gpu + pci_address: + type: string + description: PCI address of the device (required, e.g., "0000:a2:00.0") + example: "0000:a2:00.0" + + Device: + type: object + required: [id, type, pci_address, vendor_id, device_id, iommu_group, bound_to_vfio, created_at] + properties: + id: + type: string + description: Auto-generated unique identifier (CUID2 format) + example: tz4a98xxat96iws9zmbrgj3a + name: + type: string + description: Device name (user-provided or auto-generated from PCI address) + example: l4-gpu + type: + $ref: "#/components/schemas/DeviceType" + pci_address: + type: string + description: PCI address + example: "0000:a2:00.0" + vendor_id: + type: string + description: PCI vendor ID (hex) + example: "10de" + device_id: + type: string + description: PCI device ID (hex) + example: "27b8" + iommu_group: + type: integer + description: IOMMU group number + example: 82 + bound_to_vfio: + type: boolean + description: | + Whether the device is currently bound to the vfio-pci driver, which is required for VM passthrough. + - true: Device is bound to vfio-pci and ready for (or currently in use by) a VM. The device's native driver has been unloaded. + - false: Device is using its native driver (e.g., nvidia) or no driver. Hypeman will automatically bind to vfio-pci when attaching to an instance. + example: false + attached_to: + type: string + description: Instance ID if attached + nullable: true + example: null + created_at: + type: string + format: date-time + description: Registration timestamp (RFC3339) + example: "2025-01-15T10:00:00Z" + + AvailableDevice: + type: object + required: [pci_address, vendor_id, device_id, iommu_group] + properties: + pci_address: + type: string + description: PCI address + example: "0000:a2:00.0" + vendor_id: + type: string + description: PCI vendor ID (hex) + example: "10de" + device_id: + type: string + description: PCI device ID (hex) + example: "27b8" + vendor_name: + type: string + description: Human-readable vendor name + example: "NVIDIA Corporation" + device_name: + type: string + description: Human-readable device name + example: "L4" + iommu_group: + type: integer + description: IOMMU group number + example: 82 + current_driver: + type: string + description: Currently bound driver (null if none) + nullable: true + example: "nvidia" + paths: /health: get: @@ -1246,6 +1353,176 @@ paths: schema: $ref: "#/components/schemas/Error" + /devices: + get: + summary: List registered devices + operationId: listDevices + security: + - bearerAuth: [] + responses: + 200: + description: List of registered devices + content: + application/json: + schema: + type: array + items: + $ref: "#/components/schemas/Device" + 401: + description: Unauthorized + content: + application/json: + schema: + $ref: "#/components/schemas/Error" + 500: + description: Internal server error + content: + application/json: + schema: + $ref: "#/components/schemas/Error" + post: + summary: Register a device for passthrough + operationId: createDevice + security: + - bearerAuth: [] + requestBody: + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/CreateDeviceRequest" + responses: + 201: + description: Device registered + content: + application/json: + schema: + $ref: "#/components/schemas/Device" + 400: + description: Bad request (invalid name or PCI address) + content: + application/json: + schema: + $ref: "#/components/schemas/Error" + 401: + description: Unauthorized + content: + application/json: + schema: + $ref: "#/components/schemas/Error" + 404: + description: PCI device not found on host + content: + application/json: + schema: + $ref: "#/components/schemas/Error" + 409: + description: Conflict - device or name already registered + content: + application/json: + schema: + $ref: "#/components/schemas/Error" + 500: + description: Internal server error + content: + application/json: + schema: + $ref: "#/components/schemas/Error" + + /devices/available: + get: + summary: Discover passthrough-capable devices on host + operationId: listAvailableDevices + security: + - bearerAuth: [] + responses: + 200: + description: List of available PCI devices + content: + application/json: + schema: + type: array + items: + $ref: "#/components/schemas/AvailableDevice" + 401: + description: Unauthorized + content: + application/json: + schema: + $ref: "#/components/schemas/Error" + 500: + description: Internal server error + content: + application/json: + schema: + $ref: "#/components/schemas/Error" + + /devices/{id}: + get: + summary: Get device details + operationId: getDevice + security: + - bearerAuth: [] + parameters: + - name: id + in: path + required: true + schema: + type: string + description: Device ID or name + responses: + 200: + description: Device details + content: + application/json: + schema: + $ref: "#/components/schemas/Device" + 404: + description: Device not found + content: + application/json: + schema: + $ref: "#/components/schemas/Error" + 500: + description: Internal server error + content: + application/json: + schema: + $ref: "#/components/schemas/Error" + delete: + summary: Unregister device + operationId: deleteDevice + security: + - bearerAuth: [] + parameters: + - name: id + in: path + required: true + schema: + type: string + description: Device ID or name + responses: + 204: + description: Device unregistered + 404: + description: Device not found + content: + application/json: + schema: + $ref: "#/components/schemas/Error" + 409: + description: Conflict - device is attached to an instance + content: + application/json: + schema: + $ref: "#/components/schemas/Error" + 500: + description: Internal server error + content: + application/json: + schema: + $ref: "#/components/schemas/Error" + /ingresses: get: summary: List ingresses @@ -1388,3 +1665,4 @@ paths: schema: $ref: "#/components/schemas/Error" + diff --git a/stainless.yaml b/stainless.yaml index 6aac3c92..4e271485 100644 --- a/stainless.yaml +++ b/stainless.yaml @@ -103,6 +103,18 @@ resources: get: get /volumes/{id} delete: delete /volumes/{id} + devices: + models: + device: '#/components/schemas/Device' + available_device: '#/components/schemas/AvailableDevice' + device_type: '#/components/schemas/DeviceType' + methods: + list: get /devices + create: post /devices + retrieve: get /devices/{id} + delete: delete /devices/{id} + list_available: get /devices/available + ingresses: models: ingress: '#/components/schemas/Ingress' From befef360e1435aa91be472fc5b56fb4fa7cc6d4a Mon Sep 17 00:00:00 2001 From: Rafael Garcia Date: Sun, 14 Dec 2025 04:08:36 +0000 Subject: [PATCH 08/17] test: increase VM memory to 2GB to accommodate large initrd The initrd now includes NVIDIA kernel modules, firmware, and driver libraries (~238MB total). With 512MB VMs, the kernel couldn't unpack the initrd into tmpfs without running out of space. Increase test VM memory from 512MB to 2GB to provide sufficient room for the initrd contents plus normal VM operation. --- lib/devices/gpu_e2e_test.go | 2 +- lib/devices/gpu_module_test.go | 2 +- lib/instances/exec_test.go | 2 +- lib/instances/manager_test.go | 4 ++-- lib/instances/network_test.go | 2 +- lib/instances/resource_limits_test.go | 16 ++++++++-------- lib/instances/volumes_test.go | 10 +++++----- 7 files changed, 19 insertions(+), 19 deletions(-) diff --git a/lib/devices/gpu_e2e_test.go b/lib/devices/gpu_e2e_test.go index 94941b11..4348ebdb 100644 --- a/lib/devices/gpu_e2e_test.go +++ b/lib/devices/gpu_e2e_test.go @@ -189,7 +189,7 @@ func TestGPUPassthrough(t *testing.T) { inst, err := instanceMgr.CreateInstance(createCtx, instances.CreateInstanceRequest{ Name: "gpu-test", Image: "docker.io/library/nginx:alpine", - Size: 512 * 1024 * 1024, + Size: 2 * 1024 * 1024 * 1024, // 2GB (needs extra room for initrd with NVIDIA libs) HotplugSize: 512 * 1024 * 1024, OverlaySize: 10 * 1024 * 1024 * 1024, Vcpus: 1, diff --git a/lib/devices/gpu_module_test.go b/lib/devices/gpu_module_test.go index fad9bc9a..841faedd 100644 --- a/lib/devices/gpu_module_test.go +++ b/lib/devices/gpu_module_test.go @@ -164,7 +164,7 @@ func TestNVIDIAModuleLoading(t *testing.T) { inst, err := instanceMgr.CreateInstance(createCtx, instances.CreateInstanceRequest{ Name: "nvidia-module-test", Image: createdImg.Name, - Size: 512 * 1024 * 1024, + Size: 2 * 1024 * 1024 * 1024, // 2GB (needs extra room for initrd with NVIDIA libs) HotplugSize: 512 * 1024 * 1024, OverlaySize: 10 * 1024 * 1024 * 1024, Vcpus: 2, diff --git a/lib/instances/exec_test.go b/lib/instances/exec_test.go index f0e69663..d3dbfde9 100644 --- a/lib/instances/exec_test.go +++ b/lib/instances/exec_test.go @@ -75,7 +75,7 @@ func TestExecConcurrent(t *testing.T) { inst, err := manager.CreateInstance(ctx, CreateInstanceRequest{ Name: "exec-test", Image: "docker.io/library/nginx:alpine", - Size: 512 * 1024 * 1024, + Size: 2 * 1024 * 1024 * 1024, // 2GB (needs extra room for initrd with NVIDIA libs) HotplugSize: 512 * 1024 * 1024, OverlaySize: 1024 * 1024 * 1024, Vcpus: 2, // More vCPUs for concurrency diff --git a/lib/instances/manager_test.go b/lib/instances/manager_test.go index 19241a3a..95b99174 100644 --- a/lib/instances/manager_test.go +++ b/lib/instances/manager_test.go @@ -248,7 +248,7 @@ func TestBasicEndToEnd(t *testing.T) { req := CreateInstanceRequest{ Name: "test-nginx", Image: "docker.io/library/nginx:alpine", - Size: 512 * 1024 * 1024, // 512MB + Size: 2 * 1024 * 1024 * 1024, // 2GB (needs extra room for initrd with NVIDIA libs) HotplugSize: 512 * 1024 * 1024, // 512MB OverlaySize: 10 * 1024 * 1024 * 1024, // 10GB Vcpus: 1, @@ -862,7 +862,7 @@ func TestStandbyAndRestore(t *testing.T) { req := CreateInstanceRequest{ Name: "test-standby", Image: "docker.io/library/nginx:alpine", - Size: 512 * 1024 * 1024, + Size: 2 * 1024 * 1024 * 1024, // 2GB (needs extra room for initrd with NVIDIA libs) HotplugSize: 512 * 1024 * 1024, OverlaySize: 10 * 1024 * 1024 * 1024, Vcpus: 1, diff --git a/lib/instances/network_test.go b/lib/instances/network_test.go index 579b4b8c..419115e8 100644 --- a/lib/instances/network_test.go +++ b/lib/instances/network_test.go @@ -63,7 +63,7 @@ func TestCreateInstanceWithNetwork(t *testing.T) { inst, err := manager.CreateInstance(ctx, CreateInstanceRequest{ Name: "test-net-instance", Image: "docker.io/library/nginx:alpine", - Size: 512 * 1024 * 1024, + Size: 2 * 1024 * 1024 * 1024, // 2GB (needs extra room for initrd with NVIDIA libs) HotplugSize: 512 * 1024 * 1024, OverlaySize: 5 * 1024 * 1024 * 1024, Vcpus: 1, diff --git a/lib/instances/resource_limits_test.go b/lib/instances/resource_limits_test.go index f003f3a8..91dc4a46 100644 --- a/lib/instances/resource_limits_test.go +++ b/lib/instances/resource_limits_test.go @@ -258,13 +258,13 @@ func TestAggregateLimits_EnforcedAtRuntime(t *testing.T) { // Set small aggregate limits: // - MaxTotalVcpus: 2 (first VM gets 1, second wants 2 -> denied) - // - MaxTotalMemory: 2GB (first VM gets 1GB, second wants 1.5GB -> denied) + // - MaxTotalMemory: 6GB (first VM gets 2.5GB, second wants 4GB -> denied) limits := ResourceLimits{ MaxOverlaySize: 100 * 1024 * 1024 * 1024, // 100GB MaxVcpusPerInstance: 4, // per-instance limit (high) - MaxMemoryPerInstance: 4 * 1024 * 1024 * 1024, // 4GB per-instance (high) + MaxMemoryPerInstance: 8 * 1024 * 1024 * 1024, // 8GB per-instance (high) MaxTotalVcpus: 2, // aggregate: only 2 total - MaxTotalMemory: 2 * 1024 * 1024 * 1024, // aggregate: only 2GB total + MaxTotalMemory: 6 * 1024 * 1024 * 1024, // aggregate: only 6GB total (allows first 2.5GB VM) } mgr := NewManager(p, imageManager, systemManager, networkManager, deviceManager, volumeManager, limits, nil, nil).(*manager) @@ -306,14 +306,14 @@ func TestAggregateLimits_EnforcedAtRuntime(t *testing.T) { assert.Equal(t, 0, usage.TotalVcpus, "Initial vCPUs should be 0") assert.Equal(t, int64(0), usage.TotalMemory, "Initial memory should be 0") - // Create first VM: 1 vCPU, 512MB + 512MB = 1GB memory - t.Log("Creating first instance (1 vCPU, 1GB memory)...") + // Create first VM: 1 vCPU, 2GB + 512MB = 2.5GB memory + t.Log("Creating first instance (1 vCPU, 2.5GB memory)...") inst1, err := mgr.CreateInstance(ctx, CreateInstanceRequest{ Name: "small-vm-1", Image: "docker.io/library/alpine:latest", Vcpus: 1, - Size: 512 * 1024 * 1024, // 512MB - HotplugSize: 512 * 1024 * 1024, // 512MB (total 1GB) + Size: 2 * 1024 * 1024 * 1024, // 2GB (needs extra room for initrd with NVIDIA libs) + HotplugSize: 512 * 1024 * 1024, // 512MB OverlaySize: 1 * 1024 * 1024 * 1024, NetworkEnabled: false, }) @@ -325,7 +325,7 @@ func TestAggregateLimits_EnforcedAtRuntime(t *testing.T) { usage, err = mgr.calculateAggregateUsage(ctx) require.NoError(t, err) assert.Equal(t, 1, usage.TotalVcpus, "Should have 1 vCPU in use") - assert.Equal(t, int64(1024*1024*1024), usage.TotalMemory, "Should have 1GB memory in use") + assert.Equal(t, int64(2*1024*1024*1024+512*1024*1024), usage.TotalMemory, "Should have 2.5GB memory in use") t.Logf("Aggregate usage after first VM: %d vCPUs, %d bytes memory", usage.TotalVcpus, usage.TotalMemory) // Try to create second VM: 2 vCPUs (would exceed MaxTotalVcpus=2) diff --git a/lib/instances/volumes_test.go b/lib/instances/volumes_test.go index abb760f1..3237db3b 100644 --- a/lib/instances/volumes_test.go +++ b/lib/instances/volumes_test.go @@ -93,7 +93,7 @@ func TestVolumeMultiAttachReadOnly(t *testing.T) { writerInst, err := manager.CreateInstance(ctx, CreateInstanceRequest{ Name: "writer", Image: "docker.io/library/alpine:latest", - Size: 512 * 1024 * 1024, + Size: 2 * 1024 * 1024 * 1024, // 2GB (needs extra room for initrd with NVIDIA libs) HotplugSize: 512 * 1024 * 1024, OverlaySize: 1024 * 1024 * 1024, Vcpus: 1, @@ -135,7 +135,7 @@ func TestVolumeMultiAttachReadOnly(t *testing.T) { reader1, err := manager.CreateInstance(ctx, CreateInstanceRequest{ Name: "reader-1", Image: "docker.io/library/alpine:latest", - Size: 512 * 1024 * 1024, + Size: 2 * 1024 * 1024 * 1024, // 2GB (needs extra room for initrd with NVIDIA libs) HotplugSize: 512 * 1024 * 1024, OverlaySize: 1024 * 1024 * 1024, Vcpus: 1, @@ -151,7 +151,7 @@ func TestVolumeMultiAttachReadOnly(t *testing.T) { reader2, err := manager.CreateInstance(ctx, CreateInstanceRequest{ Name: "reader-2-overlay", Image: "docker.io/library/alpine:latest", - Size: 512 * 1024 * 1024, + Size: 2 * 1024 * 1024 * 1024, // 2GB (needs extra room for initrd with NVIDIA libs) HotplugSize: 512 * 1024 * 1024, OverlaySize: 1024 * 1024 * 1024, Vcpus: 1, @@ -270,7 +270,7 @@ func TestOverlayDiskCleanupOnDelete(t *testing.T) { inst, err := manager.CreateInstance(ctx, CreateInstanceRequest{ Name: "overlay-cleanup-test", Image: "docker.io/library/alpine:latest", - Size: 512 * 1024 * 1024, + Size: 2 * 1024 * 1024 * 1024, // 2GB (needs extra room for initrd with NVIDIA libs) HotplugSize: 512 * 1024 * 1024, OverlaySize: 1024 * 1024 * 1024, Vcpus: 1, @@ -394,7 +394,7 @@ func TestVolumeFromArchive(t *testing.T) { inst, err := manager.CreateInstance(ctx, CreateInstanceRequest{ Name: "archive-reader", Image: "docker.io/library/alpine:latest", - Size: 512 * 1024 * 1024, + Size: 2 * 1024 * 1024 * 1024, // 2GB (needs extra room for initrd with NVIDIA libs) HotplugSize: 512 * 1024 * 1024, OverlaySize: 1024 * 1024 * 1024, Vcpus: 1, From 975fdd9bddf64ccfc8e28de0bc3ccd342207f33b Mon Sep 17 00:00:00 2001 From: Rafael Garcia Date: Mon, 15 Dec 2025 19:22:53 +0000 Subject: [PATCH 09/17] remove slop test --- lib/instances/liveness_test.go | 42 ---------------------------------- 1 file changed, 42 deletions(-) delete mode 100644 lib/instances/liveness_test.go diff --git a/lib/instances/liveness_test.go b/lib/instances/liveness_test.go deleted file mode 100644 index 6fa7c8b5..00000000 --- a/lib/instances/liveness_test.go +++ /dev/null @@ -1,42 +0,0 @@ -package instances - -import ( - "context" - "testing" - - "github.com/stretchr/testify/assert" -) - -func TestNewLivenessChecker_ReturnsNilForNonManagerType(t *testing.T) { - // Test that passing a non-*manager type returns nil - // This would only happen if someone wraps the Manager interface - // We can't easily test this without a mock, but we can test the happy path - - // For now, just verify the interface is implemented correctly - var _ = (*instanceLivenessAdapter)(nil) -} - -func TestInstanceLivenessAdapter_Interface(t *testing.T) { - // Verify the adapter implements the expected interface - // This is a compile-time check via the var _ assignment in liveness.go - // but we can also verify the method signatures exist - adapter := &instanceLivenessAdapter{} - - ctx := context.Background() - - // These should not panic even with nil manager - // (they'll fail, but that's expected) - running := adapter.IsInstanceRunning(ctx, "test-id") - assert.False(t, running, "Should return false for nil manager") - - devices := adapter.GetInstanceDevices(ctx, "test-id") - assert.Nil(t, devices, "Should return nil for nil manager") - - allDevices := adapter.ListAllInstanceDevices(ctx) - assert.Nil(t, allDevices, "Should return nil for nil manager") -} - - - - - From 361998f5b6d4d6af161656a7dba9b52179bad8d7 Mon Sep 17 00:00:00 2001 From: Rafael Garcia Date: Mon, 15 Dec 2025 19:39:23 +0000 Subject: [PATCH 10/17] remove outdated comment --- lib/system/initrd.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/lib/system/initrd.go b/lib/system/initrd.go index 027b4372..3d2dc2e9 100644 --- a/lib/system/initrd.go +++ b/lib/system/initrd.go @@ -66,8 +66,6 @@ func (m *manager) buildInitrd(ctx context.Context, arch string) (string, error) } // Write generated init script - // Note: The init script is generated at instance creation time with hasGPU flag, - // so we write a placeholder here that will be replaced per-instance initScript := GenerateInitScript() initPath := filepath.Join(rootfsDir, "init") if err := os.WriteFile(initPath, []byte(initScript), 0755); err != nil { From cc0efea8324828308418484c480813fb6b1e7e12 Mon Sep 17 00:00:00 2001 From: Rafael Garcia Date: Mon, 15 Dec 2025 20:12:06 +0000 Subject: [PATCH 11/17] markattached bug --- lib/devices/manager.go | 2 -- lib/instances/create.go | 51 +++++++++++++++++++++++++++++------------ 2 files changed, 36 insertions(+), 17 deletions(-) diff --git a/lib/devices/manager.go b/lib/devices/manager.go index 79e9dc32..195264d0 100644 --- a/lib/devices/manager.go +++ b/lib/devices/manager.go @@ -800,5 +800,3 @@ func (m *manager) findByPCIAddress(pciAddress string) (*Device, error) { return nil, ErrNotFound } - - diff --git a/lib/instances/create.go b/lib/instances/create.go index 8d14efa8..45ec0979 100644 --- a/lib/instances/create.go +++ b/lib/instances/create.go @@ -202,7 +202,29 @@ func (m *manager) createInstance( kernelVer := m.systemManager.GetDefaultKernelVersion() // 9. Validate, resolve, and auto-bind devices (GPU passthrough) + // Track devices we've marked as attached for cleanup on error. + // The cleanup closure captures this slice by reference, so it will see + // whatever devices have been attached when cleanup runs. + var attachedDeviceIDs []string var resolvedDeviceIDs []string + + // Setup cleanup stack early so device attachment errors trigger cleanup + cu := cleanup.Make(func() { + log.DebugContext(ctx, "cleaning up instance on error", "instance_id", id) + m.deleteInstanceData(id) + }) + defer cu.Clean() + + // Add device detachment cleanup - closure captures attachedDeviceIDs by reference + if m.deviceManager != nil { + cu.Add(func() { + for _, deviceID := range attachedDeviceIDs { + log.DebugContext(ctx, "detaching device on cleanup", "instance_id", id, "device", deviceID) + m.deviceManager.MarkDetached(ctx, deviceID) + } + }) + } + if len(req.Devices) > 0 && m.deviceManager != nil { for _, deviceRef := range req.Devices { device, err := m.deviceManager.GetDevice(ctx, deviceRef) @@ -222,6 +244,12 @@ func (m *manager) createInstance( return nil, fmt.Errorf("bind device %s to VFIO: %w", deviceRef, err) } } + // Mark device as attached to this instance + if err := m.deviceManager.MarkAttached(ctx, device.Id, id); err != nil { + log.ErrorContext(ctx, "failed to mark device as attached", "device", deviceRef, "error", err) + return nil, fmt.Errorf("mark device %s as attached: %w", deviceRef, err) + } + attachedDeviceIDs = append(attachedDeviceIDs, device.Id) resolvedDeviceIDs = append(resolvedDeviceIDs, device.Id) } log.DebugContext(ctx, "validated devices for passthrough", "id", id, "devices", resolvedDeviceIDs) @@ -250,28 +278,21 @@ func (m *manager) createInstance( Devices: resolvedDeviceIDs, } - // Setup cleanup stack for automatic rollback on errors - cu := cleanup.Make(func() { - log.DebugContext(ctx, "cleaning up instance on error", "instance_id", id) - m.deleteInstanceData(id) - }) - defer cu.Clean() - - // 8. Ensure directories + // 11. Ensure directories log.DebugContext(ctx, "creating instance directories", "instance_id", id) if err := m.ensureDirectories(id); err != nil { log.ErrorContext(ctx, "failed to create directories", "instance_id", id, "error", err) return nil, fmt.Errorf("ensure directories: %w", err) } - // 9. Create overlay disk with specified size + // 12. Create overlay disk with specified size log.DebugContext(ctx, "creating overlay disk", "instance_id", id, "size_bytes", stored.OverlaySize) if err := m.createOverlayDisk(id, stored.OverlaySize); err != nil { log.ErrorContext(ctx, "failed to create overlay disk", "instance_id", id, "error", err) return nil, fmt.Errorf("create overlay disk: %w", err) } - // 14. Allocate network (if network enabled) + // 13. Allocate network (if network enabled) var netConfig *network.NetworkConfig if networkName != "" { log.DebugContext(ctx, "allocating network", "instance_id", id, "network", networkName) @@ -296,7 +317,7 @@ func (m *manager) createInstance( }) } - // 15. Validate and attach volumes + // 14. Validate and attach volumes if len(req.Volumes) > 0 { log.DebugContext(ctx, "validating volumes", "instance_id", id, "count", len(req.Volumes)) for _, volAttach := range req.Volumes { @@ -336,7 +357,7 @@ func (m *manager) createInstance( stored.Volumes = req.Volumes } - // 16. Create config disk (needs Instance for buildVMConfig) + // 15. Create config disk (needs Instance for buildVMConfig) inst := &Instance{StoredMetadata: *stored} log.DebugContext(ctx, "creating config disk", "instance_id", id) if err := m.createConfigDisk(inst, imageInfo, netConfig); err != nil { @@ -344,7 +365,7 @@ func (m *manager) createInstance( return nil, fmt.Errorf("create config disk: %w", err) } - // 12. Save metadata + // 16. Save metadata log.DebugContext(ctx, "saving instance metadata", "instance_id", id) meta := &metadata{StoredMetadata: *stored} if err := m.saveMetadata(meta); err != nil { @@ -352,14 +373,14 @@ func (m *manager) createInstance( return nil, fmt.Errorf("save metadata: %w", err) } - // 13. Start VMM and boot VM + // 17. Start VMM and boot VM log.InfoContext(ctx, "starting VMM and booting VM", "instance_id", id) if err := m.startAndBootVM(ctx, stored, imageInfo, netConfig); err != nil { log.ErrorContext(ctx, "failed to start and boot VM", "instance_id", id, "error", err) return nil, err } - // 14. Update timestamp after VM is running + // 18. Update timestamp after VM is running now := time.Now() stored.StartedAt = &now From 4fa81c480a32c79c6c49a21e535460d7004c3c09 Mon Sep 17 00:00:00 2001 From: Rafael Garcia Date: Mon, 15 Dec 2025 20:13:27 +0000 Subject: [PATCH 12/17] remove preview script --- Makefile | 6 ------ 1 file changed, 6 deletions(-) diff --git a/Makefile b/Makefile index d0aa43a3..58e67156 100644 --- a/Makefile +++ b/Makefile @@ -168,12 +168,6 @@ build: ensure-ch-binaries ensure-caddy-binaries lib/system/exec_agent/exec-agent # Build all binaries build-all: build -# Build preview CLI from stainless-sdks/hypeman-cli -# Usage: make build-preview-cli - uses preview/ -# make build-preview-cli CLI_BRANCH=preview/xyz - uses specific branch -build-preview-cli: - @./scripts/build-preview-cli.sh $(CLI_BRANCH) - # Run in development mode with hot reload dev: ensure-ch-binaries ensure-caddy-binaries lib/system/exec_agent/exec-agent $(AIR) @rm -f ./tmp/main From 712e328c184d03f1e864ede5ae2cb82b30da0ca7 Mon Sep 17 00:00:00 2001 From: Rafael Garcia Date: Mon, 15 Dec 2025 20:53:08 +0000 Subject: [PATCH 13/17] fix(configdisk): only set HAS_GPU=1 for actual GPU devices The HAS_GPU flag was being set unconditionally when any device was attached, regardless of device type. This would trigger NVIDIA module loading in the VM init script even for non-GPU PCI devices. Now iterates through attached devices and checks each device's type, only setting HAS_GPU=1 if at least one device is DeviceTypeGPU. --- lib/instances/configdisk.go | 18 ++++++++++++------ lib/instances/create.go | 2 +- lib/instances/start.go | 2 +- 3 files changed, 14 insertions(+), 8 deletions(-) diff --git a/lib/instances/configdisk.go b/lib/instances/configdisk.go index 2ea6c85a..fb9305ac 100644 --- a/lib/instances/configdisk.go +++ b/lib/instances/configdisk.go @@ -1,6 +1,7 @@ package instances import ( + "context" "encoding/json" "fmt" "os" @@ -8,6 +9,7 @@ import ( "strconv" "strings" + "github.com/onkernel/hypeman/lib/devices" "github.com/onkernel/hypeman/lib/images" "github.com/onkernel/hypeman/lib/network" ) @@ -16,7 +18,7 @@ import ( // The disk contains: // - /config.sh - Shell script sourced by init // - /metadata.json - JSON metadata for programmatic access -func (m *manager) createConfigDisk(inst *Instance, imageInfo *images.Image, netConfig *network.NetworkConfig) error { +func (m *manager) createConfigDisk(ctx context.Context, inst *Instance, imageInfo *images.Image, netConfig *network.NetworkConfig) error { // Create temporary directory for config files tmpDir, err := os.MkdirTemp("", "hypeman-config-*") if err != nil { @@ -25,7 +27,7 @@ func (m *manager) createConfigDisk(inst *Instance, imageInfo *images.Image, netC defer os.RemoveAll(tmpDir) // Generate config.sh - configScript := m.generateConfigScript(inst, imageInfo, netConfig) + configScript := m.generateConfigScript(ctx, inst, imageInfo, netConfig) configPath := filepath.Join(tmpDir, "config.sh") if err := os.WriteFile(configPath, []byte(configScript), 0644); err != nil { return fmt.Errorf("write config.sh: %w", err) @@ -64,7 +66,7 @@ func (m *manager) createConfigDisk(inst *Instance, imageInfo *images.Image, netC } // generateConfigScript creates the shell script that will be sourced by init -func (m *manager) generateConfigScript(inst *Instance, imageInfo *images.Image, netConfig *network.NetworkConfig) string { +func (m *manager) generateConfigScript(ctx context.Context, inst *Instance, imageInfo *images.Image, netConfig *network.NetworkConfig) string { // Prepare entrypoint value entrypoint := "" if len(imageInfo.Entrypoint) > 0 { @@ -106,10 +108,14 @@ GUEST_DNS="%s" } // GPU passthrough configuration - // When devices are attached, set HAS_GPU=1 to trigger NVIDIA module loading in init + // Only set HAS_GPU=1 if at least one attached device is actually a GPU gpuSection := "" - if len(inst.Devices) > 0 { - gpuSection = "\n# GPU passthrough\nHAS_GPU=1\n" + for _, deviceID := range inst.Devices { + device, err := m.deviceManager.GetDevice(ctx, deviceID) + if err == nil && device.Type == devices.DeviceTypeGPU { + gpuSection = "\n# GPU passthrough\nHAS_GPU=1\n" + break + } } // Build volume mounts section diff --git a/lib/instances/create.go b/lib/instances/create.go index 45ec0979..f133c0f4 100644 --- a/lib/instances/create.go +++ b/lib/instances/create.go @@ -360,7 +360,7 @@ func (m *manager) createInstance( // 15. Create config disk (needs Instance for buildVMConfig) inst := &Instance{StoredMetadata: *stored} log.DebugContext(ctx, "creating config disk", "instance_id", id) - if err := m.createConfigDisk(inst, imageInfo, netConfig); err != nil { + if err := m.createConfigDisk(ctx, inst, imageInfo, netConfig); err != nil { log.ErrorContext(ctx, "failed to create config disk", "instance_id", id, "error", err) return nil, fmt.Errorf("create config disk: %w", err) } diff --git a/lib/instances/start.go b/lib/instances/start.go index a29c3ad0..1687d597 100644 --- a/lib/instances/start.go +++ b/lib/instances/start.go @@ -84,7 +84,7 @@ func (m *manager) startInstance( // 5. Regenerate config disk with new network configuration instForConfig := &Instance{StoredMetadata: *stored} log.DebugContext(ctx, "regenerating config disk", "instance_id", id) - if err := m.createConfigDisk(instForConfig, imageInfo, netConfig); err != nil { + if err := m.createConfigDisk(ctx, instForConfig, imageInfo, netConfig); err != nil { log.ErrorContext(ctx, "failed to create config disk", "instance_id", id, "error", err) return nil, fmt.Errorf("create config disk: %w", err) } From 28a6bc3696c97e7671310cf8081e171f81fb8575 Mon Sep 17 00:00:00 2001 From: Rafael Garcia Date: Mon, 15 Dec 2025 21:12:44 +0000 Subject: [PATCH 14/17] fix(devices): prevent false positive warnings for instances without GPU devices detectSuspiciousVMMProcesses was using ListAllInstanceDevices to build the set of known running instances, but that method only returns instances with devices attached. This caused legitimate cloud-hypervisor processes for instances without GPU passthrough to be incorrectly flagged as 'untracked' with misleading advice to run gpu-reset.sh. Fix: Call IsInstanceRunning directly for each discovered process instead of pre-building a map from ListAllInstanceDevices. This correctly identifies all running instances regardless of device attachment. --- lib/devices/manager.go | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) diff --git a/lib/devices/manager.go b/lib/devices/manager.go index 195264d0..14873189 100644 --- a/lib/devices/manager.go +++ b/lib/devices/manager.go @@ -669,18 +669,6 @@ func (m *manager) detectSuspiciousVMMProcesses(ctx context.Context, stats *recon return } - // Get list of running instance sockets if we have liveness checker - var runningInstances map[string]bool - if m.livenessChecker != nil { - instanceDevices := m.livenessChecker.ListAllInstanceDevices(ctx) - runningInstances = make(map[string]bool) - for instanceID := range instanceDevices { - if m.livenessChecker.IsInstanceRunning(ctx, instanceID) { - runningInstances[instanceID] = true - } - } - } - for _, line := range lines { if line == "" { continue @@ -697,9 +685,12 @@ func (m *manager) detectSuspiciousVMMProcesses(ctx context.Context, stats *recon } } - // Check if this socket path matches any instance directory + // Check if this socket path matches any running instance + // We use IsInstanceRunning directly rather than ListAllInstanceDevices because + // the latter only returns instances with devices attached, which would cause + // false positives for instances without GPU passthrough. matched := false - if socketPath != "" { + if socketPath != "" && m.livenessChecker != nil { // Socket path is typically like /var/lib/hypeman/guests//ch.sock // Try to extract instance ID if strings.Contains(socketPath, "/guests/") { @@ -707,7 +698,7 @@ func (m *manager) detectSuspiciousVMMProcesses(ctx context.Context, stats *recon if len(pathParts) > 1 { instancePath := pathParts[1] instanceID := strings.Split(instancePath, "/")[0] - if runningInstances != nil && runningInstances[instanceID] { + if m.livenessChecker.IsInstanceRunning(ctx, instanceID) { matched = true } } From a26009141a7dee763fcc18c94aa29e94eb4975c1 Mon Sep 17 00:00:00 2001 From: Rafael Garcia Date: Tue, 16 Dec 2025 16:15:35 +0000 Subject: [PATCH 15/17] devices: add startup validation warnings for GPU prerequisites Check and warn on startup if: - IOMMU is not enabled (no groups in /sys/kernel/iommu_groups) - VFIO modules not loaded (vfio_pci, vfio_iommu_type1) - Huge pages not configured (info hint when devices exist) --- lib/devices/manager.go | 51 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/lib/devices/manager.go b/lib/devices/manager.go index 14873189..80024286 100644 --- a/lib/devices/manager.go +++ b/lib/devices/manager.go @@ -396,6 +396,9 @@ func (m *manager) ReconcileDevices(ctx context.Context) error { log := logger.FromContext(ctx) log.InfoContext(ctx, "reconciling device state") + // Validate GPU prerequisites and log warnings + m.validatePrerequisites(ctx) + m.mu.Lock() defer m.mu.Unlock() @@ -539,6 +542,54 @@ func (m *manager) ReconcileDevices(ctx context.Context) error { return nil } +// validatePrerequisites checks GPU passthrough prerequisites and logs warnings. +// This helps operators debug configuration issues. +func (m *manager) validatePrerequisites(ctx context.Context) { + log := logger.FromContext(ctx) + + // Check IOMMU availability + iommuGroupsDir := "/sys/kernel/iommu_groups" + entries, err := os.ReadDir(iommuGroupsDir) + if err != nil { + log.WarnContext(ctx, "IOMMU not available - GPU passthrough will not work", + "error", err, + "hint", "enable IOMMU in BIOS and kernel (intel_iommu=on or amd_iommu=on)", + ) + } else if len(entries) == 0 { + log.WarnContext(ctx, "no IOMMU groups found - GPU passthrough will not work", + "hint", "enable IOMMU in BIOS and kernel (intel_iommu=on or amd_iommu=on)", + ) + } + + // Check VFIO modules + vfioModules := []string{"vfio_pci", "vfio_iommu_type1"} + for _, module := range vfioModules { + modulePath := "/sys/module/" + module + if _, err := os.Stat(modulePath); os.IsNotExist(err) { + log.WarnContext(ctx, "VFIO module not loaded - GPU passthrough will not work", + "module", module, + "hint", "run: modprobe "+module, + ) + } + } + + // Check huge pages (info-level hint if devices exist but no huge pages) + hugePagesPath := "/sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages" + if data, err := os.ReadFile(hugePagesPath); err == nil { + count := strings.TrimSpace(string(data)) + if count == "0" || count == "" { + // Only warn if we have registered devices + if devicesDir := m.paths.DevicesDir(); devicesDir != "" { + if entries, err := os.ReadDir(devicesDir); err == nil && len(entries) > 0 { + log.InfoContext(ctx, "huge pages not configured - GPU performance may be reduced", + "hint", "run: echo 1024 > /proc/sys/vm/nr_hugepages", + ) + } + } + } + } +} + // reconcileStats tracks reconciliation metrics type reconcileStats struct { orphanedCleared int From 8d610e982ae2f51c3b74b5b8e5c6444d94802f31 Mon Sep 17 00:00:00 2001 From: Rafael Garcia Date: Tue, 16 Dec 2025 16:21:43 +0000 Subject: [PATCH 16/17] instances: move detectSuspiciousVMMProcesses to liveness.go This function is about instance lifecycle, not device management. Moving it to the instances module where it belongs. The implementation uses IsInstanceRunning (which queries all instances) rather than ListAllInstanceDevices (which only returns instances with devices) to avoid false positives for non-GPU VMs. --- lib/devices/manager.go | 74 ++++----------------------------------- lib/instances/liveness.go | 74 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 81 insertions(+), 67 deletions(-) diff --git a/lib/devices/manager.go b/lib/devices/manager.go index 80024286..cc004cdc 100644 --- a/lib/devices/manager.go +++ b/lib/devices/manager.go @@ -5,7 +5,6 @@ import ( "encoding/json" "fmt" "os" - "os/exec" "strings" "sync" "time" @@ -29,6 +28,10 @@ type InstanceLivenessChecker interface { // ListAllInstanceDevices returns a map of instanceID -> []deviceIDs for all instances. ListAllInstanceDevices(ctx context.Context) map[string][]string + + // DetectSuspiciousVMMProcesses finds cloud-hypervisor processes that don't match + // known instances and logs warnings. Returns the count of suspicious processes found. + DetectSuspiciousVMMProcesses(ctx context.Context) int } // Manager provides device management operations @@ -526,7 +529,9 @@ func (m *manager) ReconcileDevices(ctx context.Context) error { } // Phase 3: Detect suspicious cloud-hypervisor processes (log-only) - m.detectSuspiciousVMMProcesses(ctx, &stats) + if m.livenessChecker != nil { + stats.suspiciousVMM = m.livenessChecker.DetectSuspiciousVMMProcesses(ctx) + } // Log summary log.InfoContext(ctx, "device reconciliation complete", @@ -702,71 +707,6 @@ func (m *manager) resetOrphanedDevice(ctx context.Context, device *Device, stats } } -// detectSuspiciousVMMProcesses logs warnings about cloud-hypervisor processes -// that don't match known instances. This is log-only (no killing). -func (m *manager) detectSuspiciousVMMProcesses(ctx context.Context, stats *reconcileStats) { - log := logger.FromContext(ctx) - - // Find all cloud-hypervisor processes - cmd := exec.Command("pgrep", "-a", "cloud-hypervisor") - output, err := cmd.Output() - if err != nil { - // pgrep returns exit code 1 if no processes found - that's fine - return - } - - lines := strings.Split(strings.TrimSpace(string(output)), "\n") - if len(lines) == 0 || (len(lines) == 1 && lines[0] == "") { - return - } - - for _, line := range lines { - if line == "" { - continue - } - - // Try to extract socket path from command line to match against known instances - // cloud-hypervisor command typically includes --api-socket - socketPath := "" - parts := strings.Fields(line) - for i, part := range parts { - if part == "--api-socket" && i+1 < len(parts) { - socketPath = parts[i+1] - break - } - } - - // Check if this socket path matches any running instance - // We use IsInstanceRunning directly rather than ListAllInstanceDevices because - // the latter only returns instances with devices attached, which would cause - // false positives for instances without GPU passthrough. - matched := false - if socketPath != "" && m.livenessChecker != nil { - // Socket path is typically like /var/lib/hypeman/guests//ch.sock - // Try to extract instance ID - if strings.Contains(socketPath, "/guests/") { - pathParts := strings.Split(socketPath, "/guests/") - if len(pathParts) > 1 { - instancePath := pathParts[1] - instanceID := strings.Split(instancePath, "/")[0] - if m.livenessChecker.IsInstanceRunning(ctx, instanceID) { - matched = true - } - } - } - } - - if !matched { - log.WarnContext(ctx, "detected untracked cloud-hypervisor process", - "process_info", line, - "socket_path", socketPath, - "remediation", "Run lib/devices/scripts/gpu-reset.sh for manual recovery if needed", - ) - stats.suspiciousVMM++ - } - } -} - // Helper methods func (m *manager) loadDevice(id string) (*Device, error) { diff --git a/lib/instances/liveness.go b/lib/instances/liveness.go index 3440bbbb..19d3d20b 100644 --- a/lib/instances/liveness.go +++ b/lib/instances/liveness.go @@ -2,8 +2,11 @@ package instances import ( "context" + "os/exec" + "strings" "github.com/onkernel/hypeman/lib/devices" + "github.com/onkernel/hypeman/lib/logger" ) // Ensure instanceLivenessAdapter implements the interface @@ -79,3 +82,74 @@ func (a *instanceLivenessAdapter) ListAllInstanceDevices(ctx context.Context) ma return result } +// DetectSuspiciousVMMProcesses finds cloud-hypervisor processes that don't match +// known instances and logs warnings. Returns the count of suspicious processes found. +// This uses ListInstances (all instances) rather than ListAllInstanceDevices to avoid +// false positives for instances without GPU devices attached. +func (a *instanceLivenessAdapter) DetectSuspiciousVMMProcesses(ctx context.Context) int { + log := logger.FromContext(ctx) + + if a.manager == nil { + return 0 + } + + // Find all cloud-hypervisor processes + cmd := exec.Command("pgrep", "-a", "cloud-hypervisor") + output, err := cmd.Output() + if err != nil { + // pgrep returns exit code 1 if no processes found - that's fine + return 0 + } + + lines := strings.Split(strings.TrimSpace(string(output)), "\n") + if len(lines) == 0 || (len(lines) == 1 && lines[0] == "") { + return 0 + } + + suspiciousCount := 0 + for _, line := range lines { + if line == "" { + continue + } + + // Try to extract socket path from command line to match against known instances + // cloud-hypervisor command typically includes --api-socket + socketPath := "" + parts := strings.Fields(line) + for i, part := range parts { + if part == "--api-socket" && i+1 < len(parts) { + socketPath = parts[i+1] + break + } + } + + // Check if this socket path matches any known instance + matched := false + if socketPath != "" { + // Socket path is typically like /var/lib/hypeman/guests//ch.sock + // Try to extract instance ID + if strings.Contains(socketPath, "/guests/") { + pathParts := strings.Split(socketPath, "/guests/") + if len(pathParts) > 1 { + instancePath := pathParts[1] + instanceID := strings.Split(instancePath, "/")[0] + if a.IsInstanceRunning(ctx, instanceID) { + matched = true + } + } + } + } + + if !matched { + log.WarnContext(ctx, "detected untracked cloud-hypervisor process", + "process_info", line, + "socket_path", socketPath, + "remediation", "Run lib/devices/scripts/gpu-reset.sh for manual recovery if needed", + ) + suspiciousCount++ + } + } + + return suspiciousCount +} + From 4d23e7344f35255a0aa7a3d8cf381ff4e2633373 Mon Sep 17 00:00:00 2001 From: Rafael Garcia Date: Tue, 16 Dec 2025 16:23:32 +0000 Subject: [PATCH 17/17] system: use context loggers in initrd building Replace fmt.Printf calls with proper context loggers so messages appear in structured logs with consistent formatting. --- lib/devices/reconcile_test.go | 19 ++++--------------- lib/system/initrd.go | 10 +++++++--- 2 files changed, 11 insertions(+), 18 deletions(-) diff --git a/lib/devices/reconcile_test.go b/lib/devices/reconcile_test.go index f6aa422e..7c2bdaea 100644 --- a/lib/devices/reconcile_test.go +++ b/lib/devices/reconcile_test.go @@ -38,6 +38,10 @@ func (m *mockLivenessChecker) ListAllInstanceDevices(ctx context.Context) map[st return m.instanceDevices } +func (m *mockLivenessChecker) DetectSuspiciousVMMProcesses(ctx context.Context) int { + return 0 // Mock returns no suspicious processes +} + func (m *mockLivenessChecker) setRunning(instanceID string, running bool) { m.runningInstances[instanceID] = running } @@ -563,21 +567,6 @@ func TestResetOrphanedDevice_NonExistentPCIAddress(t *testing.T) { // The key is it doesn't panic } -// TestDetectSuspiciousVMMProcesses_NoPgrep tests that detection handles -// missing pgrep gracefully (e.g., in minimal containers) -func TestDetectSuspiciousVMMProcesses_NoPgrep(t *testing.T) { - mgr, _, _ := setupTestManager(t) - ctx := context.Background() - - stats := &reconcileStats{} - - // This test just verifies no panic when pgrep isn't available - // or returns no results - mgr.detectSuspiciousVMMProcesses(ctx, stats) - - // No assertions needed - we just want to ensure no panic -} - // Helper function for testing: verify device directory structure func verifyDeviceDir(t *testing.T, p *paths.Paths, deviceID string) bool { t.Helper() diff --git a/lib/system/initrd.go b/lib/system/initrd.go index 3d2dc2e9..09f286c3 100644 --- a/lib/system/initrd.go +++ b/lib/system/initrd.go @@ -15,6 +15,7 @@ import ( "time" "github.com/onkernel/hypeman/lib/images" + "github.com/onkernel/hypeman/lib/logger" ) const alpineBaseImage = "alpine:3.22" @@ -62,7 +63,8 @@ func (m *manager) buildInitrd(ctx context.Context, arch string) (string, error) // Add NVIDIA kernel modules (for GPU passthrough support) if err := m.addNvidiaModules(ctx, rootfsDir, arch); err != nil { // Log but don't fail - NVIDIA modules are optional (not available on all architectures) - fmt.Printf("initrd: skipping NVIDIA modules: %v\n", err) + log := logger.FromContext(ctx) + log.InfoContext(ctx, "skipping NVIDIA modules", "error", err) } // Write generated init script @@ -205,7 +207,8 @@ func (m *manager) addNvidiaModules(ctx context.Context, rootfsDir, arch string) // Add userspace driver libraries (libcuda.so, libnvidia-ml.so, nvidia-smi, etc.) // These are injected into containers at boot time - see lib/devices/GPU.md if err := m.addNvidiaDriverLibs(ctx, rootfsDir, arch); err != nil { - fmt.Printf("initrd: warning: could not add nvidia driver libs: %v\n", err) + log := logger.FromContext(ctx) + log.WarnContext(ctx, "could not add nvidia driver libs", "error", err) // Don't fail - kernel modules can still work, but containers won't have driver libs } @@ -252,7 +255,8 @@ func (m *manager) addNvidiaDriverLibs(ctx context.Context, rootfsDir, arch strin return fmt.Errorf("extract nvidia driver libs: %w", err) } - fmt.Printf("initrd: added NVIDIA driver libraries from %s\n", url) + log := logger.FromContext(ctx) + log.InfoContext(ctx, "added NVIDIA driver libraries", "url", url) return nil }