From 0db8ca98930d4aff1a6e9ddfc6435096ddb2e26a Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Mon, 16 Jun 2025 19:21:09 +0200 Subject: [PATCH] Fix discovery of nvidia-fs devices in non-privileged containers The /dev/nvidia-fs* device nodes for GDS are not greated at the driver root when running a containerized driver and are always created in /. This change updates the search path for these device nodes so that non-privilged containers also have the device nodes injected. Signed-off-by: Evan Lezar --- internal/discover/gds.go | 5 +++-- internal/modifier/gated.go | 2 +- pkg/nvcdi/gds.go | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) diff --git a/internal/discover/gds.go b/internal/discover/gds.go index cf762cd8..d1f4b00b 100644 --- a/internal/discover/gds.go +++ b/internal/discover/gds.go @@ -29,10 +29,11 @@ type gdsDeviceDiscoverer struct { } // NewGDSDiscoverer creates a discoverer for GPUDirect Storage devices and mounts. -func NewGDSDiscoverer(logger logger.Interface, driverRoot string, devRoot string) (Discover, error) { +func NewGDSDiscoverer(logger logger.Interface, driverRoot string) (Discover, error) { devices := NewCharDeviceDiscoverer( logger, - devRoot, + // The /dev/nvidia-fs* devices are always created at / + "/", []string{"/dev/nvidia-fs*"}, ) diff --git a/internal/modifier/gated.go b/internal/modifier/gated.go index a0239df8..b88916da 100644 --- a/internal/modifier/gated.go +++ b/internal/modifier/gated.go @@ -48,7 +48,7 @@ func NewFeatureGatedModifier(logger logger.Interface, cfg *config.Config, image devRoot := cfg.NVIDIAContainerCLIConfig.Root if image.Getenv("NVIDIA_GDS") == "enabled" { - d, err := discover.NewGDSDiscoverer(logger, driverRoot, devRoot) + d, err := discover.NewGDSDiscoverer(logger, driverRoot) if err != nil { return nil, fmt.Errorf("failed to construct discoverer for GDS devices: %w", err) } diff --git a/pkg/nvcdi/gds.go b/pkg/nvcdi/gds.go index 73892f96..16607f11 100644 --- a/pkg/nvcdi/gds.go +++ b/pkg/nvcdi/gds.go @@ -34,7 +34,7 @@ var _ Interface = (*gdslib)(nil) // GetAllDeviceSpecs returns the device specs for all available devices. func (l *gdslib) GetAllDeviceSpecs() ([]specs.Device, error) { - discoverer, err := discover.NewGDSDiscoverer(l.logger, l.driverRoot, l.devRoot) + discoverer, err := discover.NewGDSDiscoverer(l.logger, l.driverRoot) if err != nil { return nil, fmt.Errorf("failed to create GPUDirect Storage discoverer: %v", err) }