Fix discovery of nvidia-fs devices in non-privileged containers

The /dev/nvidia-fs* device nodes for GDS are not greated at the
driver root when running a containerized driver and are always
created in /.

This change updates the search path for these device nodes so that
non-privilged containers also have the device nodes injected.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
This commit is contained in:
Evan Lezar 2025-06-16 19:21:09 +02:00
parent 8149be09ac
commit 0db8ca9893
No known key found for this signature in database
3 changed files with 5 additions and 4 deletions

View File

@ -29,10 +29,11 @@ type gdsDeviceDiscoverer struct {
} }
// NewGDSDiscoverer creates a discoverer for GPUDirect Storage devices and mounts. // NewGDSDiscoverer creates a discoverer for GPUDirect Storage devices and mounts.
func NewGDSDiscoverer(logger logger.Interface, driverRoot string, devRoot string) (Discover, error) { func NewGDSDiscoverer(logger logger.Interface, driverRoot string) (Discover, error) {
devices := NewCharDeviceDiscoverer( devices := NewCharDeviceDiscoverer(
logger, logger,
devRoot, // The /dev/nvidia-fs* devices are always created at /
"/",
[]string{"/dev/nvidia-fs*"}, []string{"/dev/nvidia-fs*"},
) )

View File

@ -48,7 +48,7 @@ func NewFeatureGatedModifier(logger logger.Interface, cfg *config.Config, image
devRoot := cfg.NVIDIAContainerCLIConfig.Root devRoot := cfg.NVIDIAContainerCLIConfig.Root
if image.Getenv("NVIDIA_GDS") == "enabled" { if image.Getenv("NVIDIA_GDS") == "enabled" {
d, err := discover.NewGDSDiscoverer(logger, driverRoot, devRoot) d, err := discover.NewGDSDiscoverer(logger, driverRoot)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to construct discoverer for GDS devices: %w", err) return nil, fmt.Errorf("failed to construct discoverer for GDS devices: %w", err)
} }

View File

@ -34,7 +34,7 @@ var _ Interface = (*gdslib)(nil)
// GetAllDeviceSpecs returns the device specs for all available devices. // GetAllDeviceSpecs returns the device specs for all available devices.
func (l *gdslib) GetAllDeviceSpecs() ([]specs.Device, error) { func (l *gdslib) GetAllDeviceSpecs() ([]specs.Device, error) {
discoverer, err := discover.NewGDSDiscoverer(l.logger, l.driverRoot, l.devRoot) discoverer, err := discover.NewGDSDiscoverer(l.logger, l.driverRoot)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to create GPUDirect Storage discoverer: %v", err) return nil, fmt.Errorf("failed to create GPUDirect Storage discoverer: %v", err)
} }