From a4bfccc3fec84d629c9794824e0c6cfc3d90cab7 Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Mon, 16 Sep 2024 11:19:02 +0200 Subject: [PATCH] Use include-persistenced-socket feature for CDI mode This change ensures that the internal CDI representation includes the persistenced socket if the include-persistenced-socket feature flag is enabled. Signed-off-by: Evan Lezar --- internal/discover/ipc.go | 13 ++++++++----- internal/modifier/cdi.go | 1 + pkg/nvcdi/common-nvml.go | 2 +- pkg/nvcdi/driver-nvml.go | 22 +++++++++++----------- pkg/nvcdi/lib.go | 2 ++ pkg/nvcdi/management.go | 2 +- pkg/nvcdi/options.go | 11 +++++++++++ 7 files changed, 35 insertions(+), 18 deletions(-) diff --git a/internal/discover/ipc.go b/internal/discover/ipc.go index f636290f..f3d106b5 100644 --- a/internal/discover/ipc.go +++ b/internal/discover/ipc.go @@ -24,7 +24,13 @@ import ( type ipcMounts mounts // NewIPCDiscoverer creats a discoverer for NVIDIA IPC sockets. -func NewIPCDiscoverer(logger logger.Interface, driverRoot string) (Discover, error) { +func NewIPCDiscoverer(logger logger.Interface, driverRoot string, includePersistencedSocket bool) (Discover, error) { + var requiredSockets []string + if includePersistencedSocket { + requiredSockets = append(requiredSockets, "/nvidia-persistenced/socket") + } + requiredSockets = append(requiredSockets, "/nvidia-fabricmanager/socket") + sockets := newMounts( logger, lookup.NewFileLocator( @@ -34,10 +40,7 @@ func NewIPCDiscoverer(logger logger.Interface, driverRoot string) (Discover, err lookup.WithCount(1), ), driverRoot, - []string{ - "/nvidia-persistenced/socket", - "/nvidia-fabricmanager/socket", - }, + requiredSockets, ) mps := newMounts( diff --git a/internal/modifier/cdi.go b/internal/modifier/cdi.go index c5af4f88..d3e4e935 100644 --- a/internal/modifier/cdi.go +++ b/internal/modifier/cdi.go @@ -189,6 +189,7 @@ func generateAutomaticCDISpec(logger logger.Interface, cfg *config.Config, devic nvcdi.WithDriverRoot(cfg.NVIDIAContainerCLIConfig.Root), nvcdi.WithVendor("runtime.nvidia.com"), nvcdi.WithClass("gpu"), + nvcdi.WithOptInFeature("include-persistenced-socket", cfg.Features.IncludePersistencedSocket.IsEnabled()), ) if err != nil { return nil, fmt.Errorf("failed to construct CDI library: %w", err) diff --git a/pkg/nvcdi/common-nvml.go b/pkg/nvcdi/common-nvml.go index 4dd1bc35..6e9661cb 100644 --- a/pkg/nvcdi/common-nvml.go +++ b/pkg/nvcdi/common-nvml.go @@ -41,7 +41,7 @@ func (l *nvmllib) newCommonNVMLDiscoverer() (discover.Discover, error) { l.logger.Warningf("failed to create discoverer for graphics mounts: %v", err) } - driverFiles, err := NewDriverDiscoverer(l.logger, l.driver, l.nvidiaCDIHookPath, l.ldconfigPath, l.nvmllib) + driverFiles, err := l.NewDriverDiscoverer() if err != nil { return nil, fmt.Errorf("failed to create discoverer for driver files: %v", err) } diff --git a/pkg/nvcdi/driver-nvml.go b/pkg/nvcdi/driver-nvml.go index 8fb39888..59ff91c2 100644 --- a/pkg/nvcdi/driver-nvml.go +++ b/pkg/nvcdi/driver-nvml.go @@ -34,41 +34,41 @@ import ( // NewDriverDiscoverer creates a discoverer for the libraries and binaries associated with a driver installation. // The supplied NVML Library is used to query the expected driver version. -func NewDriverDiscoverer(logger logger.Interface, driver *root.Driver, nvidiaCDIHookPath string, ldconfigPath string, nvmllib nvml.Interface) (discover.Discover, error) { - if r := nvmllib.Init(); r != nvml.SUCCESS { +func (l *nvmllib) NewDriverDiscoverer() (discover.Discover, error) { + if r := l.nvmllib.Init(); r != nvml.SUCCESS { return nil, fmt.Errorf("failed to initialize NVML: %v", r) } defer func() { - if r := nvmllib.Shutdown(); r != nvml.SUCCESS { - logger.Warningf("failed to shutdown NVML: %v", r) + if r := l.nvmllib.Shutdown(); r != nvml.SUCCESS { + l.logger.Warningf("failed to shutdown NVML: %v", r) } }() - version, r := nvmllib.SystemGetDriverVersion() + version, r := l.nvmllib.SystemGetDriverVersion() if r != nvml.SUCCESS { return nil, fmt.Errorf("failed to determine driver version: %v", r) } - return newDriverVersionDiscoverer(logger, driver, nvidiaCDIHookPath, ldconfigPath, version) + return (*nvcdilib)(l).newDriverVersionDiscoverer(version) } -func newDriverVersionDiscoverer(logger logger.Interface, driver *root.Driver, nvidiaCDIHookPath, ldconfigPath, version string) (discover.Discover, error) { - libraries, err := NewDriverLibraryDiscoverer(logger, driver, nvidiaCDIHookPath, ldconfigPath, version) +func (l *nvcdilib) newDriverVersionDiscoverer(version string) (discover.Discover, error) { + libraries, err := NewDriverLibraryDiscoverer(l.logger, l.driver, l.nvidiaCDIHookPath, l.ldconfigPath, version) if err != nil { return nil, fmt.Errorf("failed to create discoverer for driver libraries: %v", err) } - ipcs, err := discover.NewIPCDiscoverer(logger, driver.Root) + ipcs, err := discover.NewIPCDiscoverer(l.logger, l.driver.Root, l.optInFeatures["include-persistenced-socket"]) if err != nil { return nil, fmt.Errorf("failed to create discoverer for IPC sockets: %v", err) } - firmwares, err := NewDriverFirmwareDiscoverer(logger, driver.Root, version) + firmwares, err := NewDriverFirmwareDiscoverer(l.logger, l.driver.Root, version) if err != nil { return nil, fmt.Errorf("failed to create discoverer for GSP firmware: %v", err) } - binaries := NewDriverBinariesDiscoverer(logger, driver.Root) + binaries := NewDriverBinariesDiscoverer(l.logger, l.driver.Root) d := discover.Merge( libraries, diff --git a/pkg/nvcdi/lib.go b/pkg/nvcdi/lib.go index d2db3b6c..e14b2971 100644 --- a/pkg/nvcdi/lib.go +++ b/pkg/nvcdi/lib.go @@ -63,6 +63,8 @@ type nvcdilib struct { infolib info.Interface mergedDeviceOptions []transform.MergedDeviceOption + + optInFeatures map[string]bool } // New creates a new nvcdi library diff --git a/pkg/nvcdi/management.go b/pkg/nvcdi/management.go index 4648e5bb..8f7709af 100644 --- a/pkg/nvcdi/management.go +++ b/pkg/nvcdi/management.go @@ -66,7 +66,7 @@ func (m *managementlib) GetCommonEdits() (*cdi.ContainerEdits, error) { return nil, fmt.Errorf("failed to get CUDA version: %v", err) } - driver, err := newDriverVersionDiscoverer(m.logger, m.driver, m.nvidiaCDIHookPath, m.ldconfigPath, version) + driver, err := (*nvcdilib)(m).newDriverVersionDiscoverer(version) if err != nil { return nil, fmt.Errorf("failed to create driver library discoverer: %v", err) } diff --git a/pkg/nvcdi/options.go b/pkg/nvcdi/options.go index 417687b9..762b5ac6 100644 --- a/pkg/nvcdi/options.go +++ b/pkg/nvcdi/options.go @@ -155,3 +155,14 @@ func WithLibrarySearchPaths(paths []string) Option { o.librarySearchPaths = paths } } + +// WithOptInFeature sets a specific opt-in feature. +// Note that previous opt-in-features are not removed. +func WithOptInFeature(feature string, enabled bool) Option { + return func(n *nvcdilib) { + if n.optInFeatures == nil { + n.optInFeatures = make(map[string]bool) + } + n.optInFeatures[feature] = enabled + } +}