diff --git a/CHANGELOG.md b/CHANGELOG.md index 060bbf05..bb6e3321 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ## v1.13.0-rc.3 * Prefer /run over /var/run when locating nvidia-persistenced and nvidia-fabricmanager sockets. +* Only initialize NVML for modes that require it when runing `nvidia-ctk cdi generate` ## v1.13.0-rc.2 diff --git a/cmd/nvidia-ctk/cdi/generate/generate.go b/cmd/nvidia-ctk/cdi/generate/generate.go index 535a6641..12caca6a 100644 --- a/cmd/nvidia-ctk/cdi/generate/generate.go +++ b/cmd/nvidia-ctk/cdi/generate/generate.go @@ -30,8 +30,6 @@ import ( specs "github.com/container-orchestrated-devices/container-device-interface/specs-go" "github.com/sirupsen/logrus" "github.com/urfave/cli/v2" - "gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/device" - "gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml" ) const ( @@ -190,21 +188,11 @@ func (m command) generateSpec(cfg *config) (spec.Interface, error) { return nil, fmt.Errorf("failed to create device namer: %v", err) } - nvmllib := nvml.New() - if r := nvmllib.Init(); r != nvml.SUCCESS { - return nil, r - } - defer nvmllib.Shutdown() - - devicelib := device.New(device.WithNvml(nvmllib)) - cdilib := nvcdi.New( nvcdi.WithLogger(m.logger), nvcdi.WithDriverRoot(cfg.driverRoot), nvcdi.WithNVIDIACTKPath(cfg.nvidiaCTKPath), nvcdi.WithDeviceNamer(deviceNamer), - nvcdi.WithDeviceLib(devicelib), - nvcdi.WithNvmlLib(nvmllib), nvcdi.WithMode(string(cfg.mode)), ) diff --git a/pkg/nvcdi/driver-nvml.go b/pkg/nvcdi/driver-nvml.go index eee3ffd8..a9901590 100644 --- a/pkg/nvcdi/driver-nvml.go +++ b/pkg/nvcdi/driver-nvml.go @@ -31,6 +31,11 @@ import ( // NewDriverDiscoverer creates a discoverer for the libraries and binaries associated with a driver installation. // The supplied NVML Library is used to query the expected driver version. func NewDriverDiscoverer(logger *logrus.Logger, driverRoot string, nvidiaCTKPath string, nvmllib nvml.Interface) (discover.Discover, error) { + if r := nvmllib.Init(); r != nvml.SUCCESS { + return nil, fmt.Errorf("failed to initalize NVML: %v", r) + } + defer nvmllib.Shutdown() + version, r := nvmllib.SystemGetDriverVersion() if r != nvml.SUCCESS { return nil, fmt.Errorf("failed to determine driver version: %v", r) diff --git a/pkg/nvcdi/lib-nvml.go b/pkg/nvcdi/lib-nvml.go index 8fa29c11..95ccf66b 100644 --- a/pkg/nvcdi/lib-nvml.go +++ b/pkg/nvcdi/lib-nvml.go @@ -24,6 +24,7 @@ import ( "github.com/container-orchestrated-devices/container-device-interface/pkg/cdi" "github.com/container-orchestrated-devices/container-device-interface/specs-go" "gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/device" + "gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml" ) type nvmllib nvcdilib @@ -39,6 +40,11 @@ func (l *nvmllib) GetSpec() (spec.Interface, error) { func (l *nvmllib) GetAllDeviceSpecs() ([]specs.Device, error) { var deviceSpecs []specs.Device + if r := l.nvmllib.Init(); r != nvml.SUCCESS { + return nil, fmt.Errorf("failed to initalize NVML: %v", r) + } + defer l.nvmllib.Shutdown() + gpuDeviceSpecs, err := l.getGPUDeviceSpecs() if err != nil { return nil, err