mirror of
https://github.com/NVIDIA/nvidia-container-toolkit
synced 2024-12-04 18:15:00 +00:00
Fix bug when using just-in-time CDI spec generation
This change fixes a bug when using just-in-time CDI spec generation for the NVIDIA Container Runtime for specific devices (i.e. not 'all'). Instead of unconditionally using the default nvsandboxutils library -- leading to errors due to undefined symbols -- we check whether the library can be properly initialised before continuing. Signed-off-by: Evan Lezar <elezar@nvidia.com>
This commit is contained in:
parent
d78868cd31
commit
75376d3df2
@ -103,7 +103,7 @@ func (l *nvmllib) GetDeviceSpecsByID(ids ...string) ([]specs.Device, error) {
|
|||||||
return l.GetDeviceSpecsBy(identifiers...)
|
return l.GetDeviceSpecsBy(identifiers...)
|
||||||
}
|
}
|
||||||
|
|
||||||
// GetDeviceSpecsBy is not supported for the gdslib specs.
|
// GetDeviceSpecsBy returns the device specs for devices with the specified identifiers.
|
||||||
func (l *nvmllib) GetDeviceSpecsBy(identifiers ...device.Identifier) ([]specs.Device, error) {
|
func (l *nvmllib) GetDeviceSpecsBy(identifiers ...device.Identifier) ([]specs.Device, error) {
|
||||||
for _, id := range identifiers {
|
for _, id := range identifiers {
|
||||||
if id == "all" {
|
if id == "all" {
|
||||||
@ -118,10 +118,23 @@ func (l *nvmllib) GetDeviceSpecsBy(identifiers ...device.Identifier) ([]specs.De
|
|||||||
}
|
}
|
||||||
defer func() {
|
defer func() {
|
||||||
if r := l.nvmllib.Shutdown(); r != nvml.SUCCESS {
|
if r := l.nvmllib.Shutdown(); r != nvml.SUCCESS {
|
||||||
l.logger.Warningf("failed to shutdown NVML: %w", r)
|
l.logger.Warningf("failed to shutdown NVML: %v", r)
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
|
if l.nvsandboxutilslib != nil {
|
||||||
|
if r := l.nvsandboxutilslib.Init(l.driverRoot); r != nvsandboxutils.SUCCESS {
|
||||||
|
l.logger.Warningf("Failed to init nvsandboxutils: %v; ignoring", r)
|
||||||
|
l.nvsandboxutilslib = nil
|
||||||
|
}
|
||||||
|
defer func() {
|
||||||
|
if l.nvsandboxutilslib == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
_ = l.nvsandboxutilslib.Shutdown()
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
|
||||||
nvmlDevices, err := l.getNVMLDevicesByID(identifiers...)
|
nvmlDevices, err := l.getNVMLDevicesByID(identifiers...)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to get NVML device handles: %w", err)
|
return nil, fmt.Errorf("failed to get NVML device handles: %w", err)
|
||||||
|
Loading…
Reference in New Issue
Block a user