mirror of
https://github.com/NVIDIA/nvidia-container-toolkit
synced 2024-11-21 15:57:49 +00:00
nvsandboxutils: Add usage of GetDriverVersion API
This change includes the usage of Sandboxutils GetDriverVersion API to retrieve the CUDA driver version. If the library is not available on the system or the API call fails for some other reason, it will fallback to the NVML API to return the driver version. Signed-off-by: Evan Lezar <elezar@nvidia.com> Signed-off-by: Huy Nguyen <huyn@nvidia.com> Signed-off-by: Sananya Majumder <sananyam@nvidia.com>
This commit is contained in:
parent
dcbf5bc81f
commit
7b770f63c3
@ -27,6 +27,7 @@ import (
|
||||
"tags.cncf.io/container-device-interface/specs-go"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/edits"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/nvsandboxutils"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/spec"
|
||||
)
|
||||
|
||||
@ -52,6 +53,19 @@ func (l *nvmllib) GetAllDeviceSpecs() ([]specs.Device, error) {
|
||||
}
|
||||
}()
|
||||
|
||||
if l.nvsandboxutilslib != nil {
|
||||
if r := l.nvsandboxutilslib.Init(l.driverRoot); r != nvsandboxutils.SUCCESS {
|
||||
l.logger.Warningf("Failed to init nvsandboxutils: %v; ignoring", r)
|
||||
l.nvsandboxutilslib = nil
|
||||
}
|
||||
defer func() {
|
||||
if l.nvsandboxutilslib == nil {
|
||||
return
|
||||
}
|
||||
_ = l.nvsandboxutilslib.Shutdown()
|
||||
}()
|
||||
}
|
||||
|
||||
gpuDeviceSpecs, err := l.getGPUDeviceSpecs()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
|
@ -26,6 +26,7 @@ import (
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/root"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/nvsandboxutils"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/platform-support/tegra/csv"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/spec"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/transform"
|
||||
@ -43,6 +44,7 @@ type wrapper struct {
|
||||
type nvcdilib struct {
|
||||
logger logger.Interface
|
||||
nvmllib nvml.Interface
|
||||
nvsandboxutilslib nvsandboxutils.Interface
|
||||
mode string
|
||||
devicelib device.Interface
|
||||
deviceNamers DeviceNamers
|
||||
@ -107,6 +109,19 @@ func New(opts ...Option) (Interface, error) {
|
||||
}
|
||||
l.nvmllib = nvml.New(nvmlOpts...)
|
||||
}
|
||||
if l.nvsandboxutilslib == nil {
|
||||
var nvsandboxutilsOpts []nvsandboxutils.LibraryOption
|
||||
// Set the library path for libnvidia-sandboxutils
|
||||
candidates, err := l.driver.Libraries().Locate("libnvidia-sandboxutils.so.1")
|
||||
if err != nil {
|
||||
l.logger.Warningf("Ignoring error in locating libnvidia-sandboxutils.so.1: %v", err)
|
||||
} else {
|
||||
libNvidiaSandboxutilsPath := candidates[0]
|
||||
l.logger.Infof("Using %v", libNvidiaSandboxutilsPath)
|
||||
nvsandboxutilsOpts = append(nvsandboxutilsOpts, nvsandboxutils.WithLibraryPath(libNvidiaSandboxutilsPath))
|
||||
}
|
||||
l.nvsandboxutilslib = nvsandboxutils.New(nvsandboxutilsOpts...)
|
||||
}
|
||||
if l.devicelib == nil {
|
||||
l.devicelib = device.New(l.nvmllib)
|
||||
}
|
||||
@ -214,6 +229,16 @@ func (l *nvcdilib) resolveMode() (rmode string) {
|
||||
|
||||
// getCudaVersion returns the CUDA version of the current system.
|
||||
func (l *nvcdilib) getCudaVersion() (string, error) {
|
||||
version, err := l.getCudaVersionNvsandboxutils()
|
||||
if err == nil {
|
||||
return version, err
|
||||
}
|
||||
|
||||
// Fallback to NVML
|
||||
return l.getCudaVersionNvml()
|
||||
}
|
||||
|
||||
func (l *nvcdilib) getCudaVersionNvml() (string, error) {
|
||||
if hasNVML, reason := l.infolib.HasNvml(); !hasNVML {
|
||||
return "", fmt.Errorf("nvml not detected: %v", reason)
|
||||
}
|
||||
@ -236,3 +261,12 @@ func (l *nvcdilib) getCudaVersion() (string, error) {
|
||||
}
|
||||
return version, nil
|
||||
}
|
||||
|
||||
func (l *nvcdilib) getCudaVersionNvsandboxutils() (string, error) {
|
||||
// Sandboxutils initialization should happen before this function is called
|
||||
version, ret := l.nvsandboxutilslib.GetDriverVersion()
|
||||
if ret != nvsandboxutils.SUCCESS {
|
||||
return "", fmt.Errorf("%v", ret)
|
||||
}
|
||||
return version, nil
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user