mirror of
https://github.com/NVIDIA/nvidia-container-toolkit
synced 2025-04-16 21:41:17 +00:00
[no-relnote] Refactor CDI version extraction
Signed-off-by: Evan Lezar <elezar@nvidia.com>
This commit is contained in:
parent
6746a412af
commit
de230a7e60
@ -24,7 +24,7 @@ import (
|
||||
|
||||
// newCommonNVMLDiscoverer returns a discoverer for entities that are not associated with a specific CDI device.
|
||||
// This includes driver libraries and meta devices, for example.
|
||||
func (l *nvmllib) newCommonNVMLDiscoverer() (discover.Discover, error) {
|
||||
func (l *nvmllib) newCommonNVMLDiscoverer(version string) (discover.Discover, error) {
|
||||
metaDevices := discover.NewCharDeviceDiscoverer(
|
||||
l.logger,
|
||||
l.devRoot,
|
||||
@ -41,7 +41,7 @@ func (l *nvmllib) newCommonNVMLDiscoverer() (discover.Discover, error) {
|
||||
l.logger.Warningf("failed to create discoverer for graphics mounts: %v", err)
|
||||
}
|
||||
|
||||
driverFiles, err := NewDriverDiscoverer(l.logger, l.driver, l.nvidiaCDIHookPath, l.ldconfigPath, l.nvmllib)
|
||||
driverFiles, err := NewDriverDiscoverer(l.logger, l.driver, l.nvidiaCDIHookPath, l.ldconfigPath, version)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create discoverer for driver files: %v", err)
|
||||
}
|
||||
|
@ -22,7 +22,6 @@ import (
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/NVIDIA/go-nvml/pkg/nvml"
|
||||
"golang.org/x/sys/unix"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||
@ -34,21 +33,7 @@ import (
|
||||
|
||||
// NewDriverDiscoverer creates a discoverer for the libraries and binaries associated with a driver installation.
|
||||
// The supplied NVML Library is used to query the expected driver version.
|
||||
func NewDriverDiscoverer(logger logger.Interface, driver *root.Driver, nvidiaCDIHookPath string, ldconfigPath string, nvmllib nvml.Interface) (discover.Discover, error) {
|
||||
if r := nvmllib.Init(); r != nvml.SUCCESS {
|
||||
return nil, fmt.Errorf("failed to initialize NVML: %v", r)
|
||||
}
|
||||
defer func() {
|
||||
if r := nvmllib.Shutdown(); r != nvml.SUCCESS {
|
||||
logger.Warningf("failed to shutdown NVML: %v", r)
|
||||
}
|
||||
}()
|
||||
|
||||
version, r := nvmllib.SystemGetDriverVersion()
|
||||
if r != nvml.SUCCESS {
|
||||
return nil, fmt.Errorf("failed to determine driver version: %v", r)
|
||||
}
|
||||
|
||||
func NewDriverDiscoverer(logger logger.Interface, driver *root.Driver, nvidiaCDIHookPath string, ldconfigPath string, version string) (discover.Discover, error) {
|
||||
return newDriverVersionDiscoverer(logger, driver, nvidiaCDIHookPath, ldconfigPath, version)
|
||||
}
|
||||
|
||||
|
@ -83,7 +83,25 @@ func (l *nvmllib) GetAllDeviceSpecs() ([]specs.Device, error) {
|
||||
|
||||
// GetCommonEdits generates a CDI specification that can be used for ANY devices
|
||||
func (l *nvmllib) GetCommonEdits() (*cdi.ContainerEdits, error) {
|
||||
common, err := l.newCommonNVMLDiscoverer()
|
||||
if l.nvsandboxutilslib != nil {
|
||||
if r := l.nvsandboxutilslib.Init(l.driverRoot); r != nvsandboxutils.SUCCESS {
|
||||
l.logger.Warningf("Failed to init nvsandboxutils: %v; ignoring", r)
|
||||
l.nvsandboxutilslib = nil
|
||||
}
|
||||
defer func() {
|
||||
if l.nvsandboxutilslib == nil {
|
||||
return
|
||||
}
|
||||
_ = l.nvsandboxutilslib.Shutdown()
|
||||
}()
|
||||
}
|
||||
|
||||
version, err := (*nvcdilib)(l).getDriverVersion()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get driver version: %v", err)
|
||||
}
|
||||
|
||||
common, err := l.newCommonNVMLDiscoverer(version)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create discoverer for common entities: %v", err)
|
||||
}
|
||||
|
@ -18,12 +18,15 @@ package nvcdi
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/NVIDIA/go-nvlib/pkg/nvlib/device"
|
||||
"github.com/NVIDIA/go-nvlib/pkg/nvlib/info"
|
||||
"github.com/NVIDIA/go-nvml/pkg/nvml"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/cuda"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/root"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/nvsandboxutils"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/platform-support/tegra/csv"
|
||||
@ -168,18 +171,36 @@ func New(opts ...Option) (Interface, error) {
|
||||
return &w, nil
|
||||
}
|
||||
|
||||
// getCudaVersion returns the CUDA version of the current system.
|
||||
func (l *nvcdilib) getCudaVersion() (string, error) {
|
||||
version, err := l.getCudaVersionNvsandboxutils()
|
||||
if err == nil {
|
||||
// getDriverVersion returns the driver version of the current system.
|
||||
func (l *nvcdilib) getDriverVersion() (string, error) {
|
||||
if version, err := l.getDriverVersionNvsandboxutils(); err == nil && version != "" {
|
||||
return version, err
|
||||
}
|
||||
|
||||
// Fallback to NVML
|
||||
return l.getCudaVersionNvml()
|
||||
if version, err := l.getDriverVersionNvml(); err == nil && version != "" {
|
||||
return version, err
|
||||
}
|
||||
|
||||
// Fallback to getting the version from the libcuda.so suffix.
|
||||
return l.getDriverVersionLibcudaSo()
|
||||
}
|
||||
|
||||
func (l *nvcdilib) getCudaVersionNvml() (string, error) {
|
||||
func (l *nvcdilib) getDriverVersionLibcudaSo() (string, error) {
|
||||
libCudaPaths, err := cuda.New(
|
||||
l.driver.Libraries(),
|
||||
).Locate(".*.*")
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to locate libcuda.so: %v", err)
|
||||
}
|
||||
libCudaPath := libCudaPaths[0]
|
||||
|
||||
version := strings.TrimPrefix(filepath.Base(libCudaPath), "libcuda.so.")
|
||||
|
||||
return version, nil
|
||||
}
|
||||
|
||||
func (l *nvcdilib) getDriverVersionNvml() (string, error) {
|
||||
if hasNVML, reason := l.infolib.HasNvml(); !hasNVML {
|
||||
return "", fmt.Errorf("nvml not detected: %v", reason)
|
||||
}
|
||||
@ -203,7 +224,7 @@ func (l *nvcdilib) getCudaVersionNvml() (string, error) {
|
||||
return version, nil
|
||||
}
|
||||
|
||||
func (l *nvcdilib) getCudaVersionNvsandboxutils() (string, error) {
|
||||
func (l *nvcdilib) getDriverVersionNvsandboxutils() (string, error) {
|
||||
if l.nvsandboxutilslib == nil {
|
||||
return "", fmt.Errorf("libnvsandboxutils is not available")
|
||||
}
|
||||
|
@ -27,7 +27,6 @@ import (
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/edits"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/cuda"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/nvsandboxutils"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/spec"
|
||||
)
|
||||
@ -75,7 +74,7 @@ func (m *managementlib) GetCommonEdits() (*cdi.ContainerEdits, error) {
|
||||
}()
|
||||
}
|
||||
|
||||
version, err := m.getCudaVersion()
|
||||
version, err := (*nvcdilib)(m).getDriverVersion()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get CUDA version: %v", err)
|
||||
}
|
||||
@ -93,27 +92,6 @@ func (m *managementlib) GetCommonEdits() (*cdi.ContainerEdits, error) {
|
||||
return edits, nil
|
||||
}
|
||||
|
||||
// getCudaVersion returns the CUDA version for use in managementlib containers.
|
||||
func (m *managementlib) getCudaVersion() (string, error) {
|
||||
version, err := (*nvcdilib)(m).getCudaVersion()
|
||||
if err == nil {
|
||||
return version, nil
|
||||
}
|
||||
|
||||
libCudaPaths, err := cuda.New(
|
||||
m.driver.Libraries(),
|
||||
).Locate(".*.*")
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to locate libcuda.so: %v", err)
|
||||
}
|
||||
|
||||
libCudaPath := libCudaPaths[0]
|
||||
|
||||
version = strings.TrimPrefix(filepath.Base(libCudaPath), "libcuda.so.")
|
||||
|
||||
return version, nil
|
||||
}
|
||||
|
||||
type managementDiscoverer struct {
|
||||
discover.Discover
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user