From 4bab94baa60187a83beb6c0a273e2f7bc845f1ba Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Wed, 9 Apr 2025 13:48:43 +0200 Subject: [PATCH] Add envvar for libcuda.so parent dir to CDI spec This change adds an NVIDIA_CTK_LIBCUDA_DIR envvar to a generated CDI specification. This reports where the `libcuda.so.*` libraries will be injected into the container. Signed-off-by: Evan Lezar --- .../toolkit/toolkit_test.go | 1 + cmd/nvidia-ctk/cdi/generate/generate_test.go | 4 +++ pkg/nvcdi/driver-nvml.go | 26 ++++++++++++------- 3 files changed, 22 insertions(+), 9 deletions(-) diff --git a/cmd/nvidia-ctk-installer/toolkit/toolkit_test.go b/cmd/nvidia-ctk-installer/toolkit/toolkit_test.go index d7246330..fd8a4bf8 100644 --- a/cmd/nvidia-ctk-installer/toolkit/toolkit_test.go +++ b/cmd/nvidia-ctk-installer/toolkit/toolkit_test.go @@ -86,6 +86,7 @@ devices: hostPath: /host/driver/root/dev/nvidia-caps-imex-channels/channel2047 containerEdits: env: + - NVIDIA_CTK_LIBCUDA_DIR=/lib/x86_64-linux-gnu - NVIDIA_VISIBLE_DEVICES=void hooks: - hookName: createContainer diff --git a/cmd/nvidia-ctk/cdi/generate/generate_test.go b/cmd/nvidia-ctk/cdi/generate/generate_test.go index 6f762761..b8059b42 100644 --- a/cmd/nvidia-ctk/cdi/generate/generate_test.go +++ b/cmd/nvidia-ctk/cdi/generate/generate_test.go @@ -80,6 +80,7 @@ devices: hostPath: {{ .driverRoot }}/dev/nvidia0 containerEdits: env: + - NVIDIA_CTK_LIBCUDA_DIR=/lib/x86_64-linux-gnu - NVIDIA_VISIBLE_DEVICES=void deviceNodes: - path: /dev/nvidiactl @@ -164,6 +165,7 @@ devices: hostPath: {{ .driverRoot }}/dev/nvidia0 containerEdits: env: + - NVIDIA_CTK_LIBCUDA_DIR=/lib/x86_64-linux-gnu - NVIDIA_VISIBLE_DEVICES=void deviceNodes: - path: /dev/nvidiactl @@ -240,6 +242,7 @@ devices: hostPath: {{ .driverRoot }}/dev/nvidia0 containerEdits: env: + - NVIDIA_CTK_LIBCUDA_DIR=/lib/x86_64-linux-gnu - NVIDIA_VISIBLE_DEVICES=void deviceNodes: - path: /dev/nvidiactl @@ -307,6 +310,7 @@ devices: hostPath: {{ .driverRoot }}/dev/nvidia0 containerEdits: env: + - NVIDIA_CTK_LIBCUDA_DIR=/lib/x86_64-linux-gnu - NVIDIA_VISIBLE_DEVICES=void deviceNodes: - path: /dev/nvidiactl diff --git a/pkg/nvcdi/driver-nvml.go b/pkg/nvcdi/driver-nvml.go index 764b648a..51592ff5 100644 --- a/pkg/nvcdi/driver-nvml.go +++ b/pkg/nvcdi/driver-nvml.go @@ -82,7 +82,7 @@ func (l *nvcdilib) newDriverVersionDiscoverer(version string) (discover.Discover // NewDriverLibraryDiscoverer creates a discoverer for the libraries associated with the specified driver version. func (l *nvcdilib) NewDriverLibraryDiscoverer(version string) (discover.Discover, error) { - libraryPaths, err := getVersionLibs(l.logger, l.driver, version) + libraryPaths, libCudaDirectoryPath, err := getVersionLibs(l.logger, l.driver, version) if err != nil { return nil, fmt.Errorf("failed to get libraries for driver version: %v", err) } @@ -116,6 +116,12 @@ func (l *nvcdilib) NewDriverLibraryDiscoverer(version string) (discover.Discover disableDeviceNodeModification := l.hookCreator.Create(DisableDeviceNodeModificationHook) discoverers = append(discoverers, disableDeviceNodeModification) + environmentVariable := &discover.EnvVar{ + Name: "NVIDIA_CTK_LIBCUDA_DIR", + Value: libCudaDirectoryPath, + } + discoverers = append(discoverers, environmentVariable) + d := discover.Merge(discoverers...) return d, nil @@ -203,39 +209,41 @@ func NewDriverBinariesDiscoverer(logger logger.Interface, driverRoot string) dis // getVersionLibs checks the LDCache for libraries ending in the specified driver version. // Although the ldcache at the specified driverRoot is queried, the paths are returned relative to this driverRoot. // This allows the standard mount location logic to be used for resolving the mounts. -func getVersionLibs(logger logger.Interface, driver *root.Driver, version string) ([]string, error) { +func getVersionLibs(logger logger.Interface, driver *root.Driver, version string) ([]string, string, error) { logger.Infof("Using driver version %v", version) libCudaPaths, err := cuda.New( driver.Libraries(), ).Locate("." + version) if err != nil { - return nil, fmt.Errorf("failed to locate libcuda.so.%v: %v", version, err) + return nil, "", fmt.Errorf("failed to locate libcuda.so.%v: %v", version, err) } - libRoot := filepath.Dir(libCudaPaths[0]) + libCudaDirectoryPath := filepath.Dir(libCudaPaths[0]) libraries := lookup.NewFileLocator( lookup.WithLogger(logger), lookup.WithSearchPaths( - libRoot, - filepath.Join(libRoot, "vdpau"), + libCudaDirectoryPath, + filepath.Join(libCudaDirectoryPath, "vdpau"), ), lookup.WithOptional(true), ) libs, err := libraries.Locate("*.so." + version) if err != nil { - return nil, fmt.Errorf("failed to locate libraries for driver version %v: %v", version, err) + return nil, "", fmt.Errorf("failed to locate libraries for driver version %v: %v", version, err) } if driver.Root == "/" || driver.Root == "" { - return libs, nil + return libs, libCudaDirectoryPath, nil } + libCudaDirectoryPath = driver.RelativeToRoot(libCudaDirectoryPath) + var relative []string for _, l := range libs { relative = append(relative, strings.TrimPrefix(l, driver.Root)) } - return relative, nil + return relative, libCudaDirectoryPath, nil }