From e1ae57eef95a75bd85f74029db97f97e045172e5 Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Thu, 6 Feb 2025 17:58:10 +0100 Subject: [PATCH] Add enable-cuda-compat hook if required This change adds the enable-cuda-compat hook to the incomming OCI runtime spec if the allow-cuda-compat-libs-from-container feature flag is not enabled. An update-ldcache hook is also injected to ensure that the required folders are processed. Signed-off-by: Evan Lezar --- .../cudacompat/cudacompat_test.go | 9 +++++++ internal/discover/compat_libs.go | 24 +++++++++++++++++++ internal/modifier/gated.go | 8 ++++++- internal/runtime/runtime_factory.go | 2 +- 4 files changed, 41 insertions(+), 2 deletions(-) create mode 100644 internal/discover/compat_libs.go diff --git a/cmd/nvidia-cdi-hook/cudacompat/cudacompat_test.go b/cmd/nvidia-cdi-hook/cudacompat/cudacompat_test.go index ad8d5695..0422fe76 100644 --- a/cmd/nvidia-cdi-hook/cudacompat/cudacompat_test.go +++ b/cmd/nvidia-cdi-hook/cudacompat/cudacompat_test.go @@ -73,6 +73,15 @@ func TestCompatLibs(t *testing.T) { hostDriverVersion: "222.55.66", expectedContainerForwardCompatDir: "", }, + { + description: "numeric comparison is used; ldcache", + contents: map[string]string{ + "/etc/ld.so.cache": "", + "/usr/local/cuda/compat/libcuda.so.222.88.99": "", + }, + hostDriverVersion: "99.55.66", + expectedContainerForwardCompatDir: "/usr/local/cuda/compat", + }, { description: "driver version empty; ldcache", contents: map[string]string{ diff --git a/internal/discover/compat_libs.go b/internal/discover/compat_libs.go new file mode 100644 index 00000000..027ca2ed --- /dev/null +++ b/internal/discover/compat_libs.go @@ -0,0 +1,24 @@ +package discover + +import ( + "strings" + + "github.com/NVIDIA/nvidia-container-toolkit/internal/logger" + "github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/root" +) + +// NewCUDACompatHookDiscoverer creates a discoverer for a enable-cuda-compat hook. +// This hook is responsible for setting up CUDA compatibility in the container and depends on the host driver version. +func NewCUDACompatHookDiscoverer(logger logger.Interface, nvidiaCDIHookPath string, driver *root.Driver) Discover { + _, cudaVersionPattern := getCUDALibRootAndVersionPattern(logger, driver) + var args []string + if !strings.Contains(cudaVersionPattern, "*") { + args = append(args, "--host-driver-version="+cudaVersionPattern) + } + + return CreateNvidiaCDIHook( + nvidiaCDIHookPath, + "enable-cuda-compat", + args..., + ) +} diff --git a/internal/modifier/gated.go b/internal/modifier/gated.go index 2c39d207..409f793b 100644 --- a/internal/modifier/gated.go +++ b/internal/modifier/gated.go @@ -23,6 +23,7 @@ import ( "github.com/NVIDIA/nvidia-container-toolkit/internal/config/image" "github.com/NVIDIA/nvidia-container-toolkit/internal/discover" "github.com/NVIDIA/nvidia-container-toolkit/internal/logger" + "github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/root" "github.com/NVIDIA/nvidia-container-toolkit/internal/oci" ) @@ -35,7 +36,7 @@ import ( // NVIDIA_GDRCOPY=enabled // // If not devices are selected, no changes are made. -func NewFeatureGatedModifier(logger logger.Interface, cfg *config.Config, image image.CUDA) (oci.SpecModifier, error) { +func NewFeatureGatedModifier(logger logger.Interface, cfg *config.Config, image image.CUDA, driver *root.Driver) (oci.SpecModifier, error) { if devices := image.VisibleDevicesFromEnvVar(); len(devices) == 0 { logger.Infof("No modification required; no devices requested") return nil, nil @@ -78,5 +79,10 @@ func NewFeatureGatedModifier(logger logger.Interface, cfg *config.Config, image discoverers = append(discoverers, d) } + if !cfg.Features.AllowCUDACompatLibsFromContainer.IsEnabled() { + compatLibHookDiscoverer := discover.NewCUDACompatHookDiscoverer(logger, cfg.NVIDIACTKConfig.Path, driver) + discoverers = append(discoverers, compatLibHookDiscoverer) + } + return NewModifierFromDiscoverer(logger, discover.Merge(discoverers...)) } diff --git a/internal/runtime/runtime_factory.go b/internal/runtime/runtime_factory.go index 2b5cd9c6..537b1727 100644 --- a/internal/runtime/runtime_factory.go +++ b/internal/runtime/runtime_factory.go @@ -94,7 +94,7 @@ func newSpecModifier(logger logger.Interface, cfg *config.Config, ociSpec oci.Sp } modifiers = append(modifiers, graphicsModifier) case "feature-gated": - featureGatedModifier, err := modifier.NewFeatureGatedModifier(logger, cfg, image) + featureGatedModifier, err := modifier.NewFeatureGatedModifier(logger, cfg, image, driver) if err != nil { return nil, err }