Add feature gate to require NVIDIA kernel modules

This change adds an opt-in feature to the NVIDIA Container Runtime that only uses the NVIDIA runtime if the NVIDIA kernel modules are loaded. Signed-off-by: Evan Lezar <elezar@nvidia.com>
2025-06-26 18:18:24 +00:00 · 2024-10-30 15:05:28 +01:00 · 2024-10-30 15:05:28 +01:00 · 7263d26817
commit 7263d26817
parent efb18a72ad
2 changed files with 22 additions and 0 deletions
--- a/internal/config/features.go
+++ b/internal/config/features.go
@ -21,6 +21,9 @@ type features struct {
 	// DisableImexChannelCreation ensures that the implicit creation of
 	// requested IMEX channels is skipped when invoking the nvidia-container-cli.
 	DisableImexChannelCreation *feature `toml:"disable-imex-channel-creation,omitempty"`
 	// RequireNvidiaKernelModules indicates that the NVIDIA kernel module must be
 	// loaded for the NVIDIA Container Runtime to perform any OCI spec modifications.
 	RequireNvidiaKernelModules *feature `toml:"require-nvidia-kernel-module,omitempty"`
 }
 //nolint:unused
--- a/internal/runtime/runtime_factory.go
+++ b/internal/runtime/runtime_factory.go
@ -18,6 +18,7 @@ package runtime
 import (
 	"fmt"
 	"os"
 	"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
 	"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
@ -41,6 +42,11 @@ func newNVIDIAContainerRuntime(logger logger.Interface, cfg *config.Config, argv
 		return lowLevelRuntime, nil
 	}
 	if cfg.Features.RequireNvidiaKernelModules.IsEnabled() && !isNvidiaModuleLoaded() {
 		logger.Tracef("NVIDIA driver modules are not yet loaded; skipping modifer")
 		return lowLevelRuntime, nil
 	}
 	ociSpec, err := oci.NewSpec(logger, argv)
 	if err != nil {
 		return nil, fmt.Errorf("error constructing OCI specification: %v", err)
@ -62,6 +68,19 @@ func newNVIDIAContainerRuntime(logger logger.Interface, cfg *config.Config, argv
 	return r, nil
 }
 // isNvidiaKernelModuleLoaded checks whether the NVIDIA GPU driver is installed
 // and the kernel module is available.
 func isNvidiaModuleLoaded() bool {
 	// TODO: This was implemented as:
 	// cat /proc/modules | grep -e \"^nvidia \" >/dev/null 2>&1
 	// if [ "${?}" != "0" ]; then
 	//	echo "nvidia driver modules are not yet loaded, invoking runc directly"
 	//	exec runc "$@"
 	// fi
 	_, err := os.Stat("/proc/driver/nvidia/version")
 	return err == nil
 }
 // newSpecModifier is a factory method that creates constructs an OCI spec modifer based on the provided config.
 func newSpecModifier(logger logger.Interface, cfg *config.Config, ociSpec oci.Spec, driver *root.Driver) (oci.SpecModifier, error) {
 	rawSpec, err := ociSpec.Load()