This commit is contained in:
Evan Lezar 2024-11-21 14:48:46 +00:00 committed by GitHub
commit 963a7a3a84
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 27 additions and 44 deletions

View File

@ -21,6 +21,9 @@ type features struct {
// DisableImexChannelCreation ensures that the implicit creation of
// requested IMEX channels is skipped when invoking the nvidia-container-cli.
DisableImexChannelCreation *feature `toml:"disable-imex-channel-creation,omitempty"`
// RequireNvidiaKernelModules indicates that the NVIDIA kernel module must be
// loaded for the NVIDIA Container Runtime to perform any OCI spec modifications.
RequireNvidiaKernelModules *feature `toml:"require-nvidia-kernel-module,omitempty"`
}
//nolint:unused

View File

@ -18,6 +18,7 @@ package runtime
import (
"fmt"
"os"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
@ -41,6 +42,11 @@ func newNVIDIAContainerRuntime(logger logger.Interface, cfg *config.Config, argv
return lowLevelRuntime, nil
}
if cfg.Features.RequireNvidiaKernelModules.IsEnabled() && !isNvidiaModuleLoaded() {
logger.Tracef("NVIDIA driver modules are not yet loaded; skipping modifer")
return lowLevelRuntime, nil
}
ociSpec, err := oci.NewSpec(logger, argv)
if err != nil {
return nil, fmt.Errorf("error constructing OCI specification: %v", err)
@ -62,6 +68,19 @@ func newNVIDIAContainerRuntime(logger logger.Interface, cfg *config.Config, argv
return r, nil
}
// isNvidiaKernelModuleLoaded checks whether the NVIDIA GPU driver is installed
// and the kernel module is available.
func isNvidiaModuleLoaded() bool {
// TODO: This was implemented as:
// cat /proc/modules | grep -e \"^nvidia \" >/dev/null 2>&1
// if [ "${?}" != "0" ]; then
// echo "nvidia driver modules are not yet loaded, invoking runc directly"
// exec runc "$@"
// fi
_, err := os.Stat("/proc/driver/nvidia/version")
return err == nil
}
// newSpecModifier is a factory method that creates constructs an OCI spec modifer based on the provided config.
func newSpecModifier(logger logger.Interface, cfg *config.Config, ociSpec oci.Spec, driver *root.Driver) (oci.SpecModifier, error) {
rawSpec, err := ociSpec.Load()

View File

@ -36,7 +36,6 @@ type executable struct {
source string
target executableTarget
env map[string]string
preLines []string
argLines []string
}
@ -96,11 +95,6 @@ func (e executable) writeWrapperTo(wrapper io.Writer, destFolder string, dotfile
// Add the shebang
fmt.Fprintln(wrapper, "#! /bin/sh")
// Add the preceding lines if any
for _, line := range e.preLines {
fmt.Fprintf(wrapper, "%s\n", r.apply(line))
}
// Update the path to include the destination folder
var env map[string]string
if e.env == nil {

View File

@ -59,23 +59,6 @@ func TestWrapper(t *testing.T) {
"",
},
},
{
e: executable{
preLines: []string{
"preline1",
"preline2",
},
},
expectedLines: []string{
shebang,
"preline1",
"preline2",
"PATH=/dest/folder:$PATH \\",
"source.real \\",
"\t\"$@\"",
"",
},
},
{
e: executable{
argLines: []string{

View File

@ -57,16 +57,6 @@ func newNvidiaContainerRuntimeInstaller(source string) *executable {
}
func newRuntimeInstaller(source string, target executableTarget, env map[string]string) *executable {
preLines := []string{
"",
"cat /proc/modules | grep -e \"^nvidia \" >/dev/null 2>&1",
"if [ \"${?}\" != \"0\" ]; then",
" echo \"nvidia driver modules are not yet loaded, invoking runc directly\"",
" exec runc \"$@\"",
"fi",
"",
}
runtimeEnv := make(map[string]string)
runtimeEnv["XDG_CONFIG_HOME"] = filepath.Join(destDirPattern, ".config")
for k, v := range env {
@ -74,10 +64,9 @@ func newRuntimeInstaller(source string, target executableTarget, env map[string]
}
r := executable{
source: source,
target: target,
env: runtimeEnv,
preLines: preLines,
source: source,
target: target,
env: runtimeEnv,
}
return &r

View File

@ -38,13 +38,6 @@ func TestNvidiaContainerRuntimeInstallerWrapper(t *testing.T) {
expectedLines := []string{
shebang,
"",
"cat /proc/modules | grep -e \"^nvidia \" >/dev/null 2>&1",
"if [ \"${?}\" != \"0\" ]; then",
" echo \"nvidia driver modules are not yet loaded, invoking runc directly\"",
" exec runc \"$@\"",
"fi",
"",
"PATH=/dest/folder:$PATH \\",
"XDG_CONFIG_HOME=/dest/folder/.config \\",
"source.real \\",

View File

@ -466,6 +466,8 @@ func installToolkitConfig(c *cli.Context, toolkitConfigPath string, nvidiaContai
configValues["nvidia-container-runtime.runtimes"] = toolkitRuntimeList
}
// We require the NVIDIA kernel modules to be loaded.
configValues["features.require-nvidia-kernel-modules"] = true
for _, optInFeature := range opts.optInFeatures.Value() {
configValues["features."+optInFeature] = true
}