nvidia-container-toolkit/internal/runtime/runtime_factory.go
Evan Lezar 7263d26817
Add feature gate to require NVIDIA kernel modules
This change adds an opt-in feature to the NVIDIA Container Runtime that
only uses the NVIDIA runtime if the NVIDIA kernel modules are loaded.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2024-10-30 15:15:18 +01:00

136 lines
4.4 KiB
Go

/*
# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
*/
package runtime
import (
"fmt"
"os"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
"github.com/NVIDIA/nvidia-container-toolkit/internal/info"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/root"
"github.com/NVIDIA/nvidia-container-toolkit/internal/modifier"
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
)
// newNVIDIAContainerRuntime is a factory method that constructs a runtime based on the selected configuration and specified logger
func newNVIDIAContainerRuntime(logger logger.Interface, cfg *config.Config, argv []string, driver *root.Driver) (oci.Runtime, error) {
lowLevelRuntime, err := oci.NewLowLevelRuntime(logger, cfg.NVIDIAContainerRuntimeConfig.Runtimes)
if err != nil {
return nil, fmt.Errorf("error constructing low-level runtime: %v", err)
}
logger.Tracef("Using low-level runtime %v", lowLevelRuntime.String())
if !oci.HasCreateSubcommand(argv) {
logger.Tracef("Skipping modifier for non-create subcommand")
return lowLevelRuntime, nil
}
if cfg.Features.RequireNvidiaKernelModules.IsEnabled() && !isNvidiaModuleLoaded() {
logger.Tracef("NVIDIA driver modules are not yet loaded; skipping modifer")
return lowLevelRuntime, nil
}
ociSpec, err := oci.NewSpec(logger, argv)
if err != nil {
return nil, fmt.Errorf("error constructing OCI specification: %v", err)
}
specModifier, err := newSpecModifier(logger, cfg, ociSpec, driver)
if err != nil {
return nil, fmt.Errorf("failed to construct OCI spec modifier: %v", err)
}
// Create the wrapping runtime with the specified modifier.
r := oci.NewModifyingRuntimeWrapper(
logger,
lowLevelRuntime,
ociSpec,
specModifier,
)
return r, nil
}
// isNvidiaKernelModuleLoaded checks whether the NVIDIA GPU driver is installed
// and the kernel module is available.
func isNvidiaModuleLoaded() bool {
// TODO: This was implemented as:
// cat /proc/modules | grep -e \"^nvidia \" >/dev/null 2>&1
// if [ "${?}" != "0" ]; then
// echo "nvidia driver modules are not yet loaded, invoking runc directly"
// exec runc "$@"
// fi
_, err := os.Stat("/proc/driver/nvidia/version")
return err == nil
}
// newSpecModifier is a factory method that creates constructs an OCI spec modifer based on the provided config.
func newSpecModifier(logger logger.Interface, cfg *config.Config, ociSpec oci.Spec, driver *root.Driver) (oci.SpecModifier, error) {
rawSpec, err := ociSpec.Load()
if err != nil {
return nil, fmt.Errorf("failed to load OCI spec: %v", err)
}
image, err := image.NewCUDAImageFromSpec(rawSpec)
if err != nil {
return nil, err
}
mode := info.ResolveAutoMode(logger, cfg.NVIDIAContainerRuntimeConfig.Mode, image)
modeModifier, err := newModeModifier(logger, mode, cfg, ociSpec, image)
if err != nil {
return nil, err
}
// For CDI mode we make no additional modifications.
if mode == "cdi" {
return modeModifier, nil
}
graphicsModifier, err := modifier.NewGraphicsModifier(logger, cfg, image, driver)
if err != nil {
return nil, err
}
featureModifier, err := modifier.NewFeatureGatedModifier(logger, cfg, image)
if err != nil {
return nil, err
}
modifiers := modifier.Merge(
modeModifier,
graphicsModifier,
featureModifier,
)
return modifiers, nil
}
func newModeModifier(logger logger.Interface, mode string, cfg *config.Config, ociSpec oci.Spec, image image.CUDA) (oci.SpecModifier, error) {
switch mode {
case "legacy":
return modifier.NewStableRuntimeModifier(logger, cfg.NVIDIAContainerRuntimeHookConfig.Path), nil
case "csv":
return modifier.NewCSVModifier(logger, cfg, image)
case "cdi":
return modifier.NewCDIModifier(logger, cfg, ociSpec)
}
return nil, fmt.Errorf("invalid runtime mode: %v", cfg.NVIDIAContainerRuntimeConfig.Mode)
}