mirror of
https://github.com/NVIDIA/nvidia-container-toolkit
synced 2025-04-03 04:10:47 +00:00
Replace experimental and discover-mode
These changes replace the nvidia-container-runtime config options experimental and discover-mode with a single mode config option. Note that mode is now a string with a default value of "auto" and a mode value of "legacy" is equivalent to experimental == false. Signed-off-by: Evan Lezar <elezar@nvidia.com>
This commit is contained in:
parent
e0ad82e467
commit
e591f3f26b
@ -25,15 +25,14 @@ import (
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover/csv"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/edits"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/info"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/requirements"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// experiemental represents the modifications required by the experimental runtime
|
||||
type experimental struct {
|
||||
// csvMode represents the modifications as performed by the csv runtime mode
|
||||
type csvMode struct {
|
||||
logger *logrus.Logger
|
||||
discoverer discover.Discover
|
||||
}
|
||||
@ -45,9 +44,9 @@ const (
|
||||
nvidiaRequireJetpackEnvvar = "NVIDIA_REQUIRE_JETPACK"
|
||||
)
|
||||
|
||||
// NewExperimentalModifier creates a modifier that applies the experimental
|
||||
// modications to an OCI spec if required by the runtime wrapper.
|
||||
func NewExperimentalModifier(logger *logrus.Logger, cfg *config.Config, ociSpec oci.Spec) (oci.SpecModifier, error) {
|
||||
// NewCSVModifier creates a modifier that applies modications to an OCI spec if required by the runtime wrapper.
|
||||
// The modifications are defined by CSV MountSpecs.
|
||||
func NewCSVModifier(logger *logrus.Logger, cfg *config.Config, ociSpec oci.Spec) (oci.SpecModifier, error) {
|
||||
rawSpec, err := ociSpec.Load()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to load OCI spec: %v", err)
|
||||
@ -67,64 +66,51 @@ func NewExperimentalModifier(logger *logrus.Logger, cfg *config.Config, ociSpec
|
||||
NVIDIAContainerToolkitCLIExecutablePath: cfg.NVIDIACTKConfig.Path,
|
||||
}
|
||||
|
||||
var d discover.Discover
|
||||
|
||||
switch info.ResolveAutoMode(logger, cfg.NVIDIAContainerRuntimeConfig.DiscoverMode) {
|
||||
case "legacy":
|
||||
legacyDiscoverer, err := discover.NewLegacyDiscoverer(logger, config)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create legacy discoverer: %v", err)
|
||||
}
|
||||
d = legacyDiscoverer
|
||||
case "csv":
|
||||
// TODO: Once the devices have been encapsulated in the CUDA image, this can be moved to before the
|
||||
// visible devices are checked.
|
||||
image, err := image.NewCUDAImageFromSpec(rawSpec)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := checkRequirements(logger, &image); err != nil {
|
||||
return nil, fmt.Errorf("requirements not met: %v", err)
|
||||
}
|
||||
|
||||
csvFiles, err := csv.GetFileList(csv.DefaultMountSpecPath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get list of CSV files: %v", err)
|
||||
}
|
||||
|
||||
nvidiaRequireJetpack, _ := ociSpec.LookupEnv(nvidiaRequireJetpackEnvvar)
|
||||
if nvidiaRequireJetpack != "csv-mounts=all" {
|
||||
csvFiles = csv.BaseFilesOnly(csvFiles)
|
||||
}
|
||||
|
||||
csvDiscoverer, err := discover.NewFromCSVFiles(logger, csvFiles, config.Root)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create CSV discoverer: %v", err)
|
||||
}
|
||||
|
||||
ldcacheUpdateHook, err := discover.NewLDCacheUpdateHook(logger, csvDiscoverer, config)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create ldcach update hook discoverer: %v", err)
|
||||
}
|
||||
|
||||
createSymlinksHook, err := discover.NewCreateSymlinksHook(logger, csvFiles, csvDiscoverer, config)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create symlink hook discoverer: %v", err)
|
||||
}
|
||||
|
||||
d = discover.NewList(csvDiscoverer, ldcacheUpdateHook, createSymlinksHook)
|
||||
default:
|
||||
return nil, fmt.Errorf("invalid discover mode: %v", cfg.NVIDIAContainerRuntimeConfig.DiscoverMode)
|
||||
// TODO: Once the devices have been encapsulated in the CUDA image, this can be moved to before the
|
||||
// visible devices are checked.
|
||||
image, err := image.NewCUDAImageFromSpec(rawSpec)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return newExperimentalModifierFromDiscoverer(logger, d)
|
||||
if err := checkRequirements(logger, &image); err != nil {
|
||||
return nil, fmt.Errorf("requirements not met: %v", err)
|
||||
}
|
||||
|
||||
csvFiles, err := csv.GetFileList(cfg.NVIDIAContainerRuntimeConfig.Modes.CSV.MountSpecPath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get list of CSV files: %v", err)
|
||||
}
|
||||
|
||||
nvidiaRequireJetpack, _ := ociSpec.LookupEnv(nvidiaRequireJetpackEnvvar)
|
||||
if nvidiaRequireJetpack != "csv-mounts=all" {
|
||||
csvFiles = csv.BaseFilesOnly(csvFiles)
|
||||
}
|
||||
|
||||
csvDiscoverer, err := discover.NewFromCSVFiles(logger, csvFiles, config.Root)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create CSV discoverer: %v", err)
|
||||
}
|
||||
|
||||
ldcacheUpdateHook, err := discover.NewLDCacheUpdateHook(logger, csvDiscoverer, config)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create ldcach update hook discoverer: %v", err)
|
||||
}
|
||||
|
||||
createSymlinksHook, err := discover.NewCreateSymlinksHook(logger, csvFiles, csvDiscoverer, config)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create symlink hook discoverer: %v", err)
|
||||
}
|
||||
|
||||
d := discover.NewList(csvDiscoverer, ldcacheUpdateHook, createSymlinksHook)
|
||||
|
||||
return newModifierFromDiscoverer(logger, d)
|
||||
}
|
||||
|
||||
// newExperimentalModifierFromDiscoverer created a modifier that aplies the discovered
|
||||
// newModifierFromDiscoverer created a modifier that aplies the discovered
|
||||
// modifications to an OCI spec if require by the runtime wrapper.
|
||||
func newExperimentalModifierFromDiscoverer(logger *logrus.Logger, d discover.Discover) (oci.SpecModifier, error) {
|
||||
m := experimental{
|
||||
func newModifierFromDiscoverer(logger *logrus.Logger, d discover.Discover) (oci.SpecModifier, error) {
|
||||
m := csvMode{
|
||||
logger: logger,
|
||||
discoverer: d,
|
||||
}
|
||||
@ -133,7 +119,7 @@ func newExperimentalModifierFromDiscoverer(logger *logrus.Logger, d discover.Dis
|
||||
|
||||
// Modify applies the required modifications to the incomming OCI spec. These modifications
|
||||
// are applied in-place.
|
||||
func (m experimental) Modify(spec *specs.Spec) error {
|
||||
func (m csvMode) Modify(spec *specs.Spec) error {
|
||||
err := nvidiaContainerRuntimeHookRemover{m.logger}.Modify(spec)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to remove existing hooks: %v", err)
|
@ -28,7 +28,7 @@ import (
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestNewExperimentalModifier(t *testing.T) {
|
||||
func TestNewCSVModifier(t *testing.T) {
|
||||
logger, _ := testlog.NewNullLogger()
|
||||
|
||||
testCases := []struct {
|
||||
@ -63,42 +63,6 @@ func TestNewExperimentalModifier(t *testing.T) {
|
||||
visibleDevices: "void",
|
||||
expectedNil: true,
|
||||
},
|
||||
{
|
||||
description: "empty config raises error",
|
||||
cfg: &config.Config{
|
||||
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{},
|
||||
},
|
||||
visibleDevices: "all",
|
||||
expectedError: fmt.Errorf("invalid discover mode"),
|
||||
},
|
||||
{
|
||||
description: "non-legacy discover mode raises error",
|
||||
cfg: &config.Config{
|
||||
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
|
||||
DiscoverMode: "non-legacy",
|
||||
},
|
||||
},
|
||||
visibleDevices: "all",
|
||||
expectedError: fmt.Errorf("invalid discover mode"),
|
||||
},
|
||||
{
|
||||
description: "legacy discover mode returns modifier",
|
||||
cfg: &config.Config{
|
||||
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
|
||||
DiscoverMode: "legacy",
|
||||
},
|
||||
},
|
||||
visibleDevices: "all",
|
||||
},
|
||||
{
|
||||
description: "csv discover mode returns modifier",
|
||||
cfg: &config.Config{
|
||||
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
|
||||
DiscoverMode: "csv",
|
||||
},
|
||||
},
|
||||
visibleDevices: "all",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
@ -115,7 +79,7 @@ func TestNewExperimentalModifier(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
m, err := NewExperimentalModifier(logger, tc.cfg, spec)
|
||||
m, err := NewCSVModifier(logger, tc.cfg, spec)
|
||||
if tc.expectedError != nil {
|
||||
require.Error(t, err)
|
||||
} else {
|
||||
@ -304,7 +268,7 @@ func TestExperimentalModifier(t *testing.T) {
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
m, err := newExperimentalModifierFromDiscoverer(logger, tc.discover)
|
||||
m, err := newModifierFromDiscoverer(logger, tc.discover)
|
||||
require.NoError(t, err)
|
||||
|
||||
err = m.Modify(tc.spec)
|
@ -21,6 +21,7 @@ import (
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-container-runtime/modifier"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/info"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/runtime"
|
||||
"github.com/sirupsen/logrus"
|
||||
@ -61,9 +62,12 @@ func newNVIDIAContainerRuntime(logger *logrus.Logger, cfg *config.Config, argv [
|
||||
|
||||
// newSpecModifier is a factory method that creates constructs an OCI spec modifer based on the provided config.
|
||||
func newSpecModifier(logger *logrus.Logger, cfg *config.Config, ociSpec oci.Spec, argv []string) (oci.SpecModifier, error) {
|
||||
if !cfg.NVIDIAContainerRuntimeConfig.Experimental {
|
||||
switch info.ResolveAutoMode(logger, cfg.NVIDIAContainerRuntimeConfig.Mode) {
|
||||
case "legacy":
|
||||
return modifier.NewStableRuntimeModifier(logger), nil
|
||||
case "csv":
|
||||
return modifier.NewCSVModifier(logger, cfg, ociSpec)
|
||||
}
|
||||
|
||||
return modifier.NewExperimentalModifier(logger, cfg, ociSpec)
|
||||
return nil, fmt.Errorf("invalid runtime mode: %v", cfg.NVIDIAContainerRuntimeConfig.Mode)
|
||||
}
|
||||
|
@ -49,16 +49,16 @@ func TestFactoryMethod(t *testing.T) {
|
||||
cfg: &config.Config{
|
||||
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
|
||||
Runtimes: []string{"runc"},
|
||||
Mode: "legacy",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "experimental flag supported",
|
||||
description: "csv mode is supported",
|
||||
cfg: &config.Config{
|
||||
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
|
||||
Experimental: true,
|
||||
DiscoverMode: "legacy",
|
||||
Runtimes: []string{"runc"},
|
||||
Runtimes: []string{"runc"},
|
||||
Mode: "csv",
|
||||
},
|
||||
},
|
||||
spec: &specs.Spec{
|
||||
@ -69,6 +69,43 @@ func TestFactoryMethod(t *testing.T) {
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "non-legacy discover mode raises error",
|
||||
cfg: &config.Config{
|
||||
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
|
||||
Runtimes: []string{"runc"},
|
||||
Mode: "non-legacy",
|
||||
},
|
||||
},
|
||||
expectedError: true,
|
||||
},
|
||||
{
|
||||
description: "legacy discover mode returns modifier",
|
||||
cfg: &config.Config{
|
||||
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
|
||||
Runtimes: []string{"runc"},
|
||||
Mode: "legacy",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "csv discover mode returns modifier",
|
||||
cfg: &config.Config{
|
||||
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
|
||||
Runtimes: []string{"runc"},
|
||||
Mode: "csv",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "empty mode raises error",
|
||||
cfg: &config.Config{
|
||||
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
|
||||
Runtimes: []string{"runc"},
|
||||
},
|
||||
},
|
||||
expectedError: true,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
@ -79,7 +116,7 @@ func TestFactoryMethod(t *testing.T) {
|
||||
require.NoError(t, err)
|
||||
require.NoError(t, json.NewEncoder(specFile).Encode(tc.spec))
|
||||
|
||||
argv := []string{"--bundle", bundleDir}
|
||||
argv := []string{"--bundle", bundleDir, "create"}
|
||||
|
||||
_, err = newNVIDIAContainerRuntime(logger, tc.cfg, argv)
|
||||
if tc.expectedError {
|
||||
|
@ -13,6 +13,7 @@ import (
|
||||
"strings"
|
||||
"syscall"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/info"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
)
|
||||
|
||||
@ -73,7 +74,7 @@ func doPrestart() {
|
||||
hook := getHookConfig()
|
||||
cli := hook.NvidiaContainerCLI
|
||||
|
||||
if hook.NVIDIAContainerRuntime.Experimental && !*forceflag {
|
||||
if !*forceflag && info.ResolveAutoMode(&logInterceptor{}, hook.NVIDIAContainerRuntime.Mode) != "legacy" {
|
||||
log.Panicln("invoking the NVIDIA Container Runtime Hook directly (e.g. specifying the docker --gpus flag) is not supported. Please use the NVIDIA Container Runtime instead.")
|
||||
}
|
||||
|
||||
@ -178,3 +179,12 @@ func main() {
|
||||
os.Exit(2)
|
||||
}
|
||||
}
|
||||
|
||||
// logInterceptor implements the info.Logger interface to allow for logging from this function.
|
||||
type logInterceptor struct{}
|
||||
|
||||
func (l *logInterceptor) Infof(format string, args ...interface{}) {
|
||||
log.Printf(format, args...)
|
||||
}
|
||||
|
||||
func (l *logInterceptor) Debugf(format string, args ...interface{}) {}
|
||||
|
@ -62,8 +62,6 @@ func TestGetConfig(t *testing.T) {
|
||||
},
|
||||
NVIDIAContainerRuntimeConfig: RuntimeConfig{
|
||||
DebugFilePath: "/dev/null",
|
||||
Experimental: false,
|
||||
DiscoverMode: "auto",
|
||||
LogLevel: "info",
|
||||
Runtimes: []string{"docker-runc", "runc"},
|
||||
Mode: "auto",
|
||||
@ -97,8 +95,6 @@ func TestGetConfig(t *testing.T) {
|
||||
},
|
||||
NVIDIAContainerRuntimeConfig: RuntimeConfig{
|
||||
DebugFilePath: "/foo/bar",
|
||||
Experimental: true,
|
||||
DiscoverMode: "not-legacy",
|
||||
LogLevel: "debug",
|
||||
Runtimes: []string{"/some/runtime"},
|
||||
Mode: "not-auto",
|
||||
@ -136,8 +132,6 @@ func TestGetConfig(t *testing.T) {
|
||||
},
|
||||
NVIDIAContainerRuntimeConfig: RuntimeConfig{
|
||||
DebugFilePath: "/foo/bar",
|
||||
Experimental: true,
|
||||
DiscoverMode: "not-legacy",
|
||||
LogLevel: "debug",
|
||||
Runtimes: []string{"/some/runtime"},
|
||||
Mode: "not-auto",
|
||||
|
@ -33,8 +33,6 @@ const (
|
||||
// RuntimeConfig stores the config options for the NVIDIA Container Runtime
|
||||
type RuntimeConfig struct {
|
||||
DebugFilePath string `toml:"debug"`
|
||||
Experimental bool `toml:"experimental"`
|
||||
DiscoverMode string `toml:"discover-mode"`
|
||||
// LogLevel defines the logging level for the application
|
||||
LogLevel string `toml:"log-level"`
|
||||
// Runtimes defines the candidates for the low-level runtime
|
||||
@ -80,8 +78,6 @@ func getRuntimeConfigFrom(toml *toml.Tree) (*RuntimeConfig, error) {
|
||||
func GetDefaultRuntimeConfig() *RuntimeConfig {
|
||||
c := RuntimeConfig{
|
||||
DebugFilePath: "/dev/null",
|
||||
Experimental: false,
|
||||
DiscoverMode: auto,
|
||||
LogLevel: logrus.InfoLevel.String(),
|
||||
Runtimes: []string{
|
||||
dockerRuncExecutableName,
|
||||
|
Loading…
Reference in New Issue
Block a user