From e591f3f26b4a2d513ff886bd14fd1f036dd468de Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Mon, 9 May 2022 15:42:59 +0200 Subject: [PATCH] Replace experimental and discover-mode These changes replace the nvidia-container-runtime config options experimental and discover-mode with a single mode config option. Note that mode is now a string with a default value of "auto" and a mode value of "legacy" is equivalent to experimental == false. Signed-off-by: Evan Lezar --- .../modifier/{experimental.go => csv.go} | 106 ++++++++---------- .../{experimental_test.go => csv_test.go} | 42 +------ .../runtime_factory.go | 8 +- .../runtime_factory_test.go | 47 +++++++- cmd/nvidia-container-toolkit/main.go | 12 +- internal/config/config_test.go | 6 - internal/config/runtime.go | 4 - 7 files changed, 108 insertions(+), 117 deletions(-) rename cmd/nvidia-container-runtime/modifier/{experimental.go => csv.go} (59%) rename cmd/nvidia-container-runtime/modifier/{experimental_test.go => csv_test.go} (85%) diff --git a/cmd/nvidia-container-runtime/modifier/experimental.go b/cmd/nvidia-container-runtime/modifier/csv.go similarity index 59% rename from cmd/nvidia-container-runtime/modifier/experimental.go rename to cmd/nvidia-container-runtime/modifier/csv.go index 5c72bae8..b9210fcd 100644 --- a/cmd/nvidia-container-runtime/modifier/experimental.go +++ b/cmd/nvidia-container-runtime/modifier/csv.go @@ -25,15 +25,14 @@ import ( "github.com/NVIDIA/nvidia-container-toolkit/internal/discover" "github.com/NVIDIA/nvidia-container-toolkit/internal/discover/csv" "github.com/NVIDIA/nvidia-container-toolkit/internal/edits" - "github.com/NVIDIA/nvidia-container-toolkit/internal/info" "github.com/NVIDIA/nvidia-container-toolkit/internal/oci" "github.com/NVIDIA/nvidia-container-toolkit/internal/requirements" "github.com/opencontainers/runtime-spec/specs-go" "github.com/sirupsen/logrus" ) -// experiemental represents the modifications required by the experimental runtime -type experimental struct { +// csvMode represents the modifications as performed by the csv runtime mode +type csvMode struct { logger *logrus.Logger discoverer discover.Discover } @@ -45,9 +44,9 @@ const ( nvidiaRequireJetpackEnvvar = "NVIDIA_REQUIRE_JETPACK" ) -// NewExperimentalModifier creates a modifier that applies the experimental -// modications to an OCI spec if required by the runtime wrapper. -func NewExperimentalModifier(logger *logrus.Logger, cfg *config.Config, ociSpec oci.Spec) (oci.SpecModifier, error) { +// NewCSVModifier creates a modifier that applies modications to an OCI spec if required by the runtime wrapper. +// The modifications are defined by CSV MountSpecs. +func NewCSVModifier(logger *logrus.Logger, cfg *config.Config, ociSpec oci.Spec) (oci.SpecModifier, error) { rawSpec, err := ociSpec.Load() if err != nil { return nil, fmt.Errorf("failed to load OCI spec: %v", err) @@ -67,64 +66,51 @@ func NewExperimentalModifier(logger *logrus.Logger, cfg *config.Config, ociSpec NVIDIAContainerToolkitCLIExecutablePath: cfg.NVIDIACTKConfig.Path, } - var d discover.Discover - - switch info.ResolveAutoMode(logger, cfg.NVIDIAContainerRuntimeConfig.DiscoverMode) { - case "legacy": - legacyDiscoverer, err := discover.NewLegacyDiscoverer(logger, config) - if err != nil { - return nil, fmt.Errorf("failed to create legacy discoverer: %v", err) - } - d = legacyDiscoverer - case "csv": - // TODO: Once the devices have been encapsulated in the CUDA image, this can be moved to before the - // visible devices are checked. - image, err := image.NewCUDAImageFromSpec(rawSpec) - if err != nil { - return nil, err - } - - if err := checkRequirements(logger, &image); err != nil { - return nil, fmt.Errorf("requirements not met: %v", err) - } - - csvFiles, err := csv.GetFileList(csv.DefaultMountSpecPath) - if err != nil { - return nil, fmt.Errorf("failed to get list of CSV files: %v", err) - } - - nvidiaRequireJetpack, _ := ociSpec.LookupEnv(nvidiaRequireJetpackEnvvar) - if nvidiaRequireJetpack != "csv-mounts=all" { - csvFiles = csv.BaseFilesOnly(csvFiles) - } - - csvDiscoverer, err := discover.NewFromCSVFiles(logger, csvFiles, config.Root) - if err != nil { - return nil, fmt.Errorf("failed to create CSV discoverer: %v", err) - } - - ldcacheUpdateHook, err := discover.NewLDCacheUpdateHook(logger, csvDiscoverer, config) - if err != nil { - return nil, fmt.Errorf("failed to create ldcach update hook discoverer: %v", err) - } - - createSymlinksHook, err := discover.NewCreateSymlinksHook(logger, csvFiles, csvDiscoverer, config) - if err != nil { - return nil, fmt.Errorf("failed to create symlink hook discoverer: %v", err) - } - - d = discover.NewList(csvDiscoverer, ldcacheUpdateHook, createSymlinksHook) - default: - return nil, fmt.Errorf("invalid discover mode: %v", cfg.NVIDIAContainerRuntimeConfig.DiscoverMode) + // TODO: Once the devices have been encapsulated in the CUDA image, this can be moved to before the + // visible devices are checked. + image, err := image.NewCUDAImageFromSpec(rawSpec) + if err != nil { + return nil, err } - return newExperimentalModifierFromDiscoverer(logger, d) + if err := checkRequirements(logger, &image); err != nil { + return nil, fmt.Errorf("requirements not met: %v", err) + } + + csvFiles, err := csv.GetFileList(cfg.NVIDIAContainerRuntimeConfig.Modes.CSV.MountSpecPath) + if err != nil { + return nil, fmt.Errorf("failed to get list of CSV files: %v", err) + } + + nvidiaRequireJetpack, _ := ociSpec.LookupEnv(nvidiaRequireJetpackEnvvar) + if nvidiaRequireJetpack != "csv-mounts=all" { + csvFiles = csv.BaseFilesOnly(csvFiles) + } + + csvDiscoverer, err := discover.NewFromCSVFiles(logger, csvFiles, config.Root) + if err != nil { + return nil, fmt.Errorf("failed to create CSV discoverer: %v", err) + } + + ldcacheUpdateHook, err := discover.NewLDCacheUpdateHook(logger, csvDiscoverer, config) + if err != nil { + return nil, fmt.Errorf("failed to create ldcach update hook discoverer: %v", err) + } + + createSymlinksHook, err := discover.NewCreateSymlinksHook(logger, csvFiles, csvDiscoverer, config) + if err != nil { + return nil, fmt.Errorf("failed to create symlink hook discoverer: %v", err) + } + + d := discover.NewList(csvDiscoverer, ldcacheUpdateHook, createSymlinksHook) + + return newModifierFromDiscoverer(logger, d) } -// newExperimentalModifierFromDiscoverer created a modifier that aplies the discovered +// newModifierFromDiscoverer created a modifier that aplies the discovered // modifications to an OCI spec if require by the runtime wrapper. -func newExperimentalModifierFromDiscoverer(logger *logrus.Logger, d discover.Discover) (oci.SpecModifier, error) { - m := experimental{ +func newModifierFromDiscoverer(logger *logrus.Logger, d discover.Discover) (oci.SpecModifier, error) { + m := csvMode{ logger: logger, discoverer: d, } @@ -133,7 +119,7 @@ func newExperimentalModifierFromDiscoverer(logger *logrus.Logger, d discover.Dis // Modify applies the required modifications to the incomming OCI spec. These modifications // are applied in-place. -func (m experimental) Modify(spec *specs.Spec) error { +func (m csvMode) Modify(spec *specs.Spec) error { err := nvidiaContainerRuntimeHookRemover{m.logger}.Modify(spec) if err != nil { return fmt.Errorf("failed to remove existing hooks: %v", err) diff --git a/cmd/nvidia-container-runtime/modifier/experimental_test.go b/cmd/nvidia-container-runtime/modifier/csv_test.go similarity index 85% rename from cmd/nvidia-container-runtime/modifier/experimental_test.go rename to cmd/nvidia-container-runtime/modifier/csv_test.go index d7ba0455..e8cbf3e3 100644 --- a/cmd/nvidia-container-runtime/modifier/experimental_test.go +++ b/cmd/nvidia-container-runtime/modifier/csv_test.go @@ -28,7 +28,7 @@ import ( "github.com/stretchr/testify/require" ) -func TestNewExperimentalModifier(t *testing.T) { +func TestNewCSVModifier(t *testing.T) { logger, _ := testlog.NewNullLogger() testCases := []struct { @@ -63,42 +63,6 @@ func TestNewExperimentalModifier(t *testing.T) { visibleDevices: "void", expectedNil: true, }, - { - description: "empty config raises error", - cfg: &config.Config{ - NVIDIAContainerRuntimeConfig: config.RuntimeConfig{}, - }, - visibleDevices: "all", - expectedError: fmt.Errorf("invalid discover mode"), - }, - { - description: "non-legacy discover mode raises error", - cfg: &config.Config{ - NVIDIAContainerRuntimeConfig: config.RuntimeConfig{ - DiscoverMode: "non-legacy", - }, - }, - visibleDevices: "all", - expectedError: fmt.Errorf("invalid discover mode"), - }, - { - description: "legacy discover mode returns modifier", - cfg: &config.Config{ - NVIDIAContainerRuntimeConfig: config.RuntimeConfig{ - DiscoverMode: "legacy", - }, - }, - visibleDevices: "all", - }, - { - description: "csv discover mode returns modifier", - cfg: &config.Config{ - NVIDIAContainerRuntimeConfig: config.RuntimeConfig{ - DiscoverMode: "csv", - }, - }, - visibleDevices: "all", - }, } for _, tc := range testCases { @@ -115,7 +79,7 @@ func TestNewExperimentalModifier(t *testing.T) { } } - m, err := NewExperimentalModifier(logger, tc.cfg, spec) + m, err := NewCSVModifier(logger, tc.cfg, spec) if tc.expectedError != nil { require.Error(t, err) } else { @@ -304,7 +268,7 @@ func TestExperimentalModifier(t *testing.T) { for _, tc := range testCases { t.Run(tc.description, func(t *testing.T) { - m, err := newExperimentalModifierFromDiscoverer(logger, tc.discover) + m, err := newModifierFromDiscoverer(logger, tc.discover) require.NoError(t, err) err = m.Modify(tc.spec) diff --git a/cmd/nvidia-container-runtime/runtime_factory.go b/cmd/nvidia-container-runtime/runtime_factory.go index 9c0a78c3..9023d419 100644 --- a/cmd/nvidia-container-runtime/runtime_factory.go +++ b/cmd/nvidia-container-runtime/runtime_factory.go @@ -21,6 +21,7 @@ import ( "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-container-runtime/modifier" "github.com/NVIDIA/nvidia-container-toolkit/internal/config" + "github.com/NVIDIA/nvidia-container-toolkit/internal/info" "github.com/NVIDIA/nvidia-container-toolkit/internal/oci" "github.com/NVIDIA/nvidia-container-toolkit/internal/runtime" "github.com/sirupsen/logrus" @@ -61,9 +62,12 @@ func newNVIDIAContainerRuntime(logger *logrus.Logger, cfg *config.Config, argv [ // newSpecModifier is a factory method that creates constructs an OCI spec modifer based on the provided config. func newSpecModifier(logger *logrus.Logger, cfg *config.Config, ociSpec oci.Spec, argv []string) (oci.SpecModifier, error) { - if !cfg.NVIDIAContainerRuntimeConfig.Experimental { + switch info.ResolveAutoMode(logger, cfg.NVIDIAContainerRuntimeConfig.Mode) { + case "legacy": return modifier.NewStableRuntimeModifier(logger), nil + case "csv": + return modifier.NewCSVModifier(logger, cfg, ociSpec) } - return modifier.NewExperimentalModifier(logger, cfg, ociSpec) + return nil, fmt.Errorf("invalid runtime mode: %v", cfg.NVIDIAContainerRuntimeConfig.Mode) } diff --git a/cmd/nvidia-container-runtime/runtime_factory_test.go b/cmd/nvidia-container-runtime/runtime_factory_test.go index 4133fecb..88d4ddb5 100644 --- a/cmd/nvidia-container-runtime/runtime_factory_test.go +++ b/cmd/nvidia-container-runtime/runtime_factory_test.go @@ -49,16 +49,16 @@ func TestFactoryMethod(t *testing.T) { cfg: &config.Config{ NVIDIAContainerRuntimeConfig: config.RuntimeConfig{ Runtimes: []string{"runc"}, + Mode: "legacy", }, }, }, { - description: "experimental flag supported", + description: "csv mode is supported", cfg: &config.Config{ NVIDIAContainerRuntimeConfig: config.RuntimeConfig{ - Experimental: true, - DiscoverMode: "legacy", - Runtimes: []string{"runc"}, + Runtimes: []string{"runc"}, + Mode: "csv", }, }, spec: &specs.Spec{ @@ -69,6 +69,43 @@ func TestFactoryMethod(t *testing.T) { }, }, }, + { + description: "non-legacy discover mode raises error", + cfg: &config.Config{ + NVIDIAContainerRuntimeConfig: config.RuntimeConfig{ + Runtimes: []string{"runc"}, + Mode: "non-legacy", + }, + }, + expectedError: true, + }, + { + description: "legacy discover mode returns modifier", + cfg: &config.Config{ + NVIDIAContainerRuntimeConfig: config.RuntimeConfig{ + Runtimes: []string{"runc"}, + Mode: "legacy", + }, + }, + }, + { + description: "csv discover mode returns modifier", + cfg: &config.Config{ + NVIDIAContainerRuntimeConfig: config.RuntimeConfig{ + Runtimes: []string{"runc"}, + Mode: "csv", + }, + }, + }, + { + description: "empty mode raises error", + cfg: &config.Config{ + NVIDIAContainerRuntimeConfig: config.RuntimeConfig{ + Runtimes: []string{"runc"}, + }, + }, + expectedError: true, + }, } for _, tc := range testCases { @@ -79,7 +116,7 @@ func TestFactoryMethod(t *testing.T) { require.NoError(t, err) require.NoError(t, json.NewEncoder(specFile).Encode(tc.spec)) - argv := []string{"--bundle", bundleDir} + argv := []string{"--bundle", bundleDir, "create"} _, err = newNVIDIAContainerRuntime(logger, tc.cfg, argv) if tc.expectedError { diff --git a/cmd/nvidia-container-toolkit/main.go b/cmd/nvidia-container-toolkit/main.go index 232f17b5..b515337d 100644 --- a/cmd/nvidia-container-toolkit/main.go +++ b/cmd/nvidia-container-toolkit/main.go @@ -13,6 +13,7 @@ import ( "strings" "syscall" + "github.com/NVIDIA/nvidia-container-toolkit/internal/info" "github.com/NVIDIA/nvidia-container-toolkit/internal/lookup" ) @@ -73,7 +74,7 @@ func doPrestart() { hook := getHookConfig() cli := hook.NvidiaContainerCLI - if hook.NVIDIAContainerRuntime.Experimental && !*forceflag { + if !*forceflag && info.ResolveAutoMode(&logInterceptor{}, hook.NVIDIAContainerRuntime.Mode) != "legacy" { log.Panicln("invoking the NVIDIA Container Runtime Hook directly (e.g. specifying the docker --gpus flag) is not supported. Please use the NVIDIA Container Runtime instead.") } @@ -178,3 +179,12 @@ func main() { os.Exit(2) } } + +// logInterceptor implements the info.Logger interface to allow for logging from this function. +type logInterceptor struct{} + +func (l *logInterceptor) Infof(format string, args ...interface{}) { + log.Printf(format, args...) +} + +func (l *logInterceptor) Debugf(format string, args ...interface{}) {} diff --git a/internal/config/config_test.go b/internal/config/config_test.go index 97c49c04..f4501bf3 100644 --- a/internal/config/config_test.go +++ b/internal/config/config_test.go @@ -62,8 +62,6 @@ func TestGetConfig(t *testing.T) { }, NVIDIAContainerRuntimeConfig: RuntimeConfig{ DebugFilePath: "/dev/null", - Experimental: false, - DiscoverMode: "auto", LogLevel: "info", Runtimes: []string{"docker-runc", "runc"}, Mode: "auto", @@ -97,8 +95,6 @@ func TestGetConfig(t *testing.T) { }, NVIDIAContainerRuntimeConfig: RuntimeConfig{ DebugFilePath: "/foo/bar", - Experimental: true, - DiscoverMode: "not-legacy", LogLevel: "debug", Runtimes: []string{"/some/runtime"}, Mode: "not-auto", @@ -136,8 +132,6 @@ func TestGetConfig(t *testing.T) { }, NVIDIAContainerRuntimeConfig: RuntimeConfig{ DebugFilePath: "/foo/bar", - Experimental: true, - DiscoverMode: "not-legacy", LogLevel: "debug", Runtimes: []string{"/some/runtime"}, Mode: "not-auto", diff --git a/internal/config/runtime.go b/internal/config/runtime.go index 4ccdbbfe..4526da0c 100644 --- a/internal/config/runtime.go +++ b/internal/config/runtime.go @@ -33,8 +33,6 @@ const ( // RuntimeConfig stores the config options for the NVIDIA Container Runtime type RuntimeConfig struct { DebugFilePath string `toml:"debug"` - Experimental bool `toml:"experimental"` - DiscoverMode string `toml:"discover-mode"` // LogLevel defines the logging level for the application LogLevel string `toml:"log-level"` // Runtimes defines the candidates for the low-level runtime @@ -80,8 +78,6 @@ func getRuntimeConfigFrom(toml *toml.Tree) (*RuntimeConfig, error) { func GetDefaultRuntimeConfig() *RuntimeConfig { c := RuntimeConfig{ DebugFilePath: "/dev/null", - Experimental: false, - DiscoverMode: auto, LogLevel: logrus.InfoLevel.String(), Runtimes: []string{ dockerRuncExecutableName,