From 2563c1b87c3b37a13726ac1e55723a1efd525bb8 Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Wed, 6 Apr 2022 16:04:38 +0200 Subject: [PATCH 1/4] Export GetDefaultRuntimeConfig Signed-off-by: Evan Lezar --- internal/config/config.go | 2 +- internal/config/runtime.go | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/internal/config/config.go b/internal/config/config.go index 9f4f16f3..648dee5e 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -95,7 +95,7 @@ func getDefaultConfig() *Config { c := Config{ NVIDIAContainerCLIConfig: *getDefaultContainerCLIConfig(), NVIDIACTKConfig: *getDefaultCTKConfig(), - NVIDIAContainerRuntimeConfig: *getDefaultRuntimeConfig(), + NVIDIAContainerRuntimeConfig: *GetDefaultRuntimeConfig(), } return &c diff --git a/internal/config/runtime.go b/internal/config/runtime.go index 9ae8de48..7486e9e9 100644 --- a/internal/config/runtime.go +++ b/internal/config/runtime.go @@ -29,7 +29,7 @@ type RuntimeConfig struct { // getRuntimeConfigFrom reads the nvidia container runtime config from the specified toml Tree. func getRuntimeConfigFrom(toml *toml.Tree) *RuntimeConfig { - cfg := getDefaultRuntimeConfig() + cfg := GetDefaultRuntimeConfig() if toml == nil { return cfg @@ -42,8 +42,8 @@ func getRuntimeConfigFrom(toml *toml.Tree) *RuntimeConfig { return cfg } -// getDefaultRuntimeConfig defines the default values for the config -func getDefaultRuntimeConfig() *RuntimeConfig { +// GetDefaultRuntimeConfig defines the default values for the config +func GetDefaultRuntimeConfig() *RuntimeConfig { c := RuntimeConfig{ DebugFilePath: "/dev/null", Experimental: false, From a9a470427335f467c3a2a0d5f1859c1d5ec6a806 Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Wed, 6 Apr 2022 16:05:26 +0200 Subject: [PATCH 2/4] Raise error if hook invoked in experimental mode without force flag Signed-off-by: Evan Lezar --- cmd/nvidia-container-toolkit/hook_config.go | 7 +++++-- cmd/nvidia-container-toolkit/main.go | 5 +++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/cmd/nvidia-container-toolkit/hook_config.go b/cmd/nvidia-container-toolkit/hook_config.go index b4cb0133..6a6d18b3 100644 --- a/cmd/nvidia-container-toolkit/hook_config.go +++ b/cmd/nvidia-container-toolkit/hook_config.go @@ -7,6 +7,7 @@ import ( "reflect" "github.com/BurntSushi/toml" + "github.com/NVIDIA/nvidia-container-toolkit/internal/config" ) const ( @@ -41,10 +42,11 @@ type HookConfig struct { AcceptDeviceListAsVolumeMounts bool `toml:"accept-nvidia-visible-devices-as-volume-mounts"` SupportedDriverCapabilities DriverCapabilities `toml:"supported-driver-capabilities"` - NvidiaContainerCLI CLIConfig `toml:"nvidia-container-cli"` + NvidiaContainerCLI CLIConfig `toml:"nvidia-container-cli"` + NVIDIAContainerRuntime config.RuntimeConfig `toml:"nvidia-container-runtime"` } -func getDefaultHookConfig() (config HookConfig) { +func getDefaultHookConfig() HookConfig { return HookConfig{ DisableRequire: false, SwarmResource: nil, @@ -63,6 +65,7 @@ func getDefaultHookConfig() (config HookConfig) { User: nil, Ldconfig: nil, }, + NVIDIAContainerRuntime: *config.GetDefaultRuntimeConfig(), } } diff --git a/cmd/nvidia-container-toolkit/main.go b/cmd/nvidia-container-toolkit/main.go index 13f8197c..81c86572 100644 --- a/cmd/nvidia-container-toolkit/main.go +++ b/cmd/nvidia-container-toolkit/main.go @@ -17,6 +17,7 @@ import ( var ( debugflag = flag.Bool("debug", false, "enable debug output") + forceflag = flag.Bool("force", false, "force execution of prestart hook in experimental mode") configflag = flag.String("config", "", "configuration file") defaultPATH = []string{"/usr/local/sbin", "/usr/local/bin", "/usr/sbin", "/usr/bin", "/sbin", "/bin"} @@ -85,6 +86,10 @@ func doPrestart() { hook := getHookConfig() cli := hook.NvidiaContainerCLI + if hook.NVIDIAContainerRuntime.Experimental && !*forceflag { + log.Panicln("invoking the NVIDIA Container Runtime Hook directly (e.g. specifying the docker --gpus flag) is not supported. Please use the NVIDIA Container Runtime instead.") + } + container := getContainerConfig(hook) nvidia := container.Nvidia if nvidia == nil { From dab6f4b768b00b15a127522f0446e358b744070f Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Wed, 6 Apr 2022 16:06:30 +0200 Subject: [PATCH 3/4] Specify --force flag when invoking nvidia-container-runtime-hook Signed-off-by: Evan Lezar --- internal/discover/legacy.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/discover/legacy.go b/internal/discover/legacy.go index adc4c0c5..8a565905 100644 --- a/internal/discover/legacy.go +++ b/internal/discover/legacy.go @@ -60,7 +60,7 @@ func (d legacy) Hooks() ([]Hook, error) { } d.logger.Debugf("Using NVIDIA Container Runtime Hook path %v", hookPath) - args := []string{hookPath, "prestart"} + args := []string{hookPath, "--force", "prestart"} h := Hook{ Lifecycle: cdi.PrestartHook, Path: hookPath, From 45160b88a40171ff907224bb9fc8e7bde7b9a13e Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Wed, 6 Apr 2022 16:18:56 +0200 Subject: [PATCH 4/4] Remove exsiting NVIDIA Container Runtime Hooks from the spec Signed-off-by: Evan Lezar --- .../modifier/experimental_test.go | 14 ++++++-------- .../modifier/hook_remover.go | 12 ++++++++++-- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/cmd/nvidia-container-runtime/modifier/experimental_test.go b/cmd/nvidia-container-runtime/modifier/experimental_test.go index 9e09873a..34f83bf4 100644 --- a/cmd/nvidia-container-runtime/modifier/experimental_test.go +++ b/cmd/nvidia-container-runtime/modifier/experimental_test.go @@ -231,7 +231,7 @@ func TestExperimentalModifier(t *testing.T) { }, }, { - description: "modification fails for existing nvidia-container-runtime-hook", + description: "modification removes existing nvidia-container-runtime-hook", spec: &specs.Spec{ Hooks: &specs.Hooks{ Prestart: []specs.Hook{ @@ -254,20 +254,19 @@ func TestExperimentalModifier(t *testing.T) { return hooks, nil }, }, - expectedError: fmt.Errorf("nvidia-container-runtime-hook already exists"), expectedSpec: &specs.Spec{ Hooks: &specs.Hooks{ Prestart: []specs.Hook{ { - Path: "/path/to/nvidia-container-runtime-hook", - Args: []string{"/path/to/nvidia-container-runtime-hook", "prestart"}, + Path: "/hook/b", + Args: []string{"/hook/b", "argb"}, }, }, }, }, }, { - description: "modification fails for existing nvidia-container-toolkit", + description: "modification removes existing nvidia-container-toolkit", spec: &specs.Spec{ Hooks: &specs.Hooks{ Prestart: []specs.Hook{ @@ -290,13 +289,12 @@ func TestExperimentalModifier(t *testing.T) { return hooks, nil }, }, - expectedError: fmt.Errorf("nvidia-container-toolkit already exists"), expectedSpec: &specs.Spec{ Hooks: &specs.Hooks{ Prestart: []specs.Hook{ { - Path: "/path/to/nvidia-container-toolkit", - Args: []string{"/path/to/nvidia-container-toolkit", "prestart"}, + Path: "/hook/b", + Args: []string{"/hook/b", "argb"}, }, }, }, diff --git a/cmd/nvidia-container-runtime/modifier/hook_remover.go b/cmd/nvidia-container-runtime/modifier/hook_remover.go index 7df4b2f1..16fe82d4 100644 --- a/cmd/nvidia-container-runtime/modifier/hook_remover.go +++ b/cmd/nvidia-container-runtime/modifier/hook_remover.go @@ -17,7 +17,6 @@ package modifier import ( - "fmt" "path/filepath" "github.com/NVIDIA/nvidia-container-toolkit/internal/oci" @@ -46,10 +45,19 @@ func (m nvidiaContainerRuntimeHookRemover) Modify(spec *specs.Spec) error { return nil } + var newPrestart []specs.Hook + for _, hook := range spec.Hooks.Prestart { if isNVIDIAContainerRuntimeHook(&hook) { - return fmt.Errorf("spec already contains required 'prestart' hook") + m.logger.Debugf("Removing hook %v", hook) + continue } + newPrestart = append(newPrestart, hook) + } + + if len(newPrestart) != len(spec.Hooks.Prestart) { + m.logger.Debugf("Updating 'prestart' hooks to %v", newPrestart) + spec.Hooks.Prestart = newPrestart } return nil