diff --git a/test/container/containerd_test.sh b/test/container/containerd_test.sh index 8b2d53ba..98cfe165 100755 --- a/test/container/containerd_test.sh +++ b/test/container/containerd_test.sh @@ -91,7 +91,7 @@ testing::containerd::toolkit::test_config() { "${toolkit_container_image}" -c "containerd setup \ --config=${output_config} \ --socket=${containerd_dind_containerd_dir}/containerd.sock \ - --restart-mode=NONE \ + --restart-mode=none \ /usr/local/nvidia/toolkit" # As a basic test we check that the config has changed @@ -107,7 +107,7 @@ testing::containerd::toolkit::test_config() { "${toolkit_container_image}" -c "containerd cleanup \ --config=${output_config} \ --socket=${containerd_dind_containerd_dir}/containerd.sock \ - --restart-mode=NONE \ + --restart-mode=none \ /usr/local/nvidia/toolkit" if [[ -s "${input_config}" ]]; then diff --git a/third_party/libnvidia-container b/third_party/libnvidia-container index 3f55f9be..88f28f41 160000 --- a/third_party/libnvidia-container +++ b/third_party/libnvidia-container @@ -1 +1 @@ -Subproject commit 3f55f9be893787df6dbacbe1f87842d520315ab2 +Subproject commit 88f28f41fd20018d4244df5132d3d6565d6bbb7f diff --git a/tools/container/containerd/containerd.go b/tools/container/containerd/containerd.go index 9bd684d5..23566f67 100644 --- a/tools/container/containerd/containerd.go +++ b/tools/container/containerd/containerd.go @@ -33,7 +33,7 @@ import ( const ( restartModeSignal = "signal" restartModeSystemd = "systemd" - restartModeNone = "NONE" + restartModeNone = "none" nvidiaRuntimeName = "nvidia" nvidiaRuntimeBinary = "nvidia-container-runtime" @@ -154,7 +154,7 @@ func main() { }, &cli.StringFlag{ Name: "restart-mode", - Usage: "Specify how containerd should be restarted; [signal | systemd]", + Usage: "Specify how containerd should be restarted; If 'none' is selected, it will not be restarted [signal | systemd | none]", Value: defaultRestartMode, Destination: &options.restartMode, EnvVars: []string{"CONTAINERD_RESTART_MODE"}, diff --git a/tools/container/docker/docker.go b/tools/container/docker/docker.go index c85f7475..21866a1b 100644 --- a/tools/container/docker/docker.go +++ b/tools/container/docker/docker.go @@ -32,6 +32,9 @@ import ( ) const ( + restartModeSignal = "signal" + restartModeNone = "none" + nvidiaRuntimeName = "nvidia" nvidiaRuntimeBinary = "nvidia-container-runtime" nvidiaExperimentalRuntimeName = "nvidia-experimental" @@ -42,6 +45,7 @@ const ( defaultSetAsDefault = true // defaultRuntimeName specifies the NVIDIA runtime to be use as the default runtime if setting the default runtime is enabled defaultRuntimeName = nvidiaRuntimeName + defaultRestartMode = restartModeSignal reloadBackoff = 5 * time.Second maxReloadAttempts = 6 @@ -63,6 +67,7 @@ type options struct { runtimeName string setAsDefault bool runtimeDir string + restartMode string } func main() { @@ -137,6 +142,13 @@ func main() { EnvVars: []string{"DOCKER_SET_AS_DEFAULT"}, Hidden: true, }, + &cli.StringFlag{ + Name: "restart-mode", + Usage: "Specify how docker should be restarted; If 'none' is selected it will not be restarted [signal | none]", + Value: defaultRestartMode, + Destination: &options.restartMode, + EnvVars: []string{"DOCKER_RESTART_MODE"}, + }, } // Update the subcommand flags with the common subcommand flags @@ -175,9 +187,9 @@ func Setup(c *cli.Context, o *options) error { return fmt.Errorf("unable to flush config: %v", err) } - err = SignalDocker(o.socket) + err = RestartDocker(o) if err != nil { - return fmt.Errorf("unable to signal docker: %v", err) + return fmt.Errorf("unable to restart docker: %v", err) } log.Infof("Completed 'setup' for %v", c.App.Name) @@ -209,7 +221,7 @@ func Cleanup(c *cli.Context, o *options) error { return fmt.Errorf("unable to flush config: %v", err) } - err = SignalDocker(o.socket) + err = RestartDocker(o) if err != nil { return fmt.Errorf("unable to signal docker: %v", err) } @@ -340,6 +352,23 @@ func FlushConfig(cfg map[string]interface{}, config string) error { return nil } +// RestartDocker restarts docker depending on the value of restartModeFlag +func RestartDocker(o *options) error { + switch o.restartMode { + case restartModeNone: + log.Warnf("Skipping sending signal to docker due to --restart-mode=%v", o.restartMode) + case restartModeSignal: + err := SignalDocker(o.socket) + if err != nil { + return fmt.Errorf("unable to signal docker: %v", err) + } + default: + return fmt.Errorf("invalid restart mode specified: %v", o.restartMode) + } + + return nil +} + // SignalDocker sends a SIGHUP signal to docker daemon func SignalDocker(socket string) error { log.Infof("Sending SIGHUP signal to docker")