From 178eb5c5a8000884d9fab3ef4b91a0dffe2b4717 Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Sat, 10 Jun 2023 12:41:53 +0200 Subject: [PATCH] Rework restart logic Signed-off-by: Evan Lezar --- tools/container/container.go | 46 ++++++++++++++++++++++++ tools/container/containerd/containerd.go | 46 +++--------------------- tools/container/crio/crio.go | 38 +++----------------- tools/container/docker/docker.go | 21 ++--------- 4 files changed, 57 insertions(+), 94 deletions(-) diff --git a/tools/container/container.go b/tools/container/container.go index 20d669b1..953e901b 100644 --- a/tools/container/container.go +++ b/tools/container/container.go @@ -18,6 +18,8 @@ package container import ( "fmt" + "os" + "os/exec" "github.com/NVIDIA/nvidia-container-toolkit/pkg/config/engine" "github.com/NVIDIA/nvidia-container-toolkit/tools/container/operator" @@ -25,6 +27,12 @@ import ( "github.com/urfave/cli/v2" ) +const ( + restartModeNone = "none" + restartModeSignal = "signal" + restartModeSystemd = "systemd" +) + // Options defines the shared options for the CLIs to configure containers runtimes. type Options struct { Config string @@ -121,3 +129,41 @@ func (o Options) RevertConfig(cfg engine.Interface) error { return nil } + +// Restart restarts the specified service +func (o Options) Restart(service string, withSignal func(string) error) error { + switch o.RestartMode { + case restartModeNone: + logrus.Warnf("Skipping restart of %v due to --restart-mode=%v", service, o.RestartMode) + return nil + case restartModeSignal: + return withSignal(o.Socket) + case restartModeSystemd: + return o.SystemdRestart(service) + } + + return fmt.Errorf("invalid restart mode specified: %v", o.RestartMode) +} + +// SystemdRestart restarts the specified service using systemd +func (o Options) SystemdRestart(service string) error { + var args []string + var msg string + if o.HostRootMount != "" { + msg = " on host" + args = append(args, "chroot", o.HostRootMount) + } + args = append(args, "systemctl", "restart", service) + + logrus.Infof("Restarting %v%v using systemd: %v", service, msg, args) + + cmd := exec.Command(args[0], args[1:]...) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + err := cmd.Run() + if err != nil { + return fmt.Errorf("error restarting %v using systemd: %v", service, err) + } + + return nil +} diff --git a/tools/container/containerd/containerd.go b/tools/container/containerd/containerd.go index 548d09c5..d0a3670e 100644 --- a/tools/container/containerd/containerd.go +++ b/tools/container/containerd/containerd.go @@ -20,7 +20,6 @@ import ( "fmt" "net" "os" - "os/exec" "syscall" "time" @@ -32,10 +31,6 @@ import ( ) const ( - restartModeSignal = "signal" - restartModeSystemd = "systemd" - restartModeNone = "none" - nvidiaRuntimeName = "nvidia" nvidiaRuntimeBinary = "nvidia-container-runtime" nvidiaExperimentalRuntimeName = "nvidia-experimental" @@ -46,7 +41,7 @@ const ( defaultRuntimeClass = "nvidia" defaultRuntmeType = "io.containerd.runc.v2" defaultSetAsDefault = true - defaultRestartMode = restartModeSignal + defaultRestartMode = "signal" defaultHostRootMount = "/host" reloadBackoff = 5 * time.Second @@ -257,31 +252,16 @@ func Cleanup(c *cli.Context, o *options) error { // RestartContainerd restarts containerd depending on the value of restartModeFlag func RestartContainerd(o *options) error { - switch o.RestartMode { - case restartModeNone: - log.Warnf("Skipping sending signal to containerd due to --restart-mode=%v", o.RestartMode) - return nil - case restartModeSignal: - err := SignalContainerd(o) - if err != nil { - return fmt.Errorf("unable to signal containerd: %v", err) - } - case restartModeSystemd: - return RestartContainerdSystemd(o.HostRootMount) - default: - return fmt.Errorf("Invalid restart mode specified: %v", o.RestartMode) - } - - return nil + return o.Restart("containerd", SignalContainerd) } // SignalContainerd sends a SIGHUP signal to the containerd daemon -func SignalContainerd(o *options) error { +func SignalContainerd(socket string) error { log.Infof("Sending SIGHUP signal to containerd") // Wrap the logic to perform the SIGHUP in a function so we can retry it on failure retriable := func() error { - conn, err := net.Dial("unix", o.Socket) + conn, err := net.Dial("unix", socket) if err != nil { return fmt.Errorf("unable to dial: %v", err) } @@ -355,24 +335,6 @@ func SignalContainerd(o *options) error { return nil } -// RestartContainerdSystemd restarts containerd using systemctl -func RestartContainerdSystemd(hostRootMount string) error { - log.Infof("Restarting containerd using systemd and host root mounted at %v", hostRootMount) - - command := "chroot" - args := []string{hostRootMount, "systemctl", "restart", "containerd"} - - cmd := exec.Command(command, args...) - cmd.Stdout = os.Stdout - cmd.Stderr = os.Stderr - err := cmd.Run() - if err != nil { - return fmt.Errorf("error restarting containerd using systemd: %v", err) - } - - return nil -} - // containerAnnotationsFromCDIPrefixes returns the container annotations to set for the given CDI prefixes. func (o *options) containerAnnotationsFromCDIPrefixes() []string { var annotations []string diff --git a/tools/container/crio/crio.go b/tools/container/crio/crio.go index 0d8a1feb..ac3b059a 100644 --- a/tools/container/crio/crio.go +++ b/tools/container/crio/crio.go @@ -20,7 +20,6 @@ import ( "encoding/json" "fmt" "os" - "os/exec" "path/filepath" "github.com/NVIDIA/nvidia-container-toolkit/internal/config" @@ -32,9 +31,6 @@ import ( ) const ( - restartModeSystemd = "systemd" - restartModeNone = "none" - defaultConfigMode = "hook" // Hook-based settings @@ -46,7 +42,7 @@ const ( defaultSocket = "/var/run/crio/crio.sock" defaultRuntimeClass = "nvidia" defaultSetAsDefault = true - defaultRestartMode = restartModeSystemd + defaultRestartMode = "systemd" defaultHostRootMount = "/host" ) @@ -117,7 +113,8 @@ func main() { Value: "", Destination: &options.Socket, EnvVars: []string{"CRIO_SOCKET", "RUNTIME_SOCKET"}, - Hidden: true, + // Note: We hide this option since restarting cri-o via a socket is not supported. + Hidden: true, }, &cli.StringFlag{ Name: "restart-mode", @@ -179,7 +176,6 @@ func main() { Destination: &options.configMode, EnvVars: []string{"CRIO_CONFIG_MODE"}, }, - // The flags below are only used by the 'setup' command. } // Update the subcommand flags with the common subcommand flags @@ -341,31 +337,5 @@ func generateOciHook(toolkitDir string) podmanHook { // RestartCrio restarts crio depending on the value of restartModeFlag func RestartCrio(o *options) error { - switch o.RestartMode { - case restartModeNone: - log.Warnf("Skipping restart of crio due to --restart-mode=%v", o.RestartMode) - return nil - case restartModeSystemd: - return RestartCrioSystemd(o.HostRootMount) - default: - return fmt.Errorf("invalid restart mode specified: %v", o.RestartMode) - } -} - -// RestartCrioSystemd restarts cri-o using systemctl -func RestartCrioSystemd(hostRootMount string) error { - log.Infof("Restarting cri-o using systemd and host root mounted at %v", hostRootMount) - - command := "chroot" - args := []string{hostRootMount, "systemctl", "restart", "crio"} - - cmd := exec.Command(command, args...) - cmd.Stdout = os.Stdout - cmd.Stderr = os.Stderr - err := cmd.Run() - if err != nil { - return fmt.Errorf("error restarting crio using systemd: %v", err) - } - - return nil + return o.Restart("crio", func(string) error { return fmt.Errorf("supporting crio via signal is unsupported") }) } diff --git a/tools/container/docker/docker.go b/tools/container/docker/docker.go index 8807af8a..ddd32af4 100644 --- a/tools/container/docker/docker.go +++ b/tools/container/docker/docker.go @@ -31,9 +31,6 @@ import ( ) const ( - restartModeSignal = "signal" - restartModeNone = "none" - nvidiaRuntimeName = "nvidia" nvidiaRuntimeBinary = "nvidia-container-runtime" nvidiaExperimentalRuntimeName = "nvidia-experimental" @@ -44,7 +41,7 @@ const ( defaultSetAsDefault = true // defaultRuntimeName specifies the NVIDIA runtime to be use as the default runtime if setting the default runtime is enabled defaultRuntimeName = nvidiaRuntimeName - defaultRestartMode = restartModeSignal + defaultRestartMode = "signal" defaultHostRootMount = "/host" reloadBackoff = 5 * time.Second @@ -119,7 +116,7 @@ func main() { }, &cli.StringFlag{ Name: "restart-mode", - Usage: "Specify how docker should be restarted; If 'none' is selected it will not be restarted [signal | none]", + Usage: "Specify how docker should be restarted; If 'none' is selected it will not be restarted [signal | systemd | none ]", Value: defaultRestartMode, Destination: &options.RestartMode, EnvVars: []string{"DOCKER_RESTART_MODE", "RUNTIME_RESTART_MODE"}, @@ -224,19 +221,7 @@ func Cleanup(c *cli.Context, o *options) error { // RestartDocker restarts docker depending on the value of restartModeFlag func RestartDocker(o *options) error { - switch o.RestartMode { - case restartModeNone: - log.Warnf("Skipping sending signal to docker due to --restart-mode=%v", o.RestartMode) - case restartModeSignal: - err := SignalDocker(o.Socket) - if err != nil { - return fmt.Errorf("unable to signal docker: %v", err) - } - default: - return fmt.Errorf("invalid restart mode specified: %v", o.RestartMode) - } - - return nil + return o.Restart("docker", SignalDocker) } // SignalDocker sends a SIGHUP signal to docker daemon