mirror of
https://github.com/NVIDIA/nvidia-container-toolkit
synced 2024-11-21 15:57:49 +00:00
Rework restart logic
Signed-off-by: Evan Lezar <elezar@nvidia.com>
This commit is contained in:
parent
761fc29567
commit
178eb5c5a8
@ -18,6 +18,8 @@ package container
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/pkg/config/engine"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/tools/container/operator"
|
||||
@ -25,6 +27,12 @@ import (
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
const (
|
||||
restartModeNone = "none"
|
||||
restartModeSignal = "signal"
|
||||
restartModeSystemd = "systemd"
|
||||
)
|
||||
|
||||
// Options defines the shared options for the CLIs to configure containers runtimes.
|
||||
type Options struct {
|
||||
Config string
|
||||
@ -121,3 +129,41 @@ func (o Options) RevertConfig(cfg engine.Interface) error {
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Restart restarts the specified service
|
||||
func (o Options) Restart(service string, withSignal func(string) error) error {
|
||||
switch o.RestartMode {
|
||||
case restartModeNone:
|
||||
logrus.Warnf("Skipping restart of %v due to --restart-mode=%v", service, o.RestartMode)
|
||||
return nil
|
||||
case restartModeSignal:
|
||||
return withSignal(o.Socket)
|
||||
case restartModeSystemd:
|
||||
return o.SystemdRestart(service)
|
||||
}
|
||||
|
||||
return fmt.Errorf("invalid restart mode specified: %v", o.RestartMode)
|
||||
}
|
||||
|
||||
// SystemdRestart restarts the specified service using systemd
|
||||
func (o Options) SystemdRestart(service string) error {
|
||||
var args []string
|
||||
var msg string
|
||||
if o.HostRootMount != "" {
|
||||
msg = " on host"
|
||||
args = append(args, "chroot", o.HostRootMount)
|
||||
}
|
||||
args = append(args, "systemctl", "restart", service)
|
||||
|
||||
logrus.Infof("Restarting %v%v using systemd: %v", service, msg, args)
|
||||
|
||||
cmd := exec.Command(args[0], args[1:]...)
|
||||
cmd.Stdout = os.Stdout
|
||||
cmd.Stderr = os.Stderr
|
||||
err := cmd.Run()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error restarting %v using systemd: %v", service, err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
@ -20,7 +20,6 @@ import (
|
||||
"fmt"
|
||||
"net"
|
||||
"os"
|
||||
"os/exec"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
@ -32,10 +31,6 @@ import (
|
||||
)
|
||||
|
||||
const (
|
||||
restartModeSignal = "signal"
|
||||
restartModeSystemd = "systemd"
|
||||
restartModeNone = "none"
|
||||
|
||||
nvidiaRuntimeName = "nvidia"
|
||||
nvidiaRuntimeBinary = "nvidia-container-runtime"
|
||||
nvidiaExperimentalRuntimeName = "nvidia-experimental"
|
||||
@ -46,7 +41,7 @@ const (
|
||||
defaultRuntimeClass = "nvidia"
|
||||
defaultRuntmeType = "io.containerd.runc.v2"
|
||||
defaultSetAsDefault = true
|
||||
defaultRestartMode = restartModeSignal
|
||||
defaultRestartMode = "signal"
|
||||
defaultHostRootMount = "/host"
|
||||
|
||||
reloadBackoff = 5 * time.Second
|
||||
@ -257,31 +252,16 @@ func Cleanup(c *cli.Context, o *options) error {
|
||||
|
||||
// RestartContainerd restarts containerd depending on the value of restartModeFlag
|
||||
func RestartContainerd(o *options) error {
|
||||
switch o.RestartMode {
|
||||
case restartModeNone:
|
||||
log.Warnf("Skipping sending signal to containerd due to --restart-mode=%v", o.RestartMode)
|
||||
return nil
|
||||
case restartModeSignal:
|
||||
err := SignalContainerd(o)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to signal containerd: %v", err)
|
||||
}
|
||||
case restartModeSystemd:
|
||||
return RestartContainerdSystemd(o.HostRootMount)
|
||||
default:
|
||||
return fmt.Errorf("Invalid restart mode specified: %v", o.RestartMode)
|
||||
}
|
||||
|
||||
return nil
|
||||
return o.Restart("containerd", SignalContainerd)
|
||||
}
|
||||
|
||||
// SignalContainerd sends a SIGHUP signal to the containerd daemon
|
||||
func SignalContainerd(o *options) error {
|
||||
func SignalContainerd(socket string) error {
|
||||
log.Infof("Sending SIGHUP signal to containerd")
|
||||
|
||||
// Wrap the logic to perform the SIGHUP in a function so we can retry it on failure
|
||||
retriable := func() error {
|
||||
conn, err := net.Dial("unix", o.Socket)
|
||||
conn, err := net.Dial("unix", socket)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to dial: %v", err)
|
||||
}
|
||||
@ -355,24 +335,6 @@ func SignalContainerd(o *options) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// RestartContainerdSystemd restarts containerd using systemctl
|
||||
func RestartContainerdSystemd(hostRootMount string) error {
|
||||
log.Infof("Restarting containerd using systemd and host root mounted at %v", hostRootMount)
|
||||
|
||||
command := "chroot"
|
||||
args := []string{hostRootMount, "systemctl", "restart", "containerd"}
|
||||
|
||||
cmd := exec.Command(command, args...)
|
||||
cmd.Stdout = os.Stdout
|
||||
cmd.Stderr = os.Stderr
|
||||
err := cmd.Run()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error restarting containerd using systemd: %v", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// containerAnnotationsFromCDIPrefixes returns the container annotations to set for the given CDI prefixes.
|
||||
func (o *options) containerAnnotationsFromCDIPrefixes() []string {
|
||||
var annotations []string
|
||||
|
@ -20,7 +20,6 @@ import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
|
||||
@ -32,9 +31,6 @@ import (
|
||||
)
|
||||
|
||||
const (
|
||||
restartModeSystemd = "systemd"
|
||||
restartModeNone = "none"
|
||||
|
||||
defaultConfigMode = "hook"
|
||||
|
||||
// Hook-based settings
|
||||
@ -46,7 +42,7 @@ const (
|
||||
defaultSocket = "/var/run/crio/crio.sock"
|
||||
defaultRuntimeClass = "nvidia"
|
||||
defaultSetAsDefault = true
|
||||
defaultRestartMode = restartModeSystemd
|
||||
defaultRestartMode = "systemd"
|
||||
defaultHostRootMount = "/host"
|
||||
)
|
||||
|
||||
@ -117,7 +113,8 @@ func main() {
|
||||
Value: "",
|
||||
Destination: &options.Socket,
|
||||
EnvVars: []string{"CRIO_SOCKET", "RUNTIME_SOCKET"},
|
||||
Hidden: true,
|
||||
// Note: We hide this option since restarting cri-o via a socket is not supported.
|
||||
Hidden: true,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "restart-mode",
|
||||
@ -179,7 +176,6 @@ func main() {
|
||||
Destination: &options.configMode,
|
||||
EnvVars: []string{"CRIO_CONFIG_MODE"},
|
||||
},
|
||||
// The flags below are only used by the 'setup' command.
|
||||
}
|
||||
|
||||
// Update the subcommand flags with the common subcommand flags
|
||||
@ -341,31 +337,5 @@ func generateOciHook(toolkitDir string) podmanHook {
|
||||
|
||||
// RestartCrio restarts crio depending on the value of restartModeFlag
|
||||
func RestartCrio(o *options) error {
|
||||
switch o.RestartMode {
|
||||
case restartModeNone:
|
||||
log.Warnf("Skipping restart of crio due to --restart-mode=%v", o.RestartMode)
|
||||
return nil
|
||||
case restartModeSystemd:
|
||||
return RestartCrioSystemd(o.HostRootMount)
|
||||
default:
|
||||
return fmt.Errorf("invalid restart mode specified: %v", o.RestartMode)
|
||||
}
|
||||
}
|
||||
|
||||
// RestartCrioSystemd restarts cri-o using systemctl
|
||||
func RestartCrioSystemd(hostRootMount string) error {
|
||||
log.Infof("Restarting cri-o using systemd and host root mounted at %v", hostRootMount)
|
||||
|
||||
command := "chroot"
|
||||
args := []string{hostRootMount, "systemctl", "restart", "crio"}
|
||||
|
||||
cmd := exec.Command(command, args...)
|
||||
cmd.Stdout = os.Stdout
|
||||
cmd.Stderr = os.Stderr
|
||||
err := cmd.Run()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error restarting crio using systemd: %v", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
return o.Restart("crio", func(string) error { return fmt.Errorf("supporting crio via signal is unsupported") })
|
||||
}
|
||||
|
@ -31,9 +31,6 @@ import (
|
||||
)
|
||||
|
||||
const (
|
||||
restartModeSignal = "signal"
|
||||
restartModeNone = "none"
|
||||
|
||||
nvidiaRuntimeName = "nvidia"
|
||||
nvidiaRuntimeBinary = "nvidia-container-runtime"
|
||||
nvidiaExperimentalRuntimeName = "nvidia-experimental"
|
||||
@ -44,7 +41,7 @@ const (
|
||||
defaultSetAsDefault = true
|
||||
// defaultRuntimeName specifies the NVIDIA runtime to be use as the default runtime if setting the default runtime is enabled
|
||||
defaultRuntimeName = nvidiaRuntimeName
|
||||
defaultRestartMode = restartModeSignal
|
||||
defaultRestartMode = "signal"
|
||||
defaultHostRootMount = "/host"
|
||||
|
||||
reloadBackoff = 5 * time.Second
|
||||
@ -119,7 +116,7 @@ func main() {
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "restart-mode",
|
||||
Usage: "Specify how docker should be restarted; If 'none' is selected it will not be restarted [signal | none]",
|
||||
Usage: "Specify how docker should be restarted; If 'none' is selected it will not be restarted [signal | systemd | none ]",
|
||||
Value: defaultRestartMode,
|
||||
Destination: &options.RestartMode,
|
||||
EnvVars: []string{"DOCKER_RESTART_MODE", "RUNTIME_RESTART_MODE"},
|
||||
@ -224,19 +221,7 @@ func Cleanup(c *cli.Context, o *options) error {
|
||||
|
||||
// RestartDocker restarts docker depending on the value of restartModeFlag
|
||||
func RestartDocker(o *options) error {
|
||||
switch o.RestartMode {
|
||||
case restartModeNone:
|
||||
log.Warnf("Skipping sending signal to docker due to --restart-mode=%v", o.RestartMode)
|
||||
case restartModeSignal:
|
||||
err := SignalDocker(o.Socket)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to signal docker: %v", err)
|
||||
}
|
||||
default:
|
||||
return fmt.Errorf("invalid restart mode specified: %v", o.RestartMode)
|
||||
}
|
||||
|
||||
return nil
|
||||
return o.Restart("docker", SignalDocker)
|
||||
}
|
||||
|
||||
// SignalDocker sends a SIGHUP signal to docker daemon
|
||||
|
Loading…
Reference in New Issue
Block a user