mirror of
https://github.com/NVIDIA/nvidia-container-toolkit
synced 2024-11-22 00:08:11 +00:00
Rework restart logic
Signed-off-by: Evan Lezar <elezar@nvidia.com>
This commit is contained in:
parent
761fc29567
commit
178eb5c5a8
@ -18,6 +18,8 @@ package container
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"os"
|
||||||
|
"os/exec"
|
||||||
|
|
||||||
"github.com/NVIDIA/nvidia-container-toolkit/pkg/config/engine"
|
"github.com/NVIDIA/nvidia-container-toolkit/pkg/config/engine"
|
||||||
"github.com/NVIDIA/nvidia-container-toolkit/tools/container/operator"
|
"github.com/NVIDIA/nvidia-container-toolkit/tools/container/operator"
|
||||||
@ -25,6 +27,12 @@ import (
|
|||||||
"github.com/urfave/cli/v2"
|
"github.com/urfave/cli/v2"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
restartModeNone = "none"
|
||||||
|
restartModeSignal = "signal"
|
||||||
|
restartModeSystemd = "systemd"
|
||||||
|
)
|
||||||
|
|
||||||
// Options defines the shared options for the CLIs to configure containers runtimes.
|
// Options defines the shared options for the CLIs to configure containers runtimes.
|
||||||
type Options struct {
|
type Options struct {
|
||||||
Config string
|
Config string
|
||||||
@ -121,3 +129,41 @@ func (o Options) RevertConfig(cfg engine.Interface) error {
|
|||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Restart restarts the specified service
|
||||||
|
func (o Options) Restart(service string, withSignal func(string) error) error {
|
||||||
|
switch o.RestartMode {
|
||||||
|
case restartModeNone:
|
||||||
|
logrus.Warnf("Skipping restart of %v due to --restart-mode=%v", service, o.RestartMode)
|
||||||
|
return nil
|
||||||
|
case restartModeSignal:
|
||||||
|
return withSignal(o.Socket)
|
||||||
|
case restartModeSystemd:
|
||||||
|
return o.SystemdRestart(service)
|
||||||
|
}
|
||||||
|
|
||||||
|
return fmt.Errorf("invalid restart mode specified: %v", o.RestartMode)
|
||||||
|
}
|
||||||
|
|
||||||
|
// SystemdRestart restarts the specified service using systemd
|
||||||
|
func (o Options) SystemdRestart(service string) error {
|
||||||
|
var args []string
|
||||||
|
var msg string
|
||||||
|
if o.HostRootMount != "" {
|
||||||
|
msg = " on host"
|
||||||
|
args = append(args, "chroot", o.HostRootMount)
|
||||||
|
}
|
||||||
|
args = append(args, "systemctl", "restart", service)
|
||||||
|
|
||||||
|
logrus.Infof("Restarting %v%v using systemd: %v", service, msg, args)
|
||||||
|
|
||||||
|
cmd := exec.Command(args[0], args[1:]...)
|
||||||
|
cmd.Stdout = os.Stdout
|
||||||
|
cmd.Stderr = os.Stderr
|
||||||
|
err := cmd.Run()
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("error restarting %v using systemd: %v", service, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
@ -20,7 +20,6 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"net"
|
"net"
|
||||||
"os"
|
"os"
|
||||||
"os/exec"
|
|
||||||
"syscall"
|
"syscall"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
@ -32,10 +31,6 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
restartModeSignal = "signal"
|
|
||||||
restartModeSystemd = "systemd"
|
|
||||||
restartModeNone = "none"
|
|
||||||
|
|
||||||
nvidiaRuntimeName = "nvidia"
|
nvidiaRuntimeName = "nvidia"
|
||||||
nvidiaRuntimeBinary = "nvidia-container-runtime"
|
nvidiaRuntimeBinary = "nvidia-container-runtime"
|
||||||
nvidiaExperimentalRuntimeName = "nvidia-experimental"
|
nvidiaExperimentalRuntimeName = "nvidia-experimental"
|
||||||
@ -46,7 +41,7 @@ const (
|
|||||||
defaultRuntimeClass = "nvidia"
|
defaultRuntimeClass = "nvidia"
|
||||||
defaultRuntmeType = "io.containerd.runc.v2"
|
defaultRuntmeType = "io.containerd.runc.v2"
|
||||||
defaultSetAsDefault = true
|
defaultSetAsDefault = true
|
||||||
defaultRestartMode = restartModeSignal
|
defaultRestartMode = "signal"
|
||||||
defaultHostRootMount = "/host"
|
defaultHostRootMount = "/host"
|
||||||
|
|
||||||
reloadBackoff = 5 * time.Second
|
reloadBackoff = 5 * time.Second
|
||||||
@ -257,31 +252,16 @@ func Cleanup(c *cli.Context, o *options) error {
|
|||||||
|
|
||||||
// RestartContainerd restarts containerd depending on the value of restartModeFlag
|
// RestartContainerd restarts containerd depending on the value of restartModeFlag
|
||||||
func RestartContainerd(o *options) error {
|
func RestartContainerd(o *options) error {
|
||||||
switch o.RestartMode {
|
return o.Restart("containerd", SignalContainerd)
|
||||||
case restartModeNone:
|
|
||||||
log.Warnf("Skipping sending signal to containerd due to --restart-mode=%v", o.RestartMode)
|
|
||||||
return nil
|
|
||||||
case restartModeSignal:
|
|
||||||
err := SignalContainerd(o)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("unable to signal containerd: %v", err)
|
|
||||||
}
|
|
||||||
case restartModeSystemd:
|
|
||||||
return RestartContainerdSystemd(o.HostRootMount)
|
|
||||||
default:
|
|
||||||
return fmt.Errorf("Invalid restart mode specified: %v", o.RestartMode)
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// SignalContainerd sends a SIGHUP signal to the containerd daemon
|
// SignalContainerd sends a SIGHUP signal to the containerd daemon
|
||||||
func SignalContainerd(o *options) error {
|
func SignalContainerd(socket string) error {
|
||||||
log.Infof("Sending SIGHUP signal to containerd")
|
log.Infof("Sending SIGHUP signal to containerd")
|
||||||
|
|
||||||
// Wrap the logic to perform the SIGHUP in a function so we can retry it on failure
|
// Wrap the logic to perform the SIGHUP in a function so we can retry it on failure
|
||||||
retriable := func() error {
|
retriable := func() error {
|
||||||
conn, err := net.Dial("unix", o.Socket)
|
conn, err := net.Dial("unix", socket)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("unable to dial: %v", err)
|
return fmt.Errorf("unable to dial: %v", err)
|
||||||
}
|
}
|
||||||
@ -355,24 +335,6 @@ func SignalContainerd(o *options) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// RestartContainerdSystemd restarts containerd using systemctl
|
|
||||||
func RestartContainerdSystemd(hostRootMount string) error {
|
|
||||||
log.Infof("Restarting containerd using systemd and host root mounted at %v", hostRootMount)
|
|
||||||
|
|
||||||
command := "chroot"
|
|
||||||
args := []string{hostRootMount, "systemctl", "restart", "containerd"}
|
|
||||||
|
|
||||||
cmd := exec.Command(command, args...)
|
|
||||||
cmd.Stdout = os.Stdout
|
|
||||||
cmd.Stderr = os.Stderr
|
|
||||||
err := cmd.Run()
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("error restarting containerd using systemd: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// containerAnnotationsFromCDIPrefixes returns the container annotations to set for the given CDI prefixes.
|
// containerAnnotationsFromCDIPrefixes returns the container annotations to set for the given CDI prefixes.
|
||||||
func (o *options) containerAnnotationsFromCDIPrefixes() []string {
|
func (o *options) containerAnnotationsFromCDIPrefixes() []string {
|
||||||
var annotations []string
|
var annotations []string
|
||||||
|
@ -20,7 +20,6 @@ import (
|
|||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
"os/exec"
|
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
|
||||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
|
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
|
||||||
@ -32,9 +31,6 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
restartModeSystemd = "systemd"
|
|
||||||
restartModeNone = "none"
|
|
||||||
|
|
||||||
defaultConfigMode = "hook"
|
defaultConfigMode = "hook"
|
||||||
|
|
||||||
// Hook-based settings
|
// Hook-based settings
|
||||||
@ -46,7 +42,7 @@ const (
|
|||||||
defaultSocket = "/var/run/crio/crio.sock"
|
defaultSocket = "/var/run/crio/crio.sock"
|
||||||
defaultRuntimeClass = "nvidia"
|
defaultRuntimeClass = "nvidia"
|
||||||
defaultSetAsDefault = true
|
defaultSetAsDefault = true
|
||||||
defaultRestartMode = restartModeSystemd
|
defaultRestartMode = "systemd"
|
||||||
defaultHostRootMount = "/host"
|
defaultHostRootMount = "/host"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -117,7 +113,8 @@ func main() {
|
|||||||
Value: "",
|
Value: "",
|
||||||
Destination: &options.Socket,
|
Destination: &options.Socket,
|
||||||
EnvVars: []string{"CRIO_SOCKET", "RUNTIME_SOCKET"},
|
EnvVars: []string{"CRIO_SOCKET", "RUNTIME_SOCKET"},
|
||||||
Hidden: true,
|
// Note: We hide this option since restarting cri-o via a socket is not supported.
|
||||||
|
Hidden: true,
|
||||||
},
|
},
|
||||||
&cli.StringFlag{
|
&cli.StringFlag{
|
||||||
Name: "restart-mode",
|
Name: "restart-mode",
|
||||||
@ -179,7 +176,6 @@ func main() {
|
|||||||
Destination: &options.configMode,
|
Destination: &options.configMode,
|
||||||
EnvVars: []string{"CRIO_CONFIG_MODE"},
|
EnvVars: []string{"CRIO_CONFIG_MODE"},
|
||||||
},
|
},
|
||||||
// The flags below are only used by the 'setup' command.
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Update the subcommand flags with the common subcommand flags
|
// Update the subcommand flags with the common subcommand flags
|
||||||
@ -341,31 +337,5 @@ func generateOciHook(toolkitDir string) podmanHook {
|
|||||||
|
|
||||||
// RestartCrio restarts crio depending on the value of restartModeFlag
|
// RestartCrio restarts crio depending on the value of restartModeFlag
|
||||||
func RestartCrio(o *options) error {
|
func RestartCrio(o *options) error {
|
||||||
switch o.RestartMode {
|
return o.Restart("crio", func(string) error { return fmt.Errorf("supporting crio via signal is unsupported") })
|
||||||
case restartModeNone:
|
|
||||||
log.Warnf("Skipping restart of crio due to --restart-mode=%v", o.RestartMode)
|
|
||||||
return nil
|
|
||||||
case restartModeSystemd:
|
|
||||||
return RestartCrioSystemd(o.HostRootMount)
|
|
||||||
default:
|
|
||||||
return fmt.Errorf("invalid restart mode specified: %v", o.RestartMode)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// RestartCrioSystemd restarts cri-o using systemctl
|
|
||||||
func RestartCrioSystemd(hostRootMount string) error {
|
|
||||||
log.Infof("Restarting cri-o using systemd and host root mounted at %v", hostRootMount)
|
|
||||||
|
|
||||||
command := "chroot"
|
|
||||||
args := []string{hostRootMount, "systemctl", "restart", "crio"}
|
|
||||||
|
|
||||||
cmd := exec.Command(command, args...)
|
|
||||||
cmd.Stdout = os.Stdout
|
|
||||||
cmd.Stderr = os.Stderr
|
|
||||||
err := cmd.Run()
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("error restarting crio using systemd: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
@ -31,9 +31,6 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
restartModeSignal = "signal"
|
|
||||||
restartModeNone = "none"
|
|
||||||
|
|
||||||
nvidiaRuntimeName = "nvidia"
|
nvidiaRuntimeName = "nvidia"
|
||||||
nvidiaRuntimeBinary = "nvidia-container-runtime"
|
nvidiaRuntimeBinary = "nvidia-container-runtime"
|
||||||
nvidiaExperimentalRuntimeName = "nvidia-experimental"
|
nvidiaExperimentalRuntimeName = "nvidia-experimental"
|
||||||
@ -44,7 +41,7 @@ const (
|
|||||||
defaultSetAsDefault = true
|
defaultSetAsDefault = true
|
||||||
// defaultRuntimeName specifies the NVIDIA runtime to be use as the default runtime if setting the default runtime is enabled
|
// defaultRuntimeName specifies the NVIDIA runtime to be use as the default runtime if setting the default runtime is enabled
|
||||||
defaultRuntimeName = nvidiaRuntimeName
|
defaultRuntimeName = nvidiaRuntimeName
|
||||||
defaultRestartMode = restartModeSignal
|
defaultRestartMode = "signal"
|
||||||
defaultHostRootMount = "/host"
|
defaultHostRootMount = "/host"
|
||||||
|
|
||||||
reloadBackoff = 5 * time.Second
|
reloadBackoff = 5 * time.Second
|
||||||
@ -119,7 +116,7 @@ func main() {
|
|||||||
},
|
},
|
||||||
&cli.StringFlag{
|
&cli.StringFlag{
|
||||||
Name: "restart-mode",
|
Name: "restart-mode",
|
||||||
Usage: "Specify how docker should be restarted; If 'none' is selected it will not be restarted [signal | none]",
|
Usage: "Specify how docker should be restarted; If 'none' is selected it will not be restarted [signal | systemd | none ]",
|
||||||
Value: defaultRestartMode,
|
Value: defaultRestartMode,
|
||||||
Destination: &options.RestartMode,
|
Destination: &options.RestartMode,
|
||||||
EnvVars: []string{"DOCKER_RESTART_MODE", "RUNTIME_RESTART_MODE"},
|
EnvVars: []string{"DOCKER_RESTART_MODE", "RUNTIME_RESTART_MODE"},
|
||||||
@ -224,19 +221,7 @@ func Cleanup(c *cli.Context, o *options) error {
|
|||||||
|
|
||||||
// RestartDocker restarts docker depending on the value of restartModeFlag
|
// RestartDocker restarts docker depending on the value of restartModeFlag
|
||||||
func RestartDocker(o *options) error {
|
func RestartDocker(o *options) error {
|
||||||
switch o.RestartMode {
|
return o.Restart("docker", SignalDocker)
|
||||||
case restartModeNone:
|
|
||||||
log.Warnf("Skipping sending signal to docker due to --restart-mode=%v", o.RestartMode)
|
|
||||||
case restartModeSignal:
|
|
||||||
err := SignalDocker(o.Socket)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("unable to signal docker: %v", err)
|
|
||||||
}
|
|
||||||
default:
|
|
||||||
return fmt.Errorf("invalid restart mode specified: %v", o.RestartMode)
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// SignalDocker sends a SIGHUP signal to docker daemon
|
// SignalDocker sends a SIGHUP signal to docker daemon
|
||||||
|
Loading…
Reference in New Issue
Block a user