mirror of
				https://github.com/NVIDIA/nvidia-container-toolkit
				synced 2025-06-26 18:18:24 +00:00 
			
		
		
		
	Rework restart logic
Signed-off-by: Evan Lezar <elezar@nvidia.com>
This commit is contained in:
		
							parent
							
								
									761fc29567
								
							
						
					
					
						commit
						178eb5c5a8
					
				| @ -18,6 +18,8 @@ package container | ||||
| 
 | ||||
| import ( | ||||
| 	"fmt" | ||||
| 	"os" | ||||
| 	"os/exec" | ||||
| 
 | ||||
| 	"github.com/NVIDIA/nvidia-container-toolkit/pkg/config/engine" | ||||
| 	"github.com/NVIDIA/nvidia-container-toolkit/tools/container/operator" | ||||
| @ -25,6 +27,12 @@ import ( | ||||
| 	"github.com/urfave/cli/v2" | ||||
| ) | ||||
| 
 | ||||
| const ( | ||||
| 	restartModeNone    = "none" | ||||
| 	restartModeSignal  = "signal" | ||||
| 	restartModeSystemd = "systemd" | ||||
| ) | ||||
| 
 | ||||
| // Options defines the shared options for the CLIs to configure containers runtimes.
 | ||||
| type Options struct { | ||||
| 	Config        string | ||||
| @ -121,3 +129,41 @@ func (o Options) RevertConfig(cfg engine.Interface) error { | ||||
| 
 | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| // Restart restarts the specified service
 | ||||
| func (o Options) Restart(service string, withSignal func(string) error) error { | ||||
| 	switch o.RestartMode { | ||||
| 	case restartModeNone: | ||||
| 		logrus.Warnf("Skipping restart of %v due to --restart-mode=%v", service, o.RestartMode) | ||||
| 		return nil | ||||
| 	case restartModeSignal: | ||||
| 		return withSignal(o.Socket) | ||||
| 	case restartModeSystemd: | ||||
| 		return o.SystemdRestart(service) | ||||
| 	} | ||||
| 
 | ||||
| 	return fmt.Errorf("invalid restart mode specified: %v", o.RestartMode) | ||||
| } | ||||
| 
 | ||||
| // SystemdRestart restarts the specified service using systemd
 | ||||
| func (o Options) SystemdRestart(service string) error { | ||||
| 	var args []string | ||||
| 	var msg string | ||||
| 	if o.HostRootMount != "" { | ||||
| 		msg = " on host" | ||||
| 		args = append(args, "chroot", o.HostRootMount) | ||||
| 	} | ||||
| 	args = append(args, "systemctl", "restart", service) | ||||
| 
 | ||||
| 	logrus.Infof("Restarting %v%v using systemd: %v", service, msg, args) | ||||
| 
 | ||||
| 	cmd := exec.Command(args[0], args[1:]...) | ||||
| 	cmd.Stdout = os.Stdout | ||||
| 	cmd.Stderr = os.Stderr | ||||
| 	err := cmd.Run() | ||||
| 	if err != nil { | ||||
| 		return fmt.Errorf("error restarting %v using systemd: %v", service, err) | ||||
| 	} | ||||
| 
 | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| @ -20,7 +20,6 @@ import ( | ||||
| 	"fmt" | ||||
| 	"net" | ||||
| 	"os" | ||||
| 	"os/exec" | ||||
| 	"syscall" | ||||
| 	"time" | ||||
| 
 | ||||
| @ -32,10 +31,6 @@ import ( | ||||
| ) | ||||
| 
 | ||||
| const ( | ||||
| 	restartModeSignal  = "signal" | ||||
| 	restartModeSystemd = "systemd" | ||||
| 	restartModeNone    = "none" | ||||
| 
 | ||||
| 	nvidiaRuntimeName               = "nvidia" | ||||
| 	nvidiaRuntimeBinary             = "nvidia-container-runtime" | ||||
| 	nvidiaExperimentalRuntimeName   = "nvidia-experimental" | ||||
| @ -46,7 +41,7 @@ const ( | ||||
| 	defaultRuntimeClass  = "nvidia" | ||||
| 	defaultRuntmeType    = "io.containerd.runc.v2" | ||||
| 	defaultSetAsDefault  = true | ||||
| 	defaultRestartMode   = restartModeSignal | ||||
| 	defaultRestartMode   = "signal" | ||||
| 	defaultHostRootMount = "/host" | ||||
| 
 | ||||
| 	reloadBackoff     = 5 * time.Second | ||||
| @ -257,31 +252,16 @@ func Cleanup(c *cli.Context, o *options) error { | ||||
| 
 | ||||
| // RestartContainerd restarts containerd depending on the value of restartModeFlag
 | ||||
| func RestartContainerd(o *options) error { | ||||
| 	switch o.RestartMode { | ||||
| 	case restartModeNone: | ||||
| 		log.Warnf("Skipping sending signal to containerd due to --restart-mode=%v", o.RestartMode) | ||||
| 		return nil | ||||
| 	case restartModeSignal: | ||||
| 		err := SignalContainerd(o) | ||||
| 		if err != nil { | ||||
| 			return fmt.Errorf("unable to signal containerd: %v", err) | ||||
| 		} | ||||
| 	case restartModeSystemd: | ||||
| 		return RestartContainerdSystemd(o.HostRootMount) | ||||
| 	default: | ||||
| 		return fmt.Errorf("Invalid restart mode specified: %v", o.RestartMode) | ||||
| 	} | ||||
| 
 | ||||
| 	return nil | ||||
| 	return o.Restart("containerd", SignalContainerd) | ||||
| } | ||||
| 
 | ||||
| // SignalContainerd sends a SIGHUP signal to the containerd daemon
 | ||||
| func SignalContainerd(o *options) error { | ||||
| func SignalContainerd(socket string) error { | ||||
| 	log.Infof("Sending SIGHUP signal to containerd") | ||||
| 
 | ||||
| 	// Wrap the logic to perform the SIGHUP in a function so we can retry it on failure
 | ||||
| 	retriable := func() error { | ||||
| 		conn, err := net.Dial("unix", o.Socket) | ||||
| 		conn, err := net.Dial("unix", socket) | ||||
| 		if err != nil { | ||||
| 			return fmt.Errorf("unable to dial: %v", err) | ||||
| 		} | ||||
| @ -355,24 +335,6 @@ func SignalContainerd(o *options) error { | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| // RestartContainerdSystemd restarts containerd using systemctl
 | ||||
| func RestartContainerdSystemd(hostRootMount string) error { | ||||
| 	log.Infof("Restarting containerd using systemd and host root mounted at %v", hostRootMount) | ||||
| 
 | ||||
| 	command := "chroot" | ||||
| 	args := []string{hostRootMount, "systemctl", "restart", "containerd"} | ||||
| 
 | ||||
| 	cmd := exec.Command(command, args...) | ||||
| 	cmd.Stdout = os.Stdout | ||||
| 	cmd.Stderr = os.Stderr | ||||
| 	err := cmd.Run() | ||||
| 	if err != nil { | ||||
| 		return fmt.Errorf("error restarting containerd using systemd: %v", err) | ||||
| 	} | ||||
| 
 | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| // containerAnnotationsFromCDIPrefixes returns the container annotations to set for the given CDI prefixes.
 | ||||
| func (o *options) containerAnnotationsFromCDIPrefixes() []string { | ||||
| 	var annotations []string | ||||
|  | ||||
| @ -20,7 +20,6 @@ import ( | ||||
| 	"encoding/json" | ||||
| 	"fmt" | ||||
| 	"os" | ||||
| 	"os/exec" | ||||
| 	"path/filepath" | ||||
| 
 | ||||
| 	"github.com/NVIDIA/nvidia-container-toolkit/internal/config" | ||||
| @ -32,9 +31,6 @@ import ( | ||||
| ) | ||||
| 
 | ||||
| const ( | ||||
| 	restartModeSystemd = "systemd" | ||||
| 	restartModeNone    = "none" | ||||
| 
 | ||||
| 	defaultConfigMode = "hook" | ||||
| 
 | ||||
| 	// Hook-based settings
 | ||||
| @ -46,7 +42,7 @@ const ( | ||||
| 	defaultSocket        = "/var/run/crio/crio.sock" | ||||
| 	defaultRuntimeClass  = "nvidia" | ||||
| 	defaultSetAsDefault  = true | ||||
| 	defaultRestartMode   = restartModeSystemd | ||||
| 	defaultRestartMode   = "systemd" | ||||
| 	defaultHostRootMount = "/host" | ||||
| ) | ||||
| 
 | ||||
| @ -117,7 +113,8 @@ func main() { | ||||
| 			Value:       "", | ||||
| 			Destination: &options.Socket, | ||||
| 			EnvVars:     []string{"CRIO_SOCKET", "RUNTIME_SOCKET"}, | ||||
| 			Hidden:      true, | ||||
| 			// Note: We hide this option since restarting cri-o via a socket is not supported.
 | ||||
| 			Hidden: true, | ||||
| 		}, | ||||
| 		&cli.StringFlag{ | ||||
| 			Name:        "restart-mode", | ||||
| @ -179,7 +176,6 @@ func main() { | ||||
| 			Destination: &options.configMode, | ||||
| 			EnvVars:     []string{"CRIO_CONFIG_MODE"}, | ||||
| 		}, | ||||
| 		// The flags below are only used by the 'setup' command.
 | ||||
| 	} | ||||
| 
 | ||||
| 	// Update the subcommand flags with the common subcommand flags
 | ||||
| @ -341,31 +337,5 @@ func generateOciHook(toolkitDir string) podmanHook { | ||||
| 
 | ||||
| // RestartCrio restarts crio depending on the value of restartModeFlag
 | ||||
| func RestartCrio(o *options) error { | ||||
| 	switch o.RestartMode { | ||||
| 	case restartModeNone: | ||||
| 		log.Warnf("Skipping restart of crio due to --restart-mode=%v", o.RestartMode) | ||||
| 		return nil | ||||
| 	case restartModeSystemd: | ||||
| 		return RestartCrioSystemd(o.HostRootMount) | ||||
| 	default: | ||||
| 		return fmt.Errorf("invalid restart mode specified: %v", o.RestartMode) | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| // RestartCrioSystemd restarts cri-o using systemctl
 | ||||
| func RestartCrioSystemd(hostRootMount string) error { | ||||
| 	log.Infof("Restarting cri-o using systemd and host root mounted at %v", hostRootMount) | ||||
| 
 | ||||
| 	command := "chroot" | ||||
| 	args := []string{hostRootMount, "systemctl", "restart", "crio"} | ||||
| 
 | ||||
| 	cmd := exec.Command(command, args...) | ||||
| 	cmd.Stdout = os.Stdout | ||||
| 	cmd.Stderr = os.Stderr | ||||
| 	err := cmd.Run() | ||||
| 	if err != nil { | ||||
| 		return fmt.Errorf("error restarting crio using systemd: %v", err) | ||||
| 	} | ||||
| 
 | ||||
| 	return nil | ||||
| 	return o.Restart("crio", func(string) error { return fmt.Errorf("supporting crio via signal is unsupported") }) | ||||
| } | ||||
|  | ||||
| @ -31,9 +31,6 @@ import ( | ||||
| ) | ||||
| 
 | ||||
| const ( | ||||
| 	restartModeSignal = "signal" | ||||
| 	restartModeNone   = "none" | ||||
| 
 | ||||
| 	nvidiaRuntimeName               = "nvidia" | ||||
| 	nvidiaRuntimeBinary             = "nvidia-container-runtime" | ||||
| 	nvidiaExperimentalRuntimeName   = "nvidia-experimental" | ||||
| @ -44,7 +41,7 @@ const ( | ||||
| 	defaultSetAsDefault = true | ||||
| 	// defaultRuntimeName specifies the NVIDIA runtime to be use as the default runtime if setting the default runtime is enabled
 | ||||
| 	defaultRuntimeName   = nvidiaRuntimeName | ||||
| 	defaultRestartMode   = restartModeSignal | ||||
| 	defaultRestartMode   = "signal" | ||||
| 	defaultHostRootMount = "/host" | ||||
| 
 | ||||
| 	reloadBackoff     = 5 * time.Second | ||||
| @ -119,7 +116,7 @@ func main() { | ||||
| 		}, | ||||
| 		&cli.StringFlag{ | ||||
| 			Name:        "restart-mode", | ||||
| 			Usage:       "Specify how docker should be restarted; If 'none' is selected it will not be restarted [signal | none]", | ||||
| 			Usage:       "Specify how docker should be restarted; If 'none' is selected it will not be restarted [signal | systemd | none ]", | ||||
| 			Value:       defaultRestartMode, | ||||
| 			Destination: &options.RestartMode, | ||||
| 			EnvVars:     []string{"DOCKER_RESTART_MODE", "RUNTIME_RESTART_MODE"}, | ||||
| @ -224,19 +221,7 @@ func Cleanup(c *cli.Context, o *options) error { | ||||
| 
 | ||||
| // RestartDocker restarts docker depending on the value of restartModeFlag
 | ||||
| func RestartDocker(o *options) error { | ||||
| 	switch o.RestartMode { | ||||
| 	case restartModeNone: | ||||
| 		log.Warnf("Skipping sending signal to docker due to --restart-mode=%v", o.RestartMode) | ||||
| 	case restartModeSignal: | ||||
| 		err := SignalDocker(o.Socket) | ||||
| 		if err != nil { | ||||
| 			return fmt.Errorf("unable to signal docker: %v", err) | ||||
| 		} | ||||
| 	default: | ||||
| 		return fmt.Errorf("invalid restart mode specified: %v", o.RestartMode) | ||||
| 	} | ||||
| 
 | ||||
| 	return nil | ||||
| 	return o.Restart("docker", SignalDocker) | ||||
| } | ||||
| 
 | ||||
| // SignalDocker sends a SIGHUP signal to docker daemon
 | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user