2021-10-11 14:31:02 +00:00
package main
import (
"fmt"
"os"
"os/signal"
"path/filepath"
"strings"
"syscall"
2024-10-10 16:50:48 +00:00
"github.com/urfave/cli/v2"
"golang.org/x/sys/unix"
2024-09-19 11:35:42 +00:00
2024-10-26 19:13:27 +00:00
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
2024-09-27 09:09:24 +00:00
"github.com/NVIDIA/nvidia-container-toolkit/tools/container/runtime"
2024-09-19 11:35:42 +00:00
"github.com/NVIDIA/nvidia-container-toolkit/tools/container/toolkit"
2021-10-11 14:31:02 +00:00
)
const (
2024-06-14 12:15:54 +00:00
toolkitPidFilename = "toolkit.pid"
defaultPidFile = "/run/nvidia/toolkit/" + toolkitPidFilename
toolkitSubDir = "toolkit"
2021-10-11 14:31:02 +00:00
2024-10-10 16:50:48 +00:00
defaultRuntime = "docker"
defaultRuntimeArgs = ""
2021-10-11 14:31:02 +00:00
)
var availableRuntimes = map [ string ] struct { } { "docker" : { } , "crio" : { } , "containerd" : { } }
2024-08-08 22:40:00 +00:00
var defaultLowLevelRuntimes = [ ] string { "docker-runc" , "runc" , "crun" }
2021-10-11 14:31:02 +00:00
var waitingForSignal = make ( chan bool , 1 )
var signalReceived = make ( chan bool , 1 )
2022-08-26 12:59:23 +00:00
// options stores the command line arguments
type options struct {
noDaemon bool
runtime string
runtimeArgs string
2022-08-26 13:58:29 +00:00
root string
2024-06-14 12:15:54 +00:00
pidFile string
2024-09-19 11:35:42 +00:00
toolkitOptions toolkit . Options
2024-09-27 09:09:24 +00:00
runtimeOptions runtime . Options
2024-09-19 11:35:42 +00:00
}
func ( o options ) toolkitRoot ( ) string {
return filepath . Join ( o . root , toolkitSubDir )
2022-08-26 12:59:23 +00:00
}
2021-10-11 14:31:02 +00:00
// Version defines the CLI version. This is set at build time using LD FLAGS
var Version = "development"
func main ( ) {
2024-10-26 19:13:27 +00:00
logger := logger . New ( )
remainingArgs , root , err := ParseArgs ( logger , os . Args )
2022-08-26 13:58:29 +00:00
if err != nil {
2024-10-26 19:13:27 +00:00
logger . Errorf ( "Error: unable to parse arguments: %v" , err )
os . Exit ( 1 )
}
c := NewApp ( logger , root )
// Run the CLI
logger . Infof ( "Starting %v" , c . Name )
if err := c . Run ( remainingArgs ) ; err != nil {
logger . Errorf ( "error running nvidia-toolkit: %v" , err )
2022-08-26 13:58:29 +00:00
os . Exit ( 1 )
}
2022-08-26 12:59:23 +00:00
2024-10-26 19:13:27 +00:00
logger . Infof ( "Completed %v" , c . Name )
}
// An app represents the nvidia-ctk-installer.
type app struct {
logger logger . Interface
// defaultRoot stores the root to use if the --root flag is not specified.
defaultRoot string
}
// NewApp creates the CLI app fro the specified options.
// defaultRoot is used as the root if not specified via the --root flag.
func NewApp ( logger logger . Interface , defaultRoot string ) * cli . App {
a := app {
logger : logger ,
defaultRoot : defaultRoot ,
}
return a . build ( )
}
func ( a app ) build ( ) * cli . App {
2024-09-19 11:35:42 +00:00
options := options {
toolkitOptions : toolkit . Options { } ,
}
2021-10-11 14:31:02 +00:00
// Create the top-level CLI
c := cli . NewApp ( )
c . Name = "nvidia-toolkit"
c . Usage = "Install the nvidia-container-toolkit for use by a given runtime"
2022-08-26 13:58:29 +00:00
c . UsageText = "[DESTINATION] [-n | --no-daemon] [-r | --runtime] [-u | --runtime-args]"
2021-10-11 14:31:02 +00:00
c . Description = "DESTINATION points to the host path underneath which the nvidia-container-toolkit should be installed.\nIt will be installed at ${DESTINATION}/toolkit"
c . Version = Version
2024-06-14 12:15:54 +00:00
c . Before = func ( ctx * cli . Context ) error {
2024-10-26 19:13:27 +00:00
return a . Before ( ctx , & options )
2024-06-14 12:15:54 +00:00
}
2022-08-26 12:59:23 +00:00
c . Action = func ( ctx * cli . Context ) error {
2024-10-26 19:13:27 +00:00
return a . Run ( ctx , & options )
2022-08-26 12:59:23 +00:00
}
2021-10-11 14:31:02 +00:00
// Setup flags for the CLI
c . Flags = [ ] cli . Flag {
& cli . BoolFlag {
Name : "no-daemon" ,
Aliases : [ ] string { "n" } ,
2023-08-25 14:14:06 +00:00
Usage : "terminate immediately after setting up the runtime. Note that no cleanup will be performed" ,
2022-08-26 12:59:23 +00:00
Destination : & options . noDaemon ,
2021-10-11 14:31:02 +00:00
EnvVars : [ ] string { "NO_DAEMON" } ,
} ,
& cli . StringFlag {
Name : "runtime" ,
Aliases : [ ] string { "r" } ,
Usage : "the runtime to setup on this node. One of {'docker', 'crio', 'containerd'}" ,
Value : defaultRuntime ,
2022-08-26 12:59:23 +00:00
Destination : & options . runtime ,
2021-10-11 14:31:02 +00:00
EnvVars : [ ] string { "RUNTIME" } ,
} ,
2024-09-27 08:36:39 +00:00
// TODO: Remove runtime-args
2021-10-11 14:31:02 +00:00
& cli . StringFlag {
Name : "runtime-args" ,
Aliases : [ ] string { "u" } ,
Usage : "arguments to pass to 'docker', 'crio', or 'containerd' setup command" ,
Value : defaultRuntimeArgs ,
2022-08-26 12:59:23 +00:00
Destination : & options . runtimeArgs ,
2021-10-11 14:31:02 +00:00
EnvVars : [ ] string { "RUNTIME_ARGS" } ,
} ,
2022-08-26 13:58:29 +00:00
& cli . StringFlag {
Name : "root" ,
2024-10-26 19:13:27 +00:00
Value : a . defaultRoot ,
2022-08-26 13:58:29 +00:00
Usage : "the folder where the NVIDIA Container Toolkit is to be installed. It will be installed to `ROOT`/toolkit" ,
Destination : & options . root ,
EnvVars : [ ] string { "ROOT" } ,
} ,
2024-06-14 12:15:54 +00:00
& cli . StringFlag {
Name : "pid-file" ,
Value : defaultPidFile ,
Usage : "the path to a toolkit.pid file to ensure that only a single configuration instance is running" ,
Destination : & options . pidFile ,
EnvVars : [ ] string { "TOOLKIT_PID_FILE" , "PID_FILE" } ,
} ,
2021-10-11 14:31:02 +00:00
}
2024-09-19 11:35:42 +00:00
c . Flags = append ( c . Flags , toolkit . Flags ( & options . toolkitOptions ) ... )
2024-09-27 09:09:24 +00:00
c . Flags = append ( c . Flags , runtime . Flags ( & options . runtimeOptions ) ... )
2024-09-19 11:35:42 +00:00
2024-10-26 19:13:27 +00:00
return c
}
2021-10-11 14:31:02 +00:00
2024-10-26 19:13:27 +00:00
func ( a * app ) Before ( c * cli . Context , o * options ) error {
return a . validateFlags ( c , o )
2021-10-11 14:31:02 +00:00
}
2024-10-26 19:13:27 +00:00
func ( a * app ) validateFlags ( _ * cli . Context , o * options ) error {
2024-10-26 19:10:14 +00:00
if o . root == "" {
return fmt . Errorf ( "the install root must be specified" )
}
if _ , exists := availableRuntimes [ o . runtime ] ; ! exists {
return fmt . Errorf ( "unknown runtime: %v" , o . runtime )
}
2024-06-14 12:15:54 +00:00
if filepath . Base ( o . pidFile ) != toolkitPidFilename {
return fmt . Errorf ( "invalid toolkit.pid path %v" , o . pidFile )
}
2024-10-26 19:13:27 +00:00
2024-09-19 11:35:42 +00:00
if err := toolkit . ValidateOptions ( & o . toolkitOptions , o . toolkitRoot ( ) ) ; err != nil {
return err
}
2024-09-27 08:36:39 +00:00
if err := runtime . ValidateOptions ( & o . runtimeOptions , o . runtime , o . toolkitRoot ( ) ) ; err != nil {
2024-09-27 09:09:24 +00:00
return err
}
2024-06-14 12:15:54 +00:00
return nil
}
2021-10-11 14:31:02 +00:00
// Run runs the core logic of the CLI
2024-10-26 19:13:27 +00:00
func ( a * app ) Run ( c * cli . Context , o * options ) error {
err := a . initialize ( o . pidFile )
2021-10-11 14:31:02 +00:00
if err != nil {
return fmt . Errorf ( "unable to initialize: %v" , err )
}
2024-10-26 19:13:27 +00:00
defer a . shutdown ( o . pidFile )
2021-10-11 14:31:02 +00:00
2024-08-08 22:40:00 +00:00
if len ( o . toolkitOptions . ContainerRuntimeRuntimes . Value ( ) ) == 0 {
lowlevelRuntimePaths , err := runtime . GetLowlevelRuntimePaths ( & o . runtimeOptions , o . runtime )
if err != nil {
return fmt . Errorf ( "unable to determine runtime options: %w" , err )
}
lowlevelRuntimePaths = append ( lowlevelRuntimePaths , defaultLowLevelRuntimes ... )
o . toolkitOptions . ContainerRuntimeRuntimes = * cli . NewStringSlice ( lowlevelRuntimePaths ... )
}
2024-10-27 10:30:07 +00:00
err = toolkit . Install ( c , & o . toolkitOptions , "" , o . toolkitRoot ( ) )
2021-10-11 14:31:02 +00:00
if err != nil {
return fmt . Errorf ( "unable to install toolkit: %v" , err )
}
2024-09-27 08:36:39 +00:00
err = runtime . Setup ( c , & o . runtimeOptions , o . runtime )
2021-10-11 14:31:02 +00:00
if err != nil {
return fmt . Errorf ( "unable to setup runtime: %v" , err )
}
2022-08-26 12:59:23 +00:00
if ! o . noDaemon {
2024-10-26 19:13:27 +00:00
err = a . waitForSignal ( )
2021-10-11 14:31:02 +00:00
if err != nil {
return fmt . Errorf ( "unable to wait for signal: %v" , err )
}
2024-09-27 08:36:39 +00:00
err = runtime . Cleanup ( c , & o . runtimeOptions , o . runtime )
2021-10-11 14:31:02 +00:00
if err != nil {
return fmt . Errorf ( "unable to cleanup runtime: %v" , err )
}
}
return nil
}
2022-08-26 13:58:29 +00:00
// ParseArgs checks if a single positional argument was defined and extracts this the root.
2024-10-10 16:50:48 +00:00
// If no positional arguments are defined, it is assumed that the root is specified as a flag.
2024-10-26 19:13:27 +00:00
func ParseArgs ( logger logger . Interface , args [ ] string ) ( [ ] string , string , error ) {
logger . Infof ( "Parsing arguments" )
2021-10-11 14:31:02 +00:00
2022-08-26 13:58:29 +00:00
if len ( args ) < 2 {
return args , "" , nil
2021-10-11 14:31:02 +00:00
}
2022-08-26 13:58:29 +00:00
var lastPositionalArg int
for i , arg := range args {
2021-10-11 14:31:02 +00:00
if strings . HasPrefix ( arg , "-" ) {
2022-08-26 13:58:29 +00:00
break
2021-10-11 14:31:02 +00:00
}
2022-08-26 13:58:29 +00:00
lastPositionalArg = i
2021-10-11 14:31:02 +00:00
}
2022-08-26 13:58:29 +00:00
if lastPositionalArg == 0 {
return args , "" , nil
2021-10-11 14:31:02 +00:00
}
2022-08-26 13:58:29 +00:00
if lastPositionalArg == 1 {
return append ( [ ] string { args [ 0 ] } , args [ 2 : ] ... ) , args [ 1 ] , nil
}
2021-10-11 14:31:02 +00:00
2022-08-26 13:58:29 +00:00
return nil , "" , fmt . Errorf ( "unexpected positional argument(s) %v" , args [ 2 : lastPositionalArg + 1 ] )
2021-10-11 14:31:02 +00:00
}
2024-10-26 19:13:27 +00:00
func ( a * app ) initialize ( pidFile string ) error {
a . logger . Infof ( "Initializing" )
2021-10-11 14:31:02 +00:00
2024-06-14 12:15:54 +00:00
if dir := filepath . Dir ( pidFile ) ; dir != "" {
err := os . MkdirAll ( dir , 0755 )
if err != nil {
return fmt . Errorf ( "unable to create folder for pidfile: %w" , err )
}
}
2021-10-11 14:31:02 +00:00
f , err := os . Create ( pidFile )
if err != nil {
return fmt . Errorf ( "unable to create pidfile: %v" , err )
}
err = unix . Flock ( int ( f . Fd ( ) ) , unix . LOCK_EX | unix . LOCK_NB )
if err != nil {
2024-10-26 19:13:27 +00:00
a . logger . Warningf ( "Unable to get exclusive lock on '%v'" , pidFile )
a . logger . Warningf ( "This normally means an instance of the NVIDIA toolkit Container is already running, aborting" )
2021-10-11 14:31:02 +00:00
return fmt . Errorf ( "unable to get flock on pidfile: %v" , err )
}
_ , err = f . WriteString ( fmt . Sprintf ( "%v\n" , os . Getpid ( ) ) )
if err != nil {
return fmt . Errorf ( "unable to write PID to pidfile: %v" , err )
}
sigs := make ( chan os . Signal , 1 )
signal . Notify ( sigs , syscall . SIGHUP , syscall . SIGINT , syscall . SIGQUIT , syscall . SIGPIPE , syscall . SIGTERM )
go func ( ) {
<- sigs
select {
case <- waitingForSignal :
signalReceived <- true
default :
2024-10-26 19:13:27 +00:00
a . logger . Infof ( "Signal received, exiting early" )
a . shutdown ( pidFile )
2021-10-11 14:31:02 +00:00
os . Exit ( 0 )
}
} ( )
return nil
}
2024-10-26 19:13:27 +00:00
func ( a * app ) waitForSignal ( ) error {
a . logger . Infof ( "Waiting for signal" )
2021-10-11 14:31:02 +00:00
waitingForSignal <- true
<- signalReceived
return nil
}
2024-10-26 19:13:27 +00:00
func ( a * app ) shutdown ( pidFile string ) {
a . logger . Infof ( "Shutting Down" )
2021-10-11 14:31:02 +00:00
err := os . Remove ( pidFile )
if err != nil {
2024-10-26 19:13:27 +00:00
a . logger . Warningf ( "Unable to remove pidfile: %v" , err )
2021-10-11 14:31:02 +00:00
}
}