2021-10-11 14:31:02 +00:00
package main
import (
"fmt"
"os"
"os/exec"
"os/signal"
"path/filepath"
"strings"
"syscall"
log "github.com/sirupsen/logrus"
cli "github.com/urfave/cli/v2"
unix "golang.org/x/sys/unix"
)
const (
runDir = "/run/nvidia"
pidFile = runDir + "/toolkit.pid"
toolkitCommand = "toolkit"
toolkitSubDir = "toolkit"
defaultToolkitArgs = ""
defaultRuntime = "docker"
defaultRuntimeArgs = ""
)
var availableRuntimes = map [ string ] struct { } { "docker" : { } , "crio" : { } , "containerd" : { } }
var waitingForSignal = make ( chan bool , 1 )
var signalReceived = make ( chan bool , 1 )
2022-08-26 12:59:23 +00:00
// options stores the command line arguments
type options struct {
noDaemon bool
runtime string
runtimeArgs string
2022-08-26 13:58:29 +00:00
root string
2022-08-26 12:59:23 +00:00
}
2021-10-11 14:31:02 +00:00
// Version defines the CLI version. This is set at build time using LD FLAGS
var Version = "development"
func main ( ) {
2022-08-26 13:58:29 +00:00
remainingArgs , root , err := ParseArgs ( os . Args )
if err != nil {
log . Errorf ( "Error: unable to parse arguments: %v" , err )
os . Exit ( 1 )
}
2022-08-26 12:59:23 +00:00
options := options { }
2021-10-11 14:31:02 +00:00
// Create the top-level CLI
c := cli . NewApp ( )
c . Name = "nvidia-toolkit"
c . Usage = "Install the nvidia-container-toolkit for use by a given runtime"
2022-08-26 13:58:29 +00:00
c . UsageText = "[DESTINATION] [-n | --no-daemon] [-r | --runtime] [-u | --runtime-args]"
2021-10-11 14:31:02 +00:00
c . Description = "DESTINATION points to the host path underneath which the nvidia-container-toolkit should be installed.\nIt will be installed at ${DESTINATION}/toolkit"
c . Version = Version
2022-08-26 12:59:23 +00:00
c . Action = func ( ctx * cli . Context ) error {
return Run ( ctx , & options )
}
2021-10-11 14:31:02 +00:00
// Setup flags for the CLI
c . Flags = [ ] cli . Flag {
& cli . BoolFlag {
Name : "no-daemon" ,
Aliases : [ ] string { "n" } ,
2023-08-25 14:14:06 +00:00
Usage : "terminate immediately after setting up the runtime. Note that no cleanup will be performed" ,
2022-08-26 12:59:23 +00:00
Destination : & options . noDaemon ,
2021-10-11 14:31:02 +00:00
EnvVars : [ ] string { "NO_DAEMON" } ,
} ,
& cli . StringFlag {
Name : "runtime" ,
Aliases : [ ] string { "r" } ,
Usage : "the runtime to setup on this node. One of {'docker', 'crio', 'containerd'}" ,
Value : defaultRuntime ,
2022-08-26 12:59:23 +00:00
Destination : & options . runtime ,
2021-10-11 14:31:02 +00:00
EnvVars : [ ] string { "RUNTIME" } ,
} ,
& cli . StringFlag {
Name : "runtime-args" ,
Aliases : [ ] string { "u" } ,
Usage : "arguments to pass to 'docker', 'crio', or 'containerd' setup command" ,
Value : defaultRuntimeArgs ,
2022-08-26 12:59:23 +00:00
Destination : & options . runtimeArgs ,
2021-10-11 14:31:02 +00:00
EnvVars : [ ] string { "RUNTIME_ARGS" } ,
} ,
2022-08-26 13:58:29 +00:00
& cli . StringFlag {
Name : "root" ,
Value : root ,
Usage : "the folder where the NVIDIA Container Toolkit is to be installed. It will be installed to `ROOT`/toolkit" ,
Destination : & options . root ,
EnvVars : [ ] string { "ROOT" } ,
} ,
2021-10-11 14:31:02 +00:00
}
// Run the CLI
log . Infof ( "Starting %v" , c . Name )
if err := c . Run ( remainingArgs ) ; err != nil {
log . Errorf ( "error running nvidia-toolkit: %v" , err )
os . Exit ( 1 )
}
log . Infof ( "Completed %v" , c . Name )
}
// Run runs the core logic of the CLI
2022-08-26 12:59:23 +00:00
func Run ( c * cli . Context , o * options ) error {
err := verifyFlags ( o )
2021-10-11 14:31:02 +00:00
if err != nil {
return fmt . Errorf ( "unable to verify flags: %v" , err )
}
err = initialize ( )
if err != nil {
return fmt . Errorf ( "unable to initialize: %v" , err )
}
defer shutdown ( )
2022-08-26 13:04:07 +00:00
err = installToolkit ( o )
2021-10-11 14:31:02 +00:00
if err != nil {
return fmt . Errorf ( "unable to install toolkit: %v" , err )
}
2022-08-26 12:59:23 +00:00
err = setupRuntime ( o )
2021-10-11 14:31:02 +00:00
if err != nil {
return fmt . Errorf ( "unable to setup runtime: %v" , err )
}
2022-08-26 12:59:23 +00:00
if ! o . noDaemon {
2021-10-11 14:31:02 +00:00
err = waitForSignal ( )
if err != nil {
return fmt . Errorf ( "unable to wait for signal: %v" , err )
}
2022-08-26 12:59:23 +00:00
err = cleanupRuntime ( o )
2021-10-11 14:31:02 +00:00
if err != nil {
return fmt . Errorf ( "unable to cleanup runtime: %v" , err )
}
}
return nil
}
2022-08-26 13:58:29 +00:00
// ParseArgs checks if a single positional argument was defined and extracts this the root.
// If no positional arguments are defined, the it is assumed that the root is specified as a flag.
func ParseArgs ( args [ ] string ) ( [ ] string , string , error ) {
2021-10-11 14:31:02 +00:00
log . Infof ( "Parsing arguments" )
2022-08-26 13:58:29 +00:00
if len ( args ) < 2 {
return args , "" , nil
2021-10-11 14:31:02 +00:00
}
2022-08-26 13:58:29 +00:00
var lastPositionalArg int
for i , arg := range args {
2021-10-11 14:31:02 +00:00
if strings . HasPrefix ( arg , "-" ) {
2022-08-26 13:58:29 +00:00
break
2021-10-11 14:31:02 +00:00
}
2022-08-26 13:58:29 +00:00
lastPositionalArg = i
2021-10-11 14:31:02 +00:00
}
2022-08-26 13:58:29 +00:00
if lastPositionalArg == 0 {
return args , "" , nil
2021-10-11 14:31:02 +00:00
}
2022-08-26 13:58:29 +00:00
if lastPositionalArg == 1 {
return append ( [ ] string { args [ 0 ] } , args [ 2 : ] ... ) , args [ 1 ] , nil
}
2021-10-11 14:31:02 +00:00
2022-08-26 13:58:29 +00:00
return nil , "" , fmt . Errorf ( "unexpected positional argument(s) %v" , args [ 2 : lastPositionalArg + 1 ] )
2021-10-11 14:31:02 +00:00
}
2022-08-26 12:59:23 +00:00
func verifyFlags ( o * options ) error {
2021-10-11 14:31:02 +00:00
log . Infof ( "Verifying Flags" )
2022-08-26 13:58:29 +00:00
if o . root == "" {
return fmt . Errorf ( "the install root must be specified" )
}
2022-08-26 12:59:23 +00:00
if _ , exists := availableRuntimes [ o . runtime ] ; ! exists {
return fmt . Errorf ( "unknown runtime: %v" , o . runtime )
2021-10-11 14:31:02 +00:00
}
return nil
}
func initialize ( ) error {
log . Infof ( "Initializing" )
f , err := os . Create ( pidFile )
if err != nil {
return fmt . Errorf ( "unable to create pidfile: %v" , err )
}
err = unix . Flock ( int ( f . Fd ( ) ) , unix . LOCK_EX | unix . LOCK_NB )
if err != nil {
2023-06-06 19:46:38 +00:00
log . Warningf ( "Unable to get exclusive lock on '%v'" , pidFile )
log . Warningf ( "This normally means an instance of the NVIDIA toolkit Container is already running, aborting" )
2021-10-11 14:31:02 +00:00
return fmt . Errorf ( "unable to get flock on pidfile: %v" , err )
}
_ , err = f . WriteString ( fmt . Sprintf ( "%v\n" , os . Getpid ( ) ) )
if err != nil {
return fmt . Errorf ( "unable to write PID to pidfile: %v" , err )
}
sigs := make ( chan os . Signal , 1 )
signal . Notify ( sigs , syscall . SIGHUP , syscall . SIGINT , syscall . SIGQUIT , syscall . SIGPIPE , syscall . SIGTERM )
go func ( ) {
<- sigs
select {
case <- waitingForSignal :
signalReceived <- true
default :
log . Infof ( "Signal received, exiting early" )
shutdown ( )
os . Exit ( 0 )
}
} ( )
return nil
}
2022-08-26 13:04:07 +00:00
func installToolkit ( o * options ) error {
2021-10-11 14:31:02 +00:00
log . Infof ( "Installing toolkit" )
2022-07-25 08:26:01 +00:00
cmdline := [ ] string {
toolkitCommand ,
"install" ,
"--toolkit-root" ,
2022-08-26 13:04:07 +00:00
filepath . Join ( o . root , toolkitSubDir ) ,
2022-07-25 08:26:01 +00:00
}
2023-08-28 09:07:04 +00:00
//nolint:gosec // TODO: Can we harden this so that there is less risk of command injection
2022-07-25 08:26:01 +00:00
cmd := exec . Command ( "sh" , "-c" , strings . Join ( cmdline , " " ) )
2021-10-11 14:31:02 +00:00
cmd . Stdout = os . Stdout
cmd . Stderr = os . Stderr
err := cmd . Run ( )
if err != nil {
2022-07-25 08:26:01 +00:00
return fmt . Errorf ( "error running %v command: %v" , cmdline , err )
2021-10-11 14:31:02 +00:00
}
return nil
}
2022-08-26 12:59:23 +00:00
func setupRuntime ( o * options ) error {
2022-08-26 13:04:07 +00:00
toolkitDir := filepath . Join ( o . root , toolkitSubDir )
2021-10-11 14:31:02 +00:00
log . Infof ( "Setting up runtime" )
2022-08-26 12:59:23 +00:00
cmdline := fmt . Sprintf ( "%v setup %v %v\n" , o . runtime , o . runtimeArgs , toolkitDir )
2021-10-11 14:31:02 +00:00
2023-08-28 09:07:04 +00:00
//nolint:gosec // TODO: Can we harden this so that there is less risk of command injection
2021-10-11 14:31:02 +00:00
cmd := exec . Command ( "sh" , "-c" , cmdline )
cmd . Stdout = os . Stdout
cmd . Stderr = os . Stderr
err := cmd . Run ( )
if err != nil {
2022-08-26 12:59:23 +00:00
return fmt . Errorf ( "error running %v command: %v" , o . runtime , err )
2021-10-11 14:31:02 +00:00
}
return nil
}
func waitForSignal ( ) error {
log . Infof ( "Waiting for signal" )
waitingForSignal <- true
<- signalReceived
return nil
}
2022-08-26 12:59:23 +00:00
func cleanupRuntime ( o * options ) error {
2022-08-26 13:04:07 +00:00
toolkitDir := filepath . Join ( o . root , toolkitSubDir )
2021-10-11 14:31:02 +00:00
log . Infof ( "Cleaning up Runtime" )
2022-08-26 12:59:23 +00:00
cmdline := fmt . Sprintf ( "%v cleanup %v %v\n" , o . runtime , o . runtimeArgs , toolkitDir )
2021-10-11 14:31:02 +00:00
2023-08-28 09:07:04 +00:00
//nolint:gosec // TODO: Can we harden this so that there is less risk of command injection
2021-10-11 14:31:02 +00:00
cmd := exec . Command ( "sh" , "-c" , cmdline )
cmd . Stdout = os . Stdout
cmd . Stderr = os . Stderr
err := cmd . Run ( )
if err != nil {
2022-08-26 12:59:23 +00:00
return fmt . Errorf ( "error running %v command: %v" , o . runtime , err )
2021-10-11 14:31:02 +00:00
}
return nil
}
func shutdown ( ) {
log . Infof ( "Shutting Down" )
err := os . Remove ( pidFile )
if err != nil {
2023-06-06 19:46:38 +00:00
log . Warningf ( "Unable to remove pidfile: %v" , err )
2021-10-11 14:31:02 +00:00
}
}