2021-10-11 14:31:02 +00:00
package main
import (
"fmt"
"os"
"os/signal"
"path/filepath"
"strings"
"syscall"
log "github.com/sirupsen/logrus"
2024-10-10 16:50:48 +00:00
"github.com/urfave/cli/v2"
"golang.org/x/sys/unix"
2024-09-19 11:35:42 +00:00
2024-09-27 09:09:24 +00:00
"github.com/NVIDIA/nvidia-container-toolkit/tools/container/runtime"
2024-09-19 11:35:42 +00:00
"github.com/NVIDIA/nvidia-container-toolkit/tools/container/toolkit"
2021-10-11 14:31:02 +00:00
)
const (
2024-06-14 12:15:54 +00:00
toolkitPidFilename = "toolkit.pid"
defaultPidFile = "/run/nvidia/toolkit/" + toolkitPidFilename
toolkitSubDir = "toolkit"
2021-10-11 14:31:02 +00:00
2024-10-10 16:50:48 +00:00
defaultRuntime = "docker"
defaultRuntimeArgs = ""
2021-10-11 14:31:02 +00:00
)
var availableRuntimes = map [ string ] struct { } { "docker" : { } , "crio" : { } , "containerd" : { } }
2024-08-08 22:40:00 +00:00
var defaultLowLevelRuntimes = [ ] string { "docker-runc" , "runc" , "crun" }
2021-10-11 14:31:02 +00:00
var waitingForSignal = make ( chan bool , 1 )
var signalReceived = make ( chan bool , 1 )
2022-08-26 12:59:23 +00:00
// options stores the command line arguments
type options struct {
noDaemon bool
runtime string
runtimeArgs string
2022-08-26 13:58:29 +00:00
root string
2024-06-14 12:15:54 +00:00
pidFile string
2024-09-19 11:35:42 +00:00
toolkitOptions toolkit . Options
2024-09-27 09:09:24 +00:00
runtimeOptions runtime . Options
2024-09-19 11:35:42 +00:00
}
func ( o options ) toolkitRoot ( ) string {
return filepath . Join ( o . root , toolkitSubDir )
2022-08-26 12:59:23 +00:00
}
2021-10-11 14:31:02 +00:00
// Version defines the CLI version. This is set at build time using LD FLAGS
var Version = "development"
func main ( ) {
2022-08-26 13:58:29 +00:00
remainingArgs , root , err := ParseArgs ( os . Args )
if err != nil {
log . Errorf ( "Error: unable to parse arguments: %v" , err )
os . Exit ( 1 )
}
2022-08-26 12:59:23 +00:00
2024-09-19 11:35:42 +00:00
options := options {
toolkitOptions : toolkit . Options { } ,
}
2021-10-11 14:31:02 +00:00
// Create the top-level CLI
c := cli . NewApp ( )
c . Name = "nvidia-toolkit"
c . Usage = "Install the nvidia-container-toolkit for use by a given runtime"
2022-08-26 13:58:29 +00:00
c . UsageText = "[DESTINATION] [-n | --no-daemon] [-r | --runtime] [-u | --runtime-args]"
2021-10-11 14:31:02 +00:00
c . Description = "DESTINATION points to the host path underneath which the nvidia-container-toolkit should be installed.\nIt will be installed at ${DESTINATION}/toolkit"
c . Version = Version
2024-06-14 12:15:54 +00:00
c . Before = func ( ctx * cli . Context ) error {
return validateFlags ( ctx , & options )
}
2022-08-26 12:59:23 +00:00
c . Action = func ( ctx * cli . Context ) error {
return Run ( ctx , & options )
}
2021-10-11 14:31:02 +00:00
// Setup flags for the CLI
c . Flags = [ ] cli . Flag {
& cli . BoolFlag {
Name : "no-daemon" ,
Aliases : [ ] string { "n" } ,
2023-08-25 14:14:06 +00:00
Usage : "terminate immediately after setting up the runtime. Note that no cleanup will be performed" ,
2022-08-26 12:59:23 +00:00
Destination : & options . noDaemon ,
2021-10-11 14:31:02 +00:00
EnvVars : [ ] string { "NO_DAEMON" } ,
} ,
& cli . StringFlag {
Name : "runtime" ,
Aliases : [ ] string { "r" } ,
Usage : "the runtime to setup on this node. One of {'docker', 'crio', 'containerd'}" ,
Value : defaultRuntime ,
2022-08-26 12:59:23 +00:00
Destination : & options . runtime ,
2021-10-11 14:31:02 +00:00
EnvVars : [ ] string { "RUNTIME" } ,
} ,
2024-09-27 08:36:39 +00:00
// TODO: Remove runtime-args
2021-10-11 14:31:02 +00:00
& cli . StringFlag {
Name : "runtime-args" ,
Aliases : [ ] string { "u" } ,
Usage : "arguments to pass to 'docker', 'crio', or 'containerd' setup command" ,
Value : defaultRuntimeArgs ,
2022-08-26 12:59:23 +00:00
Destination : & options . runtimeArgs ,
2021-10-11 14:31:02 +00:00
EnvVars : [ ] string { "RUNTIME_ARGS" } ,
} ,
2022-08-26 13:58:29 +00:00
& cli . StringFlag {
Name : "root" ,
Value : root ,
Usage : "the folder where the NVIDIA Container Toolkit is to be installed. It will be installed to `ROOT`/toolkit" ,
Destination : & options . root ,
EnvVars : [ ] string { "ROOT" } ,
} ,
2024-06-14 12:15:54 +00:00
& cli . StringFlag {
Name : "pid-file" ,
Value : defaultPidFile ,
Usage : "the path to a toolkit.pid file to ensure that only a single configuration instance is running" ,
Destination : & options . pidFile ,
EnvVars : [ ] string { "TOOLKIT_PID_FILE" , "PID_FILE" } ,
} ,
2021-10-11 14:31:02 +00:00
}
2024-09-19 11:35:42 +00:00
c . Flags = append ( c . Flags , toolkit . Flags ( & options . toolkitOptions ) ... )
2024-09-27 09:09:24 +00:00
c . Flags = append ( c . Flags , runtime . Flags ( & options . runtimeOptions ) ... )
2024-09-19 11:35:42 +00:00
2021-10-11 14:31:02 +00:00
// Run the CLI
log . Infof ( "Starting %v" , c . Name )
if err := c . Run ( remainingArgs ) ; err != nil {
log . Errorf ( "error running nvidia-toolkit: %v" , err )
os . Exit ( 1 )
}
log . Infof ( "Completed %v" , c . Name )
}
2024-06-14 12:15:54 +00:00
func validateFlags ( _ * cli . Context , o * options ) error {
if filepath . Base ( o . pidFile ) != toolkitPidFilename {
return fmt . Errorf ( "invalid toolkit.pid path %v" , o . pidFile )
}
2024-09-19 11:35:42 +00:00
if err := toolkit . ValidateOptions ( & o . toolkitOptions , o . toolkitRoot ( ) ) ; err != nil {
return err
}
2024-09-27 08:36:39 +00:00
if err := runtime . ValidateOptions ( & o . runtimeOptions , o . runtime , o . toolkitRoot ( ) ) ; err != nil {
2024-09-27 09:09:24 +00:00
return err
}
2024-06-14 12:15:54 +00:00
return nil
}
2021-10-11 14:31:02 +00:00
// Run runs the core logic of the CLI
2022-08-26 12:59:23 +00:00
func Run ( c * cli . Context , o * options ) error {
err := verifyFlags ( o )
2021-10-11 14:31:02 +00:00
if err != nil {
return fmt . Errorf ( "unable to verify flags: %v" , err )
}
2024-06-14 12:15:54 +00:00
err = initialize ( o . pidFile )
2021-10-11 14:31:02 +00:00
if err != nil {
return fmt . Errorf ( "unable to initialize: %v" , err )
}
2024-06-14 12:15:54 +00:00
defer shutdown ( o . pidFile )
2021-10-11 14:31:02 +00:00
2024-08-08 22:40:00 +00:00
if len ( o . toolkitOptions . ContainerRuntimeRuntimes . Value ( ) ) == 0 {
lowlevelRuntimePaths , err := runtime . GetLowlevelRuntimePaths ( & o . runtimeOptions , o . runtime )
if err != nil {
return fmt . Errorf ( "unable to determine runtime options: %w" , err )
}
lowlevelRuntimePaths = append ( lowlevelRuntimePaths , defaultLowLevelRuntimes ... )
o . toolkitOptions . ContainerRuntimeRuntimes = * cli . NewStringSlice ( lowlevelRuntimePaths ... )
}
2024-10-27 10:30:07 +00:00
err = toolkit . Install ( c , & o . toolkitOptions , "" , o . toolkitRoot ( ) )
2021-10-11 14:31:02 +00:00
if err != nil {
return fmt . Errorf ( "unable to install toolkit: %v" , err )
}
2024-09-27 08:36:39 +00:00
err = runtime . Setup ( c , & o . runtimeOptions , o . runtime )
2021-10-11 14:31:02 +00:00
if err != nil {
return fmt . Errorf ( "unable to setup runtime: %v" , err )
}
2022-08-26 12:59:23 +00:00
if ! o . noDaemon {
2021-10-11 14:31:02 +00:00
err = waitForSignal ( )
if err != nil {
return fmt . Errorf ( "unable to wait for signal: %v" , err )
}
2024-09-27 08:36:39 +00:00
err = runtime . Cleanup ( c , & o . runtimeOptions , o . runtime )
2021-10-11 14:31:02 +00:00
if err != nil {
return fmt . Errorf ( "unable to cleanup runtime: %v" , err )
}
}
return nil
}
2022-08-26 13:58:29 +00:00
// ParseArgs checks if a single positional argument was defined and extracts this the root.
2024-10-10 16:50:48 +00:00
// If no positional arguments are defined, it is assumed that the root is specified as a flag.
2022-08-26 13:58:29 +00:00
func ParseArgs ( args [ ] string ) ( [ ] string , string , error ) {
2021-10-11 14:31:02 +00:00
log . Infof ( "Parsing arguments" )
2022-08-26 13:58:29 +00:00
if len ( args ) < 2 {
return args , "" , nil
2021-10-11 14:31:02 +00:00
}
2022-08-26 13:58:29 +00:00
var lastPositionalArg int
for i , arg := range args {
2021-10-11 14:31:02 +00:00
if strings . HasPrefix ( arg , "-" ) {
2022-08-26 13:58:29 +00:00
break
2021-10-11 14:31:02 +00:00
}
2022-08-26 13:58:29 +00:00
lastPositionalArg = i
2021-10-11 14:31:02 +00:00
}
2022-08-26 13:58:29 +00:00
if lastPositionalArg == 0 {
return args , "" , nil
2021-10-11 14:31:02 +00:00
}
2022-08-26 13:58:29 +00:00
if lastPositionalArg == 1 {
return append ( [ ] string { args [ 0 ] } , args [ 2 : ] ... ) , args [ 1 ] , nil
}
2021-10-11 14:31:02 +00:00
2022-08-26 13:58:29 +00:00
return nil , "" , fmt . Errorf ( "unexpected positional argument(s) %v" , args [ 2 : lastPositionalArg + 1 ] )
2021-10-11 14:31:02 +00:00
}
2022-08-26 12:59:23 +00:00
func verifyFlags ( o * options ) error {
2021-10-11 14:31:02 +00:00
log . Infof ( "Verifying Flags" )
2022-08-26 13:58:29 +00:00
if o . root == "" {
return fmt . Errorf ( "the install root must be specified" )
}
2022-08-26 12:59:23 +00:00
if _ , exists := availableRuntimes [ o . runtime ] ; ! exists {
return fmt . Errorf ( "unknown runtime: %v" , o . runtime )
2021-10-11 14:31:02 +00:00
}
return nil
}
2024-06-14 12:15:54 +00:00
func initialize ( pidFile string ) error {
2021-10-11 14:31:02 +00:00
log . Infof ( "Initializing" )
2024-06-14 12:15:54 +00:00
if dir := filepath . Dir ( pidFile ) ; dir != "" {
err := os . MkdirAll ( dir , 0755 )
if err != nil {
return fmt . Errorf ( "unable to create folder for pidfile: %w" , err )
}
}
2021-10-11 14:31:02 +00:00
f , err := os . Create ( pidFile )
if err != nil {
return fmt . Errorf ( "unable to create pidfile: %v" , err )
}
err = unix . Flock ( int ( f . Fd ( ) ) , unix . LOCK_EX | unix . LOCK_NB )
if err != nil {
2023-06-06 19:46:38 +00:00
log . Warningf ( "Unable to get exclusive lock on '%v'" , pidFile )
log . Warningf ( "This normally means an instance of the NVIDIA toolkit Container is already running, aborting" )
2021-10-11 14:31:02 +00:00
return fmt . Errorf ( "unable to get flock on pidfile: %v" , err )
}
_ , err = f . WriteString ( fmt . Sprintf ( "%v\n" , os . Getpid ( ) ) )
if err != nil {
return fmt . Errorf ( "unable to write PID to pidfile: %v" , err )
}
sigs := make ( chan os . Signal , 1 )
signal . Notify ( sigs , syscall . SIGHUP , syscall . SIGINT , syscall . SIGQUIT , syscall . SIGPIPE , syscall . SIGTERM )
go func ( ) {
<- sigs
select {
case <- waitingForSignal :
signalReceived <- true
default :
log . Infof ( "Signal received, exiting early" )
2024-06-14 12:15:54 +00:00
shutdown ( pidFile )
2021-10-11 14:31:02 +00:00
os . Exit ( 0 )
}
} ( )
return nil
}
func waitForSignal ( ) error {
log . Infof ( "Waiting for signal" )
waitingForSignal <- true
<- signalReceived
return nil
}
2024-06-14 12:15:54 +00:00
func shutdown ( pidFile string ) {
2021-10-11 14:31:02 +00:00
log . Infof ( "Shutting Down" )
err := os . Remove ( pidFile )
if err != nil {
2023-06-06 19:46:38 +00:00
log . Warningf ( "Unable to remove pidfile: %v" , err )
2021-10-11 14:31:02 +00:00
}
}