Convert toolkit to go package

This change converts the toolkit installation logic to a go package
and invokes this installation over the go API instead of starting
this executable.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
This commit is contained in:
Evan Lezar 2024-09-19 13:35:42 +02:00 committed by Tariq Ibrahim
parent 6ca2700a17
commit 046a05921f
No known key found for this signature in database
GPG Key ID: 8367AA3C6B8DF06D
7 changed files with 51 additions and 113 deletions

View File

@ -12,6 +12,8 @@ import (
log "github.com/sirupsen/logrus" log "github.com/sirupsen/logrus"
cli "github.com/urfave/cli/v2" cli "github.com/urfave/cli/v2"
unix "golang.org/x/sys/unix" unix "golang.org/x/sys/unix"
"github.com/NVIDIA/nvidia-container-toolkit/tools/container/toolkit"
) )
const ( const (
@ -20,7 +22,6 @@ const (
toolkitCommand = "toolkit" toolkitCommand = "toolkit"
toolkitSubDir = "toolkit" toolkitSubDir = "toolkit"
defaultToolkitArgs = ""
defaultRuntime = "docker" defaultRuntime = "docker"
defaultRuntimeArgs = "" defaultRuntimeArgs = ""
) )
@ -37,6 +38,12 @@ type options struct {
runtimeArgs string runtimeArgs string
root string root string
pidFile string pidFile string
toolkitOptions toolkit.Options
}
func (o options) toolkitRoot() string {
return filepath.Join(o.root, toolkitSubDir)
} }
// Version defines the CLI version. This is set at build time using LD FLAGS // Version defines the CLI version. This is set at build time using LD FLAGS
@ -49,7 +56,9 @@ func main() {
os.Exit(1) os.Exit(1)
} }
options := options{} options := options{
toolkitOptions: toolkit.Options{},
}
// Create the top-level CLI // Create the top-level CLI
c := cli.NewApp() c := cli.NewApp()
c.Name = "nvidia-toolkit" c.Name = "nvidia-toolkit"
@ -105,6 +114,8 @@ func main() {
}, },
} }
c.Flags = append(c.Flags, toolkit.Flags(&options.toolkitOptions)...)
// Run the CLI // Run the CLI
log.Infof("Starting %v", c.Name) log.Infof("Starting %v", c.Name)
if err := c.Run(remainingArgs); err != nil { if err := c.Run(remainingArgs); err != nil {
@ -119,6 +130,9 @@ func validateFlags(_ *cli.Context, o *options) error {
if filepath.Base(o.pidFile) != toolkitPidFilename { if filepath.Base(o.pidFile) != toolkitPidFilename {
return fmt.Errorf("invalid toolkit.pid path %v", o.pidFile) return fmt.Errorf("invalid toolkit.pid path %v", o.pidFile)
} }
if err := toolkit.ValidateOptions(&o.toolkitOptions, o.toolkitRoot()); err != nil {
return err
}
return nil return nil
} }
@ -136,7 +150,7 @@ func Run(c *cli.Context, o *options) error {
} }
defer shutdown(o.pidFile) defer shutdown(o.pidFile)
err = installToolkit(o) err = toolkit.Install(c, &o.toolkitOptions, o.toolkitRoot())
if err != nil { if err != nil {
return fmt.Errorf("unable to install toolkit: %v", err) return fmt.Errorf("unable to install toolkit: %v", err)
} }
@ -245,28 +259,6 @@ func initialize(pidFile string) error {
return nil return nil
} }
func installToolkit(o *options) error {
log.Infof("Installing toolkit")
cmdline := []string{
toolkitCommand,
"install",
"--toolkit-root",
filepath.Join(o.root, toolkitSubDir),
}
//nolint:gosec // TODO: Can we harden this so that there is less risk of command injection
cmd := exec.Command("sh", "-c", strings.Join(cmdline, " "))
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
err := cmd.Run()
if err != nil {
return fmt.Errorf("error running %v command: %v", cmdline, err)
}
return nil
}
func setupRuntime(o *options) error { func setupRuntime(o *options) error {
toolkitDir := filepath.Join(o.root, toolkitSubDir) toolkitDir := filepath.Join(o.root, toolkitSubDir)

View File

@ -14,7 +14,7 @@
# limitations under the License. # limitations under the License.
*/ */
package main package toolkit
import ( import (
"fmt" "fmt"

View File

@ -14,7 +14,7 @@
# limitations under the License. # limitations under the License.
*/ */
package main package toolkit
import ( import (
"bytes" "bytes"

View File

@ -14,7 +14,7 @@
# limitations under the License. # limitations under the License.
*/ */
package main package toolkit
import "strings" import "strings"

View File

@ -14,7 +14,7 @@
# limitations under the License. # limitations under the License.
*/ */
package main package toolkit
import ( import (
"fmt" "fmt"

View File

@ -14,7 +14,7 @@
# limitations under the License. # limitations under the License.
*/ */
package main package toolkit
import ( import (
"bytes" "bytes"

View File

@ -14,7 +14,7 @@
# limitations under the License. # limitations under the License.
*/ */
package main package toolkit
import ( import (
"errors" "errors"
@ -49,7 +49,7 @@ const (
toolkitPidFilename = "toolkit.pid" toolkitPidFilename = "toolkit.pid"
) )
type options struct { type Options struct {
DriverRoot string DriverRoot string
DevRoot string DevRoot string
DriverRootCtrPath string DriverRootCtrPath string
@ -67,7 +67,6 @@ type options struct {
ContainerRuntimeHookSkipModeDetection bool ContainerRuntimeHookSkipModeDetection bool
ContainerCLIDebug string ContainerCLIDebug string
toolkitRoot string
cdiEnabled bool cdiEnabled bool
cdiOutputDir string cdiOutputDir string
@ -83,46 +82,7 @@ type options struct {
ignoreErrors bool ignoreErrors bool
} }
func main() { func Flags(opts *Options) []cli.Flag {
opts := options{}
// Create the top-level CLI
c := cli.NewApp()
c.Name = "toolkit"
c.Usage = "Manage the NVIDIA container toolkit"
c.Version = "0.1.0"
// Create the 'install' subcommand
install := cli.Command{}
install.Name = "install"
install.Usage = "Install the components of the NVIDIA container toolkit"
install.ArgsUsage = "<toolkit_directory>"
install.Before = func(c *cli.Context) error {
return validateOptions(c, &opts)
}
install.Action = func(c *cli.Context) error {
return Install(c, &opts)
}
// Create the 'delete' command
delete := cli.Command{}
delete.Name = "delete"
delete.Usage = "Delete the NVIDIA container toolkit"
delete.ArgsUsage = "<toolkit_directory>"
delete.Before = func(c *cli.Context) error {
return validateOptions(c, &opts)
}
delete.Action = func(c *cli.Context) error {
return TryDelete(c, &opts)
}
// Register the subcommand with the top-level CLI
c.Commands = []*cli.Command{
&install,
&delete,
}
flags := []cli.Flag{ flags := []cli.Flag{
&cli.StringFlag{ &cli.StringFlag{
Name: "driver-root", Name: "driver-root",
@ -209,13 +169,6 @@ func main() {
Destination: &opts.acceptNVIDIAVisibleDevicesAsVolumeMounts, Destination: &opts.acceptNVIDIAVisibleDevicesAsVolumeMounts,
EnvVars: []string{"ACCEPT_NVIDIA_VISIBLE_DEVICES_AS_VOLUME_MOUNTS"}, EnvVars: []string{"ACCEPT_NVIDIA_VISIBLE_DEVICES_AS_VOLUME_MOUNTS"},
}, },
&cli.StringFlag{
Name: "toolkit-root",
Usage: "The directory where the NVIDIA Container toolkit is to be installed",
Required: true,
Destination: &opts.toolkitRoot,
EnvVars: []string{"TOOLKIT_ROOT"},
},
&cli.BoolFlag{ &cli.BoolFlag{
Name: "cdi-enabled", Name: "cdi-enabled",
Aliases: []string{"enable-cdi"}, Aliases: []string{"enable-cdi"},
@ -252,20 +205,13 @@ func main() {
}, },
} }
// Update the subcommand flags with the common subcommand flags return flags
install.Flags = append([]cli.Flag{}, flags...)
delete.Flags = append([]cli.Flag{}, flags...)
// Run the top-level CLI
if err := c.Run(os.Args); err != nil {
log.Fatal(fmt.Errorf("error: %v", err))
}
} }
// validateOptions checks whether the specified options are valid // ValidateOptions checks whether the specified options are valid
func validateOptions(c *cli.Context, opts *options) error { func ValidateOptions(opts *Options, toolkitRoot string) error {
if opts.toolkitRoot == "" { if toolkitRoot == "" {
return fmt.Errorf("invalid --toolkit-root option: %v", opts.toolkitRoot) return fmt.Errorf("invalid --toolkit-root option: %v", toolkitRoot)
} }
vendor, class := parser.ParseQualifier(opts.cdiKind) vendor, class := parser.ParseQualifier(opts.cdiKind)
@ -306,90 +252,90 @@ func validateOptions(c *cli.Context, opts *options) error {
// TryDelete attempts to remove the specified toolkit folder. // TryDelete attempts to remove the specified toolkit folder.
// A toolkit.pid file -- if present -- is skipped. // A toolkit.pid file -- if present -- is skipped.
func TryDelete(cli *cli.Context, opts *options) error { func TryDelete(cli *cli.Context, toolkitRoot string) error {
log.Infof("Attempting to delete NVIDIA container toolkit from '%v'", opts.toolkitRoot) log.Infof("Attempting to delete NVIDIA container toolkit from '%v'", toolkitRoot)
contents, err := os.ReadDir(opts.toolkitRoot) contents, err := os.ReadDir(toolkitRoot)
if err != nil && errors.Is(err, os.ErrNotExist) { if err != nil && errors.Is(err, os.ErrNotExist) {
return nil return nil
} else if err != nil { } else if err != nil {
return fmt.Errorf("failed to read the contents of %v: %w", opts.toolkitRoot, err) return fmt.Errorf("failed to read the contents of %v: %w", toolkitRoot, err)
} }
for _, content := range contents { for _, content := range contents {
if content.Name() == toolkitPidFilename { if content.Name() == toolkitPidFilename {
continue continue
} }
name := filepath.Join(opts.toolkitRoot, content.Name()) name := filepath.Join(toolkitRoot, content.Name())
if err := os.RemoveAll(name); err != nil { if err := os.RemoveAll(name); err != nil {
log.Warningf("could not remove %v: %v", name, err) log.Warningf("could not remove %v: %v", name, err)
} }
} }
if err := os.RemoveAll(opts.toolkitRoot); err != nil { if err := os.RemoveAll(toolkitRoot); err != nil {
log.Warningf("could not remove %v: %v", opts.toolkitRoot, err) log.Warningf("could not remove %v: %v", toolkitRoot, err)
} }
return nil return nil
} }
// Install installs the components of the NVIDIA container toolkit. // Install installs the components of the NVIDIA container toolkit.
// Any existing installation is removed. // Any existing installation is removed.
func Install(cli *cli.Context, opts *options) error { func Install(cli *cli.Context, opts *Options, toolkitRoot string) error {
log.Infof("Installing NVIDIA container toolkit to '%v'", opts.toolkitRoot) log.Infof("Installing NVIDIA container toolkit to '%v'", toolkitRoot)
log.Infof("Removing existing NVIDIA container toolkit installation") log.Infof("Removing existing NVIDIA container toolkit installation")
err := os.RemoveAll(opts.toolkitRoot) err := os.RemoveAll(toolkitRoot)
if err != nil && !opts.ignoreErrors { if err != nil && !opts.ignoreErrors {
return fmt.Errorf("error removing toolkit directory: %v", err) return fmt.Errorf("error removing toolkit directory: %v", err)
} else if err != nil { } else if err != nil {
log.Errorf("Ignoring error: %v", fmt.Errorf("error removing toolkit directory: %v", err)) log.Errorf("Ignoring error: %v", fmt.Errorf("error removing toolkit directory: %v", err))
} }
toolkitConfigDir := filepath.Join(opts.toolkitRoot, ".config", "nvidia-container-runtime") toolkitConfigDir := filepath.Join(toolkitRoot, ".config", "nvidia-container-runtime")
toolkitConfigPath := filepath.Join(toolkitConfigDir, configFilename) toolkitConfigPath := filepath.Join(toolkitConfigDir, configFilename)
err = createDirectories(opts.toolkitRoot, toolkitConfigDir) err = createDirectories(toolkitRoot, toolkitConfigDir)
if err != nil && !opts.ignoreErrors { if err != nil && !opts.ignoreErrors {
return fmt.Errorf("could not create required directories: %v", err) return fmt.Errorf("could not create required directories: %v", err)
} else if err != nil { } else if err != nil {
log.Errorf("Ignoring error: %v", fmt.Errorf("could not create required directories: %v", err)) log.Errorf("Ignoring error: %v", fmt.Errorf("could not create required directories: %v", err))
} }
err = installContainerLibraries(opts.toolkitRoot) err = installContainerLibraries(toolkitRoot)
if err != nil && !opts.ignoreErrors { if err != nil && !opts.ignoreErrors {
return fmt.Errorf("error installing NVIDIA container library: %v", err) return fmt.Errorf("error installing NVIDIA container library: %v", err)
} else if err != nil { } else if err != nil {
log.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA container library: %v", err)) log.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA container library: %v", err))
} }
err = installContainerRuntimes(opts.toolkitRoot, opts.DriverRoot) err = installContainerRuntimes(toolkitRoot, opts.DriverRoot)
if err != nil && !opts.ignoreErrors { if err != nil && !opts.ignoreErrors {
return fmt.Errorf("error installing NVIDIA container runtime: %v", err) return fmt.Errorf("error installing NVIDIA container runtime: %v", err)
} else if err != nil { } else if err != nil {
log.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA container runtime: %v", err)) log.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA container runtime: %v", err))
} }
nvidiaContainerCliExecutable, err := installContainerCLI(opts.toolkitRoot) nvidiaContainerCliExecutable, err := installContainerCLI(toolkitRoot)
if err != nil && !opts.ignoreErrors { if err != nil && !opts.ignoreErrors {
return fmt.Errorf("error installing NVIDIA container CLI: %v", err) return fmt.Errorf("error installing NVIDIA container CLI: %v", err)
} else if err != nil { } else if err != nil {
log.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA container CLI: %v", err)) log.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA container CLI: %v", err))
} }
nvidiaContainerRuntimeHookPath, err := installRuntimeHook(opts.toolkitRoot, toolkitConfigPath) nvidiaContainerRuntimeHookPath, err := installRuntimeHook(toolkitRoot, toolkitConfigPath)
if err != nil && !opts.ignoreErrors { if err != nil && !opts.ignoreErrors {
return fmt.Errorf("error installing NVIDIA container runtime hook: %v", err) return fmt.Errorf("error installing NVIDIA container runtime hook: %v", err)
} else if err != nil { } else if err != nil {
log.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA container runtime hook: %v", err)) log.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA container runtime hook: %v", err))
} }
nvidiaCTKPath, err := installContainerToolkitCLI(opts.toolkitRoot) nvidiaCTKPath, err := installContainerToolkitCLI(toolkitRoot)
if err != nil && !opts.ignoreErrors { if err != nil && !opts.ignoreErrors {
return fmt.Errorf("error installing NVIDIA Container Toolkit CLI: %v", err) return fmt.Errorf("error installing NVIDIA Container Toolkit CLI: %v", err)
} else if err != nil { } else if err != nil {
log.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA Container Toolkit CLI: %v", err)) log.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA Container Toolkit CLI: %v", err))
} }
nvidiaCDIHookPath, err := installContainerCDIHookCLI(opts.toolkitRoot) nvidiaCDIHookPath, err := installContainerCDIHookCLI(toolkitRoot)
if err != nil && !opts.ignoreErrors { if err != nil && !opts.ignoreErrors {
return fmt.Errorf("error installing NVIDIA Container CDI Hook CLI: %v", err) return fmt.Errorf("error installing NVIDIA Container CDI Hook CLI: %v", err)
} else if err != nil { } else if err != nil {
@ -470,7 +416,7 @@ func installLibrary(libName string, toolkitRoot string) error {
// installToolkitConfig installs the config file for the NVIDIA container toolkit ensuring // installToolkitConfig installs the config file for the NVIDIA container toolkit ensuring
// that the settings are updated to match the desired install and nvidia driver directories. // that the settings are updated to match the desired install and nvidia driver directories.
func installToolkitConfig(c *cli.Context, toolkitConfigPath string, nvidiaContainerCliExecutablePath string, nvidiaCTKPath string, nvidaContainerRuntimeHookPath string, opts *options) error { func installToolkitConfig(c *cli.Context, toolkitConfigPath string, nvidiaContainerCliExecutablePath string, nvidiaCTKPath string, nvidaContainerRuntimeHookPath string, opts *Options) error {
log.Infof("Installing NVIDIA container toolkit config '%v'", toolkitConfigPath) log.Infof("Installing NVIDIA container toolkit config '%v'", toolkitConfigPath)
cfg, err := loadConfig(nvidiaContainerToolkitConfigSource) cfg, err := loadConfig(nvidiaContainerToolkitConfigSource)
@ -777,7 +723,7 @@ func createDirectories(dir ...string) error {
return nil return nil
} }
func createDeviceNodes(opts *options) error { func createDeviceNodes(opts *Options) error {
modes := opts.createDeviceNodes.Value() modes := opts.createDeviceNodes.Value()
if len(modes) == 0 { if len(modes) == 0 {
return nil return nil
@ -804,7 +750,7 @@ func createDeviceNodes(opts *options) error {
} }
// generateCDISpec generates a CDI spec for use in management containers // generateCDISpec generates a CDI spec for use in management containers
func generateCDISpec(opts *options, nvidiaCDIHookPath string) error { func generateCDISpec(opts *Options, nvidiaCDIHookPath string) error {
if !opts.cdiEnabled { if !opts.cdiEnabled {
return nil return nil
} }