From 046a05921f5e2f6d34a08b3229dc3bae97744939 Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Thu, 19 Sep 2024 13:35:42 +0200 Subject: [PATCH] Convert toolkit to go package This change converts the toolkit installation logic to a go package and invokes this installation over the go API instead of starting this executable. Signed-off-by: Evan Lezar --- tools/container/nvidia-toolkit/run.go | 42 ++++---- tools/container/toolkit/executable.go | 2 +- tools/container/toolkit/executable_test.go | 2 +- tools/container/toolkit/replacements.go | 2 +- tools/container/toolkit/runtime.go | 2 +- tools/container/toolkit/runtime_test.go | 2 +- tools/container/toolkit/toolkit.go | 112 ++++++--------------- 7 files changed, 51 insertions(+), 113 deletions(-) diff --git a/tools/container/nvidia-toolkit/run.go b/tools/container/nvidia-toolkit/run.go index 0eea1e68..fbb5c400 100644 --- a/tools/container/nvidia-toolkit/run.go +++ b/tools/container/nvidia-toolkit/run.go @@ -12,6 +12,8 @@ import ( log "github.com/sirupsen/logrus" cli "github.com/urfave/cli/v2" unix "golang.org/x/sys/unix" + + "github.com/NVIDIA/nvidia-container-toolkit/tools/container/toolkit" ) const ( @@ -20,7 +22,6 @@ const ( toolkitCommand = "toolkit" toolkitSubDir = "toolkit" - defaultToolkitArgs = "" defaultRuntime = "docker" defaultRuntimeArgs = "" ) @@ -37,6 +38,12 @@ type options struct { runtimeArgs string root string pidFile string + + toolkitOptions toolkit.Options +} + +func (o options) toolkitRoot() string { + return filepath.Join(o.root, toolkitSubDir) } // Version defines the CLI version. This is set at build time using LD FLAGS @@ -49,7 +56,9 @@ func main() { os.Exit(1) } - options := options{} + options := options{ + toolkitOptions: toolkit.Options{}, + } // Create the top-level CLI c := cli.NewApp() c.Name = "nvidia-toolkit" @@ -105,6 +114,8 @@ func main() { }, } + c.Flags = append(c.Flags, toolkit.Flags(&options.toolkitOptions)...) + // Run the CLI log.Infof("Starting %v", c.Name) if err := c.Run(remainingArgs); err != nil { @@ -119,6 +130,9 @@ func validateFlags(_ *cli.Context, o *options) error { if filepath.Base(o.pidFile) != toolkitPidFilename { return fmt.Errorf("invalid toolkit.pid path %v", o.pidFile) } + if err := toolkit.ValidateOptions(&o.toolkitOptions, o.toolkitRoot()); err != nil { + return err + } return nil } @@ -136,7 +150,7 @@ func Run(c *cli.Context, o *options) error { } defer shutdown(o.pidFile) - err = installToolkit(o) + err = toolkit.Install(c, &o.toolkitOptions, o.toolkitRoot()) if err != nil { return fmt.Errorf("unable to install toolkit: %v", err) } @@ -245,28 +259,6 @@ func initialize(pidFile string) error { return nil } -func installToolkit(o *options) error { - log.Infof("Installing toolkit") - - cmdline := []string{ - toolkitCommand, - "install", - "--toolkit-root", - filepath.Join(o.root, toolkitSubDir), - } - - //nolint:gosec // TODO: Can we harden this so that there is less risk of command injection - cmd := exec.Command("sh", "-c", strings.Join(cmdline, " ")) - cmd.Stdout = os.Stdout - cmd.Stderr = os.Stderr - err := cmd.Run() - if err != nil { - return fmt.Errorf("error running %v command: %v", cmdline, err) - } - - return nil -} - func setupRuntime(o *options) error { toolkitDir := filepath.Join(o.root, toolkitSubDir) diff --git a/tools/container/toolkit/executable.go b/tools/container/toolkit/executable.go index 0d59e375..394ca007 100644 --- a/tools/container/toolkit/executable.go +++ b/tools/container/toolkit/executable.go @@ -14,7 +14,7 @@ # limitations under the License. */ -package main +package toolkit import ( "fmt" diff --git a/tools/container/toolkit/executable_test.go b/tools/container/toolkit/executable_test.go index 572ee2bb..8cb47596 100644 --- a/tools/container/toolkit/executable_test.go +++ b/tools/container/toolkit/executable_test.go @@ -14,7 +14,7 @@ # limitations under the License. */ -package main +package toolkit import ( "bytes" diff --git a/tools/container/toolkit/replacements.go b/tools/container/toolkit/replacements.go index 4ff67a40..7605093c 100644 --- a/tools/container/toolkit/replacements.go +++ b/tools/container/toolkit/replacements.go @@ -14,7 +14,7 @@ # limitations under the License. */ -package main +package toolkit import "strings" diff --git a/tools/container/toolkit/runtime.go b/tools/container/toolkit/runtime.go index d2e0b69f..bdfca983 100644 --- a/tools/container/toolkit/runtime.go +++ b/tools/container/toolkit/runtime.go @@ -14,7 +14,7 @@ # limitations under the License. */ -package main +package toolkit import ( "fmt" diff --git a/tools/container/toolkit/runtime_test.go b/tools/container/toolkit/runtime_test.go index 61fa8b9e..d2841506 100644 --- a/tools/container/toolkit/runtime_test.go +++ b/tools/container/toolkit/runtime_test.go @@ -14,7 +14,7 @@ # limitations under the License. */ -package main +package toolkit import ( "bytes" diff --git a/tools/container/toolkit/toolkit.go b/tools/container/toolkit/toolkit.go index 8175ed4e..484d7891 100644 --- a/tools/container/toolkit/toolkit.go +++ b/tools/container/toolkit/toolkit.go @@ -14,7 +14,7 @@ # limitations under the License. */ -package main +package toolkit import ( "errors" @@ -49,7 +49,7 @@ const ( toolkitPidFilename = "toolkit.pid" ) -type options struct { +type Options struct { DriverRoot string DevRoot string DriverRootCtrPath string @@ -67,7 +67,6 @@ type options struct { ContainerRuntimeHookSkipModeDetection bool ContainerCLIDebug string - toolkitRoot string cdiEnabled bool cdiOutputDir string @@ -83,46 +82,7 @@ type options struct { ignoreErrors bool } -func main() { - - opts := options{} - - // Create the top-level CLI - c := cli.NewApp() - c.Name = "toolkit" - c.Usage = "Manage the NVIDIA container toolkit" - c.Version = "0.1.0" - - // Create the 'install' subcommand - install := cli.Command{} - install.Name = "install" - install.Usage = "Install the components of the NVIDIA container toolkit" - install.ArgsUsage = "" - install.Before = func(c *cli.Context) error { - return validateOptions(c, &opts) - } - install.Action = func(c *cli.Context) error { - return Install(c, &opts) - } - - // Create the 'delete' command - delete := cli.Command{} - delete.Name = "delete" - delete.Usage = "Delete the NVIDIA container toolkit" - delete.ArgsUsage = "" - delete.Before = func(c *cli.Context) error { - return validateOptions(c, &opts) - } - delete.Action = func(c *cli.Context) error { - return TryDelete(c, &opts) - } - - // Register the subcommand with the top-level CLI - c.Commands = []*cli.Command{ - &install, - &delete, - } - +func Flags(opts *Options) []cli.Flag { flags := []cli.Flag{ &cli.StringFlag{ Name: "driver-root", @@ -209,13 +169,6 @@ func main() { Destination: &opts.acceptNVIDIAVisibleDevicesAsVolumeMounts, EnvVars: []string{"ACCEPT_NVIDIA_VISIBLE_DEVICES_AS_VOLUME_MOUNTS"}, }, - &cli.StringFlag{ - Name: "toolkit-root", - Usage: "The directory where the NVIDIA Container toolkit is to be installed", - Required: true, - Destination: &opts.toolkitRoot, - EnvVars: []string{"TOOLKIT_ROOT"}, - }, &cli.BoolFlag{ Name: "cdi-enabled", Aliases: []string{"enable-cdi"}, @@ -252,20 +205,13 @@ func main() { }, } - // Update the subcommand flags with the common subcommand flags - install.Flags = append([]cli.Flag{}, flags...) - delete.Flags = append([]cli.Flag{}, flags...) - - // Run the top-level CLI - if err := c.Run(os.Args); err != nil { - log.Fatal(fmt.Errorf("error: %v", err)) - } + return flags } -// validateOptions checks whether the specified options are valid -func validateOptions(c *cli.Context, opts *options) error { - if opts.toolkitRoot == "" { - return fmt.Errorf("invalid --toolkit-root option: %v", opts.toolkitRoot) +// ValidateOptions checks whether the specified options are valid +func ValidateOptions(opts *Options, toolkitRoot string) error { + if toolkitRoot == "" { + return fmt.Errorf("invalid --toolkit-root option: %v", toolkitRoot) } vendor, class := parser.ParseQualifier(opts.cdiKind) @@ -306,90 +252,90 @@ func validateOptions(c *cli.Context, opts *options) error { // TryDelete attempts to remove the specified toolkit folder. // A toolkit.pid file -- if present -- is skipped. -func TryDelete(cli *cli.Context, opts *options) error { - log.Infof("Attempting to delete NVIDIA container toolkit from '%v'", opts.toolkitRoot) +func TryDelete(cli *cli.Context, toolkitRoot string) error { + log.Infof("Attempting to delete NVIDIA container toolkit from '%v'", toolkitRoot) - contents, err := os.ReadDir(opts.toolkitRoot) + contents, err := os.ReadDir(toolkitRoot) if err != nil && errors.Is(err, os.ErrNotExist) { return nil } else if err != nil { - return fmt.Errorf("failed to read the contents of %v: %w", opts.toolkitRoot, err) + return fmt.Errorf("failed to read the contents of %v: %w", toolkitRoot, err) } for _, content := range contents { if content.Name() == toolkitPidFilename { continue } - name := filepath.Join(opts.toolkitRoot, content.Name()) + name := filepath.Join(toolkitRoot, content.Name()) if err := os.RemoveAll(name); err != nil { log.Warningf("could not remove %v: %v", name, err) } } - if err := os.RemoveAll(opts.toolkitRoot); err != nil { - log.Warningf("could not remove %v: %v", opts.toolkitRoot, err) + if err := os.RemoveAll(toolkitRoot); err != nil { + log.Warningf("could not remove %v: %v", toolkitRoot, err) } return nil } // Install installs the components of the NVIDIA container toolkit. // Any existing installation is removed. -func Install(cli *cli.Context, opts *options) error { - log.Infof("Installing NVIDIA container toolkit to '%v'", opts.toolkitRoot) +func Install(cli *cli.Context, opts *Options, toolkitRoot string) error { + log.Infof("Installing NVIDIA container toolkit to '%v'", toolkitRoot) log.Infof("Removing existing NVIDIA container toolkit installation") - err := os.RemoveAll(opts.toolkitRoot) + err := os.RemoveAll(toolkitRoot) if err != nil && !opts.ignoreErrors { return fmt.Errorf("error removing toolkit directory: %v", err) } else if err != nil { log.Errorf("Ignoring error: %v", fmt.Errorf("error removing toolkit directory: %v", err)) } - toolkitConfigDir := filepath.Join(opts.toolkitRoot, ".config", "nvidia-container-runtime") + toolkitConfigDir := filepath.Join(toolkitRoot, ".config", "nvidia-container-runtime") toolkitConfigPath := filepath.Join(toolkitConfigDir, configFilename) - err = createDirectories(opts.toolkitRoot, toolkitConfigDir) + err = createDirectories(toolkitRoot, toolkitConfigDir) if err != nil && !opts.ignoreErrors { return fmt.Errorf("could not create required directories: %v", err) } else if err != nil { log.Errorf("Ignoring error: %v", fmt.Errorf("could not create required directories: %v", err)) } - err = installContainerLibraries(opts.toolkitRoot) + err = installContainerLibraries(toolkitRoot) if err != nil && !opts.ignoreErrors { return fmt.Errorf("error installing NVIDIA container library: %v", err) } else if err != nil { log.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA container library: %v", err)) } - err = installContainerRuntimes(opts.toolkitRoot, opts.DriverRoot) + err = installContainerRuntimes(toolkitRoot, opts.DriverRoot) if err != nil && !opts.ignoreErrors { return fmt.Errorf("error installing NVIDIA container runtime: %v", err) } else if err != nil { log.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA container runtime: %v", err)) } - nvidiaContainerCliExecutable, err := installContainerCLI(opts.toolkitRoot) + nvidiaContainerCliExecutable, err := installContainerCLI(toolkitRoot) if err != nil && !opts.ignoreErrors { return fmt.Errorf("error installing NVIDIA container CLI: %v", err) } else if err != nil { log.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA container CLI: %v", err)) } - nvidiaContainerRuntimeHookPath, err := installRuntimeHook(opts.toolkitRoot, toolkitConfigPath) + nvidiaContainerRuntimeHookPath, err := installRuntimeHook(toolkitRoot, toolkitConfigPath) if err != nil && !opts.ignoreErrors { return fmt.Errorf("error installing NVIDIA container runtime hook: %v", err) } else if err != nil { log.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA container runtime hook: %v", err)) } - nvidiaCTKPath, err := installContainerToolkitCLI(opts.toolkitRoot) + nvidiaCTKPath, err := installContainerToolkitCLI(toolkitRoot) if err != nil && !opts.ignoreErrors { return fmt.Errorf("error installing NVIDIA Container Toolkit CLI: %v", err) } else if err != nil { log.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA Container Toolkit CLI: %v", err)) } - nvidiaCDIHookPath, err := installContainerCDIHookCLI(opts.toolkitRoot) + nvidiaCDIHookPath, err := installContainerCDIHookCLI(toolkitRoot) if err != nil && !opts.ignoreErrors { return fmt.Errorf("error installing NVIDIA Container CDI Hook CLI: %v", err) } else if err != nil { @@ -470,7 +416,7 @@ func installLibrary(libName string, toolkitRoot string) error { // installToolkitConfig installs the config file for the NVIDIA container toolkit ensuring // that the settings are updated to match the desired install and nvidia driver directories. -func installToolkitConfig(c *cli.Context, toolkitConfigPath string, nvidiaContainerCliExecutablePath string, nvidiaCTKPath string, nvidaContainerRuntimeHookPath string, opts *options) error { +func installToolkitConfig(c *cli.Context, toolkitConfigPath string, nvidiaContainerCliExecutablePath string, nvidiaCTKPath string, nvidaContainerRuntimeHookPath string, opts *Options) error { log.Infof("Installing NVIDIA container toolkit config '%v'", toolkitConfigPath) cfg, err := loadConfig(nvidiaContainerToolkitConfigSource) @@ -777,7 +723,7 @@ func createDirectories(dir ...string) error { return nil } -func createDeviceNodes(opts *options) error { +func createDeviceNodes(opts *Options) error { modes := opts.createDeviceNodes.Value() if len(modes) == 0 { return nil @@ -804,7 +750,7 @@ func createDeviceNodes(opts *options) error { } // generateCDISpec generates a CDI spec for use in management containers -func generateCDISpec(opts *options, nvidiaCDIHookPath string) error { +func generateCDISpec(opts *Options, nvidiaCDIHookPath string) error { if !opts.cdiEnabled { return nil }