From 1a83552aa67d2ff8633f8f2f6dc25c15e5d10bf9 Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Wed, 30 Oct 2024 13:36:11 +0100 Subject: [PATCH 1/4] [no-relnote] Use urfave for nvidia-container-runtime-hook CLI Signed-off-by: Evan Lezar --- .../hook_config.go | 10 +- .../hook_config_test.go | 7 +- cmd/nvidia-container-runtime-hook/main.go | 112 ++++++++++-------- 3 files changed, 71 insertions(+), 58 deletions(-) diff --git a/cmd/nvidia-container-runtime-hook/hook_config.go b/cmd/nvidia-container-runtime-hook/hook_config.go index afdb8bbc..beb8089f 100644 --- a/cmd/nvidia-container-runtime-hook/hook_config.go +++ b/cmd/nvidia-container-runtime-hook/hook_config.go @@ -30,11 +30,11 @@ func getDefaultHookConfig() (HookConfig, error) { } // loadConfig loads the required paths for the hook config. -func loadConfig() (*config.Config, error) { +func (a *app) loadConfig() (*config.Config, error) { var configPaths []string var required bool - if len(*configflag) != 0 { - configPaths = append(configPaths, *configflag) + if len(a.configFile) != 0 { + configPaths = append(configPaths, a.configFile) required = true } else { configPaths = append(configPaths, path.Join(driverPath, configPath), configPath) @@ -56,8 +56,8 @@ func loadConfig() (*config.Config, error) { return config.GetDefault() } -func getHookConfig() (*HookConfig, error) { - cfg, err := loadConfig() +func (a *app) getHookConfig() (*HookConfig, error) { + cfg, err := a.loadConfig() if err != nil { return nil, fmt.Errorf("failed to load config: %v", err) } diff --git a/cmd/nvidia-container-runtime-hook/hook_config_test.go b/cmd/nvidia-container-runtime-hook/hook_config_test.go index 7c50ec12..f37c6916 100644 --- a/cmd/nvidia-container-runtime-hook/hook_config_test.go +++ b/cmd/nvidia-container-runtime-hook/hook_config_test.go @@ -72,16 +72,17 @@ func TestGetHookConfig(t *testing.T) { if len(filename) > 0 { os.Remove(filename) } - configflag = nil }() + a := &app{} + if tc.lines != nil { configFile, err := os.CreateTemp("", "*.toml") require.NoError(t, err) defer configFile.Close() filename = configFile.Name() - configflag = &filename + a.configFile = filename for _, line := range tc.lines { _, err := configFile.WriteString(fmt.Sprintf("%s\n", line)) @@ -91,7 +92,7 @@ func TestGetHookConfig(t *testing.T) { var config HookConfig getHookConfig := func() { - c, _ := getHookConfig() + c, _ := a.getHookConfig() config = *c } diff --git a/cmd/nvidia-container-runtime-hook/main.go b/cmd/nvidia-container-runtime-hook/main.go index a0cfe3ec..cbaf9e23 100644 --- a/cmd/nvidia-container-runtime-hook/main.go +++ b/cmd/nvidia-container-runtime-hook/main.go @@ -1,7 +1,7 @@ package main import ( - "flag" + "errors" "fmt" "log" "os" @@ -13,29 +13,26 @@ import ( "strings" "syscall" + cli "github.com/urfave/cli/v2" + "github.com/NVIDIA/nvidia-container-toolkit/internal/config" "github.com/NVIDIA/nvidia-container-toolkit/internal/info" "github.com/NVIDIA/nvidia-container-toolkit/internal/logger" "github.com/NVIDIA/nvidia-container-toolkit/internal/lookup" ) -var ( - debugflag = flag.Bool("debug", false, "enable debug output") - versionflag = flag.Bool("version", false, "enable version output") - configflag = flag.String("config", "", "configuration file") -) - -func exit() { +func (a *app) recoverIfRequired() error { if err := recover(); err != nil { - if _, ok := err.(runtime.Error); ok { + rerr, ok := err.(runtime.Error) + if ok { log.Println(err) } - if *debugflag { + if a.isDebug { log.Printf("%s", debug.Stack()) } - os.Exit(1) + return rerr } - os.Exit(0) + return nil } func getCLIPath(config config.ContainerCLIConfig) string { @@ -63,15 +60,15 @@ func getRootfsPath(config containerConfig) string { return rootfs } -func doPrestart() { - var err error - - defer exit() +func (a *app) doPrestart() (rerr error) { + defer func() { + rerr = errors.Join(rerr, a.recoverIfRequired()) + }() log.SetFlags(0) - hook, err := getHookConfig() + hook, err := a.getHookConfig() if err != nil || hook == nil { - log.Panicln("error getting hook config:", err) + return fmt.Errorf("error getting hook config: %w", err) } cli := hook.NVIDIAContainerCLIConfig @@ -79,11 +76,11 @@ func doPrestart() { nvidia := container.Nvidia if nvidia == nil { // Not a GPU container, nothing to do. - return + return nil } if !hook.NVIDIAContainerRuntimeHookConfig.SkipModeDetection && info.ResolveAutoMode(&logInterceptor{}, hook.NVIDIAContainerRuntimeConfig.Mode, container.Image) != "legacy" { - log.Panicln("invoking the NVIDIA Container Runtime Hook directly (e.g. specifying the docker --gpus flag) is not supported. Please use the NVIDIA Container Runtime (e.g. specify the --runtime=nvidia flag) instead.") + return fmt.Errorf("invoking the NVIDIA Container Runtime Hook directly (e.g. specifying the docker --gpus flag) is not supported. Please use the NVIDIA Container Runtime (e.g. specify the --runtime=nvidia flag) instead") } rootfs := getRootfsPath(container) @@ -101,7 +98,7 @@ func doPrestart() { if cli.NoPivot { args = append(args, "--no-pivot") } - if *debugflag { + if a.isDebug { args = append(args, "--debug=/dev/stderr") } else if cli.Debug != "" { args = append(args, fmt.Sprintf("--debug=%s", cli.Debug)) @@ -149,45 +146,60 @@ func doPrestart() { env := append(os.Environ(), cli.Environment...) //nolint:gosec // TODO: Can we harden this so that there is less risk of command injection? - err = syscall.Exec(args[0], args, env) - log.Panicln("exec failed:", err) + return syscall.Exec(args[0], args, env) } -func usage() { - fmt.Fprintf(os.Stderr, "Usage of %s:\n", os.Args[0]) - flag.PrintDefaults() - fmt.Fprintf(os.Stderr, "\nCommands:\n") - fmt.Fprintf(os.Stderr, " prestart\n run the prestart hook\n") - fmt.Fprintf(os.Stderr, " poststart\n no-op\n") - fmt.Fprintf(os.Stderr, " poststop\n no-op\n") +type options struct { + isDebug bool + configFile string +} +type app struct { + options } func main() { - flag.Usage = usage - flag.Parse() + a := &app{} + // Create the top-level CLI + c := cli.NewApp() + c.Name = "NVIDIA Container Runtime Hook" + c.Version = info.GetVersionString() - if *versionflag { - fmt.Printf("%v version %v\n", "NVIDIA Container Runtime Hook", info.GetVersionString()) - return + c.Flags = []cli.Flag{ + &cli.BoolFlag{ + Name: "debug", + Destination: &a.isDebug, + Usage: "Enabled debug output", + }, + &cli.StringFlag{ + Name: "config", + Destination: &a.configFile, + Usage: "The path to the configuration file to use", + }, } - args := flag.Args() - if len(args) == 0 { - flag.Usage() - os.Exit(2) + c.Commands = []*cli.Command{ + { + Name: "prestart", + Usage: "run the prestart hook", + Action: func(ctx *cli.Context) error { + return a.doPrestart() + }, + }, + { + Name: "poststart", + Aliases: []string{"poststop"}, + Usage: "no-op", + Action: func(ctx *cli.Context) error { + return nil + }, + }, } + c.DefaultCommand = "prestart" - switch args[0] { - case "prestart": - doPrestart() - os.Exit(0) - case "poststart": - fallthrough - case "poststop": - os.Exit(0) - default: - flag.Usage() - os.Exit(2) + // Run the CLI + err := c.Run(os.Args) + if err != nil { + os.Exit(1) } } From a1e98539d0b71b3dd8badbbea5c522188fc9db7e Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Wed, 30 Oct 2024 13:43:49 +0100 Subject: [PATCH 2/4] Add NVCT_CONFIG_FILE_PATH envvar This change adds support for explicitly specifying the path to the config file through an environment variable. The NVCT_CONFIG_FILE_PATH variable is used for both the nvidia-container-runtime and nvidia-container-runtime-hook. Signed-off-by: Evan Lezar --- cmd/nvidia-container-runtime-hook/main.go | 1 + internal/config/config.go | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/cmd/nvidia-container-runtime-hook/main.go b/cmd/nvidia-container-runtime-hook/main.go index cbaf9e23..05390396 100644 --- a/cmd/nvidia-container-runtime-hook/main.go +++ b/cmd/nvidia-container-runtime-hook/main.go @@ -174,6 +174,7 @@ func main() { Name: "config", Destination: &a.configFile, Usage: "The path to the configuration file to use", + EnvVars: []string{config.FilePathOverrideEnvVar}, }, } diff --git a/internal/config/config.go b/internal/config/config.go index 33b8ba4d..3cb7541d 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -30,6 +30,8 @@ import ( ) const ( + FilePathOverrideEnvVar = "NVCTK_CONFIG_FILE_PATH" + configOverride = "XDG_CONFIG_HOME" configFilePath = "nvidia-container-runtime/config.toml" @@ -71,6 +73,9 @@ type Config struct { // GetConfigFilePath returns the path to the config file for the configured system func GetConfigFilePath() string { + if configFilePathOverride := os.Getenv(FilePathOverrideEnvVar); configFilePathOverride != "" { + return configFilePathOverride + } if XDGConfigDir := os.Getenv(configOverride); len(XDGConfigDir) != 0 { return filepath.Join(XDGConfigDir, configFilePath) } From abf6b8a320e91bb8d3f9f62dec933e26e8cd9e0e Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Wed, 30 Oct 2024 13:56:56 +0100 Subject: [PATCH 3/4] [no-relnote] Rename config constants Signed-off-by: Evan Lezar --- internal/config/config.go | 11 ++++++----- internal/config/config_test.go | 21 +++++++++++++++++++-- 2 files changed, 25 insertions(+), 7 deletions(-) diff --git a/internal/config/config.go b/internal/config/config.go index 3cb7541d..f9f9d425 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -31,9 +31,9 @@ import ( const ( FilePathOverrideEnvVar = "NVCTK_CONFIG_FILE_PATH" + RelativeFilePath = "nvidia-container-runtime/config.toml" - configOverride = "XDG_CONFIG_HOME" - configFilePath = "nvidia-container-runtime/config.toml" + configRootOverride = "XDG_CONFIG_HOME" nvidiaCTKExecutable = "nvidia-ctk" nvidiaCTKDefaultFilePath = "/usr/bin/nvidia-ctk" @@ -76,11 +76,12 @@ func GetConfigFilePath() string { if configFilePathOverride := os.Getenv(FilePathOverrideEnvVar); configFilePathOverride != "" { return configFilePathOverride } - if XDGConfigDir := os.Getenv(configOverride); len(XDGConfigDir) != 0 { - return filepath.Join(XDGConfigDir, configFilePath) + configRoot := "/etc" + if XDGConfigDir := os.Getenv(configRootOverride); len(XDGConfigDir) != 0 { + configRoot = XDGConfigDir } - return filepath.Join("/etc", configFilePath) + return filepath.Join(configRoot, RelativeFilePath) } // GetConfig sets up the config struct. Values are read from a toml file diff --git a/internal/config/config_test.go b/internal/config/config_test.go index 0873ebd2..34bf4c27 100644 --- a/internal/config/config_test.go +++ b/internal/config/config_test.go @@ -27,9 +27,26 @@ import ( func TestGetConfigWithCustomConfig(t *testing.T) { testDir := t.TempDir() - t.Setenv(configOverride, testDir) + t.Setenv(configRootOverride, testDir) - filename := filepath.Join(testDir, configFilePath) + filename := filepath.Join(testDir, RelativeFilePath) + + // By default debug is disabled + contents := []byte("[nvidia-container-runtime]\ndebug = \"/nvidia-container-toolkit.log\"") + + require.NoError(t, os.MkdirAll(filepath.Dir(filename), 0766)) + require.NoError(t, os.WriteFile(filename, contents, 0600)) + + cfg, err := GetConfig() + require.NoError(t, err) + require.Equal(t, "/nvidia-container-toolkit.log", cfg.NVIDIAContainerRuntimeConfig.DebugFilePath) +} + +func TestGetConfigWithConfigFilePathOverride(t *testing.T) { + testDir := t.TempDir() + filename := filepath.Join(testDir, RelativeFilePath) + + t.Setenv(FilePathOverrideEnvVar, filename) // By default debug is disabled contents := []byte("[nvidia-container-runtime]\ndebug = \"/nvidia-container-toolkit.log\"") From 91aadb7616897c369c24ef38a3626d23c7edd9b4 Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Wed, 30 Oct 2024 13:57:59 +0100 Subject: [PATCH 4/4] [no-relnote] Use NVCT_CONFIG_FILE_PATH in toolkit install Signed-off-by: Evan Lezar --- tools/container/toolkit/executable.go | 5 ----- tools/container/toolkit/executable_test.go | 17 --------------- tools/container/toolkit/runtime.go | 13 +++++------ tools/container/toolkit/runtime_test.go | 4 ++-- tools/container/toolkit/toolkit.go | 25 ++++++++++------------ 5 files changed, 20 insertions(+), 44 deletions(-) diff --git a/tools/container/toolkit/executable.go b/tools/container/toolkit/executable.go index 394ca007..b46cb1b8 100644 --- a/tools/container/toolkit/executable.go +++ b/tools/container/toolkit/executable.go @@ -37,7 +37,6 @@ type executable struct { target executableTarget env map[string]string preLines []string - argLines []string } // install installs an executable component of the NVIDIA container toolkit. The source executable @@ -128,10 +127,6 @@ func (e executable) writeWrapperTo(wrapper io.Writer, destFolder string, dotfile // Add the call to the target executable fmt.Fprintf(wrapper, "%s \\\n", dotfileName) - // Insert additional lines in the `arg` list - for _, line := range e.argLines { - fmt.Fprintf(wrapper, "\t%s \\\n", r.apply(line)) - } // Add the script arguments "$@" fmt.Fprintln(wrapper, "\t\"$@\"") diff --git a/tools/container/toolkit/executable_test.go b/tools/container/toolkit/executable_test.go index 8cb47596..bb503e4b 100644 --- a/tools/container/toolkit/executable_test.go +++ b/tools/container/toolkit/executable_test.go @@ -76,23 +76,6 @@ func TestWrapper(t *testing.T) { "", }, }, - { - e: executable{ - argLines: []string{ - "argline1", - "argline2", - }, - }, - expectedLines: []string{ - shebang, - "PATH=/dest/folder:$PATH \\", - "source.real \\", - "\targline1 \\", - "\targline2 \\", - "\t\"$@\"", - "", - }, - }, } for i, tc := range testCases { diff --git a/tools/container/toolkit/runtime.go b/tools/container/toolkit/runtime.go index bdfca983..95115cd3 100644 --- a/tools/container/toolkit/runtime.go +++ b/tools/container/toolkit/runtime.go @@ -20,6 +20,7 @@ import ( "fmt" "path/filepath" + "github.com/NVIDIA/nvidia-container-toolkit/internal/config" "github.com/NVIDIA/nvidia-container-toolkit/tools/container/operator" ) @@ -29,10 +30,10 @@ const ( // installContainerRuntimes sets up the NVIDIA container runtimes, copying the executables // and implementing the required wrapper -func installContainerRuntimes(toolkitDir string, driverRoot string) error { +func installContainerRuntimes(toolkitDir string, configFilePath string) error { runtimes := operator.GetRuntimes() for _, runtime := range runtimes { - r := newNvidiaContainerRuntimeInstaller(runtime.Path) + r := newNvidiaContainerRuntimeInstaller(runtime.Path, configFilePath) _, err := r.install(toolkitDir) if err != nil { @@ -46,17 +47,17 @@ func installContainerRuntimes(toolkitDir string, driverRoot string) error { // This installer will copy the specified source executable to the toolkit directory. // The executable is copied to a file with the same name as the source, but with a ".real" suffix and a wrapper is // created to allow for the configuration of the runtime environment. -func newNvidiaContainerRuntimeInstaller(source string) *executable { +func newNvidiaContainerRuntimeInstaller(source string, configFilePath string) *executable { wrapperName := filepath.Base(source) dotfileName := wrapperName + ".real" target := executableTarget{ dotfileName: dotfileName, wrapperName: wrapperName, } - return newRuntimeInstaller(source, target, nil) + return newRuntimeInstaller(source, target, configFilePath, nil) } -func newRuntimeInstaller(source string, target executableTarget, env map[string]string) *executable { +func newRuntimeInstaller(source string, target executableTarget, configFilePath string, env map[string]string) *executable { preLines := []string{ "", "cat /proc/modules | grep -e \"^nvidia \" >/dev/null 2>&1", @@ -68,7 +69,7 @@ func newRuntimeInstaller(source string, target executableTarget, env map[string] } runtimeEnv := make(map[string]string) - runtimeEnv["XDG_CONFIG_HOME"] = filepath.Join(destDirPattern, ".config") + runtimeEnv[config.FilePathOverrideEnvVar] = configFilePath for k, v := range env { runtimeEnv[k] = v } diff --git a/tools/container/toolkit/runtime_test.go b/tools/container/toolkit/runtime_test.go index d2841506..f10cbfd8 100644 --- a/tools/container/toolkit/runtime_test.go +++ b/tools/container/toolkit/runtime_test.go @@ -25,7 +25,7 @@ import ( ) func TestNvidiaContainerRuntimeInstallerWrapper(t *testing.T) { - r := newNvidiaContainerRuntimeInstaller(nvidiaContainerRuntimeSource) + r := newNvidiaContainerRuntimeInstaller(nvidiaContainerRuntimeSource, "/config/file/path/config.toml") const shebang = "#! /bin/sh" const destFolder = "/dest/folder" @@ -45,8 +45,8 @@ func TestNvidiaContainerRuntimeInstallerWrapper(t *testing.T) { " exec runc \"$@\"", "fi", "", + "NVCTK_CONFIG_FILE_PATH=/config/file/path/config.toml \\", "PATH=/dest/folder:$PATH \\", - "XDG_CONFIG_HOME=/dest/folder/.config \\", "source.real \\", "\t\"$@\"", "", diff --git a/tools/container/toolkit/toolkit.go b/tools/container/toolkit/toolkit.go index 43e68ca5..a7515f62 100644 --- a/tools/container/toolkit/toolkit.go +++ b/tools/container/toolkit/toolkit.go @@ -297,10 +297,9 @@ func Install(cli *cli.Context, opts *Options, toolkitRoot string) error { log.Errorf("Ignoring error: %v", fmt.Errorf("error removing toolkit directory: %v", err)) } - toolkitConfigDir := filepath.Join(toolkitRoot, ".config", "nvidia-container-runtime") - toolkitConfigPath := filepath.Join(toolkitConfigDir, configFilename) + toolkitConfigFilePath := filepath.Join(toolkitRoot, ".config", config.RelativeFilePath) - err = createDirectories(toolkitRoot, toolkitConfigDir) + err = createDirectories(toolkitRoot, filepath.Dir(toolkitConfigFilePath)) if err != nil && !opts.ignoreErrors { return fmt.Errorf("could not create required directories: %v", err) } else if err != nil { @@ -314,7 +313,7 @@ func Install(cli *cli.Context, opts *Options, toolkitRoot string) error { log.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA container library: %v", err)) } - err = installContainerRuntimes(toolkitRoot, opts.DriverRoot) + err = installContainerRuntimes(toolkitRoot, toolkitConfigFilePath) if err != nil && !opts.ignoreErrors { return fmt.Errorf("error installing NVIDIA container runtime: %v", err) } else if err != nil { @@ -328,7 +327,7 @@ func Install(cli *cli.Context, opts *Options, toolkitRoot string) error { log.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA container CLI: %v", err)) } - nvidiaContainerRuntimeHookPath, err := installRuntimeHook(toolkitRoot, toolkitConfigPath) + nvidiaContainerRuntimeHookPath, err := installRuntimeHook(toolkitRoot, toolkitConfigFilePath) if err != nil && !opts.ignoreErrors { return fmt.Errorf("error installing NVIDIA container runtime hook: %v", err) } else if err != nil { @@ -349,7 +348,7 @@ func Install(cli *cli.Context, opts *Options, toolkitRoot string) error { log.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA Container CDI Hook CLI: %v", err)) } - err = installToolkitConfig(cli, toolkitConfigPath, nvidiaContainerCliExecutable, nvidiaCTKPath, nvidiaContainerRuntimeHookPath, opts) + err = installToolkitConfig(cli, toolkitConfigFilePath, nvidiaContainerCliExecutable, nvidiaCTKPath, nvidiaContainerRuntimeHookPath, opts) if err != nil && !opts.ignoreErrors { return fmt.Errorf("error installing NVIDIA container toolkit config: %v", err) } else if err != nil { @@ -423,8 +422,8 @@ func installLibrary(libName string, toolkitRoot string) error { // installToolkitConfig installs the config file for the NVIDIA container toolkit ensuring // that the settings are updated to match the desired install and nvidia driver directories. -func installToolkitConfig(c *cli.Context, toolkitConfigPath string, nvidiaContainerCliExecutablePath string, nvidiaCTKPath string, nvidaContainerRuntimeHookPath string, opts *Options) error { - log.Infof("Installing NVIDIA container toolkit config '%v'", toolkitConfigPath) +func installToolkitConfig(c *cli.Context, toolkitConfigFilePath string, nvidiaContainerCliExecutablePath string, nvidiaCTKPath string, nvidaContainerRuntimeHookPath string, opts *Options) error { + log.Infof("Installing NVIDIA container toolkit config '%v'", toolkitConfigFilePath) cfg, err := config.New( config.WithConfigFile(nvidiaContainerToolkitConfigSource), @@ -433,7 +432,7 @@ func installToolkitConfig(c *cli.Context, toolkitConfigPath string, nvidiaContai return fmt.Errorf("could not open source config file: %v", err) } - targetConfig, err := os.Create(toolkitConfigPath) + targetConfig, err := os.Create(toolkitConfigFilePath) if err != nil { return fmt.Errorf("could not create target config file: %v", err) } @@ -579,17 +578,15 @@ func installContainerCLI(toolkitRoot string) (string, error) { func installRuntimeHook(toolkitRoot string, configFilePath string) (string, error) { log.Infof("Installing NVIDIA container runtime hook from '%v'", nvidiaContainerRuntimeHookSource) - argLines := []string{ - fmt.Sprintf("-config \"%s\"", configFilePath), - } - e := executable{ source: nvidiaContainerRuntimeHookSource, target: executableTarget{ dotfileName: "nvidia-container-runtime-hook.real", wrapperName: "nvidia-container-runtime-hook", }, - argLines: argLines, + env: map[string]string{ + config.FilePathOverrideEnvVar: configFilePath, + }, } installedPath, err := e.install(toolkitRoot)