diff --git a/cmd/nvidia-ctk/config/create-default/create-default.go b/cmd/nvidia-ctk/config/create-default/create-default.go index a14fda05..e8e8c934 100644 --- a/cmd/nvidia-ctk/config/create-default/create-default.go +++ b/cmd/nvidia-ctk/config/create-default/create-default.go @@ -26,7 +26,6 @@ import ( "github.com/NVIDIA/nvidia-container-toolkit/internal/config" "github.com/NVIDIA/nvidia-container-toolkit/internal/logger" - "github.com/pelletier/go-toml" "github.com/urfave/cli/v2" ) @@ -126,12 +125,10 @@ func (opts options) getFormattedConfig() ([]byte, error) { } buffer := bytes.NewBuffer(nil) - enc := toml.NewEncoder(buffer).Indentation("") - if err := enc.Encode(cfg); err != nil { - return nil, fmt.Errorf("invalid config: %v", err) + if _, err := cfg.Save(buffer); err != nil { + return nil, fmt.Errorf("unable to save config: %v", err) } - return fixComments(buffer.Bytes()) } diff --git a/internal/config/cli.go b/internal/config/cli.go index 7849e2f1..bd801a39 100644 --- a/internal/config/cli.go +++ b/internal/config/cli.go @@ -18,5 +18,7 @@ package config // ContainerCLIConfig stores the options for the nvidia-container-cli type ContainerCLIConfig struct { - Root string `toml:"root"` + Root string `toml:"root"` + LoadKmods bool `toml:"load-kmods"` + Ldconfig string `toml:"ldconfig"` } diff --git a/internal/config/config.go b/internal/config/config.go index 4381c188..c189ac54 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -130,90 +130,37 @@ func getFromTree(toml *toml.Tree) (*Config, error) { // getDefault defines the default values for the config func getDefault() (*Config, error) { - tomlConfig, err := GetDefaultToml() - if err != nil { - return nil, err - } - - // tomlConfig above includes information about the default values and comments. - // we need to marshal it back to a string and then unmarshal it to strip the comments. - contents, err := tomlConfig.ToTomlString() - if err != nil { - return nil, err - } - - reloaded, err := toml.Load(contents) - if err != nil { - return nil, err - } - - d := Config{} - if err := reloaded.Unmarshal(&d); err != nil { - return nil, fmt.Errorf("failed to unmarshal config: %v", err) - } - - // The default value for the accept-nvidia-visible-devices-envvar-when-unprivileged is non-standard. - // As such we explicitly handle it being set here. - if reloaded.Get("accept-nvidia-visible-devices-envvar-when-unprivileged") == nil { - d.AcceptEnvvarUnprivileged = true - } - // The default value for the nvidia-container-runtime.debug is non-standard. - // As such we explicitly handle it being set here. - if reloaded.Get("nvidia-container-runtime.debug") == nil { - d.NVIDIAContainerRuntimeConfig.DebugFilePath = "/dev/null" + d := Config{ + AcceptEnvvarUnprivileged: true, + NVIDIAContainerCLIConfig: ContainerCLIConfig{ + LoadKmods: true, + Ldconfig: getLdConfigPath(), + }, + NVIDIACTKConfig: CTKConfig{ + Path: nvidiaCTKExecutable, + }, + NVIDIAContainerRuntimeConfig: RuntimeConfig{ + DebugFilePath: "/dev/null", + LogLevel: "info", + Runtimes: []string{"docker-runc", "runc"}, + Mode: "auto", + Modes: modesConfig{ + CSV: csvModeConfig{ + MountSpecPath: "/etc/nvidia-container-runtime/host-files-for-container.d", + }, + CDI: cdiModeConfig{ + DefaultKind: "nvidia.com/gpu", + AnnotationPrefixes: []string{cdi.AnnotationPrefix}, + }, + }, + }, + NVIDIAContainerRuntimeHookConfig: RuntimeHookConfig{ + Path: NVIDIAContainerRuntimeHookExecutable, + }, } return &d, nil } -// GetDefaultToml returns the default config as a toml Tree. -func GetDefaultToml() (*toml.Tree, error) { - tree, err := toml.TreeFromMap(nil) - if err != nil { - return nil, err - } - - tree.Set("disable-require", false) - tree.SetWithComment("swarm-resource", "", true, "DOCKER_RESOURCE_GPU") - tree.SetWithComment("accept-nvidia-visible-devices-envvar-when-unprivileged", "", true, true) - tree.SetWithComment("accept-nvidia-visible-devices-as-volume-mounts", "", true, false) - - // nvidia-container-cli - tree.SetWithComment("nvidia-container-cli.root", "", true, "/run/nvidia/driver") - tree.SetWithComment("nvidia-container-cli.path", "", true, "/usr/bin/nvidia-container-cli") - tree.Set("nvidia-container-cli.environment", []string{}) - tree.SetWithComment("nvidia-container-cli.debug", "", true, "/var/log/nvidia-container-toolkit.log") - tree.SetWithComment("nvidia-container-cli.ldcache", "", true, "/etc/ld.so.cache") - tree.Set("nvidia-container-cli.load-kmods", true) - tree.SetWithComment("nvidia-container-cli.no-cgroups", "", true, false) - - tree.SetWithComment("nvidia-container-cli.user", "", getCommentedUserGroup(), getUserGroup()) - tree.Set("nvidia-container-cli.ldconfig", getLdConfigPath()) - - // nvidia-container-runtime - tree.SetWithComment("nvidia-container-runtime.debug", "", true, "/var/log/nvidia-container-runtime.log") - tree.Set("nvidia-container-runtime.log-level", "info") - - commentLines := []string{ - "Specify the runtimes to consider. This list is processed in order and the PATH", - "searched for matching executables unless the entry is an absolute path.", - } - tree.SetWithComment("nvidia-container-runtime.runtimes", strings.Join(commentLines, "\n "), false, []string{"docker-runc", "runc"}) - - tree.Set("nvidia-container-runtime.mode", "auto") - - tree.Set("nvidia-container-runtime.modes.csv.mount-spec-path", "/etc/nvidia-container-runtime/host-files-for-container.d") - tree.Set("nvidia-container-runtime.modes.cdi.default-kind", "nvidia.com/gpu") - tree.Set("nvidia-container-runtime.modes.cdi.annotation-prefixes", []string{cdi.AnnotationPrefix}) - - // nvidia-ctk - tree.Set("nvidia-ctk.path", nvidiaCTKExecutable) - - // nvidia-container-runtime-hook - tree.Set("nvidia-container-runtime-hook.path", nvidiaContainerRuntimeHookExecutable) - - return tree, nil -} - func getLdConfigPath() string { if _, err := os.Stat("/sbin/ldconfig.real"); err == nil { return "@/sbin/ldconfig.real" @@ -221,11 +168,6 @@ func getLdConfigPath() string { return "@/sbin/ldconfig" } -// getUserGroup returns the user and group to use for the nvidia-container-cli and whether the config option should be commented. -func getUserGroup() string { - return "root:video" -} - // getCommentedUserGroup returns whether the nvidia-container-cli user and group config option should be commented. func getCommentedUserGroup() bool { uncommentIf := map[string]bool{ @@ -311,3 +253,66 @@ func resolveWithDefault(logger logger.Interface, label string, path string, defa return resolvedPath } + +func (c Config) asCommentedToml() (*toml.Tree, error) { + contents, err := toml.Marshal(c) + if err != nil { + return nil, err + } + asToml, err := toml.LoadBytes(contents) + if err != nil { + return nil, err + } + + commentedDefaults := map[string]interface{}{ + "swarm-resource": "DOCKER_RESOURCE_GPU", + "accept-nvidia-visible-devices-envvar-when-unprivileged": true, + "accept-nvidia-visible-devices-as-volume-mounts": false, + "nvidia-container-cli.root": "/run/nvidia/driver", + "nvidia-container-cli.path": "/usr/bin/nvidia-container-cli", + "nvidia-container-cli.debug": "/var/log/nvidia-container-toolkit.log", + "nvidia-container-cli.ldcache": "/etc/ld.so.cache", + "nvidia-container-cli.no-cgroups": false, + "nvidia-container-cli.user": "root:video", + "nvidia-container-runtime.debug": "/var/log/nvidia-container-runtime.log", + } + for k, v := range commentedDefaults { + set := asToml.Get(k) + fmt.Printf("k=%v v=%+v set=%+v\n", k, v, set) + if !shouldComment(k, v, set) { + continue + } + fmt.Printf("set=%+v v=%+v\n", set, v) + asToml.SetWithComment(k, "", true, v) + } + + return asToml, nil +} + +func shouldComment(key string, value interface{}, set interface{}) bool { + if key == "nvidia-container-cli.user" && !getCommentedUserGroup() { + return false + } + if key == "nvidia-container-runtime.debug" && set == "/dev/null" { + return true + } + if set == nil || value == set { + return true + } + + return false +} + +// Save writes the config to the specified writer. +func (c Config) Save(w io.Writer) (int64, error) { + asToml, err := c.asCommentedToml() + if err != nil { + return 0, err + } + + enc := toml.NewEncoder(w).Indentation("") + if err := enc.Encode(asToml); err != nil { + return 0, fmt.Errorf("invalid config: %v", err) + } + return 0, nil +} diff --git a/internal/config/config_test.go b/internal/config/config_test.go index f69f2736..0c60ca99 100644 --- a/internal/config/config_test.go +++ b/internal/config/config_test.go @@ -49,17 +49,21 @@ func TestGetConfigWithCustomConfig(t *testing.T) { func TestGetConfig(t *testing.T) { testCases := []struct { - description string - contents []string - expectedError error - expectedConfig *Config + description string + contents []string + expectedError error + inspectLdconfig bool + expectedConfig *Config }{ { - description: "empty config is default", + description: "empty config is default", + inspectLdconfig: true, expectedConfig: &Config{ AcceptEnvvarUnprivileged: true, NVIDIAContainerCLIConfig: ContainerCLIConfig{ - Root: "", + Root: "", + LoadKmods: true, + Ldconfig: "WAS_CHECKED", }, NVIDIAContainerRuntimeConfig: RuntimeConfig{ DebugFilePath: "/dev/null", @@ -89,6 +93,8 @@ func TestGetConfig(t *testing.T) { contents: []string{ "accept-nvidia-visible-devices-envvar-when-unprivileged = false", "nvidia-container-cli.root = \"/bar/baz\"", + "nvidia-container-cli.load-kmods = false", + "nvidia-container-cli.ldconfig = \"/foo/bar/ldconfig\"", "nvidia-container-runtime.debug = \"/foo/bar\"", "nvidia-container-runtime.experimental = true", "nvidia-container-runtime.discover-mode = \"not-legacy\"", @@ -104,7 +110,9 @@ func TestGetConfig(t *testing.T) { expectedConfig: &Config{ AcceptEnvvarUnprivileged: false, NVIDIAContainerCLIConfig: ContainerCLIConfig{ - Root: "/bar/baz", + Root: "/bar/baz", + LoadKmods: false, + Ldconfig: "/foo/bar/ldconfig", }, NVIDIAContainerRuntimeConfig: RuntimeConfig{ DebugFilePath: "/foo/bar", @@ -138,6 +146,8 @@ func TestGetConfig(t *testing.T) { "accept-nvidia-visible-devices-envvar-when-unprivileged = false", "[nvidia-container-cli]", "root = \"/bar/baz\"", + "load-kmods = false", + "ldconfig = \"/foo/bar/ldconfig\"", "[nvidia-container-runtime]", "debug = \"/foo/bar\"", "experimental = true", @@ -158,7 +168,9 @@ func TestGetConfig(t *testing.T) { expectedConfig: &Config{ AcceptEnvvarUnprivileged: false, NVIDIAContainerCLIConfig: ContainerCLIConfig{ - Root: "/bar/baz", + Root: "/bar/baz", + LoadKmods: false, + Ldconfig: "/foo/bar/ldconfig", }, NVIDIAContainerRuntimeConfig: RuntimeConfig{ DebugFilePath: "/foo/bar", @@ -199,6 +211,16 @@ func TestGetConfig(t *testing.T) { require.NoError(t, err) } + // We first handle the ldconfig path since this is currently system-dependent. + if tc.inspectLdconfig { + ldconfig := cfg.NVIDIAContainerCLIConfig.Ldconfig + require.True(t, strings.HasPrefix(ldconfig, "@/sbin/ldconfig")) + remaining := strings.TrimPrefix(ldconfig, "@/sbin/ldconfig") + require.True(t, remaining == ".real" || remaining == "") + + cfg.NVIDIAContainerCLIConfig.Ldconfig = "WAS_CHECKED" + } + require.EqualValues(t, tc.expectedConfig, cfg) }) }