diff --git a/CHANGELOG.md b/CHANGELOG.md index bd998fa0..4f1b223b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,7 +14,7 @@ * Add option to load kernel modules when creating device nodes * Add option to create device nodes when creating `/dev/char` symlinks * Create ouput folders if required when running `nvidia-ctk runtime configure` - +* Generate default config as post-install step. * [libnvidia-container] Support OpenSSL 3 with the Encrypt/Decrypt library diff --git a/cmd/nvidia-ctk/config/create-default/create-default.go b/cmd/nvidia-ctk/config/create-default/create-default.go index db89345a..e8e8c934 100644 --- a/cmd/nvidia-ctk/config/create-default/create-default.go +++ b/cmd/nvidia-ctk/config/create-default/create-default.go @@ -17,11 +17,14 @@ package defaultsubcommand import ( + "bytes" "fmt" "io" "os" + "path/filepath" + "regexp" - nvctkConfig "github.com/NVIDIA/nvidia-container-toolkit/internal/config" + "github.com/NVIDIA/nvidia-container-toolkit/internal/config" "github.com/NVIDIA/nvidia-container-toolkit/internal/logger" "github.com/urfave/cli/v2" ) @@ -32,7 +35,9 @@ type command struct { // options stores the subcommand options type options struct { - output string + config string + output string + inPlace bool } // NewCommand constructs a default command with the specified logger @@ -61,9 +66,20 @@ func (m command) build() *cli.Command { } c.Flags = []cli.Flag{ + &cli.StringFlag{ + Name: "config", + Usage: "Specify the config file to process; The contents of this file overrides the default config", + Destination: &opts.config, + }, + &cli.BoolFlag{ + Name: "in-place", + Aliases: []string{"i"}, + Usage: "Modify the config file in-place", + Destination: &opts.inPlace, + }, &cli.StringFlag{ Name: "output", - Usage: "Specify the file to output the generated configuration for to. If this is '' the configuration is ouput to STDOUT.", + Usage: "Specify the output file to write to; If not specified, the output is written to stdout", Destination: &opts.output, }, } @@ -72,31 +88,96 @@ func (m command) build() *cli.Command { } func (m command) validateFlags(c *cli.Context, opts *options) error { + if opts.inPlace { + if opts.output != "" { + return fmt.Errorf("cannot specify both --in-place and --output") + } + opts.output = opts.config + } return nil } func (m command) run(c *cli.Context, opts *options) error { - defaultConfig, err := nvctkConfig.GetDefaultConfigToml() - if err != nil { - return fmt.Errorf("unable to get default config: %v", err) + if err := opts.ensureOutputFolder(); err != nil { + return fmt.Errorf("unable to create output directory: %v", err) } + contents, err := opts.getFormattedConfig() + if err != nil { + return fmt.Errorf("unable to fix comments: %v", err) + } + + if _, err := opts.Write(contents); err != nil { + return fmt.Errorf("unable to write to output: %v", err) + } + + return nil +} + +// getFormattedConfig returns the default config formatted as required from the specified config file. +// The config is then formatted as required. +// No indentation is used and comments are modified so that there is no space +// after the '#' character. +func (opts options) getFormattedConfig() ([]byte, error) { + cfg, err := config.Load(opts.config) + if err != nil { + return nil, fmt.Errorf("unable to load or create config: %v", err) + } + + buffer := bytes.NewBuffer(nil) + + if _, err := cfg.Save(buffer); err != nil { + return nil, fmt.Errorf("unable to save config: %v", err) + } + return fixComments(buffer.Bytes()) +} + +func fixComments(contents []byte) ([]byte, error) { + r, err := regexp.Compile(`(\n*)\s*?#\s*(\S.*)`) + if err != nil { + return nil, fmt.Errorf("unable to compile regexp: %v", err) + } + replaced := r.ReplaceAll(contents, []byte("$1#$2")) + + return replaced, nil +} + +func (opts options) outputExists() (bool, error) { + if opts.output == "" { + return false, nil + } + _, err := os.Stat(opts.output) + if err == nil { + return true, nil + } else if !os.IsNotExist(err) { + return false, fmt.Errorf("unable to stat output file: %v", err) + } + return false, nil +} + +func (opts options) ensureOutputFolder() error { + if opts.output == "" { + return nil + } + if dir := filepath.Dir(opts.output); dir != "" { + return os.MkdirAll(dir, 0755) + } + return nil +} + +// Write writes the contents to the output file specified in the options. +func (opts options) Write(contents []byte) (int, error) { var output io.Writer if opts.output == "" { output = os.Stdout } else { outputFile, err := os.Create(opts.output) if err != nil { - return fmt.Errorf("unable to create output file: %v", err) + return 0, fmt.Errorf("unable to create output file: %v", err) } defer outputFile.Close() output = outputFile } - _, err = defaultConfig.WriteTo(output) - if err != nil { - return fmt.Errorf("unable to write to output: %v", err) - } - - return nil + return output.Write(contents) } diff --git a/cmd/nvidia-ctk/config/create-default/create-default_test.go b/cmd/nvidia-ctk/config/create-default/create-default_test.go new file mode 100644 index 00000000..65c940ca --- /dev/null +++ b/cmd/nvidia-ctk/config/create-default/create-default_test.go @@ -0,0 +1,82 @@ +/** +# Copyright (c) NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package defaultsubcommand + +import ( + "strings" + "testing" + + "github.com/stretchr/testify/require" +) + +func TestFixComment(t *testing.T) { + testCases := []struct { + input string + expected string + }{ + { + input: "# comment", + expected: "#comment", + }, + { + input: " #comment", + expected: "#comment", + }, + { + input: " # comment", + expected: "#comment", + }, + { + input: strings.Join([]string{ + "some", + "# comment", + " # comment", + " #comment", + "other"}, "\n"), + expected: strings.Join([]string{ + "some", + "#comment", + "#comment", + "#comment", + "other"}, "\n"), + }, + } + + for _, tc := range testCases { + t.Run(tc.input, func(t *testing.T) { + actual, _ := fixComments([]byte(tc.input)) + require.Equal(t, tc.expected, string(actual)) + }) + } +} + +func TestGetFormattedConfig(t *testing.T) { + expectedLines := []string{ + "#no-cgroups = false", + "#debug = \"/var/log/nvidia-container-toolkit.log\"", + "#debug = \"/var/log/nvidia-container-runtime.log\"", + } + + opts := &options{} + contents, err := opts.getFormattedConfig() + require.NoError(t, err) + lines := strings.Split(string(contents), "\n") + + for _, line := range expectedLines { + require.Contains(t, lines, line) + } +} diff --git a/cmd/nvidia-ctk/main.go b/cmd/nvidia-ctk/main.go index a1566559..8cbb56c2 100644 --- a/cmd/nvidia-ctk/main.go +++ b/cmd/nvidia-ctk/main.go @@ -36,6 +36,8 @@ import ( type options struct { // Debug indicates whether the CLI is started in "debug" mode Debug bool + // Quiet indicates whether the CLI is started in "quiet" mode + Quiet bool } func main() { @@ -61,6 +63,12 @@ func main() { Destination: &opts.Debug, EnvVars: []string{"NVIDIA_CTK_DEBUG"}, }, + &cli.BoolFlag{ + Name: "quiet", + Usage: "Suppress all output except for errors; overrides --debug", + Destination: &opts.Quiet, + EnvVars: []string{"NVIDIA_CTK_QUIET"}, + }, } // Set log-level for all subcommands @@ -69,6 +77,9 @@ func main() { if opts.Debug { logLevel = logrus.DebugLevel } + if opts.Quiet { + logLevel = logrus.ErrorLevel + } logger.SetLevel(logLevel) return nil } diff --git a/internal/config/cli.go b/internal/config/cli.go index 7849e2f1..bd801a39 100644 --- a/internal/config/cli.go +++ b/internal/config/cli.go @@ -18,5 +18,7 @@ package config // ContainerCLIConfig stores the options for the nvidia-container-cli type ContainerCLIConfig struct { - Root string `toml:"root"` + Root string `toml:"root"` + LoadKmods bool `toml:"load-kmods"` + Ldconfig string `toml:"ldconfig"` } diff --git a/internal/config/config.go b/internal/config/config.go index c25aa19d..c189ac54 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -74,13 +74,22 @@ func GetConfig() (*Config, error) { configFilePath := path.Join(configDir, configFilePath) + return Load(configFilePath) +} + +// Load loads the config from the specified file path. +func Load(configFilePath string) (*Config, error) { + if configFilePath == "" { + return getDefault() + } + tomlFile, err := os.Open(configFilePath) if err != nil { - return getDefaultConfig() + return getDefault() } defer tomlFile.Close() - cfg, err := loadConfigFrom(tomlFile) + cfg, err := LoadFrom(tomlFile) if err != nil { return nil, fmt.Errorf("failed to read config values: %v", err) } @@ -88,21 +97,28 @@ func GetConfig() (*Config, error) { return cfg, nil } -// loadRuntimeConfigFrom reads the config from the specified Reader -func loadConfigFrom(reader io.Reader) (*Config, error) { - toml, err := toml.LoadReader(reader) +// LoadFrom reads the config from the specified Reader +func LoadFrom(reader io.Reader) (*Config, error) { + var tree *toml.Tree + if reader != nil { + toml, err := toml.LoadReader(reader) + if err != nil { + return nil, err + } + tree = toml + } + + return getFromTree(tree) +} + +// getFromTree reads the nvidia container runtime config from the specified toml Tree. +func getFromTree(toml *toml.Tree) (*Config, error) { + cfg, err := getDefault() if err != nil { return nil, err } - - return getConfigFrom(toml) -} - -// getConfigFrom reads the nvidia container runtime config from the specified toml Tree. -func getConfigFrom(toml *toml.Tree) (*Config, error) { - cfg, err := getDefaultConfig() - if err != nil { - return nil, err + if toml == nil { + return cfg, nil } if err := toml.Unmarshal(cfg); err != nil { @@ -112,92 +128,39 @@ func getConfigFrom(toml *toml.Tree) (*Config, error) { return cfg, nil } -// getDefaultConfig defines the default values for the config -func getDefaultConfig() (*Config, error) { - tomlConfig, err := GetDefaultConfigToml() - if err != nil { - return nil, err - } - - // tomlConfig above includes information about the default values and comments. - // we need to marshal it back to a string and then unmarshal it to strip the comments. - contents, err := tomlConfig.ToTomlString() - if err != nil { - return nil, err - } - - reloaded, err := toml.Load(contents) - if err != nil { - return nil, err - } - - d := Config{} - if err := reloaded.Unmarshal(&d); err != nil { - return nil, fmt.Errorf("failed to unmarshal config: %v", err) - } - - // The default value for the accept-nvidia-visible-devices-envvar-when-unprivileged is non-standard. - // As such we explicitly handle it being set here. - if reloaded.Get("accept-nvidia-visible-devices-envvar-when-unprivileged") == nil { - d.AcceptEnvvarUnprivileged = true - } - // The default value for the nvidia-container-runtime.debug is non-standard. - // As such we explicitly handle it being set here. - if reloaded.Get("nvidia-container-runtime.debug") == nil { - d.NVIDIAContainerRuntimeConfig.DebugFilePath = "/dev/null" +// getDefault defines the default values for the config +func getDefault() (*Config, error) { + d := Config{ + AcceptEnvvarUnprivileged: true, + NVIDIAContainerCLIConfig: ContainerCLIConfig{ + LoadKmods: true, + Ldconfig: getLdConfigPath(), + }, + NVIDIACTKConfig: CTKConfig{ + Path: nvidiaCTKExecutable, + }, + NVIDIAContainerRuntimeConfig: RuntimeConfig{ + DebugFilePath: "/dev/null", + LogLevel: "info", + Runtimes: []string{"docker-runc", "runc"}, + Mode: "auto", + Modes: modesConfig{ + CSV: csvModeConfig{ + MountSpecPath: "/etc/nvidia-container-runtime/host-files-for-container.d", + }, + CDI: cdiModeConfig{ + DefaultKind: "nvidia.com/gpu", + AnnotationPrefixes: []string{cdi.AnnotationPrefix}, + }, + }, + }, + NVIDIAContainerRuntimeHookConfig: RuntimeHookConfig{ + Path: NVIDIAContainerRuntimeHookExecutable, + }, } return &d, nil } -// GetDefaultConfigToml returns the default config as a toml Tree. -func GetDefaultConfigToml() (*toml.Tree, error) { - tree, err := toml.TreeFromMap(nil) - if err != nil { - return nil, err - } - - tree.Set("disable-require", false) - tree.SetWithComment("swarm-resource", "", true, "DOCKER_RESOURCE_GPU") - tree.SetWithComment("accept-nvidia-visible-devices-envvar-when-unprivileged", "", true, true) - tree.SetWithComment("accept-nvidia-visible-devices-as-volume-mounts", "", true, false) - - // nvidia-container-cli - tree.SetWithComment("nvidia-container-cli.root", "", true, "/run/nvidia/driver") - tree.SetWithComment("nvidia-container-cli.path", "", true, "/usr/bin/nvidia-container-cli") - tree.Set("nvidia-container-cli.environment", []string{}) - tree.SetWithComment("nvidia-container-cli.debug", "", true, "/var/log/nvidia-container-toolkit.log") - tree.SetWithComment("nvidia-container-cli.ldcache", "", true, "/etc/ld.so.cache") - tree.Set("nvidia-container-cli.load-kmods", true) - tree.SetWithComment("nvidia-container-cli.no-cgroups", "", true, false) - - tree.SetWithComment("nvidia-container-cli.user", "", getCommentedUserGroup(), getUserGroup()) - tree.Set("nvidia-container-cli.ldconfig", getLdConfigPath()) - - // nvidia-container-runtime - tree.SetWithComment("nvidia-container-runtime.debug", "", true, "/var/log/nvidia-container-runtime.log") - tree.Set("nvidia-container-runtime.log-level", "info") - - commentLines := []string{ - "Specify the runtimes to consider. This list is processed in order and the PATH", - "searched for matching executables unless the entry is an absolute path.", - } - tree.SetWithComment("nvidia-container-runtime.runtimes", strings.Join(commentLines, "\n "), false, []string{"docker-runc", "runc"}) - - tree.Set("nvidia-container-runtime.mode", "auto") - - tree.Set("nvidia-container-runtime.modes.csv.mount-spec-path", "/etc/nvidia-container-runtime/host-files-for-container.d") - tree.Set("nvidia-container-runtime.modes.cdi.default-kind", "nvidia.com/gpu") - tree.Set("nvidia-container-runtime.modes.cdi.annotation-prefixes", []string{cdi.AnnotationPrefix}) - - // nvidia-ctk - tree.Set("nvidia-ctk.path", nvidiaCTKExecutable) - - // nvidia-container-runtime-hook - tree.Set("nvidia-container-runtime-hook.path", nvidiaContainerRuntimeHookExecutable) - - return tree, nil -} - func getLdConfigPath() string { if _, err := os.Stat("/sbin/ldconfig.real"); err == nil { return "@/sbin/ldconfig.real" @@ -205,11 +168,6 @@ func getLdConfigPath() string { return "@/sbin/ldconfig" } -// getUserGroup returns the user and group to use for the nvidia-container-cli and whether the config option should be commented. -func getUserGroup() string { - return "root:video" -} - // getCommentedUserGroup returns whether the nvidia-container-cli user and group config option should be commented. func getCommentedUserGroup() bool { uncommentIf := map[string]bool{ @@ -295,3 +253,66 @@ func resolveWithDefault(logger logger.Interface, label string, path string, defa return resolvedPath } + +func (c Config) asCommentedToml() (*toml.Tree, error) { + contents, err := toml.Marshal(c) + if err != nil { + return nil, err + } + asToml, err := toml.LoadBytes(contents) + if err != nil { + return nil, err + } + + commentedDefaults := map[string]interface{}{ + "swarm-resource": "DOCKER_RESOURCE_GPU", + "accept-nvidia-visible-devices-envvar-when-unprivileged": true, + "accept-nvidia-visible-devices-as-volume-mounts": false, + "nvidia-container-cli.root": "/run/nvidia/driver", + "nvidia-container-cli.path": "/usr/bin/nvidia-container-cli", + "nvidia-container-cli.debug": "/var/log/nvidia-container-toolkit.log", + "nvidia-container-cli.ldcache": "/etc/ld.so.cache", + "nvidia-container-cli.no-cgroups": false, + "nvidia-container-cli.user": "root:video", + "nvidia-container-runtime.debug": "/var/log/nvidia-container-runtime.log", + } + for k, v := range commentedDefaults { + set := asToml.Get(k) + fmt.Printf("k=%v v=%+v set=%+v\n", k, v, set) + if !shouldComment(k, v, set) { + continue + } + fmt.Printf("set=%+v v=%+v\n", set, v) + asToml.SetWithComment(k, "", true, v) + } + + return asToml, nil +} + +func shouldComment(key string, value interface{}, set interface{}) bool { + if key == "nvidia-container-cli.user" && !getCommentedUserGroup() { + return false + } + if key == "nvidia-container-runtime.debug" && set == "/dev/null" { + return true + } + if set == nil || value == set { + return true + } + + return false +} + +// Save writes the config to the specified writer. +func (c Config) Save(w io.Writer) (int64, error) { + asToml, err := c.asCommentedToml() + if err != nil { + return 0, err + } + + enc := toml.NewEncoder(w).Indentation("") + if err := enc.Encode(asToml); err != nil { + return 0, fmt.Errorf("invalid config: %v", err) + } + return 0, nil +} diff --git a/internal/config/config_test.go b/internal/config/config_test.go index 867edf79..0c60ca99 100644 --- a/internal/config/config_test.go +++ b/internal/config/config_test.go @@ -49,17 +49,21 @@ func TestGetConfigWithCustomConfig(t *testing.T) { func TestGetConfig(t *testing.T) { testCases := []struct { - description string - contents []string - expectedError error - expectedConfig *Config + description string + contents []string + expectedError error + inspectLdconfig bool + expectedConfig *Config }{ { - description: "empty config is default", + description: "empty config is default", + inspectLdconfig: true, expectedConfig: &Config{ AcceptEnvvarUnprivileged: true, NVIDIAContainerCLIConfig: ContainerCLIConfig{ - Root: "", + Root: "", + LoadKmods: true, + Ldconfig: "WAS_CHECKED", }, NVIDIAContainerRuntimeConfig: RuntimeConfig{ DebugFilePath: "/dev/null", @@ -89,6 +93,8 @@ func TestGetConfig(t *testing.T) { contents: []string{ "accept-nvidia-visible-devices-envvar-when-unprivileged = false", "nvidia-container-cli.root = \"/bar/baz\"", + "nvidia-container-cli.load-kmods = false", + "nvidia-container-cli.ldconfig = \"/foo/bar/ldconfig\"", "nvidia-container-runtime.debug = \"/foo/bar\"", "nvidia-container-runtime.experimental = true", "nvidia-container-runtime.discover-mode = \"not-legacy\"", @@ -104,7 +110,9 @@ func TestGetConfig(t *testing.T) { expectedConfig: &Config{ AcceptEnvvarUnprivileged: false, NVIDIAContainerCLIConfig: ContainerCLIConfig{ - Root: "/bar/baz", + Root: "/bar/baz", + LoadKmods: false, + Ldconfig: "/foo/bar/ldconfig", }, NVIDIAContainerRuntimeConfig: RuntimeConfig{ DebugFilePath: "/foo/bar", @@ -138,6 +146,8 @@ func TestGetConfig(t *testing.T) { "accept-nvidia-visible-devices-envvar-when-unprivileged = false", "[nvidia-container-cli]", "root = \"/bar/baz\"", + "load-kmods = false", + "ldconfig = \"/foo/bar/ldconfig\"", "[nvidia-container-runtime]", "debug = \"/foo/bar\"", "experimental = true", @@ -158,7 +168,9 @@ func TestGetConfig(t *testing.T) { expectedConfig: &Config{ AcceptEnvvarUnprivileged: false, NVIDIAContainerCLIConfig: ContainerCLIConfig{ - Root: "/bar/baz", + Root: "/bar/baz", + LoadKmods: false, + Ldconfig: "/foo/bar/ldconfig", }, NVIDIAContainerRuntimeConfig: RuntimeConfig{ DebugFilePath: "/foo/bar", @@ -192,13 +204,23 @@ func TestGetConfig(t *testing.T) { t.Run(tc.description, func(t *testing.T) { reader := strings.NewReader(strings.Join(tc.contents, "\n")) - cfg, err := loadConfigFrom(reader) + cfg, err := LoadFrom(reader) if tc.expectedError != nil { require.Error(t, err) } else { require.NoError(t, err) } + // We first handle the ldconfig path since this is currently system-dependent. + if tc.inspectLdconfig { + ldconfig := cfg.NVIDIAContainerCLIConfig.Ldconfig + require.True(t, strings.HasPrefix(ldconfig, "@/sbin/ldconfig")) + remaining := strings.TrimPrefix(ldconfig, "@/sbin/ldconfig") + require.True(t, remaining == ".real" || remaining == "") + + cfg.NVIDIAContainerCLIConfig.Ldconfig = "WAS_CHECKED" + } + require.EqualValues(t, tc.expectedConfig, cfg) }) } diff --git a/internal/config/hook.go b/internal/config/hook.go index 1222a4bb..99c52061 100644 --- a/internal/config/hook.go +++ b/internal/config/hook.go @@ -27,7 +27,7 @@ type RuntimeHookConfig struct { // GetDefaultRuntimeHookConfig defines the default values for the config func GetDefaultRuntimeHookConfig() (*RuntimeHookConfig, error) { - cfg, err := getDefaultConfig() + cfg, err := getDefault() if err != nil { return nil, err } diff --git a/internal/config/runtime.go b/internal/config/runtime.go index 4dc89e2d..ba9fc83c 100644 --- a/internal/config/runtime.go +++ b/internal/config/runtime.go @@ -48,7 +48,7 @@ type csvModeConfig struct { // GetDefaultRuntimeConfig defines the default values for the config func GetDefaultRuntimeConfig() (*RuntimeConfig, error) { - cfg, err := getDefaultConfig() + cfg, err := getDefault() if err != nil { return nil, err } diff --git a/packaging/debian/nvidia-container-toolkit-base.postinst b/packaging/debian/nvidia-container-toolkit-base.postinst new file mode 100644 index 00000000..311af07c --- /dev/null +++ b/packaging/debian/nvidia-container-toolkit-base.postinst @@ -0,0 +1,21 @@ +#!/bin/sh + +set -e + +case "$1" in + configure) + /usr/bin/nvidia-ctk --quiet config default --in-place --config=/etc/nvidia-container-runtime/config.toml + ;; + + abort-upgrade|abort-remove|abort-deconfigure) + ;; + + *) + echo "postinst called with unknown argument \`$1'" >&2 + exit 1 + ;; +esac + +#DEBHELPER# + +exit 0 diff --git a/packaging/rpm/SPECS/nvidia-container-toolkit.spec b/packaging/rpm/SPECS/nvidia-container-toolkit.spec index 608d3403..e7f1536f 100644 --- a/packaging/rpm/SPECS/nvidia-container-toolkit.spec +++ b/packaging/rpm/SPECS/nvidia-container-toolkit.spec @@ -66,6 +66,9 @@ fi rm -rf %{_localstatedir}/lib/rpm-state/nvidia-container-toolkit ln -sf %{_bindir}/nvidia-container-runtime-hook %{_bindir}/nvidia-container-toolkit +# Generate the default config; If this file already exists no changes are made. +%{_bindir}/nvidia-ctk --quiet config default --in-place --config=%{_sysconfdir}/nvidia-container-runtime/config.toml + %postun if [ "$1" = 0 ]; then # package is uninstalled, not upgraded if [ -L %{_bindir}/nvidia-container-toolkit ]; then rm -f %{_bindir}/nvidia-container-toolkit; fi