From eb5d50abc499c8e3d36c47083d93922de73935a5 Mon Sep 17 00:00:00 2001 From: Evan Lezar <7723350-elezar@users.noreply.gitlab.com> Date: Wed, 22 Nov 2023 19:47:52 +0000 Subject: [PATCH 01/11] Merge branch 'include-nvoptix' into 'main' Update list of graphics mounts See merge request nvidia/container-toolkit/container-toolkit!501 --- CHANGELOG.md | 4 ++++ internal/discover/graphics.go | 2 ++ 2 files changed, 6 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5aa69dc9..bde84c17 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # NVIDIA Container Toolkit Changelog +## v1.14.4 +* Include `nvidia/nvoptix.bin` in list of graphics mounts. +* Include `vulkan/icd.d/nvidia_layers.json` in list of graphics mounts. + ## v1.14.3 * [toolkit-container] Bump CUDA base image version to 12.2.2. diff --git a/internal/discover/graphics.go b/internal/discover/graphics.go index 0bc9451e..b4f99c5a 100644 --- a/internal/discover/graphics.go +++ b/internal/discover/graphics.go @@ -78,9 +78,11 @@ func NewGraphicsMountsDiscoverer(logger logger.Interface, driverRoot string, nvi []string{ "glvnd/egl_vendor.d/10_nvidia.json", "vulkan/icd.d/nvidia_icd.json", + "vulkan/icd.d/nvidia_layers.json", "vulkan/implicit_layer.d/nvidia_layers.json", "egl/egl_external_platform.d/15_nvidia_gbm.json", "egl/egl_external_platform.d/10_nvidia_wayland.json", + "nvidia/nvoptix.bin", }, ) From 7566eb124a7dcb89128d511148b18b9839c9f542 Mon Sep 17 00:00:00 2001 From: Evan Lezar <7723350-elezar@users.noreply.gitlab.com> Date: Thu, 23 Nov 2023 12:35:09 +0000 Subject: [PATCH 02/11] Merge branch 'fix-config-update-command' into 'main' Switch to reflect package for config updates See merge request nvidia/container-toolkit/container-toolkit!500 --- CHANGELOG.md | 1 + cmd/nvidia-ctk/config/config.go | 88 +++++++++++++----- cmd/nvidia-ctk/config/config_test.go | 133 +++++++++------------------ 3 files changed, 109 insertions(+), 113 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bde84c17..dc74bf23 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ## v1.14.4 * Include `nvidia/nvoptix.bin` in list of graphics mounts. * Include `vulkan/icd.d/nvidia_layers.json` in list of graphics mounts. +* Fixed bug in `nvidia-ctk config` command when using `--set`. The types of applied config options are now applied correctly. ## v1.14.3 * [toolkit-container] Bump CUDA base image version to 12.2.2. diff --git a/cmd/nvidia-ctk/config/config.go b/cmd/nvidia-ctk/config/config.go index ee5832b6..fc73b97d 100644 --- a/cmd/nvidia-ctk/config/config.go +++ b/cmd/nvidia-ctk/config/config.go @@ -19,14 +19,16 @@ package config import ( "errors" "fmt" + "reflect" "strconv" "strings" + "github.com/urfave/cli/v2" + createdefault "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/config/create-default" "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/config/flags" "github.com/NVIDIA/nvidia-container-toolkit/internal/config" "github.com/NVIDIA/nvidia-container-toolkit/internal/logger" - "github.com/urfave/cli/v2" ) type command struct { @@ -103,7 +105,7 @@ func run(c *cli.Context, opts *options) error { } for _, set := range opts.sets.Value() { - key, value, err := (*configToml)(cfgToml).setFlagToKeyValue(set) + key, value, err := setFlagToKeyValue(set) if err != nil { return fmt.Errorf("invalid --set option %v: %w", set, err) } @@ -126,50 +128,86 @@ func run(c *cli.Context, opts *options) error { return nil } -type configToml config.Toml - var errInvalidConfigOption = errors.New("invalid config option") +var errUndefinedField = errors.New("undefined field") var errInvalidFormat = errors.New("invalid format") // setFlagToKeyValue converts a --set flag to a key-value pair. // The set flag is of the form key[=value], with the value being optional if key refers to a // boolean config option. -func (c *configToml) setFlagToKeyValue(setFlag string) (string, interface{}, error) { - if c == nil { - return "", nil, errInvalidConfigOption - } - +func setFlagToKeyValue(setFlag string) (string, interface{}, error) { setParts := strings.SplitN(setFlag, "=", 2) key := setParts[0] - v := (*config.Toml)(c).Get(key) - if v == nil { - return key, nil, errInvalidConfigOption - } - switch v.(type) { - case bool: - if len(setParts) == 1 { - return key, true, nil - } + field, err := getField(key) + if err != nil { + return key, nil, fmt.Errorf("%w: %w", errInvalidConfigOption, err) } + kind := field.Kind() if len(setParts) != 2 { + if kind == reflect.Bool { + return key, true, nil + } return key, nil, fmt.Errorf("%w: expected key=value; got %v", errInvalidFormat, setFlag) } value := setParts[1] - switch vt := v.(type) { - case bool: + switch kind { + case reflect.Bool: b, err := strconv.ParseBool(value) if err != nil { return key, value, fmt.Errorf("%w: %w", errInvalidFormat, err) } return key, b, err - case string: + case reflect.String: return key, value, nil - case []string: - return key, strings.Split(value, ","), nil - default: - return key, nil, fmt.Errorf("unsupported type for %v (%v)", setParts, vt) + case reflect.Slice: + valueParts := strings.Split(value, ",") + switch field.Elem().Kind() { + case reflect.String: + return key, valueParts, nil + case reflect.Int: + var output []int64 + for _, v := range valueParts { + vi, err := strconv.ParseInt(v, 10, 0) + if err != nil { + return key, nil, fmt.Errorf("%w: %w", errInvalidFormat, err) + } + output = append(output, vi) + } + return key, output, nil + } } + return key, nil, fmt.Errorf("unsupported type for %v (%v)", setParts, kind) +} + +func getField(key string) (reflect.Type, error) { + s, err := getStruct(reflect.TypeOf(config.Config{}), strings.Split(key, ".")...) + if err != nil { + return nil, err + } + return s.Type, err +} + +func getStruct(current reflect.Type, paths ...string) (reflect.StructField, error) { + if len(paths) < 1 { + return reflect.StructField{}, fmt.Errorf("%w: no fields selected", errUndefinedField) + } + tomlField := paths[0] + for i := 0; i < current.NumField(); i++ { + f := current.Field(i) + v, ok := f.Tag.Lookup("toml") + if !ok { + continue + } + if v != tomlField { + continue + } + if len(paths) == 1 { + return f, nil + } + return getStruct(f.Type, paths[1:]...) + } + return reflect.StructField{}, fmt.Errorf("%w: %q", errUndefinedField, tomlField) } diff --git a/cmd/nvidia-ctk/config/config_test.go b/cmd/nvidia-ctk/config/config_test.go index bab1cb4d..eca474e9 100644 --- a/cmd/nvidia-ctk/config/config_test.go +++ b/cmd/nvidia-ctk/config/config_test.go @@ -19,152 +19,109 @@ package config import ( "testing" - "github.com/NVIDIA/nvidia-container-toolkit/internal/config" - "github.com/pelletier/go-toml" "github.com/stretchr/testify/require" ) func TestSetFlagToKeyValue(t *testing.T) { + // TODO: We need to enable this test again since switching to reflect. testCases := []struct { description string - config map[string]interface{} setFlag string expectedKey string expectedValue interface{} expectedError error }{ { - description: "empty config returns an error", - setFlag: "anykey=value", - expectedKey: "anykey", - expectedError: errInvalidConfigOption, - }, - { - description: "option not present returns an error", - config: map[string]interface{}{ - "defined": "defined-value", - }, + description: "option not present returns an error", setFlag: "undefined=new-value", expectedKey: "undefined", expectedError: errInvalidConfigOption, }, { - description: "boolean option assumes true", - config: map[string]interface{}{ - "boolean": false, - }, - setFlag: "boolean", - expectedKey: "boolean", + description: "undefined nexted option returns error", + setFlag: "nvidia-container-cli.undefined", + expectedKey: "nvidia-container-cli.undefined", + expectedError: errInvalidConfigOption, + }, + { + description: "boolean option assumes true", + setFlag: "disable-require", + expectedKey: "disable-require", expectedValue: true, }, { - description: "boolean option returns true", - config: map[string]interface{}{ - "boolean": false, - }, - setFlag: "boolean=true", - expectedKey: "boolean", + description: "boolean option returns true", + setFlag: "disable-require=true", + expectedKey: "disable-require", expectedValue: true, }, { - description: "boolean option returns false", - config: map[string]interface{}{ - "boolean": false, - }, - setFlag: "boolean=false", - expectedKey: "boolean", + description: "boolean option returns false", + setFlag: "disable-require=false", + expectedKey: "disable-require", expectedValue: false, }, { - description: "invalid boolean option returns error", - config: map[string]interface{}{ - "boolean": false, - }, - setFlag: "boolean=something", - expectedKey: "boolean", + description: "invalid boolean option returns error", + setFlag: "disable-require=something", + expectedKey: "disable-require", expectedValue: "something", expectedError: errInvalidFormat, }, { - description: "string option requires value", - config: map[string]interface{}{ - "string": "value", - }, - setFlag: "string", - expectedKey: "string", + description: "string option requires value", + setFlag: "swarm-resource", + expectedKey: "swarm-resource", expectedValue: nil, expectedError: errInvalidFormat, }, { - description: "string option returns value", - config: map[string]interface{}{ - "string": "value", - }, - setFlag: "string=string-value", - expectedKey: "string", + description: "string option returns value", + setFlag: "swarm-resource=string-value", + expectedKey: "swarm-resource", expectedValue: "string-value", }, { - description: "string option returns value with equals", - config: map[string]interface{}{ - "string": "value", - }, - setFlag: "string=string-value=more", - expectedKey: "string", + description: "string option returns value with equals", + setFlag: "swarm-resource=string-value=more", + expectedKey: "swarm-resource", expectedValue: "string-value=more", }, { - description: "string option treats bool value as string", - config: map[string]interface{}{ - "string": "value", - }, - setFlag: "string=true", - expectedKey: "string", + description: "string option treats bool value as string", + setFlag: "swarm-resource=true", + expectedKey: "swarm-resource", expectedValue: "true", }, { - description: "string option treats int value as string", - config: map[string]interface{}{ - "string": "value", - }, - setFlag: "string=5", - expectedKey: "string", + description: "string option treats int value as string", + setFlag: "swarm-resource=5", + expectedKey: "swarm-resource", expectedValue: "5", }, { - description: "[]string option returns single value", - config: map[string]interface{}{ - "string": []string{"value"}, - }, - setFlag: "string=string-value", - expectedKey: "string", + description: "[]string option returns single value", + setFlag: "nvidia-container-cli.environment=string-value", + expectedKey: "nvidia-container-cli.environment", expectedValue: []string{"string-value"}, }, { - description: "[]string option returns multiple values", - config: map[string]interface{}{ - "string": []string{"value"}, - }, - setFlag: "string=first,second", - expectedKey: "string", + description: "[]string option returns multiple values", + setFlag: "nvidia-container-cli.environment=first,second", + expectedKey: "nvidia-container-cli.environment", expectedValue: []string{"first", "second"}, }, { - description: "[]string option returns values with equals", - config: map[string]interface{}{ - "string": []string{"value"}, - }, - setFlag: "string=first=1,second=2", - expectedKey: "string", + description: "[]string option returns values with equals", + setFlag: "nvidia-container-cli.environment=first=1,second=2", + expectedKey: "nvidia-container-cli.environment", expectedValue: []string{"first=1", "second=2"}, }, } for _, tc := range testCases { t.Run(tc.description, func(t *testing.T) { - tree, _ := toml.TreeFromMap(tc.config) - cfgToml := (*config.Toml)(tree) - k, v, err := (*configToml)(cfgToml).setFlagToKeyValue(tc.setFlag) + k, v, err := setFlagToKeyValue(tc.setFlag) require.ErrorIs(t, err, tc.expectedError) require.EqualValues(t, tc.expectedKey, k) require.EqualValues(t, tc.expectedValue, v) From cc688f7c75602b52fb8cda94fb24d800d8576b40 Mon Sep 17 00:00:00 2001 From: Evan Lezar <7723350-elezar@users.noreply.gitlab.com> Date: Mon, 8 Jan 2024 11:42:42 +0000 Subject: [PATCH 03/11] Merge branch 'log-requested-mode' into 'main' Log explicitly requested runtime mode See merge request nvidia/container-toolkit/container-toolkit!527 --- CHANGELOG.md | 1 + internal/info/auto.go | 1 + 2 files changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index dc74bf23..ee60beb9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ * Include `nvidia/nvoptix.bin` in list of graphics mounts. * Include `vulkan/icd.d/nvidia_layers.json` in list of graphics mounts. * Fixed bug in `nvidia-ctk config` command when using `--set`. The types of applied config options are now applied correctly. +* Log explicitly requested runtime mode. ## v1.14.3 * [toolkit-container] Bump CUDA base image version to 12.2.2. diff --git a/internal/info/auto.go b/internal/info/auto.go index 760d33d9..a99b35b9 100644 --- a/internal/info/auto.go +++ b/internal/info/auto.go @@ -63,6 +63,7 @@ func ResolveAutoMode(logger logger.Interface, mode string, image image.CUDA) (rm // resolveMode determines the correct mode for the platform if set to "auto" func (r resolver) resolveMode(mode string, image image.CUDA) (rmode string) { if mode != "auto" { + r.logger.Infof("Using requested mode '%s'", mode) return mode } defer func() { From 68f0203a49635fb061e9bbc8f84c67ac6524bedc Mon Sep 17 00:00:00 2001 From: Evan Lezar <7723350-elezar@users.noreply.gitlab.com> Date: Tue, 9 Jan 2024 09:44:23 +0000 Subject: [PATCH 04/11] Merge branch 'remove-libseccomp-dependency' into 'main' Remove libseccomp package dependency See merge request nvidia/container-toolkit/container-toolkit!531 --- CHANGELOG.md | 1 + packaging/debian/control | 2 +- packaging/rpm/SPECS/nvidia-container-toolkit.spec | 7 ------- 3 files changed, 2 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ee60beb9..a5a3e77f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ * Include `vulkan/icd.d/nvidia_layers.json` in list of graphics mounts. * Fixed bug in `nvidia-ctk config` command when using `--set`. The types of applied config options are now applied correctly. * Log explicitly requested runtime mode. +* Remove package dependency on libseccomp. ## v1.14.3 * [toolkit-container] Bump CUDA base image version to 12.2.2. diff --git a/packaging/debian/control b/packaging/debian/control index c43cbcbd..01abcea4 100644 --- a/packaging/debian/control +++ b/packaging/debian/control @@ -10,7 +10,7 @@ Build-Depends: debhelper (>= 9) Package: nvidia-container-toolkit Architecture: any -Depends: ${misc:Depends}, nvidia-container-toolkit-base (= @VERSION@), libnvidia-container-tools (>= @LIBNVIDIA_CONTAINER_TOOLS_VERSION@), libnvidia-container-tools (<< 2.0.0), libseccomp2 +Depends: ${misc:Depends}, nvidia-container-toolkit-base (= @VERSION@), libnvidia-container-tools (>= @LIBNVIDIA_CONTAINER_TOOLS_VERSION@), libnvidia-container-tools (<< 2.0.0) Breaks: nvidia-container-runtime (<= 3.5.0-1), nvidia-container-runtime-hook Replaces: nvidia-container-runtime (<= 3.5.0-1), nvidia-container-runtime-hook Description: NVIDIA Container toolkit diff --git a/packaging/rpm/SPECS/nvidia-container-toolkit.spec b/packaging/rpm/SPECS/nvidia-container-toolkit.spec index a3613537..79ef91b2 100644 --- a/packaging/rpm/SPECS/nvidia-container-toolkit.spec +++ b/packaging/rpm/SPECS/nvidia-container-toolkit.spec @@ -23,13 +23,6 @@ Provides: nvidia-container-runtime-hook Requires: libnvidia-container-tools >= %{libnvidia_container_tools_version}, libnvidia-container-tools < 2.0.0 Requires: nvidia-container-toolkit-base == %{version}-%{release} -%if 0%{?suse_version} -Requires: libseccomp2 -Requires: libapparmor1 -%else -Requires: libseccomp -%endif - %description Provides tools and utilities to enable GPU support in containers. From c1eae0dedab70c5aecc183aaba075cdbb41b7f39 Mon Sep 17 00:00:00 2001 From: Evan Lezar <7723350-elezar@users.noreply.gitlab.com> Date: Tue, 9 Jan 2024 14:37:08 +0000 Subject: [PATCH 05/11] Merge branch 'libnvdxgdmal' into 'main' Add libnvdxgdmal library See merge request nvidia/container-toolkit/container-toolkit!529 --- CHANGELOG.md | 1 + pkg/nvcdi/driver-wsl.go | 1 + 2 files changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index a5a3e77f..54ec6dc4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ * Fixed bug in `nvidia-ctk config` command when using `--set`. The types of applied config options are now applied correctly. * Log explicitly requested runtime mode. * Remove package dependency on libseccomp. +* Added detection of libnvdxgdmal.so.1 on WSL2. ## v1.14.3 * [toolkit-container] Bump CUDA base image version to 12.2.2. diff --git a/pkg/nvcdi/driver-wsl.go b/pkg/nvcdi/driver-wsl.go index 1aa02b8e..cda50226 100644 --- a/pkg/nvcdi/driver-wsl.go +++ b/pkg/nvcdi/driver-wsl.go @@ -33,6 +33,7 @@ var requiredDriverStoreFiles = []string{ "libnvidia-ml.so.1", /* Core library for nvml */ "libnvidia-ml_loader.so", /* Core library for nvml on WSL */ "libdxcore.so", /* Core library for dxcore support */ + "libnvdxgdmal.so.1", /* dxgdmal library for cuda */ "nvcubins.bin", /* Binary containing GPU code for cuda */ "nvidia-smi", /* nvidia-smi binary*/ } From e0e22fdcebc83a508d1310a346242bdb3ffe690e Mon Sep 17 00:00:00 2001 From: Evan Lezar <7723350-elezar@users.noreply.gitlab.com> Date: Thu, 11 Jan 2024 13:19:25 +0000 Subject: [PATCH 06/11] Merge branch 'fix-user-group' into 'main' Fix bug in determining CLI user on SUSE systems See merge request nvidia/container-toolkit/container-toolkit!532 --- CHANGELOG.md | 1 + internal/config/config.go | 23 +++++--- internal/config/config_test.go | 99 ++++++++++++++++++++++++++++++++++ internal/config/toml.go | 2 +- 4 files changed, 117 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 54ec6dc4..2988f06f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ * Log explicitly requested runtime mode. * Remove package dependency on libseccomp. * Added detection of libnvdxgdmal.so.1 on WSL2. +* Fix bug in determining default nvidia-container-runtime.user config value on SUSE-based systems. ## v1.14.3 * [toolkit-container] Bump CUDA base image version to 12.2.2. diff --git a/internal/config/config.go b/internal/config/config.go index 7393f7ad..1d0f5abd 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -94,6 +94,7 @@ func GetDefault() (*Config, error) { NVIDIAContainerCLIConfig: ContainerCLIConfig{ LoadKmods: true, Ldconfig: getLdConfigPath(), + User: getUserGroup(), }, NVIDIACTKConfig: CTKConfig{ Path: nvidiaCTKExecutable, @@ -128,24 +129,32 @@ func getLdConfigPath() string { return "@/sbin/ldconfig" } -// getCommentedUserGroup returns whether the nvidia-container-cli user and group config option should be commented. -func getCommentedUserGroup() bool { - uncommentIf := map[string]bool{ +func getUserGroup() string { + if isSuse() { + return "root:video" + } + return "" +} + +// isSuse returns whether a SUSE-based distribution was detected. +func isSuse() bool { + suseDists := map[string]bool{ "suse": true, "opensuse": true, } idsLike := getDistIDLike() for _, id := range idsLike { - if uncommentIf[id] { - return false + if suseDists[id] { + return true } } - return true + return false } // getDistIDLike returns the ID_LIKE field from /etc/os-release. -func getDistIDLike() []string { +// We can override this for testing. +var getDistIDLike = func() []string { releaseFile, err := os.Open("/etc/os-release") if err != nil { return nil diff --git a/internal/config/config_test.go b/internal/config/config_test.go index f842bbb3..5cb6cbc2 100644 --- a/internal/config/config_test.go +++ b/internal/config/config_test.go @@ -48,6 +48,7 @@ func TestGetConfig(t *testing.T) { contents []string expectedError error inspectLdconfig bool + distIdsLike []string expectedConfig *Config }{ { @@ -93,6 +94,7 @@ func TestGetConfig(t *testing.T) { "nvidia-container-cli.root = \"/bar/baz\"", "nvidia-container-cli.load-kmods = false", "nvidia-container-cli.ldconfig = \"/foo/bar/ldconfig\"", + "nvidia-container-cli.user = \"foo:bar\"", "nvidia-container-runtime.debug = \"/foo/bar\"", "nvidia-container-runtime.discover-mode = \"not-legacy\"", "nvidia-container-runtime.log-level = \"debug\"", @@ -112,6 +114,7 @@ func TestGetConfig(t *testing.T) { Root: "/bar/baz", LoadKmods: false, Ldconfig: "/foo/bar/ldconfig", + User: "foo:bar", }, NVIDIAContainerRuntimeConfig: RuntimeConfig{ DebugFilePath: "/foo/bar", @@ -152,6 +155,7 @@ func TestGetConfig(t *testing.T) { "root = \"/bar/baz\"", "load-kmods = false", "ldconfig = \"/foo/bar/ldconfig\"", + "user = \"foo:bar\"", "[nvidia-container-runtime]", "debug = \"/foo/bar\"", "discover-mode = \"not-legacy\"", @@ -176,6 +180,7 @@ func TestGetConfig(t *testing.T) { Root: "/bar/baz", LoadKmods: false, Ldconfig: "/foo/bar/ldconfig", + User: "foo:bar", }, NVIDIAContainerRuntimeConfig: RuntimeConfig{ DebugFilePath: "/foo/bar", @@ -207,10 +212,88 @@ func TestGetConfig(t *testing.T) { }, }, }, + { + description: "suse config", + distIdsLike: []string{"suse", "opensuse"}, + inspectLdconfig: true, + expectedConfig: &Config{ + AcceptEnvvarUnprivileged: true, + SupportedDriverCapabilities: "compat32,compute,display,graphics,ngx,utility,video", + NVIDIAContainerCLIConfig: ContainerCLIConfig{ + Root: "", + LoadKmods: true, + Ldconfig: "WAS_CHECKED", + User: "root:video", + }, + NVIDIAContainerRuntimeConfig: RuntimeConfig{ + DebugFilePath: "/dev/null", + LogLevel: "info", + Runtimes: []string{"docker-runc", "runc"}, + Mode: "auto", + Modes: modesConfig{ + CSV: csvModeConfig{ + MountSpecPath: "/etc/nvidia-container-runtime/host-files-for-container.d", + }, + CDI: cdiModeConfig{ + DefaultKind: "nvidia.com/gpu", + AnnotationPrefixes: []string{"cdi.k8s.io/"}, + SpecDirs: []string{"/etc/cdi", "/var/run/cdi"}, + }, + }, + }, + NVIDIAContainerRuntimeHookConfig: RuntimeHookConfig{ + Path: "nvidia-container-runtime-hook", + }, + NVIDIACTKConfig: CTKConfig{ + Path: "nvidia-ctk", + }, + }, + }, + { + description: "suse config overrides user", + distIdsLike: []string{"suse", "opensuse"}, + inspectLdconfig: true, + contents: []string{ + "nvidia-container-cli.user = \"foo:bar\"", + }, + expectedConfig: &Config{ + AcceptEnvvarUnprivileged: true, + SupportedDriverCapabilities: "compat32,compute,display,graphics,ngx,utility,video", + NVIDIAContainerCLIConfig: ContainerCLIConfig{ + Root: "", + LoadKmods: true, + Ldconfig: "WAS_CHECKED", + User: "foo:bar", + }, + NVIDIAContainerRuntimeConfig: RuntimeConfig{ + DebugFilePath: "/dev/null", + LogLevel: "info", + Runtimes: []string{"docker-runc", "runc"}, + Mode: "auto", + Modes: modesConfig{ + CSV: csvModeConfig{ + MountSpecPath: "/etc/nvidia-container-runtime/host-files-for-container.d", + }, + CDI: cdiModeConfig{ + DefaultKind: "nvidia.com/gpu", + AnnotationPrefixes: []string{"cdi.k8s.io/"}, + SpecDirs: []string{"/etc/cdi", "/var/run/cdi"}, + }, + }, + }, + NVIDIAContainerRuntimeHookConfig: RuntimeHookConfig{ + Path: "nvidia-container-runtime-hook", + }, + NVIDIACTKConfig: CTKConfig{ + Path: "nvidia-ctk", + }, + }, + }, } for _, tc := range testCases { t.Run(tc.description, func(t *testing.T) { + defer setGetDistIDLikeForTest(tc.distIdsLike)() reader := strings.NewReader(strings.Join(tc.contents, "\n")) tomlCfg, err := loadConfigTomlFrom(reader) @@ -236,3 +319,19 @@ func TestGetConfig(t *testing.T) { }) } } + +// setGetDistIDsLikeForTest overrides the distribution IDs that would normally be read from the /etc/os-release file. +func setGetDistIDLikeForTest(ids []string) func() { + if ids == nil { + return func() {} + } + original := getDistIDLike + + getDistIDLike = func() []string { + return ids + } + + return func() { + getDistIDLike = original + } +} diff --git a/internal/config/toml.go b/internal/config/toml.go index 8c931675..aca024ce 100644 --- a/internal/config/toml.go +++ b/internal/config/toml.go @@ -204,7 +204,7 @@ func (t *Toml) commentDefaults() *Toml { } func shouldComment(key string, defaultValue interface{}, setTo interface{}) bool { - if key == "nvidia-container-cli.user" && !getCommentedUserGroup() { + if key == "nvidia-container-cli.user" && defaultValue == setTo && isSuse() { return false } if key == "nvidia-container-runtime.debug" && setTo == "/dev/null" { From 27d0fa4ee2cb79d202dcbcd47d6411b1d31ed727 Mon Sep 17 00:00:00 2001 From: Evan Lezar <7723350-elezar@users.noreply.gitlab.com> Date: Thu, 11 Jan 2024 14:03:32 +0000 Subject: [PATCH 07/11] Merge branch 'bump-cuda-12.3.1' into 'main' Bump CUDA base image to 12.3.1 See merge request nvidia/container-toolkit/container-toolkit!535 --- CHANGELOG.md | 2 ++ versions.mk | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2988f06f..06e56784 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,8 @@ * Added detection of libnvdxgdmal.so.1 on WSL2. * Fix bug in determining default nvidia-container-runtime.user config value on SUSE-based systems. +* [toolkit-container] Bump CUDA base image version to 12.3.1. + ## v1.14.3 * [toolkit-container] Bump CUDA base image version to 12.2.2. diff --git a/versions.mk b/versions.mk index 201f2ad6..59633236 100644 --- a/versions.mk +++ b/versions.mk @@ -30,7 +30,7 @@ NVIDIA_CONTAINER_RUNTIME_VERSION := 3.14.0 # Specify the expected libnvidia-container0 version for arm64-based ubuntu builds. LIBNVIDIA_CONTAINER0_VERSION := 0.10.0+jetpack -CUDA_VERSION := 12.2.2 +CUDA_VERSION := 12.3.1 GOLANG_VERSION := 1.20.5 GIT_COMMIT ?= $(shell git describe --match="" --dirty --long --always --abbrev=40 2> /dev/null || echo "") From c050bcf081dd60a1b332e97c14dcaa4c631dae65 Mon Sep 17 00:00:00 2001 From: Evan Lezar <7723350-elezar@users.noreply.gitlab.com> Date: Wed, 17 Jan 2024 21:28:14 +0000 Subject: [PATCH 08/11] Merge branch 'add-crun-as-configured-runtime' into 'main' Set default low-level runtimes to runc, crun See merge request nvidia/container-toolkit/container-toolkit!536 --- CHANGELOG.md | 1 + internal/config/config.go | 2 +- internal/config/config_test.go | 6 +++--- internal/config/toml_test.go | 2 +- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 06e56784..5c6cedb0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ * Remove package dependency on libseccomp. * Added detection of libnvdxgdmal.so.1 on WSL2. * Fix bug in determining default nvidia-container-runtime.user config value on SUSE-based systems. +* Add `crun` to the list of configured low-level runtimes. * [toolkit-container] Bump CUDA base image version to 12.3.1. diff --git a/internal/config/config.go b/internal/config/config.go index 1d0f5abd..3e3eed49 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -102,7 +102,7 @@ func GetDefault() (*Config, error) { NVIDIAContainerRuntimeConfig: RuntimeConfig{ DebugFilePath: "/dev/null", LogLevel: "info", - Runtimes: []string{"docker-runc", "runc"}, + Runtimes: []string{"docker-runc", "runc", "crun"}, Mode: "auto", Modes: modesConfig{ CSV: csvModeConfig{ diff --git a/internal/config/config_test.go b/internal/config/config_test.go index 5cb6cbc2..caa36800 100644 --- a/internal/config/config_test.go +++ b/internal/config/config_test.go @@ -65,7 +65,7 @@ func TestGetConfig(t *testing.T) { NVIDIAContainerRuntimeConfig: RuntimeConfig{ DebugFilePath: "/dev/null", LogLevel: "info", - Runtimes: []string{"docker-runc", "runc"}, + Runtimes: []string{"docker-runc", "runc", "crun"}, Mode: "auto", Modes: modesConfig{ CSV: csvModeConfig{ @@ -228,7 +228,7 @@ func TestGetConfig(t *testing.T) { NVIDIAContainerRuntimeConfig: RuntimeConfig{ DebugFilePath: "/dev/null", LogLevel: "info", - Runtimes: []string{"docker-runc", "runc"}, + Runtimes: []string{"docker-runc", "runc", "crun"}, Mode: "auto", Modes: modesConfig{ CSV: csvModeConfig{ @@ -268,7 +268,7 @@ func TestGetConfig(t *testing.T) { NVIDIAContainerRuntimeConfig: RuntimeConfig{ DebugFilePath: "/dev/null", LogLevel: "info", - Runtimes: []string{"docker-runc", "runc"}, + Runtimes: []string{"docker-runc", "runc", "crun"}, Mode: "auto", Modes: modesConfig{ CSV: csvModeConfig{ diff --git a/internal/config/toml_test.go b/internal/config/toml_test.go index 710b5f76..e017db15 100644 --- a/internal/config/toml_test.go +++ b/internal/config/toml_test.go @@ -62,7 +62,7 @@ load-kmods = true #debug = "/var/log/nvidia-container-runtime.log" log-level = "info" mode = "auto" -runtimes = ["docker-runc", "runc"] +runtimes = ["docker-runc", "runc", "crun"] [nvidia-container-runtime.modes] From 9d2e4b48bc23a44af2cdd884b13a088c0719839f Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Wed, 17 Jan 2024 22:32:19 +0100 Subject: [PATCH 09/11] Update libnvidia-container to 1.14.4 Signed-off-by: Evan Lezar --- CHANGELOG.md | 1 + third_party/libnvidia-container | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5c6cedb0..3a5fab66 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ * Add `crun` to the list of configured low-level runtimes. * [toolkit-container] Bump CUDA base image version to 12.3.1. +* [libnvidia-container] Added detection of libnvdxgdmal.so.1 on WSL2. ## v1.14.3 * [toolkit-container] Bump CUDA base image version to 12.2.2. diff --git a/third_party/libnvidia-container b/third_party/libnvidia-container index 1eb5a30a..870d7c5d 160000 --- a/third_party/libnvidia-container +++ b/third_party/libnvidia-container @@ -1 +1 @@ -Subproject commit 1eb5a30a6ad0415550a9df632ac8832bf7e2bbba +Subproject commit 870d7c5d957f5780b8afa57c4d5cc924d4d9ed26 From 9ab640b2be1d59a0442deae3d6d6d9afb50705b4 Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Wed, 17 Jan 2024 22:46:13 +0100 Subject: [PATCH 10/11] Set libnvidia-container branch Signed-off-by: Evan Lezar --- .gitmodules | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitmodules b/.gitmodules index f417da1c..2d262889 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,4 +1,4 @@ [submodule "third_party/libnvidia-container"] path = third_party/libnvidia-container url = https://gitlab.com/nvidia/container-toolkit/libnvidia-container.git - branch = main + branch = release-1.14 From cfe0d5d07ea8655b0cb4bb69a7709f8760cd9add Mon Sep 17 00:00:00 2001 From: Evan Lezar Date: Wed, 17 Jan 2024 23:06:49 +0100 Subject: [PATCH 11/11] Skip component updates Signed-off-by: Evan Lezar --- scripts/build-packages.sh | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts/build-packages.sh b/scripts/build-packages.sh index 2e91877b..70f71f04 100755 --- a/scripts/build-packages.sh +++ b/scripts/build-packages.sh @@ -31,6 +31,8 @@ else targets=${all[@]} fi +# Skip component updates on release branches +SKIP_UPDATE_COMPONENTS=yes if [[ x"${SKIP_UPDATE_COMPONENTS}" != x"yes" ]]; then echo "Updating components" "${SCRIPTS_DIR}/update-components.sh"