Merge branch 'cherry-pick-1.14.4' into 'release-1.14'

Cherry pick changes for v1.14.4 release

See merge request nvidia/container-toolkit/container-toolkit!534
This commit is contained in:
Evan Lezar 2024-01-17 22:51:28 +00:00
commit 29fd206f3a
16 changed files with 250 additions and 135 deletions

2
.gitmodules vendored
View File

@ -1,4 +1,4 @@
[submodule "third_party/libnvidia-container"]
path = third_party/libnvidia-container
url = https://gitlab.com/nvidia/container-toolkit/libnvidia-container.git
branch = main
branch = release-1.14

View File

@ -1,5 +1,18 @@
# NVIDIA Container Toolkit Changelog
## v1.14.4
* Include `nvidia/nvoptix.bin` in list of graphics mounts.
* Include `vulkan/icd.d/nvidia_layers.json` in list of graphics mounts.
* Fixed bug in `nvidia-ctk config` command when using `--set`. The types of applied config options are now applied correctly.
* Log explicitly requested runtime mode.
* Remove package dependency on libseccomp.
* Added detection of libnvdxgdmal.so.1 on WSL2.
* Fix bug in determining default nvidia-container-runtime.user config value on SUSE-based systems.
* Add `crun` to the list of configured low-level runtimes.
* [toolkit-container] Bump CUDA base image version to 12.3.1.
* [libnvidia-container] Added detection of libnvdxgdmal.so.1 on WSL2.
## v1.14.3
* [toolkit-container] Bump CUDA base image version to 12.2.2.

View File

@ -19,14 +19,16 @@ package config
import (
"errors"
"fmt"
"reflect"
"strconv"
"strings"
"github.com/urfave/cli/v2"
createdefault "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/config/create-default"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/config/flags"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/urfave/cli/v2"
)
type command struct {
@ -103,7 +105,7 @@ func run(c *cli.Context, opts *options) error {
}
for _, set := range opts.sets.Value() {
key, value, err := (*configToml)(cfgToml).setFlagToKeyValue(set)
key, value, err := setFlagToKeyValue(set)
if err != nil {
return fmt.Errorf("invalid --set option %v: %w", set, err)
}
@ -126,50 +128,86 @@ func run(c *cli.Context, opts *options) error {
return nil
}
type configToml config.Toml
var errInvalidConfigOption = errors.New("invalid config option")
var errUndefinedField = errors.New("undefined field")
var errInvalidFormat = errors.New("invalid format")
// setFlagToKeyValue converts a --set flag to a key-value pair.
// The set flag is of the form key[=value], with the value being optional if key refers to a
// boolean config option.
func (c *configToml) setFlagToKeyValue(setFlag string) (string, interface{}, error) {
if c == nil {
return "", nil, errInvalidConfigOption
}
func setFlagToKeyValue(setFlag string) (string, interface{}, error) {
setParts := strings.SplitN(setFlag, "=", 2)
key := setParts[0]
v := (*config.Toml)(c).Get(key)
if v == nil {
return key, nil, errInvalidConfigOption
}
switch v.(type) {
case bool:
if len(setParts) == 1 {
return key, true, nil
}
field, err := getField(key)
if err != nil {
return key, nil, fmt.Errorf("%w: %w", errInvalidConfigOption, err)
}
kind := field.Kind()
if len(setParts) != 2 {
if kind == reflect.Bool {
return key, true, nil
}
return key, nil, fmt.Errorf("%w: expected key=value; got %v", errInvalidFormat, setFlag)
}
value := setParts[1]
switch vt := v.(type) {
case bool:
switch kind {
case reflect.Bool:
b, err := strconv.ParseBool(value)
if err != nil {
return key, value, fmt.Errorf("%w: %w", errInvalidFormat, err)
}
return key, b, err
case string:
case reflect.String:
return key, value, nil
case []string:
return key, strings.Split(value, ","), nil
default:
return key, nil, fmt.Errorf("unsupported type for %v (%v)", setParts, vt)
case reflect.Slice:
valueParts := strings.Split(value, ",")
switch field.Elem().Kind() {
case reflect.String:
return key, valueParts, nil
case reflect.Int:
var output []int64
for _, v := range valueParts {
vi, err := strconv.ParseInt(v, 10, 0)
if err != nil {
return key, nil, fmt.Errorf("%w: %w", errInvalidFormat, err)
}
output = append(output, vi)
}
return key, output, nil
}
}
return key, nil, fmt.Errorf("unsupported type for %v (%v)", setParts, kind)
}
func getField(key string) (reflect.Type, error) {
s, err := getStruct(reflect.TypeOf(config.Config{}), strings.Split(key, ".")...)
if err != nil {
return nil, err
}
return s.Type, err
}
func getStruct(current reflect.Type, paths ...string) (reflect.StructField, error) {
if len(paths) < 1 {
return reflect.StructField{}, fmt.Errorf("%w: no fields selected", errUndefinedField)
}
tomlField := paths[0]
for i := 0; i < current.NumField(); i++ {
f := current.Field(i)
v, ok := f.Tag.Lookup("toml")
if !ok {
continue
}
if v != tomlField {
continue
}
if len(paths) == 1 {
return f, nil
}
return getStruct(f.Type, paths[1:]...)
}
return reflect.StructField{}, fmt.Errorf("%w: %q", errUndefinedField, tomlField)
}

View File

@ -19,152 +19,109 @@ package config
import (
"testing"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
"github.com/pelletier/go-toml"
"github.com/stretchr/testify/require"
)
func TestSetFlagToKeyValue(t *testing.T) {
// TODO: We need to enable this test again since switching to reflect.
testCases := []struct {
description string
config map[string]interface{}
setFlag string
expectedKey string
expectedValue interface{}
expectedError error
}{
{
description: "empty config returns an error",
setFlag: "anykey=value",
expectedKey: "anykey",
expectedError: errInvalidConfigOption,
},
{
description: "option not present returns an error",
config: map[string]interface{}{
"defined": "defined-value",
},
description: "option not present returns an error",
setFlag: "undefined=new-value",
expectedKey: "undefined",
expectedError: errInvalidConfigOption,
},
{
description: "boolean option assumes true",
config: map[string]interface{}{
"boolean": false,
},
setFlag: "boolean",
expectedKey: "boolean",
description: "undefined nexted option returns error",
setFlag: "nvidia-container-cli.undefined",
expectedKey: "nvidia-container-cli.undefined",
expectedError: errInvalidConfigOption,
},
{
description: "boolean option assumes true",
setFlag: "disable-require",
expectedKey: "disable-require",
expectedValue: true,
},
{
description: "boolean option returns true",
config: map[string]interface{}{
"boolean": false,
},
setFlag: "boolean=true",
expectedKey: "boolean",
description: "boolean option returns true",
setFlag: "disable-require=true",
expectedKey: "disable-require",
expectedValue: true,
},
{
description: "boolean option returns false",
config: map[string]interface{}{
"boolean": false,
},
setFlag: "boolean=false",
expectedKey: "boolean",
description: "boolean option returns false",
setFlag: "disable-require=false",
expectedKey: "disable-require",
expectedValue: false,
},
{
description: "invalid boolean option returns error",
config: map[string]interface{}{
"boolean": false,
},
setFlag: "boolean=something",
expectedKey: "boolean",
description: "invalid boolean option returns error",
setFlag: "disable-require=something",
expectedKey: "disable-require",
expectedValue: "something",
expectedError: errInvalidFormat,
},
{
description: "string option requires value",
config: map[string]interface{}{
"string": "value",
},
setFlag: "string",
expectedKey: "string",
description: "string option requires value",
setFlag: "swarm-resource",
expectedKey: "swarm-resource",
expectedValue: nil,
expectedError: errInvalidFormat,
},
{
description: "string option returns value",
config: map[string]interface{}{
"string": "value",
},
setFlag: "string=string-value",
expectedKey: "string",
description: "string option returns value",
setFlag: "swarm-resource=string-value",
expectedKey: "swarm-resource",
expectedValue: "string-value",
},
{
description: "string option returns value with equals",
config: map[string]interface{}{
"string": "value",
},
setFlag: "string=string-value=more",
expectedKey: "string",
description: "string option returns value with equals",
setFlag: "swarm-resource=string-value=more",
expectedKey: "swarm-resource",
expectedValue: "string-value=more",
},
{
description: "string option treats bool value as string",
config: map[string]interface{}{
"string": "value",
},
setFlag: "string=true",
expectedKey: "string",
description: "string option treats bool value as string",
setFlag: "swarm-resource=true",
expectedKey: "swarm-resource",
expectedValue: "true",
},
{
description: "string option treats int value as string",
config: map[string]interface{}{
"string": "value",
},
setFlag: "string=5",
expectedKey: "string",
description: "string option treats int value as string",
setFlag: "swarm-resource=5",
expectedKey: "swarm-resource",
expectedValue: "5",
},
{
description: "[]string option returns single value",
config: map[string]interface{}{
"string": []string{"value"},
},
setFlag: "string=string-value",
expectedKey: "string",
description: "[]string option returns single value",
setFlag: "nvidia-container-cli.environment=string-value",
expectedKey: "nvidia-container-cli.environment",
expectedValue: []string{"string-value"},
},
{
description: "[]string option returns multiple values",
config: map[string]interface{}{
"string": []string{"value"},
},
setFlag: "string=first,second",
expectedKey: "string",
description: "[]string option returns multiple values",
setFlag: "nvidia-container-cli.environment=first,second",
expectedKey: "nvidia-container-cli.environment",
expectedValue: []string{"first", "second"},
},
{
description: "[]string option returns values with equals",
config: map[string]interface{}{
"string": []string{"value"},
},
setFlag: "string=first=1,second=2",
expectedKey: "string",
description: "[]string option returns values with equals",
setFlag: "nvidia-container-cli.environment=first=1,second=2",
expectedKey: "nvidia-container-cli.environment",
expectedValue: []string{"first=1", "second=2"},
},
}
for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
tree, _ := toml.TreeFromMap(tc.config)
cfgToml := (*config.Toml)(tree)
k, v, err := (*configToml)(cfgToml).setFlagToKeyValue(tc.setFlag)
k, v, err := setFlagToKeyValue(tc.setFlag)
require.ErrorIs(t, err, tc.expectedError)
require.EqualValues(t, tc.expectedKey, k)
require.EqualValues(t, tc.expectedValue, v)

View File

@ -94,6 +94,7 @@ func GetDefault() (*Config, error) {
NVIDIAContainerCLIConfig: ContainerCLIConfig{
LoadKmods: true,
Ldconfig: getLdConfigPath(),
User: getUserGroup(),
},
NVIDIACTKConfig: CTKConfig{
Path: nvidiaCTKExecutable,
@ -101,7 +102,7 @@ func GetDefault() (*Config, error) {
NVIDIAContainerRuntimeConfig: RuntimeConfig{
DebugFilePath: "/dev/null",
LogLevel: "info",
Runtimes: []string{"docker-runc", "runc"},
Runtimes: []string{"docker-runc", "runc", "crun"},
Mode: "auto",
Modes: modesConfig{
CSV: csvModeConfig{
@ -128,24 +129,32 @@ func getLdConfigPath() string {
return "@/sbin/ldconfig"
}
// getCommentedUserGroup returns whether the nvidia-container-cli user and group config option should be commented.
func getCommentedUserGroup() bool {
uncommentIf := map[string]bool{
func getUserGroup() string {
if isSuse() {
return "root:video"
}
return ""
}
// isSuse returns whether a SUSE-based distribution was detected.
func isSuse() bool {
suseDists := map[string]bool{
"suse": true,
"opensuse": true,
}
idsLike := getDistIDLike()
for _, id := range idsLike {
if uncommentIf[id] {
return false
if suseDists[id] {
return true
}
}
return true
return false
}
// getDistIDLike returns the ID_LIKE field from /etc/os-release.
func getDistIDLike() []string {
// We can override this for testing.
var getDistIDLike = func() []string {
releaseFile, err := os.Open("/etc/os-release")
if err != nil {
return nil

View File

@ -48,6 +48,7 @@ func TestGetConfig(t *testing.T) {
contents []string
expectedError error
inspectLdconfig bool
distIdsLike []string
expectedConfig *Config
}{
{
@ -64,7 +65,7 @@ func TestGetConfig(t *testing.T) {
NVIDIAContainerRuntimeConfig: RuntimeConfig{
DebugFilePath: "/dev/null",
LogLevel: "info",
Runtimes: []string{"docker-runc", "runc"},
Runtimes: []string{"docker-runc", "runc", "crun"},
Mode: "auto",
Modes: modesConfig{
CSV: csvModeConfig{
@ -93,6 +94,7 @@ func TestGetConfig(t *testing.T) {
"nvidia-container-cli.root = \"/bar/baz\"",
"nvidia-container-cli.load-kmods = false",
"nvidia-container-cli.ldconfig = \"/foo/bar/ldconfig\"",
"nvidia-container-cli.user = \"foo:bar\"",
"nvidia-container-runtime.debug = \"/foo/bar\"",
"nvidia-container-runtime.discover-mode = \"not-legacy\"",
"nvidia-container-runtime.log-level = \"debug\"",
@ -112,6 +114,7 @@ func TestGetConfig(t *testing.T) {
Root: "/bar/baz",
LoadKmods: false,
Ldconfig: "/foo/bar/ldconfig",
User: "foo:bar",
},
NVIDIAContainerRuntimeConfig: RuntimeConfig{
DebugFilePath: "/foo/bar",
@ -152,6 +155,7 @@ func TestGetConfig(t *testing.T) {
"root = \"/bar/baz\"",
"load-kmods = false",
"ldconfig = \"/foo/bar/ldconfig\"",
"user = \"foo:bar\"",
"[nvidia-container-runtime]",
"debug = \"/foo/bar\"",
"discover-mode = \"not-legacy\"",
@ -176,6 +180,7 @@ func TestGetConfig(t *testing.T) {
Root: "/bar/baz",
LoadKmods: false,
Ldconfig: "/foo/bar/ldconfig",
User: "foo:bar",
},
NVIDIAContainerRuntimeConfig: RuntimeConfig{
DebugFilePath: "/foo/bar",
@ -207,10 +212,88 @@ func TestGetConfig(t *testing.T) {
},
},
},
{
description: "suse config",
distIdsLike: []string{"suse", "opensuse"},
inspectLdconfig: true,
expectedConfig: &Config{
AcceptEnvvarUnprivileged: true,
SupportedDriverCapabilities: "compat32,compute,display,graphics,ngx,utility,video",
NVIDIAContainerCLIConfig: ContainerCLIConfig{
Root: "",
LoadKmods: true,
Ldconfig: "WAS_CHECKED",
User: "root:video",
},
NVIDIAContainerRuntimeConfig: RuntimeConfig{
DebugFilePath: "/dev/null",
LogLevel: "info",
Runtimes: []string{"docker-runc", "runc", "crun"},
Mode: "auto",
Modes: modesConfig{
CSV: csvModeConfig{
MountSpecPath: "/etc/nvidia-container-runtime/host-files-for-container.d",
},
CDI: cdiModeConfig{
DefaultKind: "nvidia.com/gpu",
AnnotationPrefixes: []string{"cdi.k8s.io/"},
SpecDirs: []string{"/etc/cdi", "/var/run/cdi"},
},
},
},
NVIDIAContainerRuntimeHookConfig: RuntimeHookConfig{
Path: "nvidia-container-runtime-hook",
},
NVIDIACTKConfig: CTKConfig{
Path: "nvidia-ctk",
},
},
},
{
description: "suse config overrides user",
distIdsLike: []string{"suse", "opensuse"},
inspectLdconfig: true,
contents: []string{
"nvidia-container-cli.user = \"foo:bar\"",
},
expectedConfig: &Config{
AcceptEnvvarUnprivileged: true,
SupportedDriverCapabilities: "compat32,compute,display,graphics,ngx,utility,video",
NVIDIAContainerCLIConfig: ContainerCLIConfig{
Root: "",
LoadKmods: true,
Ldconfig: "WAS_CHECKED",
User: "foo:bar",
},
NVIDIAContainerRuntimeConfig: RuntimeConfig{
DebugFilePath: "/dev/null",
LogLevel: "info",
Runtimes: []string{"docker-runc", "runc", "crun"},
Mode: "auto",
Modes: modesConfig{
CSV: csvModeConfig{
MountSpecPath: "/etc/nvidia-container-runtime/host-files-for-container.d",
},
CDI: cdiModeConfig{
DefaultKind: "nvidia.com/gpu",
AnnotationPrefixes: []string{"cdi.k8s.io/"},
SpecDirs: []string{"/etc/cdi", "/var/run/cdi"},
},
},
},
NVIDIAContainerRuntimeHookConfig: RuntimeHookConfig{
Path: "nvidia-container-runtime-hook",
},
NVIDIACTKConfig: CTKConfig{
Path: "nvidia-ctk",
},
},
},
}
for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
defer setGetDistIDLikeForTest(tc.distIdsLike)()
reader := strings.NewReader(strings.Join(tc.contents, "\n"))
tomlCfg, err := loadConfigTomlFrom(reader)
@ -236,3 +319,19 @@ func TestGetConfig(t *testing.T) {
})
}
}
// setGetDistIDsLikeForTest overrides the distribution IDs that would normally be read from the /etc/os-release file.
func setGetDistIDLikeForTest(ids []string) func() {
if ids == nil {
return func() {}
}
original := getDistIDLike
getDistIDLike = func() []string {
return ids
}
return func() {
getDistIDLike = original
}
}

View File

@ -204,7 +204,7 @@ func (t *Toml) commentDefaults() *Toml {
}
func shouldComment(key string, defaultValue interface{}, setTo interface{}) bool {
if key == "nvidia-container-cli.user" && !getCommentedUserGroup() {
if key == "nvidia-container-cli.user" && defaultValue == setTo && isSuse() {
return false
}
if key == "nvidia-container-runtime.debug" && setTo == "/dev/null" {

View File

@ -62,7 +62,7 @@ load-kmods = true
#debug = "/var/log/nvidia-container-runtime.log"
log-level = "info"
mode = "auto"
runtimes = ["docker-runc", "runc"]
runtimes = ["docker-runc", "runc", "crun"]
[nvidia-container-runtime.modes]

View File

@ -78,9 +78,11 @@ func NewGraphicsMountsDiscoverer(logger logger.Interface, driverRoot string, nvi
[]string{
"glvnd/egl_vendor.d/10_nvidia.json",
"vulkan/icd.d/nvidia_icd.json",
"vulkan/icd.d/nvidia_layers.json",
"vulkan/implicit_layer.d/nvidia_layers.json",
"egl/egl_external_platform.d/15_nvidia_gbm.json",
"egl/egl_external_platform.d/10_nvidia_wayland.json",
"nvidia/nvoptix.bin",
},
)

View File

@ -63,6 +63,7 @@ func ResolveAutoMode(logger logger.Interface, mode string, image image.CUDA) (rm
// resolveMode determines the correct mode for the platform if set to "auto"
func (r resolver) resolveMode(mode string, image image.CUDA) (rmode string) {
if mode != "auto" {
r.logger.Infof("Using requested mode '%s'", mode)
return mode
}
defer func() {

View File

@ -10,7 +10,7 @@ Build-Depends: debhelper (>= 9)
Package: nvidia-container-toolkit
Architecture: any
Depends: ${misc:Depends}, nvidia-container-toolkit-base (= @VERSION@), libnvidia-container-tools (>= @LIBNVIDIA_CONTAINER_TOOLS_VERSION@), libnvidia-container-tools (<< 2.0.0), libseccomp2
Depends: ${misc:Depends}, nvidia-container-toolkit-base (= @VERSION@), libnvidia-container-tools (>= @LIBNVIDIA_CONTAINER_TOOLS_VERSION@), libnvidia-container-tools (<< 2.0.0)
Breaks: nvidia-container-runtime (<= 3.5.0-1), nvidia-container-runtime-hook
Replaces: nvidia-container-runtime (<= 3.5.0-1), nvidia-container-runtime-hook
Description: NVIDIA Container toolkit

View File

@ -23,13 +23,6 @@ Provides: nvidia-container-runtime-hook
Requires: libnvidia-container-tools >= %{libnvidia_container_tools_version}, libnvidia-container-tools < 2.0.0
Requires: nvidia-container-toolkit-base == %{version}-%{release}
%if 0%{?suse_version}
Requires: libseccomp2
Requires: libapparmor1
%else
Requires: libseccomp
%endif
%description
Provides tools and utilities to enable GPU support in containers.

View File

@ -33,6 +33,7 @@ var requiredDriverStoreFiles = []string{
"libnvidia-ml.so.1", /* Core library for nvml */
"libnvidia-ml_loader.so", /* Core library for nvml on WSL */
"libdxcore.so", /* Core library for dxcore support */
"libnvdxgdmal.so.1", /* dxgdmal library for cuda */
"nvcubins.bin", /* Binary containing GPU code for cuda */
"nvidia-smi", /* nvidia-smi binary*/
}

View File

@ -31,6 +31,8 @@ else
targets=${all[@]}
fi
# Skip component updates on release branches
SKIP_UPDATE_COMPONENTS=yes
if [[ x"${SKIP_UPDATE_COMPONENTS}" != x"yes" ]]; then
echo "Updating components"
"${SCRIPTS_DIR}/update-components.sh"

@ -1 +1 @@
Subproject commit 1eb5a30a6ad0415550a9df632ac8832bf7e2bbba
Subproject commit 870d7c5d957f5780b8afa57c4d5cc924d4d9ed26

View File

@ -30,7 +30,7 @@ NVIDIA_CONTAINER_RUNTIME_VERSION := 3.14.0
# Specify the expected libnvidia-container0 version for arm64-based ubuntu builds.
LIBNVIDIA_CONTAINER0_VERSION := 0.10.0+jetpack
CUDA_VERSION := 12.2.2
CUDA_VERSION := 12.3.1
GOLANG_VERSION := 1.20.5
GIT_COMMIT ?= $(shell git describe --match="" --dirty --long --always --abbrev=40 2> /dev/null || echo "")