Merge branch 'CNT-4056/add-cdi-annotations' into 'main'

Add nvidia-container-runtime.modes.cdi.annotation-prefixes config option.

See merge request nvidia/container-toolkit/container-toolkit!356
This commit is contained in:
Christopher Desiniotis 2023-03-28 16:47:51 +00:00
commit ee5be5e3f2
13 changed files with 339 additions and 68 deletions

View File

@ -9,6 +9,7 @@
* Generate a simplified CDI specification by default. This means that entities in the common edits in a spec are not included in device definitions. * Generate a simplified CDI specification by default. This means that entities in the common edits in a spec are not included in device definitions.
* Also return an error from the nvcdi.New constructor instead of panicing. * Also return an error from the nvcdi.New constructor instead of panicing.
* Detect XOrg libraries for injection and CDI spec generation. * Detect XOrg libraries for injection and CDI spec generation.
* Add `nvidia-container-runtime.modes.cdi.annotation-prefixes` config option that allows the CDI annotation prefixes that are read to be overridden.
* [libnvidia-container] Fix segmentation fault when RPC initialization fails. * [libnvidia-container] Fix segmentation fault when RPC initialization fails.
* [libnvidia-container] Build centos variants of the NVIDIA Container Library with static libtirpc v1.3.2. * [libnvidia-container] Build centos variants of the NVIDIA Container Library with static libtirpc v1.3.2.

View File

@ -71,7 +71,8 @@ func TestGetConfig(t *testing.T) {
MountSpecPath: "/etc/nvidia-container-runtime/host-files-for-container.d", MountSpecPath: "/etc/nvidia-container-runtime/host-files-for-container.d",
}, },
CDI: cdiModeConfig{ CDI: cdiModeConfig{
DefaultKind: "nvidia.com/gpu", DefaultKind: "nvidia.com/gpu",
AnnotationPrefixes: []string{"cdi.k8s.io/"},
}, },
}, },
}, },
@ -92,6 +93,7 @@ func TestGetConfig(t *testing.T) {
"nvidia-container-runtime.runtimes = [\"/some/runtime\",]", "nvidia-container-runtime.runtimes = [\"/some/runtime\",]",
"nvidia-container-runtime.mode = \"not-auto\"", "nvidia-container-runtime.mode = \"not-auto\"",
"nvidia-container-runtime.modes.cdi.default-kind = \"example.vendor.com/device\"", "nvidia-container-runtime.modes.cdi.default-kind = \"example.vendor.com/device\"",
"nvidia-container-runtime.modes.cdi.annotation-prefixes = [\"cdi.k8s.io/\", \"example.vendor.com/\",]",
"nvidia-container-runtime.modes.csv.mount-spec-path = \"/not/etc/nvidia-container-runtime/host-files-for-container.d\"", "nvidia-container-runtime.modes.csv.mount-spec-path = \"/not/etc/nvidia-container-runtime/host-files-for-container.d\"",
"nvidia-ctk.path = \"/foo/bar/nvidia-ctk\"", "nvidia-ctk.path = \"/foo/bar/nvidia-ctk\"",
}, },
@ -111,6 +113,10 @@ func TestGetConfig(t *testing.T) {
}, },
CDI: cdiModeConfig{ CDI: cdiModeConfig{
DefaultKind: "example.vendor.com/device", DefaultKind: "example.vendor.com/device",
AnnotationPrefixes: []string{
"cdi.k8s.io/",
"example.vendor.com/",
},
}, },
}, },
}, },
@ -134,6 +140,7 @@ func TestGetConfig(t *testing.T) {
"mode = \"not-auto\"", "mode = \"not-auto\"",
"[nvidia-container-runtime.modes.cdi]", "[nvidia-container-runtime.modes.cdi]",
"default-kind = \"example.vendor.com/device\"", "default-kind = \"example.vendor.com/device\"",
"annotation-prefixes = [\"cdi.k8s.io/\", \"example.vendor.com/\",]",
"[nvidia-container-runtime.modes.csv]", "[nvidia-container-runtime.modes.csv]",
"mount-spec-path = \"/not/etc/nvidia-container-runtime/host-files-for-container.d\"", "mount-spec-path = \"/not/etc/nvidia-container-runtime/host-files-for-container.d\"",
"[nvidia-ctk]", "[nvidia-ctk]",
@ -155,6 +162,10 @@ func TestGetConfig(t *testing.T) {
}, },
CDI: cdiModeConfig{ CDI: cdiModeConfig{
DefaultKind: "example.vendor.com/device", DefaultKind: "example.vendor.com/device",
AnnotationPrefixes: []string{
"cdi.k8s.io/",
"example.vendor.com/",
},
}, },
}, },
}, },

View File

@ -50,12 +50,15 @@ func (c *ConfigV1) AddRuntime(name string, path string, setAsDefault bool) error
config.SetPath([]string{"plugins", "cri", "containerd", "runtimes", name, "runtime_engine"}, "") config.SetPath([]string{"plugins", "cri", "containerd", "runtimes", name, "runtime_engine"}, "")
config.SetPath([]string{"plugins", "cri", "containerd", "runtimes", name, "privileged_without_host_devices"}, false) config.SetPath([]string{"plugins", "cri", "containerd", "runtimes", name, "privileged_without_host_devices"}, false)
} }
cdiAnnotations := []interface{}{"cdi.k8s.io/*"}
containerAnnotations, ok := config.GetPath([]string{"plugins", "cri", "containerd", "runtimes", name, "container_annotations"}).([]interface{}) if len(c.ContainerAnnotations) > 0 {
if ok && containerAnnotations != nil { annotations, err := (*Config)(c).getRuntimeAnnotations([]string{"plugins", "cri", "containerd", "runtimes", name, "container_annotations"})
cdiAnnotations = append(containerAnnotations, cdiAnnotations...) if err != nil {
return err
}
annotations = append(c.ContainerAnnotations, annotations...)
config.SetPath([]string{"plugins", "cri", "containerd", "runtimes", name, "container_annotations"}, annotations)
} }
config.SetPath([]string{"plugins", "cri", "containerd", "runtimes", name, "container_annotations"}, cdiAnnotations)
config.SetPath([]string{"plugins", "cri", "containerd", "runtimes", name, "options", "BinaryName"}, path) config.SetPath([]string{"plugins", "cri", "containerd", "runtimes", name, "options", "BinaryName"}, path)
config.SetPath([]string{"plugins", "cri", "containerd", "runtimes", name, "options", "Runtime"}, path) config.SetPath([]string{"plugins", "cri", "containerd", "runtimes", name, "options", "Runtime"}, path)

View File

@ -45,12 +45,14 @@ func (c *Config) AddRuntime(name string, path string, setAsDefault bool) error {
config.SetPath([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "runtimes", name, "privileged_without_host_devices"}, false) config.SetPath([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "runtimes", name, "privileged_without_host_devices"}, false)
} }
cdiAnnotations := []interface{}{"cdi.k8s.io/*"} if len(c.ContainerAnnotations) > 0 {
containerAnnotations, ok := config.GetPath([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "runtimes", name, "container_annotations"}).([]interface{}) annotations, err := c.getRuntimeAnnotations([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "runtimes", name, "container_annotations"})
if ok && containerAnnotations != nil { if err != nil {
cdiAnnotations = append(containerAnnotations, cdiAnnotations...) return err
}
annotations = append(c.ContainerAnnotations, annotations...)
config.SetPath([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "runtimes", name, "container_annotations"}, annotations)
} }
config.SetPath([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "runtimes", name, "container_annotations"}, cdiAnnotations)
config.SetPath([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "runtimes", name, "options", "BinaryName"}, path) config.SetPath([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "runtimes", name, "options", "BinaryName"}, path)
@ -62,6 +64,32 @@ func (c *Config) AddRuntime(name string, path string, setAsDefault bool) error {
return nil return nil
} }
func (c *Config) getRuntimeAnnotations(path []string) ([]string, error) {
if c == nil || c.Tree == nil {
return nil, nil
}
config := *c.Tree
if !config.HasPath(path) {
return nil, nil
}
annotationsI, ok := config.GetPath(path).([]interface{})
if !ok {
return nil, fmt.Errorf("invalid annotations: %v", annotationsI)
}
var annotations []string
for _, annotation := range annotationsI {
a, ok := annotation.(string)
if !ok {
return nil, fmt.Errorf("invalid annotation: %v", annotation)
}
annotations = append(annotations, a)
}
return annotations, nil
}
// DefaultRuntime returns the default runtime for the cri-o config // DefaultRuntime returns the default runtime for the cri-o config
func (c Config) DefaultRuntime() string { func (c Config) DefaultRuntime() string {
if runtime, ok := c.GetPath([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "default_runtime_name"}).(string); ok { if runtime, ok := c.GetPath([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "default_runtime_name"}).(string); ok {

View File

@ -26,6 +26,7 @@ type Config struct {
*toml.Tree *toml.Tree
RuntimeType string RuntimeType string
UseDefaultRuntimeName bool UseDefaultRuntimeName bool
ContainerAnnotations []string
} }
// New creates a containerd config with the specified options // New creates a containerd config with the specified options

View File

@ -30,9 +30,10 @@ const (
) )
type builder struct { type builder struct {
path string path string
runtimeType string runtimeType string
useLegacyConfig bool useLegacyConfig bool
containerAnnotations []string
} }
// Option defines a function that can be used to configure the config builder // Option defines a function that can be used to configure the config builder
@ -59,6 +60,13 @@ func WithUseLegacyConfig(useLegacyConfig bool) Option {
} }
} }
// WithContainerAnnotations sets the container annotations for the config builder
func WithContainerAnnotations(containerAnnotations ...string) Option {
return func(b *builder) {
b.containerAnnotations = containerAnnotations
}
}
func (b *builder) build() (engine.Interface, error) { func (b *builder) build() (engine.Interface, error) {
if b.path == "" { if b.path == "" {
return nil, fmt.Errorf("config path is empty") return nil, fmt.Errorf("config path is empty")
@ -74,6 +82,7 @@ func (b *builder) build() (engine.Interface, error) {
} }
config.RuntimeType = b.runtimeType config.RuntimeType = b.runtimeType
config.UseDefaultRuntimeName = !b.useLegacyConfig config.UseDefaultRuntimeName = !b.useLegacyConfig
config.ContainerAnnotations = b.containerAnnotations
version, err := config.parseVersion(b.useLegacyConfig) version, err := config.parseVersion(b.useLegacyConfig)
if err != nil { if err != nil {

View File

@ -19,6 +19,7 @@ package config
import ( import (
"fmt" "fmt"
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
"github.com/pelletier/go-toml" "github.com/pelletier/go-toml"
"github.com/sirupsen/logrus" "github.com/sirupsen/logrus"
) )
@ -52,6 +53,8 @@ type cdiModeConfig struct {
SpecDirs []string `toml:"spec-dirs"` SpecDirs []string `toml:"spec-dirs"`
// DefaultKind sets the default kind to be used when constructing fully-qualified CDI device names // DefaultKind sets the default kind to be used when constructing fully-qualified CDI device names
DefaultKind string `toml:"default-kind"` DefaultKind string `toml:"default-kind"`
// AnnotationPrefixes sets the allowed prefixes for CDI annotation-based device injection
AnnotationPrefixes []string `toml:"annotation-prefixes"`
} }
type csvModeConfig struct { type csvModeConfig struct {
@ -98,6 +101,9 @@ func GetDefaultRuntimeConfig() *RuntimeConfig {
}, },
CDI: cdiModeConfig{ CDI: cdiModeConfig{
DefaultKind: "nvidia.com/gpu", DefaultKind: "nvidia.com/gpu",
AnnotationPrefixes: []string{
cdi.AnnotationPrefix,
},
}, },
}, },
} }

View File

@ -18,6 +18,7 @@ package modifier
import ( import (
"fmt" "fmt"
"strings"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config" "github.com/NVIDIA/nvidia-container-toolkit/internal/config"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image" "github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
@ -67,7 +68,7 @@ func getDevicesFromSpec(logger *logrus.Logger, ociSpec oci.Spec, cfg *config.Con
return nil, fmt.Errorf("failed to load OCI spec: %v", err) return nil, fmt.Errorf("failed to load OCI spec: %v", err)
} }
_, annotationDevices, err := cdi.ParseAnnotations(rawSpec.Annotations) annotationDevices, err := getAnnotationDevices(cfg.NVIDIAContainerRuntimeConfig.Modes.CDI.AnnotationPrefixes, rawSpec.Annotations)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to parse container annotations: %v", err) return nil, fmt.Errorf("failed to parse container annotations: %v", err)
} }
@ -107,6 +108,38 @@ func getDevicesFromSpec(logger *logrus.Logger, ociSpec oci.Spec, cfg *config.Con
return nil, nil return nil, nil
} }
// getAnnotationDevices returns a list of devices specified in the annotations.
// Keys starting with the specified prefixes are considered and expected to contain a comma-separated list of
// fully-qualified CDI devices names. If any device name is not fully-quality an error is returned.
// The list of returned devices is deduplicated.
func getAnnotationDevices(prefixes []string, annotations map[string]string) ([]string, error) {
devicesByKey := make(map[string][]string)
for key, value := range annotations {
for _, prefix := range prefixes {
if strings.HasPrefix(key, prefix) {
devicesByKey[key] = strings.Split(value, ",")
}
}
}
seen := make(map[string]bool)
var annotationDevices []string
for key, devices := range devicesByKey {
for _, device := range devices {
if !cdi.IsQualifiedName(device) {
return nil, fmt.Errorf("invalid device name %q in annotation %q", device, key)
}
if seen[device] {
continue
}
annotationDevices = append(annotationDevices, device)
seen[device] = true
}
}
return annotationDevices, nil
}
// Modify loads the CDI registry and injects the specified CDI devices into the OCI runtime specification. // Modify loads the CDI registry and injects the specified CDI devices into the OCI runtime specification.
func (m cdiModifier) Modify(spec *specs.Spec) error { func (m cdiModifier) Modify(spec *specs.Spec) error {
registry := cdi.GetRegistry( registry := cdi.GetRegistry(

View File

@ -0,0 +1,92 @@
/**
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package modifier
import (
"fmt"
"testing"
"github.com/stretchr/testify/require"
)
func TestGetAnnotationDevices(t *testing.T) {
testCases := []struct {
description string
prefixes []string
annotations map[string]string
expectedDevices []string
expectedError error
}{
{
description: "no annotations",
},
{
description: "no matching annotations",
prefixes: []string{"not-prefix/"},
annotations: map[string]string{
"prefix/foo": "example.com/device=bar",
},
},
{
description: "single matching annotation",
prefixes: []string{"prefix/"},
annotations: map[string]string{
"prefix/foo": "example.com/device=bar",
},
expectedDevices: []string{"example.com/device=bar"},
},
{
description: "multiple matching annotations",
prefixes: []string{"prefix/", "another-prefix/"},
annotations: map[string]string{
"prefix/foo": "example.com/device=bar",
"another-prefix/bar": "example.com/device=baz",
},
expectedDevices: []string{"example.com/device=bar", "example.com/device=baz"},
},
{
description: "multiple matching annotations with duplicate devices",
prefixes: []string{"prefix/", "another-prefix/"},
annotations: map[string]string{
"prefix/foo": "example.com/device=bar",
"another-prefix/bar": "example.com/device=bar",
},
expectedDevices: []string{"example.com/device=bar"},
},
{
description: "invalid devices",
prefixes: []string{"prefix/"},
annotations: map[string]string{
"prefix/foo": "example.com/device",
},
expectedError: fmt.Errorf("invalid device %q", "example.com/device"),
},
}
for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
devices, err := getAnnotationDevices(tc.prefixes, tc.annotations)
if tc.expectedError != nil {
require.Error(t, err)
return
}
require.NoError(t, err)
require.ElementsMatch(t, tc.expectedDevices, devices)
})
}
}

View File

@ -332,6 +332,7 @@ func TestUpdateV1Config(t *testing.T) {
Tree: config, Tree: config,
UseDefaultRuntimeName: true, UseDefaultRuntimeName: true,
RuntimeType: runtimeType, RuntimeType: runtimeType,
ContainerAnnotations: []string{"cdi.k8s.io/*"},
} }
err = UpdateConfig(v1, o) err = UpdateConfig(v1, o)
@ -585,6 +586,7 @@ func TestUpdateV1ConfigWithRuncPresent(t *testing.T) {
Tree: config, Tree: config,
UseDefaultRuntimeName: true, UseDefaultRuntimeName: true,
RuntimeType: runtimeType, RuntimeType: runtimeType,
ContainerAnnotations: []string{"cdi.k8s.io/*"},
} }
err = UpdateConfig(v1, o) err = UpdateConfig(v1, o)

View File

@ -279,8 +279,9 @@ func TestUpdateV2Config(t *testing.T) {
require.NoError(t, err) require.NoError(t, err)
v2 := &containerd.Config{ v2 := &containerd.Config{
Tree: config, Tree: config,
RuntimeType: runtimeType, RuntimeType: runtimeType,
ContainerAnnotations: []string{"cdi.k8s.io/*"},
} }
err = UpdateConfig(v2, o) err = UpdateConfig(v2, o)
@ -520,8 +521,9 @@ func TestUpdateV2ConfigWithRuncPresent(t *testing.T) {
require.NoError(t, err) require.NoError(t, err)
v2 := &containerd.Config{ v2 := &containerd.Config{
Tree: config, Tree: config,
RuntimeType: runtimeType, RuntimeType: runtimeType,
ContainerAnnotations: []string{"cdi.k8s.io/*"},
} }
err = UpdateConfig(v2, o) err = UpdateConfig(v2, o)

View File

@ -72,6 +72,8 @@ type options struct {
hostRootMount string hostRootMount string
runtimeDir string runtimeDir string
useLegacyConfig bool useLegacyConfig bool
ContainerRuntimeModesCDIAnnotationPrefixes cli.StringSlice
} }
func main() { func main() {
@ -173,6 +175,11 @@ func main() {
Destination: &options.useLegacyConfig, Destination: &options.useLegacyConfig,
EnvVars: []string{"CONTAINERD_USE_LEGACY_CONFIG"}, EnvVars: []string{"CONTAINERD_USE_LEGACY_CONFIG"},
}, },
&cli.StringSliceFlag{
Name: "nvidia-container-runtime-modes.cdi.annotation-prefixes",
Destination: &options.ContainerRuntimeModesCDIAnnotationPrefixes,
EnvVars: []string{"NVIDIA_CONTAINER_RUNTIME_MODES_CDI_ANNOTATION_PREFIXES"},
},
} }
// Update the subcommand flags with the common subcommand flags // Update the subcommand flags with the common subcommand flags
@ -199,6 +206,7 @@ func Setup(c *cli.Context, o *options) error {
containerd.WithPath(o.config), containerd.WithPath(o.config),
containerd.WithRuntimeType(o.runtimeType), containerd.WithRuntimeType(o.runtimeType),
containerd.WithUseLegacyConfig(o.useLegacyConfig), containerd.WithUseLegacyConfig(o.useLegacyConfig),
containerd.WithContainerAnnotations(o.containerAnnotationsFromCDIPrefixes()...),
) )
if err != nil { if err != nil {
return fmt.Errorf("unable to load config: %v", err) return fmt.Errorf("unable to load config: %v", err)
@ -241,6 +249,7 @@ func Cleanup(c *cli.Context, o *options) error {
containerd.WithPath(o.config), containerd.WithPath(o.config),
containerd.WithRuntimeType(o.runtimeType), containerd.WithRuntimeType(o.runtimeType),
containerd.WithUseLegacyConfig(o.useLegacyConfig), containerd.WithUseLegacyConfig(o.useLegacyConfig),
containerd.WithContainerAnnotations(o.containerAnnotationsFromCDIPrefixes()...),
) )
if err != nil { if err != nil {
return fmt.Errorf("unable to load config: %v", err) return fmt.Errorf("unable to load config: %v", err)
@ -434,3 +443,13 @@ func RestartContainerdSystemd(hostRootMount string) error {
return nil return nil
} }
// containerAnnotationsFromCDIPrefixes returns the container annotations to set for the given CDI prefixes.
func (o *options) containerAnnotationsFromCDIPrefixes() []string {
var annotations []string
for _, prefix := range o.ContainerRuntimeModesCDIAnnotationPrefixes.Value() {
annotations = append(annotations, prefix+"*")
}
return annotations
}

View File

@ -47,10 +47,12 @@ type options struct {
DriverRoot string DriverRoot string
DriverRootCtrPath string DriverRootCtrPath string
ContainerRuntimeMode string ContainerRuntimeMode string
ContainerRuntimeModesCdiDefaultKind string ContainerRuntimeDebug string
ContainerRuntimeDebug string ContainerRuntimeLogLevel string
ContainerRuntimeLogLevel string
ContainerRuntimeModesCdiDefaultKind string
ContainerRuntimeModesCDIAnnotationPrefixes cli.StringSlice
ContainerRuntimeHookSkipModeDetection bool ContainerRuntimeHookSkipModeDetection bool
@ -65,6 +67,8 @@ type options struct {
acceptNVIDIAVisibleDevicesWhenUnprivileged bool acceptNVIDIAVisibleDevicesWhenUnprivileged bool
acceptNVIDIAVisibleDevicesAsVolumeMounts bool acceptNVIDIAVisibleDevicesAsVolumeMounts bool
ignoreErrors bool
} }
func main() { func main() {
@ -121,26 +125,34 @@ func main() {
EnvVars: []string{"DRIVER_ROOT_CTR_PATH"}, EnvVars: []string{"DRIVER_ROOT_CTR_PATH"},
}, },
&cli.StringFlag{ &cli.StringFlag{
Name: "nvidia-container-runtime-debug", Name: "nvidia-container-runtime.debug",
Aliases: []string{"nvidia-container-runtime-debug"},
Usage: "Specify the location of the debug log file for the NVIDIA Container Runtime", Usage: "Specify the location of the debug log file for the NVIDIA Container Runtime",
Destination: &opts.ContainerRuntimeDebug, Destination: &opts.ContainerRuntimeDebug,
EnvVars: []string{"NVIDIA_CONTAINER_RUNTIME_DEBUG"}, EnvVars: []string{"NVIDIA_CONTAINER_RUNTIME_DEBUG"},
}, },
&cli.StringFlag{ &cli.StringFlag{
Name: "nvidia-container-runtime-debug-log-level", Name: "nvidia-container-runtime.log-level",
Aliases: []string{"nvidia-container-runtime-debug-log-level"},
Destination: &opts.ContainerRuntimeLogLevel, Destination: &opts.ContainerRuntimeLogLevel,
EnvVars: []string{"NVIDIA_CONTAINER_RUNTIME_LOG_LEVEL"}, EnvVars: []string{"NVIDIA_CONTAINER_RUNTIME_LOG_LEVEL"},
}, },
&cli.StringFlag{ &cli.StringFlag{
Name: "nvidia-container-runtime-mode", Name: "nvidia-container-runtime.mode",
Aliases: []string{"nvidia-container-runtime-mode"},
Destination: &opts.ContainerRuntimeMode, Destination: &opts.ContainerRuntimeMode,
EnvVars: []string{"NVIDIA_CONTAINER_RUNTIME_MODE"}, EnvVars: []string{"NVIDIA_CONTAINER_RUNTIME_MODE"},
}, },
&cli.StringFlag{ &cli.StringFlag{
Name: "nvidia-container-runtime-modes.cdi.default-kind", Name: "nvidia-container-runtime.modes.cdi.default-kind",
Destination: &opts.ContainerRuntimeModesCdiDefaultKind, Destination: &opts.ContainerRuntimeModesCdiDefaultKind,
EnvVars: []string{"NVIDIA_CONTAINER_RUNTIME_MODES_CDI_DEFAULT_KIND"}, EnvVars: []string{"NVIDIA_CONTAINER_RUNTIME_MODES_CDI_DEFAULT_KIND"},
}, },
&cli.StringSliceFlag{
Name: "nvidia-container-runtime.modes.cdi.annotation-prefixes",
Destination: &opts.ContainerRuntimeModesCDIAnnotationPrefixes,
EnvVars: []string{"NVIDIA_CONTAINER_RUNTIME_MODES_CDI_ANNOTATION_PREFIXES"},
},
&cli.BoolFlag{ &cli.BoolFlag{
Name: "nvidia-container-runtime-hook.skip-mode-detection", Name: "nvidia-container-runtime-hook.skip-mode-detection",
Value: true, Value: true,
@ -148,7 +160,8 @@ func main() {
EnvVars: []string{"NVIDIA_CONTAINER_RUNTIME_HOOK_SKIP_MODE_DETECTION"}, EnvVars: []string{"NVIDIA_CONTAINER_RUNTIME_HOOK_SKIP_MODE_DETECTION"},
}, },
&cli.StringFlag{ &cli.StringFlag{
Name: "nvidia-container-cli-debug", Name: "nvidia-container-cli.debug",
Aliases: []string{"nvidia-container-cli-debug"},
Usage: "Specify the location of the debug log file for the NVIDIA Container CLI", Usage: "Specify the location of the debug log file for the NVIDIA Container CLI",
Destination: &opts.ContainerCLIDebug, Destination: &opts.ContainerCLIDebug,
EnvVars: []string{"NVIDIA_CONTAINER_CLI_DEBUG"}, EnvVars: []string{"NVIDIA_CONTAINER_CLI_DEBUG"},
@ -194,6 +207,12 @@ func main() {
Destination: &opts.cdiKind, Destination: &opts.cdiKind,
EnvVars: []string{"CDI_KIND"}, EnvVars: []string{"CDI_KIND"},
}, },
&cli.BoolFlag{
Name: "ignore-errors",
Usage: "ignore errors when installing the NVIDIA Container toolkit. This is used for testing purposes only.",
Hidden: true,
Destination: &opts.ignoreErrors,
},
} }
// Update the subcommand flags with the common subcommand flags // Update the subcommand flags with the common subcommand flags
@ -242,46 +261,62 @@ func Install(cli *cli.Context, opts *options) error {
log.Infof("Removing existing NVIDIA container toolkit installation") log.Infof("Removing existing NVIDIA container toolkit installation")
err := os.RemoveAll(opts.toolkitRoot) err := os.RemoveAll(opts.toolkitRoot)
if err != nil { if err != nil && !opts.ignoreErrors {
return fmt.Errorf("error removing toolkit directory: %v", err) return fmt.Errorf("error removing toolkit directory: %v", err)
} else if err != nil {
log.Errorf("Ignoring error: %v", fmt.Errorf("error removing toolkit directory: %v", err))
} }
toolkitConfigDir := filepath.Join(opts.toolkitRoot, ".config", "nvidia-container-runtime") toolkitConfigDir := filepath.Join(opts.toolkitRoot, ".config", "nvidia-container-runtime")
toolkitConfigPath := filepath.Join(toolkitConfigDir, configFilename) toolkitConfigPath := filepath.Join(toolkitConfigDir, configFilename)
err = createDirectories(opts.toolkitRoot, toolkitConfigDir) err = createDirectories(opts.toolkitRoot, toolkitConfigDir)
if err != nil { if err != nil && !opts.ignoreErrors {
return fmt.Errorf("could not create required directories: %v", err) return fmt.Errorf("could not create required directories: %v", err)
} else if err != nil {
log.Errorf("Ignoring error: %v", fmt.Errorf("could not create required directories: %v", err))
} }
err = installContainerLibraries(opts.toolkitRoot) err = installContainerLibraries(opts.toolkitRoot)
if err != nil { if err != nil && !opts.ignoreErrors {
return fmt.Errorf("error installing NVIDIA container library: %v", err) return fmt.Errorf("error installing NVIDIA container library: %v", err)
} else if err != nil {
log.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA container library: %v", err))
} }
err = installContainerRuntimes(opts.toolkitRoot, opts.DriverRoot) err = installContainerRuntimes(opts.toolkitRoot, opts.DriverRoot)
if err != nil { if err != nil && !opts.ignoreErrors {
return fmt.Errorf("error installing NVIDIA container runtime: %v", err) return fmt.Errorf("error installing NVIDIA container runtime: %v", err)
} else if err != nil {
log.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA container runtime: %v", err))
} }
nvidiaContainerCliExecutable, err := installContainerCLI(opts.toolkitRoot) nvidiaContainerCliExecutable, err := installContainerCLI(opts.toolkitRoot)
if err != nil { if err != nil && !opts.ignoreErrors {
return fmt.Errorf("error installing NVIDIA container CLI: %v", err) return fmt.Errorf("error installing NVIDIA container CLI: %v", err)
} else if err != nil {
log.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA container CLI: %v", err))
} }
_, err = installRuntimeHook(opts.toolkitRoot, toolkitConfigPath) _, err = installRuntimeHook(opts.toolkitRoot, toolkitConfigPath)
if err != nil { if err != nil && !opts.ignoreErrors {
return fmt.Errorf("error installing NVIDIA container runtime hook: %v", err) return fmt.Errorf("error installing NVIDIA container runtime hook: %v", err)
} else if err != nil {
log.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA container runtime hook: %v", err))
} }
nvidiaCTKPath, err := installContainerToolkitCLI(opts.toolkitRoot) nvidiaCTKPath, err := installContainerToolkitCLI(opts.toolkitRoot)
if err != nil { if err != nil && !opts.ignoreErrors {
return fmt.Errorf("error installing NVIDIA Container Toolkit CLI: %v", err) return fmt.Errorf("error installing NVIDIA Container Toolkit CLI: %v", err)
} else if err != nil {
log.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA Container Toolkit CLI: %v", err))
} }
err = installToolkitConfig(toolkitConfigPath, nvidiaContainerCliExecutable, nvidiaCTKPath, opts) err = installToolkitConfig(cli, toolkitConfigPath, nvidiaContainerCliExecutable, nvidiaCTKPath, opts)
if err != nil { if err != nil && !opts.ignoreErrors {
return fmt.Errorf("error installing NVIDIA container toolkit config: %v", err) return fmt.Errorf("error installing NVIDIA container toolkit config: %v", err)
} else if err != nil {
log.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA container toolkit config: %v", err))
} }
return generateCDISpec(opts, nvidiaCTKPath) return generateCDISpec(opts, nvidiaCTKPath)
@ -337,10 +372,10 @@ func installLibrary(libName string, toolkitRoot string) error {
// installToolkitConfig installs the config file for the NVIDIA container toolkit ensuring // installToolkitConfig installs the config file for the NVIDIA container toolkit ensuring
// that the settings are updated to match the desired install and nvidia driver directories. // that the settings are updated to match the desired install and nvidia driver directories.
func installToolkitConfig(toolkitConfigPath string, nvidiaContainerCliExecutablePath string, nvidiaCTKPath string, opts *options) error { func installToolkitConfig(c *cli.Context, toolkitConfigPath string, nvidiaContainerCliExecutablePath string, nvidiaCTKPath string, opts *options) error {
log.Infof("Installing NVIDIA container toolkit config '%v'", toolkitConfigPath) log.Infof("Installing NVIDIA container toolkit config '%v'", toolkitConfigPath)
config, err := toml.LoadFile(nvidiaContainerToolkitConfigSource) config, err := loadConfig(nvidiaContainerToolkitConfigSource)
if err != nil { if err != nil {
return fmt.Errorf("could not open source config file: %v", err) return fmt.Errorf("could not open source config file: %v", err)
} }
@ -351,45 +386,64 @@ func installToolkitConfig(toolkitConfigPath string, nvidiaContainerCliExecutable
} }
defer targetConfig.Close() defer targetConfig.Close()
// Set the options in the root toml table
config.Set("accept-nvidia-visible-devices-envvar-when-unprivileged", opts.acceptNVIDIAVisibleDevicesWhenUnprivileged)
config.Set("accept-nvidia-visible-devices-as-volume-mounts", opts.acceptNVIDIAVisibleDevicesAsVolumeMounts)
nvidiaContainerCliKey := func(p string) []string {
return []string{"nvidia-container-cli", p}
}
// Read the ldconfig path from the config as this may differ per platform // Read the ldconfig path from the config as this may differ per platform
// On ubuntu-based systems this ends in `.real` // On ubuntu-based systems this ends in `.real`
ldconfigPath := fmt.Sprintf("%s", config.GetPath(nvidiaContainerCliKey("ldconfig"))) ldconfigPath := fmt.Sprintf("%s", config.GetDefault("nvidia-container-cli.ldconfig", "/sbin/ldconfig"))
// Use the driver run root as the root: // Use the driver run root as the root:
driverLdconfigPath := "@" + filepath.Join(opts.DriverRoot, strings.TrimPrefix(ldconfigPath, "@/")) driverLdconfigPath := "@" + filepath.Join(opts.DriverRoot, strings.TrimPrefix(ldconfigPath, "@/"))
config.SetPath(nvidiaContainerCliKey("root"), opts.DriverRoot) configValues := map[string]interface{}{
config.SetPath(nvidiaContainerCliKey("path"), nvidiaContainerCliExecutablePath) // Set the options in the root toml table
config.SetPath(nvidiaContainerCliKey("ldconfig"), driverLdconfigPath) "accept-nvidia-visible-devices-envvar-when-unprivileged": opts.acceptNVIDIAVisibleDevicesWhenUnprivileged,
"accept-nvidia-visible-devices-as-volume-mounts": opts.acceptNVIDIAVisibleDevicesAsVolumeMounts,
// Set the debug options if selected // Set the nvidia-container-cli options
debugOptions := map[string]string{ "nvidia-container-cli.root": opts.DriverRoot,
"nvidia-container-runtime.debug": opts.ContainerRuntimeDebug, "nvidia-container-cli.path": nvidiaContainerCliExecutablePath,
"nvidia-container-runtime.log-level": opts.ContainerRuntimeLogLevel, "nvidia-container-cli.ldconfig": driverLdconfigPath,
"nvidia-container-runtime.mode": opts.ContainerRuntimeMode, // Set nvidia-ctk options
"nvidia-container-runtime.modes.cdi.default-kind": opts.ContainerRuntimeModesCdiDefaultKind, "nvidia-ctk.path": nvidiaCTKPath,
"nvidia-container-cli.debug": opts.ContainerCLIDebug, // Set the nvidia-container-runtime-hook options
"nvidia-container-runtime-hook.skip-mode-detection": opts.ContainerRuntimeHookSkipModeDetection,
} }
for key, value := range debugOptions { for key, value := range configValues {
if value == "" {
continue
}
config.Set(key, value) config.Set(key, value)
} }
// Set nvidia-ctk options // Set the optional config options
config.Set("nvidia-ctk.path", nvidiaCTKPath) optionalConfigValues := map[string]interface{}{
"nvidia-container-runtime.debug": opts.ContainerRuntimeDebug,
"nvidia-container-runtime.log-level": opts.ContainerRuntimeLogLevel,
"nvidia-container-runtime.mode": opts.ContainerRuntimeMode,
"nvidia-container-runtime.modes.cdi.annotation-prefixes": opts.ContainerRuntimeModesCDIAnnotationPrefixes,
"nvidia-container-runtime.modes.cdi.default-kind": opts.ContainerRuntimeModesCdiDefaultKind,
"nvidia-container-cli.debug": opts.ContainerCLIDebug,
}
for key, value := range optionalConfigValues {
if !c.IsSet(key) {
log.Infof("Skipping unset option: %v", key)
continue
}
if value == nil {
log.Infof("Skipping option with nil value: %v", key)
continue
}
// Set the nvidia-container-runtime-hook options switch v := value.(type) {
config.Set("nvidia-container-runtime-hook.skip-mode-detection", opts.ContainerRuntimeHookSkipModeDetection) case string:
if v == "" {
continue
}
case cli.StringSlice:
if len(v.Value()) == 0 {
continue
}
value = v.Value()
default:
log.Warnf("Unexpected type for option %v=%v: %T", key, value, v)
}
config.Set(key, value)
}
_, err = config.WriteTo(targetConfig) _, err = config.WriteTo(targetConfig)
if err != nil { if err != nil {
@ -402,6 +456,16 @@ func installToolkitConfig(toolkitConfigPath string, nvidiaContainerCliExecutable
return nil return nil
} }
func loadConfig(path string) (*toml.Tree, error) {
_, err := os.Stat(path)
if err == nil {
return toml.LoadFile(path)
} else if os.IsNotExist(err) {
return toml.TreeFromMap(nil)
}
return nil, err
}
// installContainerToolkitCLI installs the nvidia-ctk CLI executable and wrapper. // installContainerToolkitCLI installs the nvidia-ctk CLI executable and wrapper.
func installContainerToolkitCLI(toolkitDir string) (string, error) { func installContainerToolkitCLI(toolkitDir string) (string, error) {
e := executable{ e := executable{