mirror of
https://github.com/NVIDIA/nvidia-container-toolkit
synced 2024-11-22 00:08:11 +00:00
Merge branch 'CNT-4056/add-cdi-annotations' into 'main'
Add nvidia-container-runtime.modes.cdi.annotation-prefixes config option. See merge request nvidia/container-toolkit/container-toolkit!356
This commit is contained in:
commit
ee5be5e3f2
@ -9,6 +9,7 @@
|
||||
* Generate a simplified CDI specification by default. This means that entities in the common edits in a spec are not included in device definitions.
|
||||
* Also return an error from the nvcdi.New constructor instead of panicing.
|
||||
* Detect XOrg libraries for injection and CDI spec generation.
|
||||
* Add `nvidia-container-runtime.modes.cdi.annotation-prefixes` config option that allows the CDI annotation prefixes that are read to be overridden.
|
||||
|
||||
* [libnvidia-container] Fix segmentation fault when RPC initialization fails.
|
||||
* [libnvidia-container] Build centos variants of the NVIDIA Container Library with static libtirpc v1.3.2.
|
||||
|
@ -72,6 +72,7 @@ func TestGetConfig(t *testing.T) {
|
||||
},
|
||||
CDI: cdiModeConfig{
|
||||
DefaultKind: "nvidia.com/gpu",
|
||||
AnnotationPrefixes: []string{"cdi.k8s.io/"},
|
||||
},
|
||||
},
|
||||
},
|
||||
@ -92,6 +93,7 @@ func TestGetConfig(t *testing.T) {
|
||||
"nvidia-container-runtime.runtimes = [\"/some/runtime\",]",
|
||||
"nvidia-container-runtime.mode = \"not-auto\"",
|
||||
"nvidia-container-runtime.modes.cdi.default-kind = \"example.vendor.com/device\"",
|
||||
"nvidia-container-runtime.modes.cdi.annotation-prefixes = [\"cdi.k8s.io/\", \"example.vendor.com/\",]",
|
||||
"nvidia-container-runtime.modes.csv.mount-spec-path = \"/not/etc/nvidia-container-runtime/host-files-for-container.d\"",
|
||||
"nvidia-ctk.path = \"/foo/bar/nvidia-ctk\"",
|
||||
},
|
||||
@ -111,6 +113,10 @@ func TestGetConfig(t *testing.T) {
|
||||
},
|
||||
CDI: cdiModeConfig{
|
||||
DefaultKind: "example.vendor.com/device",
|
||||
AnnotationPrefixes: []string{
|
||||
"cdi.k8s.io/",
|
||||
"example.vendor.com/",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
@ -134,6 +140,7 @@ func TestGetConfig(t *testing.T) {
|
||||
"mode = \"not-auto\"",
|
||||
"[nvidia-container-runtime.modes.cdi]",
|
||||
"default-kind = \"example.vendor.com/device\"",
|
||||
"annotation-prefixes = [\"cdi.k8s.io/\", \"example.vendor.com/\",]",
|
||||
"[nvidia-container-runtime.modes.csv]",
|
||||
"mount-spec-path = \"/not/etc/nvidia-container-runtime/host-files-for-container.d\"",
|
||||
"[nvidia-ctk]",
|
||||
@ -155,6 +162,10 @@ func TestGetConfig(t *testing.T) {
|
||||
},
|
||||
CDI: cdiModeConfig{
|
||||
DefaultKind: "example.vendor.com/device",
|
||||
AnnotationPrefixes: []string{
|
||||
"cdi.k8s.io/",
|
||||
"example.vendor.com/",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
|
@ -50,12 +50,15 @@ func (c *ConfigV1) AddRuntime(name string, path string, setAsDefault bool) error
|
||||
config.SetPath([]string{"plugins", "cri", "containerd", "runtimes", name, "runtime_engine"}, "")
|
||||
config.SetPath([]string{"plugins", "cri", "containerd", "runtimes", name, "privileged_without_host_devices"}, false)
|
||||
}
|
||||
cdiAnnotations := []interface{}{"cdi.k8s.io/*"}
|
||||
containerAnnotations, ok := config.GetPath([]string{"plugins", "cri", "containerd", "runtimes", name, "container_annotations"}).([]interface{})
|
||||
if ok && containerAnnotations != nil {
|
||||
cdiAnnotations = append(containerAnnotations, cdiAnnotations...)
|
||||
|
||||
if len(c.ContainerAnnotations) > 0 {
|
||||
annotations, err := (*Config)(c).getRuntimeAnnotations([]string{"plugins", "cri", "containerd", "runtimes", name, "container_annotations"})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
annotations = append(c.ContainerAnnotations, annotations...)
|
||||
config.SetPath([]string{"plugins", "cri", "containerd", "runtimes", name, "container_annotations"}, annotations)
|
||||
}
|
||||
config.SetPath([]string{"plugins", "cri", "containerd", "runtimes", name, "container_annotations"}, cdiAnnotations)
|
||||
|
||||
config.SetPath([]string{"plugins", "cri", "containerd", "runtimes", name, "options", "BinaryName"}, path)
|
||||
config.SetPath([]string{"plugins", "cri", "containerd", "runtimes", name, "options", "Runtime"}, path)
|
||||
|
@ -45,12 +45,14 @@ func (c *Config) AddRuntime(name string, path string, setAsDefault bool) error {
|
||||
config.SetPath([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "runtimes", name, "privileged_without_host_devices"}, false)
|
||||
}
|
||||
|
||||
cdiAnnotations := []interface{}{"cdi.k8s.io/*"}
|
||||
containerAnnotations, ok := config.GetPath([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "runtimes", name, "container_annotations"}).([]interface{})
|
||||
if ok && containerAnnotations != nil {
|
||||
cdiAnnotations = append(containerAnnotations, cdiAnnotations...)
|
||||
if len(c.ContainerAnnotations) > 0 {
|
||||
annotations, err := c.getRuntimeAnnotations([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "runtimes", name, "container_annotations"})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
annotations = append(c.ContainerAnnotations, annotations...)
|
||||
config.SetPath([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "runtimes", name, "container_annotations"}, annotations)
|
||||
}
|
||||
config.SetPath([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "runtimes", name, "container_annotations"}, cdiAnnotations)
|
||||
|
||||
config.SetPath([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "runtimes", name, "options", "BinaryName"}, path)
|
||||
|
||||
@ -62,6 +64,32 @@ func (c *Config) AddRuntime(name string, path string, setAsDefault bool) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *Config) getRuntimeAnnotations(path []string) ([]string, error) {
|
||||
if c == nil || c.Tree == nil {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
config := *c.Tree
|
||||
if !config.HasPath(path) {
|
||||
return nil, nil
|
||||
}
|
||||
annotationsI, ok := config.GetPath(path).([]interface{})
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("invalid annotations: %v", annotationsI)
|
||||
}
|
||||
|
||||
var annotations []string
|
||||
for _, annotation := range annotationsI {
|
||||
a, ok := annotation.(string)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("invalid annotation: %v", annotation)
|
||||
}
|
||||
annotations = append(annotations, a)
|
||||
}
|
||||
|
||||
return annotations, nil
|
||||
}
|
||||
|
||||
// DefaultRuntime returns the default runtime for the cri-o config
|
||||
func (c Config) DefaultRuntime() string {
|
||||
if runtime, ok := c.GetPath([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "default_runtime_name"}).(string); ok {
|
||||
|
@ -26,6 +26,7 @@ type Config struct {
|
||||
*toml.Tree
|
||||
RuntimeType string
|
||||
UseDefaultRuntimeName bool
|
||||
ContainerAnnotations []string
|
||||
}
|
||||
|
||||
// New creates a containerd config with the specified options
|
||||
|
@ -33,6 +33,7 @@ type builder struct {
|
||||
path string
|
||||
runtimeType string
|
||||
useLegacyConfig bool
|
||||
containerAnnotations []string
|
||||
}
|
||||
|
||||
// Option defines a function that can be used to configure the config builder
|
||||
@ -59,6 +60,13 @@ func WithUseLegacyConfig(useLegacyConfig bool) Option {
|
||||
}
|
||||
}
|
||||
|
||||
// WithContainerAnnotations sets the container annotations for the config builder
|
||||
func WithContainerAnnotations(containerAnnotations ...string) Option {
|
||||
return func(b *builder) {
|
||||
b.containerAnnotations = containerAnnotations
|
||||
}
|
||||
}
|
||||
|
||||
func (b *builder) build() (engine.Interface, error) {
|
||||
if b.path == "" {
|
||||
return nil, fmt.Errorf("config path is empty")
|
||||
@ -74,6 +82,7 @@ func (b *builder) build() (engine.Interface, error) {
|
||||
}
|
||||
config.RuntimeType = b.runtimeType
|
||||
config.UseDefaultRuntimeName = !b.useLegacyConfig
|
||||
config.ContainerAnnotations = b.containerAnnotations
|
||||
|
||||
version, err := config.parseVersion(b.useLegacyConfig)
|
||||
if err != nil {
|
||||
|
@ -19,6 +19,7 @@ package config
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
|
||||
"github.com/pelletier/go-toml"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
@ -52,6 +53,8 @@ type cdiModeConfig struct {
|
||||
SpecDirs []string `toml:"spec-dirs"`
|
||||
// DefaultKind sets the default kind to be used when constructing fully-qualified CDI device names
|
||||
DefaultKind string `toml:"default-kind"`
|
||||
// AnnotationPrefixes sets the allowed prefixes for CDI annotation-based device injection
|
||||
AnnotationPrefixes []string `toml:"annotation-prefixes"`
|
||||
}
|
||||
|
||||
type csvModeConfig struct {
|
||||
@ -98,6 +101,9 @@ func GetDefaultRuntimeConfig() *RuntimeConfig {
|
||||
},
|
||||
CDI: cdiModeConfig{
|
||||
DefaultKind: "nvidia.com/gpu",
|
||||
AnnotationPrefixes: []string{
|
||||
cdi.AnnotationPrefix,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
@ -18,6 +18,7 @@ package modifier
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
|
||||
@ -67,7 +68,7 @@ func getDevicesFromSpec(logger *logrus.Logger, ociSpec oci.Spec, cfg *config.Con
|
||||
return nil, fmt.Errorf("failed to load OCI spec: %v", err)
|
||||
}
|
||||
|
||||
_, annotationDevices, err := cdi.ParseAnnotations(rawSpec.Annotations)
|
||||
annotationDevices, err := getAnnotationDevices(cfg.NVIDIAContainerRuntimeConfig.Modes.CDI.AnnotationPrefixes, rawSpec.Annotations)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse container annotations: %v", err)
|
||||
}
|
||||
@ -107,6 +108,38 @@ func getDevicesFromSpec(logger *logrus.Logger, ociSpec oci.Spec, cfg *config.Con
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// getAnnotationDevices returns a list of devices specified in the annotations.
|
||||
// Keys starting with the specified prefixes are considered and expected to contain a comma-separated list of
|
||||
// fully-qualified CDI devices names. If any device name is not fully-quality an error is returned.
|
||||
// The list of returned devices is deduplicated.
|
||||
func getAnnotationDevices(prefixes []string, annotations map[string]string) ([]string, error) {
|
||||
devicesByKey := make(map[string][]string)
|
||||
for key, value := range annotations {
|
||||
for _, prefix := range prefixes {
|
||||
if strings.HasPrefix(key, prefix) {
|
||||
devicesByKey[key] = strings.Split(value, ",")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
seen := make(map[string]bool)
|
||||
var annotationDevices []string
|
||||
for key, devices := range devicesByKey {
|
||||
for _, device := range devices {
|
||||
if !cdi.IsQualifiedName(device) {
|
||||
return nil, fmt.Errorf("invalid device name %q in annotation %q", device, key)
|
||||
}
|
||||
if seen[device] {
|
||||
continue
|
||||
}
|
||||
annotationDevices = append(annotationDevices, device)
|
||||
seen[device] = true
|
||||
}
|
||||
}
|
||||
|
||||
return annotationDevices, nil
|
||||
}
|
||||
|
||||
// Modify loads the CDI registry and injects the specified CDI devices into the OCI runtime specification.
|
||||
func (m cdiModifier) Modify(spec *specs.Spec) error {
|
||||
registry := cdi.GetRegistry(
|
||||
|
92
internal/modifier/cdi_test.go
Normal file
92
internal/modifier/cdi_test.go
Normal file
@ -0,0 +1,92 @@
|
||||
/**
|
||||
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package modifier
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestGetAnnotationDevices(t *testing.T) {
|
||||
testCases := []struct {
|
||||
description string
|
||||
prefixes []string
|
||||
annotations map[string]string
|
||||
expectedDevices []string
|
||||
expectedError error
|
||||
}{
|
||||
{
|
||||
description: "no annotations",
|
||||
},
|
||||
{
|
||||
description: "no matching annotations",
|
||||
prefixes: []string{"not-prefix/"},
|
||||
annotations: map[string]string{
|
||||
"prefix/foo": "example.com/device=bar",
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "single matching annotation",
|
||||
prefixes: []string{"prefix/"},
|
||||
annotations: map[string]string{
|
||||
"prefix/foo": "example.com/device=bar",
|
||||
},
|
||||
expectedDevices: []string{"example.com/device=bar"},
|
||||
},
|
||||
{
|
||||
description: "multiple matching annotations",
|
||||
prefixes: []string{"prefix/", "another-prefix/"},
|
||||
annotations: map[string]string{
|
||||
"prefix/foo": "example.com/device=bar",
|
||||
"another-prefix/bar": "example.com/device=baz",
|
||||
},
|
||||
expectedDevices: []string{"example.com/device=bar", "example.com/device=baz"},
|
||||
},
|
||||
{
|
||||
description: "multiple matching annotations with duplicate devices",
|
||||
prefixes: []string{"prefix/", "another-prefix/"},
|
||||
annotations: map[string]string{
|
||||
"prefix/foo": "example.com/device=bar",
|
||||
"another-prefix/bar": "example.com/device=bar",
|
||||
},
|
||||
expectedDevices: []string{"example.com/device=bar"},
|
||||
},
|
||||
{
|
||||
description: "invalid devices",
|
||||
prefixes: []string{"prefix/"},
|
||||
annotations: map[string]string{
|
||||
"prefix/foo": "example.com/device",
|
||||
},
|
||||
expectedError: fmt.Errorf("invalid device %q", "example.com/device"),
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
devices, err := getAnnotationDevices(tc.prefixes, tc.annotations)
|
||||
if tc.expectedError != nil {
|
||||
require.Error(t, err)
|
||||
return
|
||||
}
|
||||
|
||||
require.NoError(t, err)
|
||||
require.ElementsMatch(t, tc.expectedDevices, devices)
|
||||
})
|
||||
}
|
||||
}
|
@ -332,6 +332,7 @@ func TestUpdateV1Config(t *testing.T) {
|
||||
Tree: config,
|
||||
UseDefaultRuntimeName: true,
|
||||
RuntimeType: runtimeType,
|
||||
ContainerAnnotations: []string{"cdi.k8s.io/*"},
|
||||
}
|
||||
|
||||
err = UpdateConfig(v1, o)
|
||||
@ -585,6 +586,7 @@ func TestUpdateV1ConfigWithRuncPresent(t *testing.T) {
|
||||
Tree: config,
|
||||
UseDefaultRuntimeName: true,
|
||||
RuntimeType: runtimeType,
|
||||
ContainerAnnotations: []string{"cdi.k8s.io/*"},
|
||||
}
|
||||
|
||||
err = UpdateConfig(v1, o)
|
||||
|
@ -281,6 +281,7 @@ func TestUpdateV2Config(t *testing.T) {
|
||||
v2 := &containerd.Config{
|
||||
Tree: config,
|
||||
RuntimeType: runtimeType,
|
||||
ContainerAnnotations: []string{"cdi.k8s.io/*"},
|
||||
}
|
||||
|
||||
err = UpdateConfig(v2, o)
|
||||
@ -522,6 +523,7 @@ func TestUpdateV2ConfigWithRuncPresent(t *testing.T) {
|
||||
v2 := &containerd.Config{
|
||||
Tree: config,
|
||||
RuntimeType: runtimeType,
|
||||
ContainerAnnotations: []string{"cdi.k8s.io/*"},
|
||||
}
|
||||
|
||||
err = UpdateConfig(v2, o)
|
||||
|
@ -72,6 +72,8 @@ type options struct {
|
||||
hostRootMount string
|
||||
runtimeDir string
|
||||
useLegacyConfig bool
|
||||
|
||||
ContainerRuntimeModesCDIAnnotationPrefixes cli.StringSlice
|
||||
}
|
||||
|
||||
func main() {
|
||||
@ -173,6 +175,11 @@ func main() {
|
||||
Destination: &options.useLegacyConfig,
|
||||
EnvVars: []string{"CONTAINERD_USE_LEGACY_CONFIG"},
|
||||
},
|
||||
&cli.StringSliceFlag{
|
||||
Name: "nvidia-container-runtime-modes.cdi.annotation-prefixes",
|
||||
Destination: &options.ContainerRuntimeModesCDIAnnotationPrefixes,
|
||||
EnvVars: []string{"NVIDIA_CONTAINER_RUNTIME_MODES_CDI_ANNOTATION_PREFIXES"},
|
||||
},
|
||||
}
|
||||
|
||||
// Update the subcommand flags with the common subcommand flags
|
||||
@ -199,6 +206,7 @@ func Setup(c *cli.Context, o *options) error {
|
||||
containerd.WithPath(o.config),
|
||||
containerd.WithRuntimeType(o.runtimeType),
|
||||
containerd.WithUseLegacyConfig(o.useLegacyConfig),
|
||||
containerd.WithContainerAnnotations(o.containerAnnotationsFromCDIPrefixes()...),
|
||||
)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to load config: %v", err)
|
||||
@ -241,6 +249,7 @@ func Cleanup(c *cli.Context, o *options) error {
|
||||
containerd.WithPath(o.config),
|
||||
containerd.WithRuntimeType(o.runtimeType),
|
||||
containerd.WithUseLegacyConfig(o.useLegacyConfig),
|
||||
containerd.WithContainerAnnotations(o.containerAnnotationsFromCDIPrefixes()...),
|
||||
)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to load config: %v", err)
|
||||
@ -434,3 +443,13 @@ func RestartContainerdSystemd(hostRootMount string) error {
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// containerAnnotationsFromCDIPrefixes returns the container annotations to set for the given CDI prefixes.
|
||||
func (o *options) containerAnnotationsFromCDIPrefixes() []string {
|
||||
var annotations []string
|
||||
for _, prefix := range o.ContainerRuntimeModesCDIAnnotationPrefixes.Value() {
|
||||
annotations = append(annotations, prefix+"*")
|
||||
}
|
||||
|
||||
return annotations
|
||||
}
|
||||
|
@ -48,10 +48,12 @@ type options struct {
|
||||
DriverRootCtrPath string
|
||||
|
||||
ContainerRuntimeMode string
|
||||
ContainerRuntimeModesCdiDefaultKind string
|
||||
ContainerRuntimeDebug string
|
||||
ContainerRuntimeLogLevel string
|
||||
|
||||
ContainerRuntimeModesCdiDefaultKind string
|
||||
ContainerRuntimeModesCDIAnnotationPrefixes cli.StringSlice
|
||||
|
||||
ContainerRuntimeHookSkipModeDetection bool
|
||||
|
||||
ContainerCLIDebug string
|
||||
@ -65,6 +67,8 @@ type options struct {
|
||||
|
||||
acceptNVIDIAVisibleDevicesWhenUnprivileged bool
|
||||
acceptNVIDIAVisibleDevicesAsVolumeMounts bool
|
||||
|
||||
ignoreErrors bool
|
||||
}
|
||||
|
||||
func main() {
|
||||
@ -121,26 +125,34 @@ func main() {
|
||||
EnvVars: []string{"DRIVER_ROOT_CTR_PATH"},
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "nvidia-container-runtime-debug",
|
||||
Name: "nvidia-container-runtime.debug",
|
||||
Aliases: []string{"nvidia-container-runtime-debug"},
|
||||
Usage: "Specify the location of the debug log file for the NVIDIA Container Runtime",
|
||||
Destination: &opts.ContainerRuntimeDebug,
|
||||
EnvVars: []string{"NVIDIA_CONTAINER_RUNTIME_DEBUG"},
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "nvidia-container-runtime-debug-log-level",
|
||||
Name: "nvidia-container-runtime.log-level",
|
||||
Aliases: []string{"nvidia-container-runtime-debug-log-level"},
|
||||
Destination: &opts.ContainerRuntimeLogLevel,
|
||||
EnvVars: []string{"NVIDIA_CONTAINER_RUNTIME_LOG_LEVEL"},
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "nvidia-container-runtime-mode",
|
||||
Name: "nvidia-container-runtime.mode",
|
||||
Aliases: []string{"nvidia-container-runtime-mode"},
|
||||
Destination: &opts.ContainerRuntimeMode,
|
||||
EnvVars: []string{"NVIDIA_CONTAINER_RUNTIME_MODE"},
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "nvidia-container-runtime-modes.cdi.default-kind",
|
||||
Name: "nvidia-container-runtime.modes.cdi.default-kind",
|
||||
Destination: &opts.ContainerRuntimeModesCdiDefaultKind,
|
||||
EnvVars: []string{"NVIDIA_CONTAINER_RUNTIME_MODES_CDI_DEFAULT_KIND"},
|
||||
},
|
||||
&cli.StringSliceFlag{
|
||||
Name: "nvidia-container-runtime.modes.cdi.annotation-prefixes",
|
||||
Destination: &opts.ContainerRuntimeModesCDIAnnotationPrefixes,
|
||||
EnvVars: []string{"NVIDIA_CONTAINER_RUNTIME_MODES_CDI_ANNOTATION_PREFIXES"},
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: "nvidia-container-runtime-hook.skip-mode-detection",
|
||||
Value: true,
|
||||
@ -148,7 +160,8 @@ func main() {
|
||||
EnvVars: []string{"NVIDIA_CONTAINER_RUNTIME_HOOK_SKIP_MODE_DETECTION"},
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "nvidia-container-cli-debug",
|
||||
Name: "nvidia-container-cli.debug",
|
||||
Aliases: []string{"nvidia-container-cli-debug"},
|
||||
Usage: "Specify the location of the debug log file for the NVIDIA Container CLI",
|
||||
Destination: &opts.ContainerCLIDebug,
|
||||
EnvVars: []string{"NVIDIA_CONTAINER_CLI_DEBUG"},
|
||||
@ -194,6 +207,12 @@ func main() {
|
||||
Destination: &opts.cdiKind,
|
||||
EnvVars: []string{"CDI_KIND"},
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: "ignore-errors",
|
||||
Usage: "ignore errors when installing the NVIDIA Container toolkit. This is used for testing purposes only.",
|
||||
Hidden: true,
|
||||
Destination: &opts.ignoreErrors,
|
||||
},
|
||||
}
|
||||
|
||||
// Update the subcommand flags with the common subcommand flags
|
||||
@ -242,46 +261,62 @@ func Install(cli *cli.Context, opts *options) error {
|
||||
|
||||
log.Infof("Removing existing NVIDIA container toolkit installation")
|
||||
err := os.RemoveAll(opts.toolkitRoot)
|
||||
if err != nil {
|
||||
if err != nil && !opts.ignoreErrors {
|
||||
return fmt.Errorf("error removing toolkit directory: %v", err)
|
||||
} else if err != nil {
|
||||
log.Errorf("Ignoring error: %v", fmt.Errorf("error removing toolkit directory: %v", err))
|
||||
}
|
||||
|
||||
toolkitConfigDir := filepath.Join(opts.toolkitRoot, ".config", "nvidia-container-runtime")
|
||||
toolkitConfigPath := filepath.Join(toolkitConfigDir, configFilename)
|
||||
|
||||
err = createDirectories(opts.toolkitRoot, toolkitConfigDir)
|
||||
if err != nil {
|
||||
if err != nil && !opts.ignoreErrors {
|
||||
return fmt.Errorf("could not create required directories: %v", err)
|
||||
} else if err != nil {
|
||||
log.Errorf("Ignoring error: %v", fmt.Errorf("could not create required directories: %v", err))
|
||||
}
|
||||
|
||||
err = installContainerLibraries(opts.toolkitRoot)
|
||||
if err != nil {
|
||||
if err != nil && !opts.ignoreErrors {
|
||||
return fmt.Errorf("error installing NVIDIA container library: %v", err)
|
||||
} else if err != nil {
|
||||
log.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA container library: %v", err))
|
||||
}
|
||||
|
||||
err = installContainerRuntimes(opts.toolkitRoot, opts.DriverRoot)
|
||||
if err != nil {
|
||||
if err != nil && !opts.ignoreErrors {
|
||||
return fmt.Errorf("error installing NVIDIA container runtime: %v", err)
|
||||
} else if err != nil {
|
||||
log.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA container runtime: %v", err))
|
||||
}
|
||||
|
||||
nvidiaContainerCliExecutable, err := installContainerCLI(opts.toolkitRoot)
|
||||
if err != nil {
|
||||
if err != nil && !opts.ignoreErrors {
|
||||
return fmt.Errorf("error installing NVIDIA container CLI: %v", err)
|
||||
} else if err != nil {
|
||||
log.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA container CLI: %v", err))
|
||||
}
|
||||
|
||||
_, err = installRuntimeHook(opts.toolkitRoot, toolkitConfigPath)
|
||||
if err != nil {
|
||||
if err != nil && !opts.ignoreErrors {
|
||||
return fmt.Errorf("error installing NVIDIA container runtime hook: %v", err)
|
||||
} else if err != nil {
|
||||
log.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA container runtime hook: %v", err))
|
||||
}
|
||||
|
||||
nvidiaCTKPath, err := installContainerToolkitCLI(opts.toolkitRoot)
|
||||
if err != nil {
|
||||
if err != nil && !opts.ignoreErrors {
|
||||
return fmt.Errorf("error installing NVIDIA Container Toolkit CLI: %v", err)
|
||||
} else if err != nil {
|
||||
log.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA Container Toolkit CLI: %v", err))
|
||||
}
|
||||
|
||||
err = installToolkitConfig(toolkitConfigPath, nvidiaContainerCliExecutable, nvidiaCTKPath, opts)
|
||||
if err != nil {
|
||||
err = installToolkitConfig(cli, toolkitConfigPath, nvidiaContainerCliExecutable, nvidiaCTKPath, opts)
|
||||
if err != nil && !opts.ignoreErrors {
|
||||
return fmt.Errorf("error installing NVIDIA container toolkit config: %v", err)
|
||||
} else if err != nil {
|
||||
log.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA container toolkit config: %v", err))
|
||||
}
|
||||
|
||||
return generateCDISpec(opts, nvidiaCTKPath)
|
||||
@ -337,10 +372,10 @@ func installLibrary(libName string, toolkitRoot string) error {
|
||||
|
||||
// installToolkitConfig installs the config file for the NVIDIA container toolkit ensuring
|
||||
// that the settings are updated to match the desired install and nvidia driver directories.
|
||||
func installToolkitConfig(toolkitConfigPath string, nvidiaContainerCliExecutablePath string, nvidiaCTKPath string, opts *options) error {
|
||||
func installToolkitConfig(c *cli.Context, toolkitConfigPath string, nvidiaContainerCliExecutablePath string, nvidiaCTKPath string, opts *options) error {
|
||||
log.Infof("Installing NVIDIA container toolkit config '%v'", toolkitConfigPath)
|
||||
|
||||
config, err := toml.LoadFile(nvidiaContainerToolkitConfigSource)
|
||||
config, err := loadConfig(nvidiaContainerToolkitConfigSource)
|
||||
if err != nil {
|
||||
return fmt.Errorf("could not open source config file: %v", err)
|
||||
}
|
||||
@ -351,45 +386,64 @@ func installToolkitConfig(toolkitConfigPath string, nvidiaContainerCliExecutable
|
||||
}
|
||||
defer targetConfig.Close()
|
||||
|
||||
// Set the options in the root toml table
|
||||
config.Set("accept-nvidia-visible-devices-envvar-when-unprivileged", opts.acceptNVIDIAVisibleDevicesWhenUnprivileged)
|
||||
config.Set("accept-nvidia-visible-devices-as-volume-mounts", opts.acceptNVIDIAVisibleDevicesAsVolumeMounts)
|
||||
|
||||
nvidiaContainerCliKey := func(p string) []string {
|
||||
return []string{"nvidia-container-cli", p}
|
||||
}
|
||||
|
||||
// Read the ldconfig path from the config as this may differ per platform
|
||||
// On ubuntu-based systems this ends in `.real`
|
||||
ldconfigPath := fmt.Sprintf("%s", config.GetPath(nvidiaContainerCliKey("ldconfig")))
|
||||
|
||||
ldconfigPath := fmt.Sprintf("%s", config.GetDefault("nvidia-container-cli.ldconfig", "/sbin/ldconfig"))
|
||||
// Use the driver run root as the root:
|
||||
driverLdconfigPath := "@" + filepath.Join(opts.DriverRoot, strings.TrimPrefix(ldconfigPath, "@/"))
|
||||
|
||||
config.SetPath(nvidiaContainerCliKey("root"), opts.DriverRoot)
|
||||
config.SetPath(nvidiaContainerCliKey("path"), nvidiaContainerCliExecutablePath)
|
||||
config.SetPath(nvidiaContainerCliKey("ldconfig"), driverLdconfigPath)
|
||||
|
||||
// Set the debug options if selected
|
||||
debugOptions := map[string]string{
|
||||
"nvidia-container-runtime.debug": opts.ContainerRuntimeDebug,
|
||||
"nvidia-container-runtime.log-level": opts.ContainerRuntimeLogLevel,
|
||||
"nvidia-container-runtime.mode": opts.ContainerRuntimeMode,
|
||||
"nvidia-container-runtime.modes.cdi.default-kind": opts.ContainerRuntimeModesCdiDefaultKind,
|
||||
"nvidia-container-cli.debug": opts.ContainerCLIDebug,
|
||||
}
|
||||
for key, value := range debugOptions {
|
||||
if value == "" {
|
||||
continue
|
||||
configValues := map[string]interface{}{
|
||||
// Set the options in the root toml table
|
||||
"accept-nvidia-visible-devices-envvar-when-unprivileged": opts.acceptNVIDIAVisibleDevicesWhenUnprivileged,
|
||||
"accept-nvidia-visible-devices-as-volume-mounts": opts.acceptNVIDIAVisibleDevicesAsVolumeMounts,
|
||||
// Set the nvidia-container-cli options
|
||||
"nvidia-container-cli.root": opts.DriverRoot,
|
||||
"nvidia-container-cli.path": nvidiaContainerCliExecutablePath,
|
||||
"nvidia-container-cli.ldconfig": driverLdconfigPath,
|
||||
// Set nvidia-ctk options
|
||||
"nvidia-ctk.path": nvidiaCTKPath,
|
||||
// Set the nvidia-container-runtime-hook options
|
||||
"nvidia-container-runtime-hook.skip-mode-detection": opts.ContainerRuntimeHookSkipModeDetection,
|
||||
}
|
||||
for key, value := range configValues {
|
||||
config.Set(key, value)
|
||||
}
|
||||
|
||||
// Set nvidia-ctk options
|
||||
config.Set("nvidia-ctk.path", nvidiaCTKPath)
|
||||
// Set the optional config options
|
||||
optionalConfigValues := map[string]interface{}{
|
||||
"nvidia-container-runtime.debug": opts.ContainerRuntimeDebug,
|
||||
"nvidia-container-runtime.log-level": opts.ContainerRuntimeLogLevel,
|
||||
"nvidia-container-runtime.mode": opts.ContainerRuntimeMode,
|
||||
"nvidia-container-runtime.modes.cdi.annotation-prefixes": opts.ContainerRuntimeModesCDIAnnotationPrefixes,
|
||||
"nvidia-container-runtime.modes.cdi.default-kind": opts.ContainerRuntimeModesCdiDefaultKind,
|
||||
"nvidia-container-cli.debug": opts.ContainerCLIDebug,
|
||||
}
|
||||
for key, value := range optionalConfigValues {
|
||||
if !c.IsSet(key) {
|
||||
log.Infof("Skipping unset option: %v", key)
|
||||
continue
|
||||
}
|
||||
if value == nil {
|
||||
log.Infof("Skipping option with nil value: %v", key)
|
||||
continue
|
||||
}
|
||||
|
||||
// Set the nvidia-container-runtime-hook options
|
||||
config.Set("nvidia-container-runtime-hook.skip-mode-detection", opts.ContainerRuntimeHookSkipModeDetection)
|
||||
switch v := value.(type) {
|
||||
case string:
|
||||
if v == "" {
|
||||
continue
|
||||
}
|
||||
case cli.StringSlice:
|
||||
if len(v.Value()) == 0 {
|
||||
continue
|
||||
}
|
||||
value = v.Value()
|
||||
default:
|
||||
log.Warnf("Unexpected type for option %v=%v: %T", key, value, v)
|
||||
}
|
||||
|
||||
config.Set(key, value)
|
||||
}
|
||||
|
||||
_, err = config.WriteTo(targetConfig)
|
||||
if err != nil {
|
||||
@ -402,6 +456,16 @@ func installToolkitConfig(toolkitConfigPath string, nvidiaContainerCliExecutable
|
||||
return nil
|
||||
}
|
||||
|
||||
func loadConfig(path string) (*toml.Tree, error) {
|
||||
_, err := os.Stat(path)
|
||||
if err == nil {
|
||||
return toml.LoadFile(path)
|
||||
} else if os.IsNotExist(err) {
|
||||
return toml.TreeFromMap(nil)
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// installContainerToolkitCLI installs the nvidia-ctk CLI executable and wrapper.
|
||||
func installContainerToolkitCLI(toolkitDir string) (string, error) {
|
||||
e := executable{
|
||||
|
Loading…
Reference in New Issue
Block a user