mirror of
				https://github.com/NVIDIA/nvidia-container-toolkit
				synced 2025-06-26 18:18:24 +00:00 
			
		
		
		
	Merge branch 'CNT-4056/add-cdi-annotations' into 'main'
Add nvidia-container-runtime.modes.cdi.annotation-prefixes config option. See merge request nvidia/container-toolkit/container-toolkit!356
This commit is contained in:
		
						commit
						ee5be5e3f2
					
				| @ -9,6 +9,7 @@ | ||||
| * Generate a simplified CDI specification by default. This means that entities in the common edits in a spec are not included in device definitions. | ||||
| * Also return an error from the nvcdi.New constructor instead of panicing. | ||||
| * Detect XOrg libraries for injection and CDI spec generation. | ||||
| * Add `nvidia-container-runtime.modes.cdi.annotation-prefixes` config option that allows the CDI annotation prefixes that are read to be overridden. | ||||
| 
 | ||||
| * [libnvidia-container] Fix segmentation fault when RPC initialization fails. | ||||
| * [libnvidia-container] Build centos variants of the NVIDIA Container Library with static libtirpc v1.3.2. | ||||
|  | ||||
| @ -71,7 +71,8 @@ func TestGetConfig(t *testing.T) { | ||||
| 							MountSpecPath: "/etc/nvidia-container-runtime/host-files-for-container.d", | ||||
| 						}, | ||||
| 						CDI: cdiModeConfig{ | ||||
| 							DefaultKind: "nvidia.com/gpu", | ||||
| 							DefaultKind:        "nvidia.com/gpu", | ||||
| 							AnnotationPrefixes: []string{"cdi.k8s.io/"}, | ||||
| 						}, | ||||
| 					}, | ||||
| 				}, | ||||
| @ -92,6 +93,7 @@ func TestGetConfig(t *testing.T) { | ||||
| 				"nvidia-container-runtime.runtimes = [\"/some/runtime\",]", | ||||
| 				"nvidia-container-runtime.mode = \"not-auto\"", | ||||
| 				"nvidia-container-runtime.modes.cdi.default-kind = \"example.vendor.com/device\"", | ||||
| 				"nvidia-container-runtime.modes.cdi.annotation-prefixes = [\"cdi.k8s.io/\", \"example.vendor.com/\",]", | ||||
| 				"nvidia-container-runtime.modes.csv.mount-spec-path = \"/not/etc/nvidia-container-runtime/host-files-for-container.d\"", | ||||
| 				"nvidia-ctk.path = \"/foo/bar/nvidia-ctk\"", | ||||
| 			}, | ||||
| @ -111,6 +113,10 @@ func TestGetConfig(t *testing.T) { | ||||
| 						}, | ||||
| 						CDI: cdiModeConfig{ | ||||
| 							DefaultKind: "example.vendor.com/device", | ||||
| 							AnnotationPrefixes: []string{ | ||||
| 								"cdi.k8s.io/", | ||||
| 								"example.vendor.com/", | ||||
| 							}, | ||||
| 						}, | ||||
| 					}, | ||||
| 				}, | ||||
| @ -134,6 +140,7 @@ func TestGetConfig(t *testing.T) { | ||||
| 				"mode = \"not-auto\"", | ||||
| 				"[nvidia-container-runtime.modes.cdi]", | ||||
| 				"default-kind = \"example.vendor.com/device\"", | ||||
| 				"annotation-prefixes = [\"cdi.k8s.io/\", \"example.vendor.com/\",]", | ||||
| 				"[nvidia-container-runtime.modes.csv]", | ||||
| 				"mount-spec-path = \"/not/etc/nvidia-container-runtime/host-files-for-container.d\"", | ||||
| 				"[nvidia-ctk]", | ||||
| @ -155,6 +162,10 @@ func TestGetConfig(t *testing.T) { | ||||
| 						}, | ||||
| 						CDI: cdiModeConfig{ | ||||
| 							DefaultKind: "example.vendor.com/device", | ||||
| 							AnnotationPrefixes: []string{ | ||||
| 								"cdi.k8s.io/", | ||||
| 								"example.vendor.com/", | ||||
| 							}, | ||||
| 						}, | ||||
| 					}, | ||||
| 				}, | ||||
|  | ||||
| @ -50,12 +50,15 @@ func (c *ConfigV1) AddRuntime(name string, path string, setAsDefault bool) error | ||||
| 		config.SetPath([]string{"plugins", "cri", "containerd", "runtimes", name, "runtime_engine"}, "") | ||||
| 		config.SetPath([]string{"plugins", "cri", "containerd", "runtimes", name, "privileged_without_host_devices"}, false) | ||||
| 	} | ||||
| 	cdiAnnotations := []interface{}{"cdi.k8s.io/*"} | ||||
| 	containerAnnotations, ok := config.GetPath([]string{"plugins", "cri", "containerd", "runtimes", name, "container_annotations"}).([]interface{}) | ||||
| 	if ok && containerAnnotations != nil { | ||||
| 		cdiAnnotations = append(containerAnnotations, cdiAnnotations...) | ||||
| 
 | ||||
| 	if len(c.ContainerAnnotations) > 0 { | ||||
| 		annotations, err := (*Config)(c).getRuntimeAnnotations([]string{"plugins", "cri", "containerd", "runtimes", name, "container_annotations"}) | ||||
| 		if err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 		annotations = append(c.ContainerAnnotations, annotations...) | ||||
| 		config.SetPath([]string{"plugins", "cri", "containerd", "runtimes", name, "container_annotations"}, annotations) | ||||
| 	} | ||||
| 	config.SetPath([]string{"plugins", "cri", "containerd", "runtimes", name, "container_annotations"}, cdiAnnotations) | ||||
| 
 | ||||
| 	config.SetPath([]string{"plugins", "cri", "containerd", "runtimes", name, "options", "BinaryName"}, path) | ||||
| 	config.SetPath([]string{"plugins", "cri", "containerd", "runtimes", name, "options", "Runtime"}, path) | ||||
|  | ||||
| @ -45,12 +45,14 @@ func (c *Config) AddRuntime(name string, path string, setAsDefault bool) error { | ||||
| 		config.SetPath([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "runtimes", name, "privileged_without_host_devices"}, false) | ||||
| 	} | ||||
| 
 | ||||
| 	cdiAnnotations := []interface{}{"cdi.k8s.io/*"} | ||||
| 	containerAnnotations, ok := config.GetPath([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "runtimes", name, "container_annotations"}).([]interface{}) | ||||
| 	if ok && containerAnnotations != nil { | ||||
| 		cdiAnnotations = append(containerAnnotations, cdiAnnotations...) | ||||
| 	if len(c.ContainerAnnotations) > 0 { | ||||
| 		annotations, err := c.getRuntimeAnnotations([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "runtimes", name, "container_annotations"}) | ||||
| 		if err != nil { | ||||
| 			return err | ||||
| 		} | ||||
| 		annotations = append(c.ContainerAnnotations, annotations...) | ||||
| 		config.SetPath([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "runtimes", name, "container_annotations"}, annotations) | ||||
| 	} | ||||
| 	config.SetPath([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "runtimes", name, "container_annotations"}, cdiAnnotations) | ||||
| 
 | ||||
| 	config.SetPath([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "runtimes", name, "options", "BinaryName"}, path) | ||||
| 
 | ||||
| @ -62,6 +64,32 @@ func (c *Config) AddRuntime(name string, path string, setAsDefault bool) error { | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| func (c *Config) getRuntimeAnnotations(path []string) ([]string, error) { | ||||
| 	if c == nil || c.Tree == nil { | ||||
| 		return nil, nil | ||||
| 	} | ||||
| 
 | ||||
| 	config := *c.Tree | ||||
| 	if !config.HasPath(path) { | ||||
| 		return nil, nil | ||||
| 	} | ||||
| 	annotationsI, ok := config.GetPath(path).([]interface{}) | ||||
| 	if !ok { | ||||
| 		return nil, fmt.Errorf("invalid annotations: %v", annotationsI) | ||||
| 	} | ||||
| 
 | ||||
| 	var annotations []string | ||||
| 	for _, annotation := range annotationsI { | ||||
| 		a, ok := annotation.(string) | ||||
| 		if !ok { | ||||
| 			return nil, fmt.Errorf("invalid annotation: %v", annotation) | ||||
| 		} | ||||
| 		annotations = append(annotations, a) | ||||
| 	} | ||||
| 
 | ||||
| 	return annotations, nil | ||||
| } | ||||
| 
 | ||||
| // DefaultRuntime returns the default runtime for the cri-o config
 | ||||
| func (c Config) DefaultRuntime() string { | ||||
| 	if runtime, ok := c.GetPath([]string{"plugins", "io.containerd.grpc.v1.cri", "containerd", "default_runtime_name"}).(string); ok { | ||||
|  | ||||
| @ -26,6 +26,7 @@ type Config struct { | ||||
| 	*toml.Tree | ||||
| 	RuntimeType           string | ||||
| 	UseDefaultRuntimeName bool | ||||
| 	ContainerAnnotations  []string | ||||
| } | ||||
| 
 | ||||
| // New creates a containerd config with the specified options
 | ||||
|  | ||||
| @ -30,9 +30,10 @@ const ( | ||||
| ) | ||||
| 
 | ||||
| type builder struct { | ||||
| 	path            string | ||||
| 	runtimeType     string | ||||
| 	useLegacyConfig bool | ||||
| 	path                 string | ||||
| 	runtimeType          string | ||||
| 	useLegacyConfig      bool | ||||
| 	containerAnnotations []string | ||||
| } | ||||
| 
 | ||||
| // Option defines a function that can be used to configure the config builder
 | ||||
| @ -59,6 +60,13 @@ func WithUseLegacyConfig(useLegacyConfig bool) Option { | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| // WithContainerAnnotations sets the container annotations for the config builder
 | ||||
| func WithContainerAnnotations(containerAnnotations ...string) Option { | ||||
| 	return func(b *builder) { | ||||
| 		b.containerAnnotations = containerAnnotations | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| func (b *builder) build() (engine.Interface, error) { | ||||
| 	if b.path == "" { | ||||
| 		return nil, fmt.Errorf("config path is empty") | ||||
| @ -74,6 +82,7 @@ func (b *builder) build() (engine.Interface, error) { | ||||
| 	} | ||||
| 	config.RuntimeType = b.runtimeType | ||||
| 	config.UseDefaultRuntimeName = !b.useLegacyConfig | ||||
| 	config.ContainerAnnotations = b.containerAnnotations | ||||
| 
 | ||||
| 	version, err := config.parseVersion(b.useLegacyConfig) | ||||
| 	if err != nil { | ||||
|  | ||||
| @ -19,6 +19,7 @@ package config | ||||
| import ( | ||||
| 	"fmt" | ||||
| 
 | ||||
| 	"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi" | ||||
| 	"github.com/pelletier/go-toml" | ||||
| 	"github.com/sirupsen/logrus" | ||||
| ) | ||||
| @ -52,6 +53,8 @@ type cdiModeConfig struct { | ||||
| 	SpecDirs []string `toml:"spec-dirs"` | ||||
| 	// DefaultKind sets the default kind to be used when constructing fully-qualified CDI device names
 | ||||
| 	DefaultKind string `toml:"default-kind"` | ||||
| 	// AnnotationPrefixes sets the allowed prefixes for CDI annotation-based device injection
 | ||||
| 	AnnotationPrefixes []string `toml:"annotation-prefixes"` | ||||
| } | ||||
| 
 | ||||
| type csvModeConfig struct { | ||||
| @ -98,6 +101,9 @@ func GetDefaultRuntimeConfig() *RuntimeConfig { | ||||
| 			}, | ||||
| 			CDI: cdiModeConfig{ | ||||
| 				DefaultKind: "nvidia.com/gpu", | ||||
| 				AnnotationPrefixes: []string{ | ||||
| 					cdi.AnnotationPrefix, | ||||
| 				}, | ||||
| 			}, | ||||
| 		}, | ||||
| 	} | ||||
|  | ||||
| @ -18,6 +18,7 @@ package modifier | ||||
| 
 | ||||
| import ( | ||||
| 	"fmt" | ||||
| 	"strings" | ||||
| 
 | ||||
| 	"github.com/NVIDIA/nvidia-container-toolkit/internal/config" | ||||
| 	"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image" | ||||
| @ -67,7 +68,7 @@ func getDevicesFromSpec(logger *logrus.Logger, ociSpec oci.Spec, cfg *config.Con | ||||
| 		return nil, fmt.Errorf("failed to load OCI spec: %v", err) | ||||
| 	} | ||||
| 
 | ||||
| 	_, annotationDevices, err := cdi.ParseAnnotations(rawSpec.Annotations) | ||||
| 	annotationDevices, err := getAnnotationDevices(cfg.NVIDIAContainerRuntimeConfig.Modes.CDI.AnnotationPrefixes, rawSpec.Annotations) | ||||
| 	if err != nil { | ||||
| 		return nil, fmt.Errorf("failed to parse container annotations: %v", err) | ||||
| 	} | ||||
| @ -107,6 +108,38 @@ func getDevicesFromSpec(logger *logrus.Logger, ociSpec oci.Spec, cfg *config.Con | ||||
| 	return nil, nil | ||||
| } | ||||
| 
 | ||||
| // getAnnotationDevices returns a list of devices specified in the annotations.
 | ||||
| // Keys starting with the specified prefixes are considered and expected to contain a comma-separated list of
 | ||||
| // fully-qualified CDI devices names. If any device name is not fully-quality an error is returned.
 | ||||
| // The list of returned devices is deduplicated.
 | ||||
| func getAnnotationDevices(prefixes []string, annotations map[string]string) ([]string, error) { | ||||
| 	devicesByKey := make(map[string][]string) | ||||
| 	for key, value := range annotations { | ||||
| 		for _, prefix := range prefixes { | ||||
| 			if strings.HasPrefix(key, prefix) { | ||||
| 				devicesByKey[key] = strings.Split(value, ",") | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	seen := make(map[string]bool) | ||||
| 	var annotationDevices []string | ||||
| 	for key, devices := range devicesByKey { | ||||
| 		for _, device := range devices { | ||||
| 			if !cdi.IsQualifiedName(device) { | ||||
| 				return nil, fmt.Errorf("invalid device name %q in annotation %q", device, key) | ||||
| 			} | ||||
| 			if seen[device] { | ||||
| 				continue | ||||
| 			} | ||||
| 			annotationDevices = append(annotationDevices, device) | ||||
| 			seen[device] = true | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	return annotationDevices, nil | ||||
| } | ||||
| 
 | ||||
| // Modify loads the CDI registry and injects the specified CDI devices into the OCI runtime specification.
 | ||||
| func (m cdiModifier) Modify(spec *specs.Spec) error { | ||||
| 	registry := cdi.GetRegistry( | ||||
|  | ||||
							
								
								
									
										92
									
								
								internal/modifier/cdi_test.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										92
									
								
								internal/modifier/cdi_test.go
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,92 @@ | ||||
| /** | ||||
| # Copyright (c) NVIDIA CORPORATION.  All rights reserved. | ||||
| # | ||||
| # Licensed under the Apache License, Version 2.0 (the "License"); | ||||
| # you may not use this file except in compliance with the License. | ||||
| # You may obtain a copy of the License at | ||||
| # | ||||
| #     http://www.apache.org/licenses/LICENSE-2.0
 | ||||
| # | ||||
| # Unless required by applicable law or agreed to in writing, software | ||||
| # distributed under the License is distributed on an "AS IS" BASIS, | ||||
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||||
| # See the License for the specific language governing permissions and | ||||
| # limitations under the License. | ||||
| **/ | ||||
| 
 | ||||
| package modifier | ||||
| 
 | ||||
| import ( | ||||
| 	"fmt" | ||||
| 	"testing" | ||||
| 
 | ||||
| 	"github.com/stretchr/testify/require" | ||||
| ) | ||||
| 
 | ||||
| func TestGetAnnotationDevices(t *testing.T) { | ||||
| 	testCases := []struct { | ||||
| 		description     string | ||||
| 		prefixes        []string | ||||
| 		annotations     map[string]string | ||||
| 		expectedDevices []string | ||||
| 		expectedError   error | ||||
| 	}{ | ||||
| 		{ | ||||
| 			description: "no annotations", | ||||
| 		}, | ||||
| 		{ | ||||
| 			description: "no matching annotations", | ||||
| 			prefixes:    []string{"not-prefix/"}, | ||||
| 			annotations: map[string]string{ | ||||
| 				"prefix/foo": "example.com/device=bar", | ||||
| 			}, | ||||
| 		}, | ||||
| 		{ | ||||
| 			description: "single matching annotation", | ||||
| 			prefixes:    []string{"prefix/"}, | ||||
| 			annotations: map[string]string{ | ||||
| 				"prefix/foo": "example.com/device=bar", | ||||
| 			}, | ||||
| 			expectedDevices: []string{"example.com/device=bar"}, | ||||
| 		}, | ||||
| 		{ | ||||
| 			description: "multiple matching annotations", | ||||
| 			prefixes:    []string{"prefix/", "another-prefix/"}, | ||||
| 			annotations: map[string]string{ | ||||
| 				"prefix/foo":         "example.com/device=bar", | ||||
| 				"another-prefix/bar": "example.com/device=baz", | ||||
| 			}, | ||||
| 			expectedDevices: []string{"example.com/device=bar", "example.com/device=baz"}, | ||||
| 		}, | ||||
| 		{ | ||||
| 			description: "multiple matching annotations with duplicate devices", | ||||
| 			prefixes:    []string{"prefix/", "another-prefix/"}, | ||||
| 			annotations: map[string]string{ | ||||
| 				"prefix/foo":         "example.com/device=bar", | ||||
| 				"another-prefix/bar": "example.com/device=bar", | ||||
| 			}, | ||||
| 			expectedDevices: []string{"example.com/device=bar"}, | ||||
| 		}, | ||||
| 		{ | ||||
| 			description: "invalid devices", | ||||
| 			prefixes:    []string{"prefix/"}, | ||||
| 			annotations: map[string]string{ | ||||
| 				"prefix/foo": "example.com/device", | ||||
| 			}, | ||||
| 			expectedError: fmt.Errorf("invalid device %q", "example.com/device"), | ||||
| 		}, | ||||
| 	} | ||||
| 
 | ||||
| 	for _, tc := range testCases { | ||||
| 		t.Run(tc.description, func(t *testing.T) { | ||||
| 			devices, err := getAnnotationDevices(tc.prefixes, tc.annotations) | ||||
| 			if tc.expectedError != nil { | ||||
| 				require.Error(t, err) | ||||
| 				return | ||||
| 			} | ||||
| 
 | ||||
| 			require.NoError(t, err) | ||||
| 			require.ElementsMatch(t, tc.expectedDevices, devices) | ||||
| 		}) | ||||
| 	} | ||||
| } | ||||
| @ -332,6 +332,7 @@ func TestUpdateV1Config(t *testing.T) { | ||||
| 				Tree:                  config, | ||||
| 				UseDefaultRuntimeName: true, | ||||
| 				RuntimeType:           runtimeType, | ||||
| 				ContainerAnnotations:  []string{"cdi.k8s.io/*"}, | ||||
| 			} | ||||
| 
 | ||||
| 			err = UpdateConfig(v1, o) | ||||
| @ -585,6 +586,7 @@ func TestUpdateV1ConfigWithRuncPresent(t *testing.T) { | ||||
| 				Tree:                  config, | ||||
| 				UseDefaultRuntimeName: true, | ||||
| 				RuntimeType:           runtimeType, | ||||
| 				ContainerAnnotations:  []string{"cdi.k8s.io/*"}, | ||||
| 			} | ||||
| 
 | ||||
| 			err = UpdateConfig(v1, o) | ||||
|  | ||||
| @ -279,8 +279,9 @@ func TestUpdateV2Config(t *testing.T) { | ||||
| 			require.NoError(t, err) | ||||
| 
 | ||||
| 			v2 := &containerd.Config{ | ||||
| 				Tree:        config, | ||||
| 				RuntimeType: runtimeType, | ||||
| 				Tree:                 config, | ||||
| 				RuntimeType:          runtimeType, | ||||
| 				ContainerAnnotations: []string{"cdi.k8s.io/*"}, | ||||
| 			} | ||||
| 
 | ||||
| 			err = UpdateConfig(v2, o) | ||||
| @ -520,8 +521,9 @@ func TestUpdateV2ConfigWithRuncPresent(t *testing.T) { | ||||
| 			require.NoError(t, err) | ||||
| 
 | ||||
| 			v2 := &containerd.Config{ | ||||
| 				Tree:        config, | ||||
| 				RuntimeType: runtimeType, | ||||
| 				Tree:                 config, | ||||
| 				RuntimeType:          runtimeType, | ||||
| 				ContainerAnnotations: []string{"cdi.k8s.io/*"}, | ||||
| 			} | ||||
| 
 | ||||
| 			err = UpdateConfig(v2, o) | ||||
|  | ||||
| @ -72,6 +72,8 @@ type options struct { | ||||
| 	hostRootMount   string | ||||
| 	runtimeDir      string | ||||
| 	useLegacyConfig bool | ||||
| 
 | ||||
| 	ContainerRuntimeModesCDIAnnotationPrefixes cli.StringSlice | ||||
| } | ||||
| 
 | ||||
| func main() { | ||||
| @ -173,6 +175,11 @@ func main() { | ||||
| 			Destination: &options.useLegacyConfig, | ||||
| 			EnvVars:     []string{"CONTAINERD_USE_LEGACY_CONFIG"}, | ||||
| 		}, | ||||
| 		&cli.StringSliceFlag{ | ||||
| 			Name:        "nvidia-container-runtime-modes.cdi.annotation-prefixes", | ||||
| 			Destination: &options.ContainerRuntimeModesCDIAnnotationPrefixes, | ||||
| 			EnvVars:     []string{"NVIDIA_CONTAINER_RUNTIME_MODES_CDI_ANNOTATION_PREFIXES"}, | ||||
| 		}, | ||||
| 	} | ||||
| 
 | ||||
| 	// Update the subcommand flags with the common subcommand flags
 | ||||
| @ -199,6 +206,7 @@ func Setup(c *cli.Context, o *options) error { | ||||
| 		containerd.WithPath(o.config), | ||||
| 		containerd.WithRuntimeType(o.runtimeType), | ||||
| 		containerd.WithUseLegacyConfig(o.useLegacyConfig), | ||||
| 		containerd.WithContainerAnnotations(o.containerAnnotationsFromCDIPrefixes()...), | ||||
| 	) | ||||
| 	if err != nil { | ||||
| 		return fmt.Errorf("unable to load config: %v", err) | ||||
| @ -241,6 +249,7 @@ func Cleanup(c *cli.Context, o *options) error { | ||||
| 		containerd.WithPath(o.config), | ||||
| 		containerd.WithRuntimeType(o.runtimeType), | ||||
| 		containerd.WithUseLegacyConfig(o.useLegacyConfig), | ||||
| 		containerd.WithContainerAnnotations(o.containerAnnotationsFromCDIPrefixes()...), | ||||
| 	) | ||||
| 	if err != nil { | ||||
| 		return fmt.Errorf("unable to load config: %v", err) | ||||
| @ -434,3 +443,13 @@ func RestartContainerdSystemd(hostRootMount string) error { | ||||
| 
 | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| // containerAnnotationsFromCDIPrefixes returns the container annotations to set for the given CDI prefixes.
 | ||||
| func (o *options) containerAnnotationsFromCDIPrefixes() []string { | ||||
| 	var annotations []string | ||||
| 	for _, prefix := range o.ContainerRuntimeModesCDIAnnotationPrefixes.Value() { | ||||
| 		annotations = append(annotations, prefix+"*") | ||||
| 	} | ||||
| 
 | ||||
| 	return annotations | ||||
| } | ||||
|  | ||||
| @ -47,10 +47,12 @@ type options struct { | ||||
| 	DriverRoot        string | ||||
| 	DriverRootCtrPath string | ||||
| 
 | ||||
| 	ContainerRuntimeMode                string | ||||
| 	ContainerRuntimeModesCdiDefaultKind string | ||||
| 	ContainerRuntimeDebug               string | ||||
| 	ContainerRuntimeLogLevel            string | ||||
| 	ContainerRuntimeMode     string | ||||
| 	ContainerRuntimeDebug    string | ||||
| 	ContainerRuntimeLogLevel string | ||||
| 
 | ||||
| 	ContainerRuntimeModesCdiDefaultKind        string | ||||
| 	ContainerRuntimeModesCDIAnnotationPrefixes cli.StringSlice | ||||
| 
 | ||||
| 	ContainerRuntimeHookSkipModeDetection bool | ||||
| 
 | ||||
| @ -65,6 +67,8 @@ type options struct { | ||||
| 
 | ||||
| 	acceptNVIDIAVisibleDevicesWhenUnprivileged bool | ||||
| 	acceptNVIDIAVisibleDevicesAsVolumeMounts   bool | ||||
| 
 | ||||
| 	ignoreErrors bool | ||||
| } | ||||
| 
 | ||||
| func main() { | ||||
| @ -121,26 +125,34 @@ func main() { | ||||
| 			EnvVars:     []string{"DRIVER_ROOT_CTR_PATH"}, | ||||
| 		}, | ||||
| 		&cli.StringFlag{ | ||||
| 			Name:        "nvidia-container-runtime-debug", | ||||
| 			Name:        "nvidia-container-runtime.debug", | ||||
| 			Aliases:     []string{"nvidia-container-runtime-debug"}, | ||||
| 			Usage:       "Specify the location of the debug log file for the NVIDIA Container Runtime", | ||||
| 			Destination: &opts.ContainerRuntimeDebug, | ||||
| 			EnvVars:     []string{"NVIDIA_CONTAINER_RUNTIME_DEBUG"}, | ||||
| 		}, | ||||
| 		&cli.StringFlag{ | ||||
| 			Name:        "nvidia-container-runtime-debug-log-level", | ||||
| 			Name:        "nvidia-container-runtime.log-level", | ||||
| 			Aliases:     []string{"nvidia-container-runtime-debug-log-level"}, | ||||
| 			Destination: &opts.ContainerRuntimeLogLevel, | ||||
| 			EnvVars:     []string{"NVIDIA_CONTAINER_RUNTIME_LOG_LEVEL"}, | ||||
| 		}, | ||||
| 		&cli.StringFlag{ | ||||
| 			Name:        "nvidia-container-runtime-mode", | ||||
| 			Name:        "nvidia-container-runtime.mode", | ||||
| 			Aliases:     []string{"nvidia-container-runtime-mode"}, | ||||
| 			Destination: &opts.ContainerRuntimeMode, | ||||
| 			EnvVars:     []string{"NVIDIA_CONTAINER_RUNTIME_MODE"}, | ||||
| 		}, | ||||
| 		&cli.StringFlag{ | ||||
| 			Name:        "nvidia-container-runtime-modes.cdi.default-kind", | ||||
| 			Name:        "nvidia-container-runtime.modes.cdi.default-kind", | ||||
| 			Destination: &opts.ContainerRuntimeModesCdiDefaultKind, | ||||
| 			EnvVars:     []string{"NVIDIA_CONTAINER_RUNTIME_MODES_CDI_DEFAULT_KIND"}, | ||||
| 		}, | ||||
| 		&cli.StringSliceFlag{ | ||||
| 			Name:        "nvidia-container-runtime.modes.cdi.annotation-prefixes", | ||||
| 			Destination: &opts.ContainerRuntimeModesCDIAnnotationPrefixes, | ||||
| 			EnvVars:     []string{"NVIDIA_CONTAINER_RUNTIME_MODES_CDI_ANNOTATION_PREFIXES"}, | ||||
| 		}, | ||||
| 		&cli.BoolFlag{ | ||||
| 			Name:        "nvidia-container-runtime-hook.skip-mode-detection", | ||||
| 			Value:       true, | ||||
| @ -148,7 +160,8 @@ func main() { | ||||
| 			EnvVars:     []string{"NVIDIA_CONTAINER_RUNTIME_HOOK_SKIP_MODE_DETECTION"}, | ||||
| 		}, | ||||
| 		&cli.StringFlag{ | ||||
| 			Name:        "nvidia-container-cli-debug", | ||||
| 			Name:        "nvidia-container-cli.debug", | ||||
| 			Aliases:     []string{"nvidia-container-cli-debug"}, | ||||
| 			Usage:       "Specify the location of the debug log file for the NVIDIA Container CLI", | ||||
| 			Destination: &opts.ContainerCLIDebug, | ||||
| 			EnvVars:     []string{"NVIDIA_CONTAINER_CLI_DEBUG"}, | ||||
| @ -194,6 +207,12 @@ func main() { | ||||
| 			Destination: &opts.cdiKind, | ||||
| 			EnvVars:     []string{"CDI_KIND"}, | ||||
| 		}, | ||||
| 		&cli.BoolFlag{ | ||||
| 			Name:        "ignore-errors", | ||||
| 			Usage:       "ignore errors when installing the NVIDIA Container toolkit. This is used for testing purposes only.", | ||||
| 			Hidden:      true, | ||||
| 			Destination: &opts.ignoreErrors, | ||||
| 		}, | ||||
| 	} | ||||
| 
 | ||||
| 	// Update the subcommand flags with the common subcommand flags
 | ||||
| @ -242,46 +261,62 @@ func Install(cli *cli.Context, opts *options) error { | ||||
| 
 | ||||
| 	log.Infof("Removing existing NVIDIA container toolkit installation") | ||||
| 	err := os.RemoveAll(opts.toolkitRoot) | ||||
| 	if err != nil { | ||||
| 	if err != nil && !opts.ignoreErrors { | ||||
| 		return fmt.Errorf("error removing toolkit directory: %v", err) | ||||
| 	} else if err != nil { | ||||
| 		log.Errorf("Ignoring error: %v", fmt.Errorf("error removing toolkit directory: %v", err)) | ||||
| 	} | ||||
| 
 | ||||
| 	toolkitConfigDir := filepath.Join(opts.toolkitRoot, ".config", "nvidia-container-runtime") | ||||
| 	toolkitConfigPath := filepath.Join(toolkitConfigDir, configFilename) | ||||
| 
 | ||||
| 	err = createDirectories(opts.toolkitRoot, toolkitConfigDir) | ||||
| 	if err != nil { | ||||
| 	if err != nil && !opts.ignoreErrors { | ||||
| 		return fmt.Errorf("could not create required directories: %v", err) | ||||
| 	} else if err != nil { | ||||
| 		log.Errorf("Ignoring error: %v", fmt.Errorf("could not create required directories: %v", err)) | ||||
| 	} | ||||
| 
 | ||||
| 	err = installContainerLibraries(opts.toolkitRoot) | ||||
| 	if err != nil { | ||||
| 	if err != nil && !opts.ignoreErrors { | ||||
| 		return fmt.Errorf("error installing NVIDIA container library: %v", err) | ||||
| 	} else if err != nil { | ||||
| 		log.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA container library: %v", err)) | ||||
| 	} | ||||
| 
 | ||||
| 	err = installContainerRuntimes(opts.toolkitRoot, opts.DriverRoot) | ||||
| 	if err != nil { | ||||
| 	if err != nil && !opts.ignoreErrors { | ||||
| 		return fmt.Errorf("error installing NVIDIA container runtime: %v", err) | ||||
| 	} else if err != nil { | ||||
| 		log.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA container runtime: %v", err)) | ||||
| 	} | ||||
| 
 | ||||
| 	nvidiaContainerCliExecutable, err := installContainerCLI(opts.toolkitRoot) | ||||
| 	if err != nil { | ||||
| 	if err != nil && !opts.ignoreErrors { | ||||
| 		return fmt.Errorf("error installing NVIDIA container CLI: %v", err) | ||||
| 	} else if err != nil { | ||||
| 		log.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA container CLI: %v", err)) | ||||
| 	} | ||||
| 
 | ||||
| 	_, err = installRuntimeHook(opts.toolkitRoot, toolkitConfigPath) | ||||
| 	if err != nil { | ||||
| 	if err != nil && !opts.ignoreErrors { | ||||
| 		return fmt.Errorf("error installing NVIDIA container runtime hook: %v", err) | ||||
| 	} else if err != nil { | ||||
| 		log.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA container runtime hook: %v", err)) | ||||
| 	} | ||||
| 
 | ||||
| 	nvidiaCTKPath, err := installContainerToolkitCLI(opts.toolkitRoot) | ||||
| 	if err != nil { | ||||
| 	if err != nil && !opts.ignoreErrors { | ||||
| 		return fmt.Errorf("error installing NVIDIA Container Toolkit CLI: %v", err) | ||||
| 	} else if err != nil { | ||||
| 		log.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA Container Toolkit CLI: %v", err)) | ||||
| 	} | ||||
| 
 | ||||
| 	err = installToolkitConfig(toolkitConfigPath, nvidiaContainerCliExecutable, nvidiaCTKPath, opts) | ||||
| 	if err != nil { | ||||
| 	err = installToolkitConfig(cli, toolkitConfigPath, nvidiaContainerCliExecutable, nvidiaCTKPath, opts) | ||||
| 	if err != nil && !opts.ignoreErrors { | ||||
| 		return fmt.Errorf("error installing NVIDIA container toolkit config: %v", err) | ||||
| 	} else if err != nil { | ||||
| 		log.Errorf("Ignoring error: %v", fmt.Errorf("error installing NVIDIA container toolkit config: %v", err)) | ||||
| 	} | ||||
| 
 | ||||
| 	return generateCDISpec(opts, nvidiaCTKPath) | ||||
| @ -337,10 +372,10 @@ func installLibrary(libName string, toolkitRoot string) error { | ||||
| 
 | ||||
| // installToolkitConfig installs the config file for the NVIDIA container toolkit ensuring
 | ||||
| // that the settings are updated to match the desired install and nvidia driver directories.
 | ||||
| func installToolkitConfig(toolkitConfigPath string, nvidiaContainerCliExecutablePath string, nvidiaCTKPath string, opts *options) error { | ||||
| func installToolkitConfig(c *cli.Context, toolkitConfigPath string, nvidiaContainerCliExecutablePath string, nvidiaCTKPath string, opts *options) error { | ||||
| 	log.Infof("Installing NVIDIA container toolkit config '%v'", toolkitConfigPath) | ||||
| 
 | ||||
| 	config, err := toml.LoadFile(nvidiaContainerToolkitConfigSource) | ||||
| 	config, err := loadConfig(nvidiaContainerToolkitConfigSource) | ||||
| 	if err != nil { | ||||
| 		return fmt.Errorf("could not open source config file: %v", err) | ||||
| 	} | ||||
| @ -351,45 +386,64 @@ func installToolkitConfig(toolkitConfigPath string, nvidiaContainerCliExecutable | ||||
| 	} | ||||
| 	defer targetConfig.Close() | ||||
| 
 | ||||
| 	// Set the options in the root toml table
 | ||||
| 	config.Set("accept-nvidia-visible-devices-envvar-when-unprivileged", opts.acceptNVIDIAVisibleDevicesWhenUnprivileged) | ||||
| 	config.Set("accept-nvidia-visible-devices-as-volume-mounts", opts.acceptNVIDIAVisibleDevicesAsVolumeMounts) | ||||
| 
 | ||||
| 	nvidiaContainerCliKey := func(p string) []string { | ||||
| 		return []string{"nvidia-container-cli", p} | ||||
| 	} | ||||
| 
 | ||||
| 	// Read the ldconfig path from the config as this may differ per platform
 | ||||
| 	// On ubuntu-based systems this ends in `.real`
 | ||||
| 	ldconfigPath := fmt.Sprintf("%s", config.GetPath(nvidiaContainerCliKey("ldconfig"))) | ||||
| 
 | ||||
| 	ldconfigPath := fmt.Sprintf("%s", config.GetDefault("nvidia-container-cli.ldconfig", "/sbin/ldconfig")) | ||||
| 	// Use the driver run root as the root:
 | ||||
| 	driverLdconfigPath := "@" + filepath.Join(opts.DriverRoot, strings.TrimPrefix(ldconfigPath, "@/")) | ||||
| 
 | ||||
| 	config.SetPath(nvidiaContainerCliKey("root"), opts.DriverRoot) | ||||
| 	config.SetPath(nvidiaContainerCliKey("path"), nvidiaContainerCliExecutablePath) | ||||
| 	config.SetPath(nvidiaContainerCliKey("ldconfig"), driverLdconfigPath) | ||||
| 
 | ||||
| 	// Set the debug options if selected
 | ||||
| 	debugOptions := map[string]string{ | ||||
| 		"nvidia-container-runtime.debug":                  opts.ContainerRuntimeDebug, | ||||
| 		"nvidia-container-runtime.log-level":              opts.ContainerRuntimeLogLevel, | ||||
| 		"nvidia-container-runtime.mode":                   opts.ContainerRuntimeMode, | ||||
| 		"nvidia-container-runtime.modes.cdi.default-kind": opts.ContainerRuntimeModesCdiDefaultKind, | ||||
| 		"nvidia-container-cli.debug":                      opts.ContainerCLIDebug, | ||||
| 	configValues := map[string]interface{}{ | ||||
| 		// Set the options in the root toml table
 | ||||
| 		"accept-nvidia-visible-devices-envvar-when-unprivileged": opts.acceptNVIDIAVisibleDevicesWhenUnprivileged, | ||||
| 		"accept-nvidia-visible-devices-as-volume-mounts":         opts.acceptNVIDIAVisibleDevicesAsVolumeMounts, | ||||
| 		// Set the nvidia-container-cli options
 | ||||
| 		"nvidia-container-cli.root":     opts.DriverRoot, | ||||
| 		"nvidia-container-cli.path":     nvidiaContainerCliExecutablePath, | ||||
| 		"nvidia-container-cli.ldconfig": driverLdconfigPath, | ||||
| 		// Set nvidia-ctk options
 | ||||
| 		"nvidia-ctk.path": nvidiaCTKPath, | ||||
| 		// Set the nvidia-container-runtime-hook options
 | ||||
| 		"nvidia-container-runtime-hook.skip-mode-detection": opts.ContainerRuntimeHookSkipModeDetection, | ||||
| 	} | ||||
| 	for key, value := range debugOptions { | ||||
| 		if value == "" { | ||||
| 			continue | ||||
| 		} | ||||
| 	for key, value := range configValues { | ||||
| 		config.Set(key, value) | ||||
| 	} | ||||
| 
 | ||||
| 	// Set nvidia-ctk options
 | ||||
| 	config.Set("nvidia-ctk.path", nvidiaCTKPath) | ||||
| 	// Set the optional config options
 | ||||
| 	optionalConfigValues := map[string]interface{}{ | ||||
| 		"nvidia-container-runtime.debug":                         opts.ContainerRuntimeDebug, | ||||
| 		"nvidia-container-runtime.log-level":                     opts.ContainerRuntimeLogLevel, | ||||
| 		"nvidia-container-runtime.mode":                          opts.ContainerRuntimeMode, | ||||
| 		"nvidia-container-runtime.modes.cdi.annotation-prefixes": opts.ContainerRuntimeModesCDIAnnotationPrefixes, | ||||
| 		"nvidia-container-runtime.modes.cdi.default-kind":        opts.ContainerRuntimeModesCdiDefaultKind, | ||||
| 		"nvidia-container-cli.debug":                             opts.ContainerCLIDebug, | ||||
| 	} | ||||
| 	for key, value := range optionalConfigValues { | ||||
| 		if !c.IsSet(key) { | ||||
| 			log.Infof("Skipping unset option: %v", key) | ||||
| 			continue | ||||
| 		} | ||||
| 		if value == nil { | ||||
| 			log.Infof("Skipping option with nil value: %v", key) | ||||
| 			continue | ||||
| 		} | ||||
| 
 | ||||
| 	// Set the nvidia-container-runtime-hook options
 | ||||
| 	config.Set("nvidia-container-runtime-hook.skip-mode-detection", opts.ContainerRuntimeHookSkipModeDetection) | ||||
| 		switch v := value.(type) { | ||||
| 		case string: | ||||
| 			if v == "" { | ||||
| 				continue | ||||
| 			} | ||||
| 		case cli.StringSlice: | ||||
| 			if len(v.Value()) == 0 { | ||||
| 				continue | ||||
| 			} | ||||
| 			value = v.Value() | ||||
| 		default: | ||||
| 			log.Warnf("Unexpected type for option %v=%v: %T", key, value, v) | ||||
| 		} | ||||
| 
 | ||||
| 		config.Set(key, value) | ||||
| 	} | ||||
| 
 | ||||
| 	_, err = config.WriteTo(targetConfig) | ||||
| 	if err != nil { | ||||
| @ -402,6 +456,16 @@ func installToolkitConfig(toolkitConfigPath string, nvidiaContainerCliExecutable | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| func loadConfig(path string) (*toml.Tree, error) { | ||||
| 	_, err := os.Stat(path) | ||||
| 	if err == nil { | ||||
| 		return toml.LoadFile(path) | ||||
| 	} else if os.IsNotExist(err) { | ||||
| 		return toml.TreeFromMap(nil) | ||||
| 	} | ||||
| 	return nil, err | ||||
| } | ||||
| 
 | ||||
| // installContainerToolkitCLI installs the nvidia-ctk CLI executable and wrapper.
 | ||||
| func installContainerToolkitCLI(toolkitDir string) (string, error) { | ||||
| 	e := executable{ | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user