mirror of
				https://github.com/NVIDIA/nvidia-container-toolkit
				synced 2025-06-26 18:18:24 +00:00 
			
		
		
		
	Merge branch 'support-cdi-mount-devices' into 'main'
Support CDI devices as mounts See merge request nvidia/container-toolkit/container-toolkit!480
This commit is contained in:
		
						commit
						c5a9ed6594
					
				| @ -174,7 +174,7 @@ func getDevicesFromEnvvar(image image.CUDA, swarmResourceEnvvars []string) *stri | ||||
| 	// if specified.
 | ||||
| 	var hasSwarmEnvvar bool | ||||
| 	for _, envvar := range swarmResourceEnvvars { | ||||
| 		if _, exists := image[envvar]; exists { | ||||
| 		if image.HasEnvvar(envvar) { | ||||
| 			hasSwarmEnvvar = true | ||||
| 			break | ||||
| 		} | ||||
| @ -257,28 +257,31 @@ func getDevices(hookConfig *HookConfig, image image.CUDA, mounts []Mount, privil | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| func getMigConfigDevices(env map[string]string) *string { | ||||
| 	if devices, ok := env[envNVMigConfigDevices]; ok { | ||||
| 		return &devices | ||||
| 	} | ||||
| 	return nil | ||||
| func getMigConfigDevices(image image.CUDA) *string { | ||||
| 	return getMigDevices(image, envNVMigConfigDevices) | ||||
| } | ||||
| 
 | ||||
| func getMigMonitorDevices(env map[string]string) *string { | ||||
| 	if devices, ok := env[envNVMigMonitorDevices]; ok { | ||||
| 		return &devices | ||||
| 	} | ||||
| 	return nil | ||||
| func getMigMonitorDevices(image image.CUDA) *string { | ||||
| 	return getMigDevices(image, envNVMigMonitorDevices) | ||||
| } | ||||
| 
 | ||||
| func (c *HookConfig) getDriverCapabilities(env map[string]string, legacyImage bool) image.DriverCapabilities { | ||||
| func getMigDevices(image image.CUDA, envvar string) *string { | ||||
| 	if !image.HasEnvvar(envvar) { | ||||
| 		return nil | ||||
| 	} | ||||
| 	devices := image.Getenv(envvar) | ||||
| 	return &devices | ||||
| } | ||||
| 
 | ||||
| func (c *HookConfig) getDriverCapabilities(cudaImage image.CUDA, legacyImage bool) image.DriverCapabilities { | ||||
| 	// We use the default driver capabilities by default. This is filtered to only include the
 | ||||
| 	// supported capabilities
 | ||||
| 	supportedDriverCapabilities := image.NewDriverCapabilities(c.SupportedDriverCapabilities) | ||||
| 
 | ||||
| 	capabilities := supportedDriverCapabilities.Intersection(image.DefaultDriverCapabilities) | ||||
| 
 | ||||
| 	capsEnv, capsEnvSpecified := env[envNVDriverCapabilities] | ||||
| 	capsEnvSpecified := cudaImage.HasEnvvar(envNVDriverCapabilities) | ||||
| 	capsEnv := cudaImage.Getenv(envNVDriverCapabilities) | ||||
| 
 | ||||
| 	if !capsEnvSpecified && legacyImage { | ||||
| 		// Environment variable unset with legacy image: set all capabilities.
 | ||||
|  | ||||
| @ -465,6 +465,9 @@ func TestGetNvidiaConfig(t *testing.T) { | ||||
| 	} | ||||
| 	for _, tc := range tests { | ||||
| 		t.Run(tc.description, func(t *testing.T) { | ||||
| 			image, _ := image.New( | ||||
| 				image.WithEnvMap(tc.env), | ||||
| 			) | ||||
| 			// Wrap the call to getNvidiaConfig() in a closure.
 | ||||
| 			var config *nvidiaConfig | ||||
| 			getConfig := func() { | ||||
| @ -473,7 +476,7 @@ func TestGetNvidiaConfig(t *testing.T) { | ||||
| 					defaultConfig, _ := getDefaultHookConfig() | ||||
| 					hookConfig = &defaultConfig | ||||
| 				} | ||||
| 				config = getNvidiaConfig(hookConfig, tc.env, nil, tc.privileged) | ||||
| 				config = getNvidiaConfig(hookConfig, image, nil, tc.privileged) | ||||
| 			} | ||||
| 
 | ||||
| 			// For any tests that are expected to panic, make sure they do.
 | ||||
| @ -678,13 +681,17 @@ func TestDeviceListSourcePriority(t *testing.T) { | ||||
| 			// Wrap the call to getDevices() in a closure.
 | ||||
| 			var devices *string | ||||
| 			getDevices := func() { | ||||
| 				env := map[string]string{ | ||||
| 				image, _ := image.New( | ||||
| 					image.WithEnvMap( | ||||
| 						map[string]string{ | ||||
| 							envNVVisibleDevices: tc.envvarDevices, | ||||
| 				} | ||||
| 						}, | ||||
| 					), | ||||
| 				) | ||||
| 				hookConfig, _ := getDefaultHookConfig() | ||||
| 				hookConfig.AcceptEnvvarUnprivileged = tc.acceptUnprivileged | ||||
| 				hookConfig.AcceptDeviceListAsVolumeMounts = tc.acceptMounts | ||||
| 				devices = getDevices(&hookConfig, env, tc.mountDevices, tc.privileged) | ||||
| 				devices = getDevices(&hookConfig, image, tc.mountDevices, tc.privileged) | ||||
| 			} | ||||
| 
 | ||||
| 			// For all other tests, just grab the devices and check the results
 | ||||
| @ -905,7 +912,10 @@ func TestGetDevicesFromEnvvar(t *testing.T) { | ||||
| 
 | ||||
| 	for i, tc := range tests { | ||||
| 		t.Run(tc.description, func(t *testing.T) { | ||||
| 			devices := getDevicesFromEnvvar(image.CUDA(tc.env), tc.swarmResourceEnvvars) | ||||
| 			image, _ := image.New( | ||||
| 				image.WithEnvMap(tc.env), | ||||
| 			) | ||||
| 			devices := getDevicesFromEnvvar(image, tc.swarmResourceEnvvars) | ||||
| 			if tc.expectedDevices == nil { | ||||
| 				require.Nil(t, devices, "%d: %v", i, tc) | ||||
| 				return | ||||
| @ -1021,8 +1031,11 @@ func TestGetDriverCapabilities(t *testing.T) { | ||||
| 				SupportedDriverCapabilities: tc.supportedCapabilities, | ||||
| 			} | ||||
| 
 | ||||
| 			image, _ := image.New( | ||||
| 				image.WithEnvMap(tc.env), | ||||
| 			) | ||||
| 			getDriverCapabilities := func() { | ||||
| 				capabilities = c.getDriverCapabilities(tc.env, tc.legacyImage).String() | ||||
| 				capabilities = c.getDriverCapabilities(image, tc.legacyImage).String() | ||||
| 			} | ||||
| 
 | ||||
| 			if tc.expectedPanic { | ||||
|  | ||||
| @ -19,10 +19,13 @@ package image | ||||
| import ( | ||||
| 	"fmt" | ||||
| 	"strings" | ||||
| 
 | ||||
| 	"github.com/opencontainers/runtime-spec/specs-go" | ||||
| ) | ||||
| 
 | ||||
| type builder struct { | ||||
| 	env            []string | ||||
| 	env            map[string]string | ||||
| 	mounts         []specs.Mount | ||||
| 	disableRequire bool | ||||
| } | ||||
| 
 | ||||
| @ -30,7 +33,12 @@ type builder struct { | ||||
| func New(opt ...Option) (CUDA, error) { | ||||
| 	b := &builder{} | ||||
| 	for _, o := range opt { | ||||
| 		o(b) | ||||
| 		if err := o(b); err != nil { | ||||
| 			return CUDA{}, err | ||||
| 		} | ||||
| 	} | ||||
| 	if b.env == nil { | ||||
| 		b.env = make(map[string]string) | ||||
| 	} | ||||
| 
 | ||||
| 	return b.build() | ||||
| @ -38,36 +46,57 @@ func New(opt ...Option) (CUDA, error) { | ||||
| 
 | ||||
| // build creates a CUDA image from the builder.
 | ||||
| func (b builder) build() (CUDA, error) { | ||||
| 	c := make(CUDA) | ||||
| 
 | ||||
| 	for _, e := range b.env { | ||||
| 		parts := strings.SplitN(e, "=", 2) | ||||
| 		if len(parts) != 2 { | ||||
| 			return nil, fmt.Errorf("invalid environment variable: %v", e) | ||||
| 		} | ||||
| 		c[parts[0]] = parts[1] | ||||
| 	} | ||||
| 
 | ||||
| 	if b.disableRequire { | ||||
| 		c[envNVDisableRequire] = "true" | ||||
| 		b.env[envNVDisableRequire] = "true" | ||||
| 	} | ||||
| 
 | ||||
| 	c := CUDA{ | ||||
| 		env:    b.env, | ||||
| 		mounts: b.mounts, | ||||
| 	} | ||||
| 	return c, nil | ||||
| } | ||||
| 
 | ||||
| // Option is a functional option for creating a CUDA image.
 | ||||
| type Option func(*builder) | ||||
| type Option func(*builder) error | ||||
| 
 | ||||
| // WithDisableRequire sets the disable require option.
 | ||||
| func WithDisableRequire(disableRequire bool) Option { | ||||
| 	return func(b *builder) { | ||||
| 	return func(b *builder) error { | ||||
| 		b.disableRequire = disableRequire | ||||
| 		return nil | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| // WithEnv sets the environment variables to use when creating the CUDA image.
 | ||||
| // Note that this also overwrites the values set with WithEnvMap.
 | ||||
| func WithEnv(env []string) Option { | ||||
| 	return func(b *builder) { | ||||
| 		b.env = env | ||||
| 	return func(b *builder) error { | ||||
| 		envmap := make(map[string]string) | ||||
| 		for _, e := range env { | ||||
| 			parts := strings.SplitN(e, "=", 2) | ||||
| 			if len(parts) != 2 { | ||||
| 				return fmt.Errorf("invalid environment variable: %v", e) | ||||
| 			} | ||||
| 			envmap[parts[0]] = parts[1] | ||||
| 		} | ||||
| 		return WithEnvMap(envmap)(b) | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| // WithEnvMap sets the environment variable map to use when creating the CUDA image.
 | ||||
| // Note that this also overwrites the values set with WithEnv.
 | ||||
| func WithEnvMap(env map[string]string) Option { | ||||
| 	return func(b *builder) error { | ||||
| 		b.env = env | ||||
| 		return nil | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| // WithMounts sets the mounts associated with the CUDA image.
 | ||||
| func WithMounts(mounts []specs.Mount) Option { | ||||
| 	return func(b *builder) error { | ||||
| 		b.mounts = mounts | ||||
| 		return nil | ||||
| 	} | ||||
| } | ||||
|  | ||||
| @ -18,9 +18,11 @@ package image | ||||
| 
 | ||||
| import ( | ||||
| 	"fmt" | ||||
| 	"path/filepath" | ||||
| 	"strconv" | ||||
| 	"strings" | ||||
| 
 | ||||
| 	"github.com/container-orchestrated-devices/container-device-interface/pkg/parser" | ||||
| 	"github.com/opencontainers/runtime-spec/specs-go" | ||||
| 	"golang.org/x/mod/semver" | ||||
| ) | ||||
| @ -37,7 +39,10 @@ const ( | ||||
| // CUDA represents a CUDA image that can be used for GPU computing. This wraps
 | ||||
| // a map of environment variable to values that can be used to perform lookups
 | ||||
| // such as requirements.
 | ||||
| type CUDA map[string]string | ||||
| type CUDA struct { | ||||
| 	env    map[string]string | ||||
| 	mounts []specs.Mount | ||||
| } | ||||
| 
 | ||||
| // NewCUDAImageFromSpec creates a CUDA image from the input OCI runtime spec.
 | ||||
| // The process environment is read (if present) to construc the CUDA Image.
 | ||||
| @ -47,7 +52,10 @@ func NewCUDAImageFromSpec(spec *specs.Spec) (CUDA, error) { | ||||
| 		env = spec.Process.Env | ||||
| 	} | ||||
| 
 | ||||
| 	return New(WithEnv(env)) | ||||
| 	return New( | ||||
| 		WithEnv(env), | ||||
| 		WithMounts(spec.Mounts), | ||||
| 	) | ||||
| } | ||||
| 
 | ||||
| // NewCUDAImageFromEnv creates a CUDA image from the input environment. The environment
 | ||||
| @ -56,12 +64,24 @@ func NewCUDAImageFromEnv(env []string) (CUDA, error) { | ||||
| 	return New(WithEnv(env)) | ||||
| } | ||||
| 
 | ||||
| // Getenv returns the value of the specified environment variable.
 | ||||
| // If the environment variable is not specified, an empty string is returned.
 | ||||
| func (i CUDA) Getenv(key string) string { | ||||
| 	return i.env[key] | ||||
| } | ||||
| 
 | ||||
| // HasEnvvar checks whether the specified envvar is defined in the image.
 | ||||
| func (i CUDA) HasEnvvar(key string) bool { | ||||
| 	_, exists := i.env[key] | ||||
| 	return exists | ||||
| } | ||||
| 
 | ||||
| // IsLegacy returns whether the associated CUDA image is a "legacy" image. An
 | ||||
| // image is considered legacy if it has a CUDA_VERSION environment variable defined
 | ||||
| // and no NVIDIA_REQUIRE_CUDA environment variable defined.
 | ||||
| func (i CUDA) IsLegacy() bool { | ||||
| 	legacyCudaVersion := i[envCUDAVersion] | ||||
| 	cudaRequire := i[envNVRequireCUDA] | ||||
| 	legacyCudaVersion := i.env[envCUDAVersion] | ||||
| 	cudaRequire := i.env[envNVRequireCUDA] | ||||
| 	return len(legacyCudaVersion) > 0 && len(cudaRequire) == 0 | ||||
| } | ||||
| 
 | ||||
| @ -74,7 +94,7 @@ func (i CUDA) GetRequirements() ([]string, error) { | ||||
| 
 | ||||
| 	// All variables with the "NVIDIA_REQUIRE_" prefix are passed to nvidia-container-cli
 | ||||
| 	var requirements []string | ||||
| 	for name, value := range i { | ||||
| 	for name, value := range i.env { | ||||
| 		if strings.HasPrefix(name, envNVRequirePrefix) && !strings.HasPrefix(name, envNVRequireJetpack) { | ||||
| 			requirements = append(requirements, value) | ||||
| 		} | ||||
| @ -93,7 +113,7 @@ func (i CUDA) GetRequirements() ([]string, error) { | ||||
| // HasDisableRequire checks for the value of the NVIDIA_DISABLE_REQUIRE. If set
 | ||||
| // to a valid (true) boolean value this can be used to disable the requirement checks
 | ||||
| func (i CUDA) HasDisableRequire() bool { | ||||
| 	if disable, exists := i[envNVDisableRequire]; exists { | ||||
| 	if disable, exists := i.env[envNVDisableRequire]; exists { | ||||
| 		// i.logger.Debugf("NVIDIA_DISABLE_REQUIRE=%v; skipping requirement checks", disable)
 | ||||
| 		d, _ := strconv.ParseBool(disable) | ||||
| 		return d | ||||
| @ -104,12 +124,12 @@ func (i CUDA) HasDisableRequire() bool { | ||||
| 
 | ||||
| // DevicesFromEnvvars returns the devices requested by the image through environment variables
 | ||||
| func (i CUDA) DevicesFromEnvvars(envVars ...string) VisibleDevices { | ||||
| 	// We concantenate all the devices from the specified envvars.
 | ||||
| 	// We concantenate all the devices from the specified env.
 | ||||
| 	var isSet bool | ||||
| 	var devices []string | ||||
| 	requested := make(map[string]bool) | ||||
| 	for _, envVar := range envVars { | ||||
| 		if devs, ok := i[envVar]; ok { | ||||
| 		if devs, ok := i.env[envVar]; ok { | ||||
| 			isSet = true | ||||
| 			for _, d := range strings.Split(devs, ",") { | ||||
| 				trimmed := strings.TrimSpace(d) | ||||
| @ -137,7 +157,7 @@ func (i CUDA) DevicesFromEnvvars(envVars ...string) VisibleDevices { | ||||
| 
 | ||||
| // GetDriverCapabilities returns the requested driver capabilities.
 | ||||
| func (i CUDA) GetDriverCapabilities() DriverCapabilities { | ||||
| 	env := i[envNVDriverCapabilities] | ||||
| 	env := i.env[envNVDriverCapabilities] | ||||
| 
 | ||||
| 	capabilities := make(DriverCapabilities) | ||||
| 	for _, c := range strings.Split(env, ",") { | ||||
| @ -148,7 +168,7 @@ func (i CUDA) GetDriverCapabilities() DriverCapabilities { | ||||
| } | ||||
| 
 | ||||
| func (i CUDA) legacyVersion() (string, error) { | ||||
| 	cudaVersion := i[envCUDAVersion] | ||||
| 	cudaVersion := i.env[envCUDAVersion] | ||||
| 	majorMinor, err := parseMajorMinorVersion(cudaVersion) | ||||
| 	if err != nil { | ||||
| 		return "", fmt.Errorf("invalid CUDA version %v: %v", cudaVersion, err) | ||||
| @ -178,3 +198,79 @@ func parseMajorMinorVersion(version string) (string, error) { | ||||
| 	} | ||||
| 	return majorMinor, nil | ||||
| } | ||||
| 
 | ||||
| // OnlyFullyQualifiedCDIDevices returns true if all devices requested in the image are requested as CDI devices/
 | ||||
| func (i CUDA) OnlyFullyQualifiedCDIDevices() bool { | ||||
| 	var hasCDIdevice bool | ||||
| 	for _, device := range i.DevicesFromEnvvars("NVIDIA_VISIBLE_DEVICES").List() { | ||||
| 		if !parser.IsQualifiedName(device) { | ||||
| 			return false | ||||
| 		} | ||||
| 		hasCDIdevice = true | ||||
| 	} | ||||
| 
 | ||||
| 	for _, device := range i.DevicesFromMounts() { | ||||
| 		if !strings.HasPrefix(device, "cdi/") { | ||||
| 			return false | ||||
| 		} | ||||
| 		hasCDIdevice = true | ||||
| 	} | ||||
| 	return hasCDIdevice | ||||
| } | ||||
| 
 | ||||
| const ( | ||||
| 	deviceListAsVolumeMountsRoot = "/var/run/nvidia-container-devices" | ||||
| ) | ||||
| 
 | ||||
| // DevicesFromMounts returns a list of device specified as mounts.
 | ||||
| // TODO: This should be merged with getDevicesFromMounts used in the NVIDIA Container Runtime
 | ||||
| func (i CUDA) DevicesFromMounts() []string { | ||||
| 	root := filepath.Clean(deviceListAsVolumeMountsRoot) | ||||
| 	seen := make(map[string]bool) | ||||
| 	var devices []string | ||||
| 	for _, m := range i.mounts { | ||||
| 		source := filepath.Clean(m.Source) | ||||
| 		// Only consider mounts who's host volume is /dev/null
 | ||||
| 		if source != "/dev/null" { | ||||
| 			continue | ||||
| 		} | ||||
| 
 | ||||
| 		destination := filepath.Clean(m.Destination) | ||||
| 		if seen[destination] { | ||||
| 			continue | ||||
| 		} | ||||
| 		seen[destination] = true | ||||
| 
 | ||||
| 		// Only consider container mount points that begin with 'root'
 | ||||
| 		if !strings.HasPrefix(destination, root) { | ||||
| 			continue | ||||
| 		} | ||||
| 
 | ||||
| 		// Grab the full path beyond 'root' and add it to the list of devices
 | ||||
| 		device := strings.Trim(strings.TrimPrefix(destination, root), "/") | ||||
| 		if len(device) == 0 { | ||||
| 			continue | ||||
| 		} | ||||
| 		devices = append(devices, device) | ||||
| 	} | ||||
| 	return devices | ||||
| } | ||||
| 
 | ||||
| // CDIDevicesFromMounts returns a list of CDI devices specified as mounts on the image.
 | ||||
| func (i CUDA) CDIDevicesFromMounts() []string { | ||||
| 	var devices []string | ||||
| 	for _, mountDevice := range i.DevicesFromMounts() { | ||||
| 		if !strings.HasPrefix(mountDevice, "cdi/") { | ||||
| 			continue | ||||
| 		} | ||||
| 		parts := strings.SplitN(strings.TrimPrefix(mountDevice, "cdi/"), "/", 3) | ||||
| 		if len(parts) != 3 { | ||||
| 			continue | ||||
| 		} | ||||
| 		vendor := parts[0] | ||||
| 		class := parts[1] | ||||
| 		device := parts[2] | ||||
| 		devices = append(devices, fmt.Sprintf("%s/%s=%s", vendor, class, device)) | ||||
| 	} | ||||
| 	return devices | ||||
| } | ||||
|  | ||||
| @ -126,7 +126,6 @@ func TestGetRequirements(t *testing.T) { | ||||
| 			requirements, err := image.GetRequirements() | ||||
| 			require.NoError(t, err) | ||||
| 			require.ElementsMatch(t, tc.requirements, requirements) | ||||
| 
 | ||||
| 		}) | ||||
| 
 | ||||
| 	} | ||||
|  | ||||
| @ -19,7 +19,6 @@ package info | ||||
| import ( | ||||
| 	"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image" | ||||
| 	"github.com/NVIDIA/nvidia-container-toolkit/internal/logger" | ||||
| 	cdi "github.com/container-orchestrated-devices/container-device-interface/pkg/parser" | ||||
| 	"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/device" | ||||
| 	"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/info" | ||||
| 	"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml" | ||||
| @ -69,7 +68,7 @@ func (r resolver) resolveMode(mode string, image image.CUDA) (rmode string) { | ||||
| 		r.logger.Infof("Auto-detected mode as '%v'", rmode) | ||||
| 	}() | ||||
| 
 | ||||
| 	if onlyFullyQualifiedCDIDevices(image) { | ||||
| 	if image.OnlyFullyQualifiedCDIDevices() { | ||||
| 		return "cdi" | ||||
| 	} | ||||
| 
 | ||||
| @ -88,14 +87,3 @@ func (r resolver) resolveMode(mode string, image image.CUDA) (rmode string) { | ||||
| 
 | ||||
| 	return "legacy" | ||||
| } | ||||
| 
 | ||||
| func onlyFullyQualifiedCDIDevices(image image.CUDA) bool { | ||||
| 	var hasCDIdevice bool | ||||
| 	for _, device := range image.DevicesFromEnvvars("NVIDIA_VISIBLE_DEVICES").List() { | ||||
| 		if !cdi.IsQualifiedName(device) { | ||||
| 			return false | ||||
| 		} | ||||
| 		hasCDIdevice = true | ||||
| 	} | ||||
| 	return hasCDIdevice | ||||
| } | ||||
|  | ||||
| @ -20,6 +20,7 @@ import ( | ||||
| 	"testing" | ||||
| 
 | ||||
| 	"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image" | ||||
| 	"github.com/opencontainers/runtime-spec/specs-go" | ||||
| 	testlog "github.com/sirupsen/logrus/hooks/test" | ||||
| 	"github.com/stretchr/testify/require" | ||||
| ) | ||||
| @ -32,7 +33,8 @@ func TestResolveAutoMode(t *testing.T) { | ||||
| 		mode         string | ||||
| 		expectedMode string | ||||
| 		info         map[string]bool | ||||
| 		image        image.CUDA | ||||
| 		envmap       map[string]string | ||||
| 		mounts       []string | ||||
| 	}{ | ||||
| 		{ | ||||
| 			description:  "non-auto resolves to input", | ||||
| @ -119,7 +121,7 @@ func TestResolveAutoMode(t *testing.T) { | ||||
| 			description:  "cdi devices resolves to cdi", | ||||
| 			mode:         "auto", | ||||
| 			expectedMode: "cdi", | ||||
| 			image: image.CUDA{ | ||||
| 			envmap: map[string]string{ | ||||
| 				"NVIDIA_VISIBLE_DEVICES": "nvidia.com/gpu=all", | ||||
| 			}, | ||||
| 		}, | ||||
| @ -127,14 +129,14 @@ func TestResolveAutoMode(t *testing.T) { | ||||
| 			description:  "multiple cdi devices resolves to cdi", | ||||
| 			mode:         "auto", | ||||
| 			expectedMode: "cdi", | ||||
| 			image: image.CUDA{ | ||||
| 			envmap: map[string]string{ | ||||
| 				"NVIDIA_VISIBLE_DEVICES": "nvidia.com/gpu=0,nvidia.com/gpu=1", | ||||
| 			}, | ||||
| 		}, | ||||
| 		{ | ||||
| 			description: "at least one non-cdi device resolves to legacy", | ||||
| 			mode:        "auto", | ||||
| 			image: image.CUDA{ | ||||
| 			envmap: map[string]string{ | ||||
| 				"NVIDIA_VISIBLE_DEVICES": "nvidia.com/gpu=0,0", | ||||
| 			}, | ||||
| 			info: map[string]bool{ | ||||
| @ -147,7 +149,7 @@ func TestResolveAutoMode(t *testing.T) { | ||||
| 		{ | ||||
| 			description: "at least one non-cdi device resolves to csv", | ||||
| 			mode:        "auto", | ||||
| 			image: image.CUDA{ | ||||
| 			envmap: map[string]string{ | ||||
| 				"NVIDIA_VISIBLE_DEVICES": "nvidia.com/gpu=0,0", | ||||
| 			}, | ||||
| 			info: map[string]bool{ | ||||
| @ -157,6 +159,44 @@ func TestResolveAutoMode(t *testing.T) { | ||||
| 			}, | ||||
| 			expectedMode: "csv", | ||||
| 		}, | ||||
| 		{ | ||||
| 			description: "cdi mount devices resolves to CDI", | ||||
| 			mode:        "auto", | ||||
| 			mounts: []string{ | ||||
| 				"/var/run/nvidia-container-devices/cdi/nvidia.com/gpu/0", | ||||
| 			}, | ||||
| 			expectedMode: "cdi", | ||||
| 		}, | ||||
| 		{ | ||||
| 			description: "cdi mount and non-CDI devices resolves to legacy", | ||||
| 			mode:        "auto", | ||||
| 			mounts: []string{ | ||||
| 				"/var/run/nvidia-container-devices/cdi/nvidia.com/gpu/0", | ||||
| 				"/var/run/nvidia-container-devices/all", | ||||
| 			}, | ||||
| 			info: map[string]bool{ | ||||
| 				"nvml":  true, | ||||
| 				"tegra": false, | ||||
| 				"nvgpu": false, | ||||
| 			}, | ||||
| 			expectedMode: "legacy", | ||||
| 		}, | ||||
| 		{ | ||||
| 			description: "cdi mount and non-CDI envvar resolves to legacy", | ||||
| 			mode:        "auto", | ||||
| 			envmap: map[string]string{ | ||||
| 				"NVIDIA_VISIBLE_DEVICES": "0", | ||||
| 			}, | ||||
| 			mounts: []string{ | ||||
| 				"/var/run/nvidia-container-devices/cdi/nvidia.com/gpu/0", | ||||
| 			}, | ||||
| 			info: map[string]bool{ | ||||
| 				"nvml":  true, | ||||
| 				"tegra": false, | ||||
| 				"nvgpu": false, | ||||
| 			}, | ||||
| 			expectedMode: "legacy", | ||||
| 		}, | ||||
| 	} | ||||
| 
 | ||||
| 	for _, tc := range testCases { | ||||
| @ -177,7 +217,20 @@ func TestResolveAutoMode(t *testing.T) { | ||||
| 				logger: logger, | ||||
| 				info:   info, | ||||
| 			} | ||||
| 			mode := r.resolveMode(tc.mode, tc.image) | ||||
| 
 | ||||
| 			var mounts []specs.Mount | ||||
| 			for _, d := range tc.mounts { | ||||
| 				mount := specs.Mount{ | ||||
| 					Source:      "/dev/null", | ||||
| 					Destination: d, | ||||
| 				} | ||||
| 				mounts = append(mounts, mount) | ||||
| 			} | ||||
| 			image, _ := image.New( | ||||
| 				image.WithEnvMap(tc.envmap), | ||||
| 				image.WithMounts(mounts), | ||||
| 			) | ||||
| 			mode := r.resolveMode(tc.mode, image) | ||||
| 			require.EqualValues(t, tc.expectedMode, mode) | ||||
| 		}) | ||||
| 	} | ||||
|  | ||||
| @ -67,6 +67,13 @@ func getDevicesFromSpec(logger logger.Interface, ociSpec oci.Spec, cfg *config.C | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
| 	if cfg.AcceptDeviceListAsVolumeMounts { | ||||
| 		mountDevices := container.CDIDevicesFromMounts() | ||||
| 		if len(mountDevices) > 0 { | ||||
| 			return mountDevices, nil | ||||
| 		} | ||||
| 	} | ||||
| 
 | ||||
| 	envDevices := container.DevicesFromEnvvars(visibleDevicesEnvvar) | ||||
| 
 | ||||
| 	var devices []string | ||||
|  | ||||
| @ -55,7 +55,7 @@ func NewCSVModifier(logger logger.Interface, cfg *config.Config, image image.CUD | ||||
| 		return nil, fmt.Errorf("failed to get list of CSV files: %v", err) | ||||
| 	} | ||||
| 
 | ||||
| 	if nvidiaRequireJetpack := image[nvidiaRequireJetpackEnvvar]; nvidiaRequireJetpack != "csv-mounts=all" { | ||||
| 	if image.Getenv(nvidiaRequireJetpackEnvvar) != "csv-mounts=all" { | ||||
| 		csvFiles = csv.BaseFilesOnly(csvFiles) | ||||
| 	} | ||||
| 
 | ||||
|  | ||||
| @ -32,30 +32,33 @@ func TestNewCSVModifier(t *testing.T) { | ||||
| 	testCases := []struct { | ||||
| 		description   string | ||||
| 		cfg           *config.Config | ||||
| 		image         image.CUDA | ||||
| 		envmap        map[string]string | ||||
| 		expectedError error | ||||
| 		expectedNil   bool | ||||
| 	}{ | ||||
| 		{ | ||||
| 			description: "visible devices not set returns nil", | ||||
| 			image:       image.CUDA{}, | ||||
| 			envmap:      map[string]string{}, | ||||
| 			expectedNil: true, | ||||
| 		}, | ||||
| 		{ | ||||
| 			description: "visible devices empty returns nil", | ||||
| 			image:       image.CUDA{"NVIDIA_VISIBLE_DEVICES": ""}, | ||||
| 			envmap:      map[string]string{"NVIDIA_VISIBLE_DEVICES": ""}, | ||||
| 			expectedNil: true, | ||||
| 		}, | ||||
| 		{ | ||||
| 			description: "visible devices 'void' returns nil", | ||||
| 			image:       image.CUDA{"NVIDIA_VISIBLE_DEVICES": "void"}, | ||||
| 			envmap:      map[string]string{"NVIDIA_VISIBLE_DEVICES": "void"}, | ||||
| 			expectedNil: true, | ||||
| 		}, | ||||
| 	} | ||||
| 
 | ||||
| 	for _, tc := range testCases { | ||||
| 		t.Run(tc.description, func(t *testing.T) { | ||||
| 			m, err := NewCSVModifier(logger, tc.cfg, tc.image) | ||||
| 			image, _ := image.New( | ||||
| 				image.WithEnvMap(tc.envmap), | ||||
| 			) | ||||
| 			m, err := NewCSVModifier(logger, tc.cfg, image) | ||||
| 			if tc.expectedError != nil { | ||||
| 				require.Error(t, err) | ||||
| 			} else { | ||||
|  | ||||
| @ -38,7 +38,7 @@ func NewGDSModifier(logger logger.Interface, cfg *config.Config, image image.CUD | ||||
| 		return nil, nil | ||||
| 	} | ||||
| 
 | ||||
| 	if gds := image[nvidiaGDSEnvvar]; gds != "enabled" { | ||||
| 	if image.Getenv(nvidiaGDSEnvvar) != "enabled" { | ||||
| 		return nil, nil | ||||
| 	} | ||||
| 
 | ||||
|  | ||||
| @ -26,7 +26,7 @@ import ( | ||||
| func TestGraphicsModifier(t *testing.T) { | ||||
| 	testCases := []struct { | ||||
| 		description      string | ||||
| 		cudaImage        image.CUDA | ||||
| 		envmap           map[string]string | ||||
| 		expectedRequired bool | ||||
| 	}{ | ||||
| 		{ | ||||
| @ -34,20 +34,20 @@ func TestGraphicsModifier(t *testing.T) { | ||||
| 		}, | ||||
| 		{ | ||||
| 			description: "devices with no capabilities does not create modifier", | ||||
| 			cudaImage: image.CUDA{ | ||||
| 			envmap: map[string]string{ | ||||
| 				"NVIDIA_VISIBLE_DEVICES": "all", | ||||
| 			}, | ||||
| 		}, | ||||
| 		{ | ||||
| 			description: "devices with no non-graphics does not create modifier", | ||||
| 			cudaImage: image.CUDA{ | ||||
| 			envmap: map[string]string{ | ||||
| 				"NVIDIA_VISIBLE_DEVICES":     "all", | ||||
| 				"NVIDIA_DRIVER_CAPABILITIES": "compute", | ||||
| 			}, | ||||
| 		}, | ||||
| 		{ | ||||
| 			description: "devices with all capabilities creates modifier", | ||||
| 			cudaImage: image.CUDA{ | ||||
| 			envmap: map[string]string{ | ||||
| 				"NVIDIA_VISIBLE_DEVICES":     "all", | ||||
| 				"NVIDIA_DRIVER_CAPABILITIES": "all", | ||||
| 			}, | ||||
| @ -55,7 +55,7 @@ func TestGraphicsModifier(t *testing.T) { | ||||
| 		}, | ||||
| 		{ | ||||
| 			description: "devices with graphics capability creates modifier", | ||||
| 			cudaImage: image.CUDA{ | ||||
| 			envmap: map[string]string{ | ||||
| 				"NVIDIA_VISIBLE_DEVICES":     "all", | ||||
| 				"NVIDIA_DRIVER_CAPABILITIES": "graphics", | ||||
| 			}, | ||||
| @ -63,7 +63,7 @@ func TestGraphicsModifier(t *testing.T) { | ||||
| 		}, | ||||
| 		{ | ||||
| 			description: "devices with compute,graphics capability creates modifier", | ||||
| 			cudaImage: image.CUDA{ | ||||
| 			envmap: map[string]string{ | ||||
| 				"NVIDIA_VISIBLE_DEVICES":     "all", | ||||
| 				"NVIDIA_DRIVER_CAPABILITIES": "compute,graphics", | ||||
| 			}, | ||||
| @ -71,7 +71,7 @@ func TestGraphicsModifier(t *testing.T) { | ||||
| 		}, | ||||
| 		{ | ||||
| 			description: "devices with display capability creates modifier", | ||||
| 			cudaImage: image.CUDA{ | ||||
| 			envmap: map[string]string{ | ||||
| 				"NVIDIA_VISIBLE_DEVICES":     "all", | ||||
| 				"NVIDIA_DRIVER_CAPABILITIES": "display", | ||||
| 			}, | ||||
| @ -79,7 +79,7 @@ func TestGraphicsModifier(t *testing.T) { | ||||
| 		}, | ||||
| 		{ | ||||
| 			description: "devices with display,graphics capability creates modifier", | ||||
| 			cudaImage: image.CUDA{ | ||||
| 			envmap: map[string]string{ | ||||
| 				"NVIDIA_VISIBLE_DEVICES":     "all", | ||||
| 				"NVIDIA_DRIVER_CAPABILITIES": "display,graphics", | ||||
| 			}, | ||||
| @ -89,7 +89,10 @@ func TestGraphicsModifier(t *testing.T) { | ||||
| 
 | ||||
| 	for _, tc := range testCases { | ||||
| 		t.Run(tc.description, func(t *testing.T) { | ||||
| 			required, _ := requiresGraphicsModifier(tc.cudaImage) | ||||
| 			image, _ := image.New( | ||||
| 				image.WithEnvMap(tc.envmap), | ||||
| 			) | ||||
| 			required, _ := requiresGraphicsModifier(image) | ||||
| 			require.EqualValues(t, tc.expectedRequired, required) | ||||
| 		}) | ||||
| 	} | ||||
|  | ||||
| @ -38,7 +38,7 @@ func NewMOFEDModifier(logger logger.Interface, cfg *config.Config, image image.C | ||||
| 		return nil, nil | ||||
| 	} | ||||
| 
 | ||||
| 	if mofed := image[nvidiaMOFEDEnvvar]; mofed != "enabled" { | ||||
| 	if image.Getenv(nvidiaMOFEDEnvvar) != "enabled" { | ||||
| 		return nil, nil | ||||
| 	} | ||||
| 
 | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user