This commit is contained in:
Carlos Eduardo Arango Gutierrez 2025-06-05 15:51:40 +02:00 committed by GitHub
commit 07ad80b910
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 334 additions and 42 deletions

View File

@ -212,7 +212,7 @@ func parseMajorMinorVersion(version string) (string, error) {
// OnlyFullyQualifiedCDIDevices returns true if all devices requested in the image are requested as CDI devices/
func (i CUDA) OnlyFullyQualifiedCDIDevices() bool {
var hasCDIdevice bool
for _, device := range i.VisibleDevicesFromEnvVar() {
for _, device := range i.visibleDevicesFromEnvVar() {
if !parser.IsQualifiedName(device) {
return false
}
@ -228,6 +228,21 @@ func (i CUDA) OnlyFullyQualifiedCDIDevices() bool {
return hasCDIdevice
}
// visibleEnvVars returns the environment variables that are used to determine device visibility.
// It returns the preferred environment variables that are set, or NVIDIA_VISIBLE_DEVICES if none are set.
func (i CUDA) visibleEnvVars() []string {
var envVars []string
for _, envVar := range i.preferredVisibleDeviceEnvVars {
if i.HasEnvvar(envVar) {
envVars = append(envVars, envVar)
}
}
if len(envVars) == 0 {
envVars = append(envVars, EnvVarNvidiaVisibleDevices)
}
return envVars
}
// VisibleDevices returns a list of devices requested in the container image.
// If volume mount requests are enabled these are returned if requested,
// otherwise device requests through environment variables are considered.
@ -243,7 +258,7 @@ func (i CUDA) VisibleDevices() []string {
}
// Get the Fallback to reading from the environment variable if privileges are correct
envVarDeviceRequests := i.VisibleDevicesFromEnvVar()
envVarDeviceRequests := i.visibleDevicesFromEnvVar()
if len(envVarDeviceRequests) == 0 {
return nil
}
@ -255,22 +270,21 @@ func (i CUDA) VisibleDevices() []string {
}
// We log a warning if we are ignoring the environment variable requests.
i.logger.Warningf("Ignoring devices specified in NVIDIA_VISIBLE_DEVICES in unprivileged container")
envVars := i.visibleEnvVars()
if len(envVars) > 0 {
i.logger.Warningf("Ignoring devices requested by environment variable(s) in unprivileged container: %v", envVars)
}
return nil
}
// VisibleDevicesFromEnvVar returns the set of visible devices requested through environment variables.
// visibleDevicesFromEnvVar returns the set of visible devices requested through environment variables.
// If any of the preferredVisibleDeviceEnvVars are present in the image, they
// are used to determine the visible devices. If this is not the case, the
// NVIDIA_VISIBLE_DEVICES environment variable is used.
func (i CUDA) VisibleDevicesFromEnvVar() []string {
for _, envVar := range i.preferredVisibleDeviceEnvVars {
if i.HasEnvvar(envVar) {
return i.DevicesFromEnvvars(i.preferredVisibleDeviceEnvVars...).List()
}
}
return i.DevicesFromEnvvars(EnvVarNvidiaVisibleDevices).List()
func (i CUDA) visibleDevicesFromEnvVar() []string {
envVars := i.visibleEnvVars()
return i.DevicesFromEnvvars(envVars...).List()
}
// visibleDevicesFromMounts returns the set of visible devices requested as mounts.

View File

@ -429,7 +429,7 @@ func TestGetDevicesFromEnvvar(t *testing.T) {
)
require.NoError(t, err)
devices := image.VisibleDevicesFromEnvVar()
devices := image.visibleDevicesFromEnvVar()
require.EqualValues(t, tc.expectedDevices, devices)
})
}
@ -508,13 +508,15 @@ func TestGetVisibleDevicesFromMounts(t *testing.T) {
func TestVisibleDevices(t *testing.T) {
var tests = []struct {
description string
mountDevices []specs.Mount
envvarDevices string
privileged bool
acceptUnprivileged bool
acceptMounts bool
expectedDevices []string
description string
mountDevices []specs.Mount
envvarDevices string
privileged bool
acceptUnprivileged bool
acceptMounts bool
preferredVisibleDeviceEnvVars []string
env map[string]string
expectedDevices []string
}{
{
description: "Mount devices, unprivileged, no accept unprivileged",
@ -597,20 +599,92 @@ func TestVisibleDevices(t *testing.T) {
acceptMounts: false,
expectedDevices: nil,
},
// New test cases for visibleEnvVars functionality
{
description: "preferred env var set and present in env, privileged",
mountDevices: nil,
envvarDevices: "",
privileged: true,
acceptUnprivileged: false,
acceptMounts: true,
preferredVisibleDeviceEnvVars: []string{"DOCKER_RESOURCE_GPUS"},
env: map[string]string{
"DOCKER_RESOURCE_GPUS": "GPU-12345",
},
expectedDevices: []string{"GPU-12345"},
},
{
description: "preferred env var set and present in env, unprivileged but accepted",
mountDevices: nil,
envvarDevices: "",
privileged: false,
acceptUnprivileged: true,
acceptMounts: true,
preferredVisibleDeviceEnvVars: []string{"DOCKER_RESOURCE_GPUS"},
env: map[string]string{
"DOCKER_RESOURCE_GPUS": "GPU-12345",
},
expectedDevices: []string{"GPU-12345"},
},
{
description: "preferred env var set and present in env, unprivileged and not accepted",
mountDevices: nil,
envvarDevices: "",
privileged: false,
acceptUnprivileged: false,
acceptMounts: true,
preferredVisibleDeviceEnvVars: []string{"DOCKER_RESOURCE_GPUS"},
env: map[string]string{
"DOCKER_RESOURCE_GPUS": "GPU-12345",
},
expectedDevices: nil,
},
{
description: "multiple preferred env vars, both present, privileged",
mountDevices: nil,
envvarDevices: "",
privileged: true,
acceptUnprivileged: false,
acceptMounts: true,
preferredVisibleDeviceEnvVars: []string{"DOCKER_RESOURCE_GPUS", "DOCKER_RESOURCE_GPUS_ADDITIONAL"},
env: map[string]string{
"DOCKER_RESOURCE_GPUS": "GPU-12345",
"DOCKER_RESOURCE_GPUS_ADDITIONAL": "GPU-67890",
},
expectedDevices: []string{"GPU-12345", "GPU-67890"},
},
{
description: "preferred env var not present, fallback to NVIDIA_VISIBLE_DEVICES, privileged",
mountDevices: nil,
envvarDevices: "GPU-12345",
privileged: true,
acceptUnprivileged: false,
acceptMounts: true,
preferredVisibleDeviceEnvVars: []string{"DOCKER_RESOURCE_GPUS"},
env: map[string]string{
EnvVarNvidiaVisibleDevices: "GPU-12345",
},
expectedDevices: []string{"GPU-12345"},
},
}
for _, tc := range tests {
t.Run(tc.description, func(t *testing.T) {
// Wrap the call to getDevices() in a closure.
// Create env map with both NVIDIA_VISIBLE_DEVICES and any additional env vars
env := make(map[string]string)
if tc.envvarDevices != "" {
env[EnvVarNvidiaVisibleDevices] = tc.envvarDevices
}
for k, v := range tc.env {
env[k] = v
}
image, err := New(
WithEnvMap(
map[string]string{
EnvVarNvidiaVisibleDevices: tc.envvarDevices,
},
),
WithEnvMap(env),
WithMounts(tc.mountDevices),
WithPrivileged(tc.privileged),
WithAcceptDeviceListAsVolumeMounts(tc.acceptMounts),
WithAcceptEnvvarUnprivileged(tc.acceptUnprivileged),
WithPreferredVisibleDevicesEnvVars(tc.preferredVisibleDeviceEnvVars...),
)
require.NoError(t, err)
require.Equal(t, tc.expectedDevices, image.VisibleDevices())

View File

@ -86,37 +86,38 @@ func getDevicesFromSpec(logger logger.Interface, ociSpec oci.Spec, cfg *config.C
if err != nil {
return nil, err
}
if cfg.AcceptDeviceListAsVolumeMounts {
mountDevices := container.CDIDevicesFromMounts()
if len(mountDevices) > 0 {
return mountDevices, nil
}
}
var devices []string
seen := make(map[string]bool)
for _, name := range container.VisibleDevicesFromEnvVar() {
for _, name := range container.VisibleDevices() {
// Skip empty device names
if strings.TrimSpace(name) == "" {
continue
}
if !parser.IsQualifiedName(name) {
name = fmt.Sprintf("%s=%s", cfg.NVIDIAContainerRuntimeConfig.Modes.CDI.DefaultKind, name)
}
// Skip devices that result in empty device IDs after qualification
if strings.HasSuffix(name, "=") {
logger.Debugf("Ignoring device with empty ID: %q", name)
continue
}
if seen[name] {
logger.Debugf("Ignoring duplicate device %q", name)
continue
}
devices = append(devices, name)
seen[name] = true
}
if len(devices) == 0 {
return nil, nil
}
if cfg.AcceptEnvvarUnprivileged || image.IsPrivileged((*image.OCISpec)(rawSpec)) {
return devices, nil
}
logger.Warningf("Ignoring devices specified in NVIDIA_VISIBLE_DEVICES: %v", devices)
return nil, nil
return devices, nil
}
// getAnnotationDevices returns a list of devices specified in the annotations.

View File

@ -20,7 +20,12 @@ import (
"fmt"
"testing"
"github.com/opencontainers/runtime-spec/specs-go"
testlog "github.com/sirupsen/logrus/hooks/test"
"github.com/stretchr/testify/require"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
)
func TestGetAnnotationDevices(t *testing.T) {
@ -90,3 +95,201 @@ func TestGetAnnotationDevices(t *testing.T) {
})
}
}
func getTestConfig() *config.Config {
cfg, _ := config.GetDefault()
return cfg
}
func getTestConfigWithAnnotations() *config.Config {
cfg, _ := config.GetDefault()
cfg.NVIDIAContainerRuntimeConfig.Modes.CDI.AnnotationPrefixes = []string{"cdi.k8s.io/"}
return cfg
}
func TestGetDevicesFromSpec(t *testing.T) {
logger, _ := testlog.NewNullLogger()
testCases := []struct {
description string
spec *specs.Spec
config *config.Config
loadError error
expectedDevices []string
expectedError string
}{
{
description: "empty spec, no devices specified",
spec: &specs.Spec{
Process: &specs.Process{
Env: []string{},
},
},
config: getTestConfig(),
expectedDevices: nil,
expectedError: "",
},
{
description: "NVIDIA_VISIBLE_DEVICES=all devices specified",
spec: &specs.Spec{
Process: &specs.Process{
Env: []string{
"NVIDIA_VISIBLE_DEVICES=all",
},
},
},
config: getTestConfig(),
expectedDevices: []string{"nvidia.com/gpu=all"},
expectedError: "",
},
{
description: "devices from annotations",
spec: &specs.Spec{
Annotations: map[string]string{
"cdi.k8s.io/test": "example.com/device=device1,example.com/device=device2",
},
Process: &specs.Process{
Env: []string{},
},
},
config: getTestConfigWithAnnotations(),
expectedDevices: []string{"example.com/device=device1", "example.com/device=device2"},
},
{
description: "devices from environment variables - single device",
spec: &specs.Spec{
Process: &specs.Process{
Env: []string{"NVIDIA_VISIBLE_DEVICES=0"},
},
},
config: getTestConfig(),
expectedDevices: []string{"nvidia.com/gpu=0"},
},
{
description: "devices from environment variables - multiple unique devices",
spec: &specs.Spec{
Process: &specs.Process{
Env: []string{"NVIDIA_VISIBLE_DEVICES=0,1,2"},
},
},
config: getTestConfig(),
expectedDevices: []string{"nvidia.com/gpu=0", "nvidia.com/gpu=1", "nvidia.com/gpu=2"},
},
{
description: "devices from environment variables - duplicate devices should be deduplicated",
spec: &specs.Spec{
Process: &specs.Process{
Env: []string{"NVIDIA_VISIBLE_DEVICES=0,1,0,2,1"},
},
},
config: getTestConfig(),
expectedDevices: []string{"nvidia.com/gpu=0", "nvidia.com/gpu=1", "nvidia.com/gpu=2"},
},
{
description: "devices from environment variables - qualified device names",
spec: &specs.Spec{
Process: &specs.Process{
Env: []string{"NVIDIA_VISIBLE_DEVICES=example.com/device=dev1,0,example.com/device=dev2"},
},
},
config: getTestConfig(),
expectedDevices: []string{"example.com/device=dev1", "nvidia.com/gpu=0", "example.com/device=dev2"},
},
{
description: "devices from environment variables - duplicate qualified device names should be deduplicated",
spec: &specs.Spec{
Process: &specs.Process{
Env: []string{"NVIDIA_VISIBLE_DEVICES=example.com/device=dev1,0,example.com/device=dev1,nvidia.com/gpu=0"},
},
},
config: getTestConfig(),
expectedDevices: []string{"example.com/device=dev1", "nvidia.com/gpu=0"},
},
{
description: "annotation devices take precedence over environment variables",
spec: &specs.Spec{
Annotations: map[string]string{
"cdi.k8s.io/test": "example.com/device=annotation-device",
},
Process: &specs.Process{
Env: []string{"NVIDIA_VISIBLE_DEVICES=0,1"},
},
},
config: getTestConfigWithAnnotations(),
expectedDevices: []string{"example.com/device=annotation-device"},
},
{
description: "devices from environment variables - empty and whitespace devices should be filtered",
spec: &specs.Spec{
Process: &specs.Process{
Env: []string{"NVIDIA_VISIBLE_DEVICES=0, ,1, \t ,2"},
},
},
config: getTestConfig(),
expectedDevices: []string{"nvidia.com/gpu=0", "nvidia.com/gpu=1", "nvidia.com/gpu=2"},
},
{
description: "devices from environment variables - void should return empty",
spec: &specs.Spec{
Process: &specs.Process{
Env: []string{"NVIDIA_VISIBLE_DEVICES=void"},
},
},
config: getTestConfig(),
expectedDevices: nil,
},
{
description: "devices from environment variables - none should be filtered out",
spec: &specs.Spec{
Process: &specs.Process{
Env: []string{"NVIDIA_VISIBLE_DEVICES=none"},
},
},
config: getTestConfig(),
expectedDevices: nil,
},
{
description: "devices from environment variables - all empty devices should result in no devices",
spec: &specs.Spec{
Process: &specs.Process{
Env: []string{"NVIDIA_VISIBLE_DEVICES= , , \t "},
},
},
config: getTestConfig(),
expectedDevices: nil,
},
{
description: "error loading OCI spec",
loadError: fmt.Errorf("failed to load spec"),
config: getTestConfig(),
expectedError: "failed to load OCI spec",
},
}
for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
mockSpec := &oci.SpecMock{
LoadFunc: func() (*specs.Spec, error) {
if tc.loadError != nil {
return nil, tc.loadError
}
return tc.spec, nil
},
}
devices, err := getDevicesFromSpec(logger, mockSpec, tc.config)
if tc.expectedError != "" {
require.Error(t, err)
require.Nil(t, devices)
require.Contains(t, err.Error(), tc.expectedError)
} else {
require.NoError(t, err)
require.ElementsMatch(t, tc.expectedDevices, devices)
}
// Verify that Load was called exactly once
require.Len(t, mockSpec.LoadCalls(), 1)
})
}
}

View File

@ -33,7 +33,7 @@ import (
// NewCSVModifier creates a modifier that applies modications to an OCI spec if required by the runtime wrapper.
// The modifications are defined by CSV MountSpecs.
func NewCSVModifier(logger logger.Interface, cfg *config.Config, container image.CUDA) (oci.SpecModifier, error) {
if devices := container.VisibleDevicesFromEnvVar(); len(devices) == 0 {
if devices := container.VisibleDevices(); len(devices) == 0 {
logger.Infof("No modification required; no devices requested")
return nil, nil
}

View File

@ -37,7 +37,7 @@ import (
//
// If not devices are selected, no changes are made.
func NewFeatureGatedModifier(logger logger.Interface, cfg *config.Config, image image.CUDA, driver *root.Driver, hookCreator discover.HookCreator) (oci.SpecModifier, error) {
if devices := image.VisibleDevicesFromEnvVar(); len(devices) == 0 {
if devices := image.VisibleDevices(); len(devices) == 0 {
logger.Infof("No modification required; no devices requested")
return nil, nil
}

View File

@ -65,7 +65,7 @@ func NewGraphicsModifier(logger logger.Interface, cfg *config.Config, containerI
// requiresGraphicsModifier determines whether a graphics modifier is required.
func requiresGraphicsModifier(cudaImage image.CUDA) (bool, string) {
if devices := cudaImage.VisibleDevicesFromEnvVar(); len(devices) == 0 {
if devices := cudaImage.VisibleDevices(); len(devices) == 0 {
return false, "no devices requested"
}