Add DevicesFromEnvvars function to CUDA image

Signed-off-by: Evan Lezar <elezar@nvidia.com>
This commit is contained in:
Evan Lezar 2022-06-29 17:33:05 +02:00
parent 25fd1aaf7e
commit 925c348565
3 changed files with 49 additions and 38 deletions

View File

@ -165,7 +165,7 @@ func isPrivileged(s *Spec) bool {
return false return false
} }
func getDevicesFromEnvvar(env map[string]string, legacyImage bool) *string { func getDevicesFromEnvvar(image image.CUDA) *string {
// Build a list of envvars to consider. // Build a list of envvars to consider.
envVars := []string{envNVVisibleDevices} envVars := []string{envNVVisibleDevices}
if envSwarmGPU != nil { if envSwarmGPU != nil {
@ -173,35 +173,14 @@ func getDevicesFromEnvvar(env map[string]string, legacyImage bool) *string {
envVars = append([]string{*envSwarmGPU}, envVars...) envVars = append([]string{*envSwarmGPU}, envVars...)
} }
// Grab a reference to devices from the first envvar devices := image.DevicesFromEnvvars(envVars...)
// in the list that actually exists in the environment. if len(devices) == 0 {
var devices *string
for _, envVar := range envVars {
if devs, ok := env[envVar]; ok {
devices = &devs
break
}
}
// Environment variable unset with legacy image: default to "all".
if devices == nil && legacyImage {
all := "all"
return &all
}
// Environment variable unset or empty or "void": return nil
if devices == nil || len(*devices) == 0 || *devices == "void" {
return nil return nil
} }
// Environment variable set to "none": reset to "". devicesString := strings.Join(devices, ",")
if *devices == "none" {
empty := ""
return &empty
}
// Any other value. return &devicesString
return devices
} }
func getDevicesFromMounts(mounts []Mount) *string { func getDevicesFromMounts(mounts []Mount) *string {
@ -241,7 +220,7 @@ func getDevicesFromMounts(mounts []Mount) *string {
return &ret return &ret
} }
func getDevices(hookConfig *HookConfig, env map[string]string, mounts []Mount, privileged bool, legacyImage bool) *string { func getDevices(hookConfig *HookConfig, image image.CUDA, mounts []Mount, privileged bool) *string {
// If enabled, try and get the device list from volume mounts first // If enabled, try and get the device list from volume mounts first
if hookConfig.AcceptDeviceListAsVolumeMounts { if hookConfig.AcceptDeviceListAsVolumeMounts {
devices := getDevicesFromMounts(mounts) devices := getDevicesFromMounts(mounts)
@ -251,7 +230,7 @@ func getDevices(hookConfig *HookConfig, env map[string]string, mounts []Mount, p
} }
// Fallback to reading from the environment variable if privileges are correct // Fallback to reading from the environment variable if privileges are correct
devices := getDevicesFromEnvvar(env, legacyImage) devices := getDevicesFromEnvvar(image)
if devices == nil { if devices == nil {
return nil return nil
} }
@ -307,7 +286,7 @@ func getNvidiaConfig(hookConfig *HookConfig, image image.CUDA, mounts []Mount, p
legacyImage := image.IsLegacy() legacyImage := image.IsLegacy()
var devices string var devices string
if d := getDevices(hookConfig, image, mounts, privileged, legacyImage); d != nil { if d := getDevices(hookConfig, image, mounts, privileged); d != nil {
devices = *d devices = *d
} else { } else {
// 'nil' devices means this is not a GPU container. // 'nil' devices means this is not a GPU container.

View File

@ -4,6 +4,7 @@ import (
"path/filepath" "path/filepath"
"testing" "testing"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
) )
@ -671,7 +672,7 @@ func TestDeviceListSourcePriority(t *testing.T) {
hookConfig := getDefaultHookConfig() hookConfig := getDefaultHookConfig()
hookConfig.AcceptEnvvarUnprivileged = tc.acceptUnprivileged hookConfig.AcceptEnvvarUnprivileged = tc.acceptUnprivileged
hookConfig.AcceptDeviceListAsVolumeMounts = tc.acceptMounts hookConfig.AcceptDeviceListAsVolumeMounts = tc.acceptMounts
devices = getDevices(&hookConfig, env, tc.mountDevices, tc.privileged, false) devices = getDevices(&hookConfig, env, tc.mountDevices, tc.privileged)
} }
// For all other tests, just grab the devices and check the results // For all other tests, just grab the devices and check the results
@ -693,7 +694,6 @@ func TestGetDevicesFromEnvvar(t *testing.T) {
description string description string
envSwarmGPU *string envSwarmGPU *string
env map[string]string env map[string]string
legacyImage bool
expectedDevices *string expectedDevices *string
}{ }{
{ {
@ -729,13 +729,15 @@ func TestGetDevicesFromEnvvar(t *testing.T) {
description: "NVIDIA_VISIBLE_DEVICES set returns value for legacy image", description: "NVIDIA_VISIBLE_DEVICES set returns value for legacy image",
env: map[string]string{ env: map[string]string{
envNVVisibleDevices: gpuID, envNVVisibleDevices: gpuID,
envCUDAVersion: "legacy",
}, },
legacyImage: true,
expectedDevices: &gpuID, expectedDevices: &gpuID,
}, },
{ {
description: "empty env returns all for legacy image", description: "empty env returns all for legacy image",
legacyImage: true, env: map[string]string{
envCUDAVersion: "legacy",
},
expectedDevices: &all, expectedDevices: &all,
}, },
// Add the `DOCKER_RESOURCE_GPUS` envvar and ensure that this is ignored when // Add the `DOCKER_RESOURCE_GPUS` envvar and ensure that this is ignored when
@ -781,16 +783,16 @@ func TestGetDevicesFromEnvvar(t *testing.T) {
env: map[string]string{ env: map[string]string{
envNVVisibleDevices: gpuID, envNVVisibleDevices: gpuID,
envDockerResourceGPUs: anotherGPUID, envDockerResourceGPUs: anotherGPUID,
envCUDAVersion: "legacy",
}, },
legacyImage: true,
expectedDevices: &gpuID, expectedDevices: &gpuID,
}, },
{ {
description: "empty env returns all for legacy image", description: "empty env returns all for legacy image",
env: map[string]string{ env: map[string]string{
envDockerResourceGPUs: anotherGPUID, envDockerResourceGPUs: anotherGPUID,
envCUDAVersion: "legacy",
}, },
legacyImage: true,
expectedDevices: &all, expectedDevices: &all,
}, },
// Add the `DOCKER_RESOURCE_GPUS` envvar and ensure that this is selected when // Add the `DOCKER_RESOURCE_GPUS` envvar and ensure that this is selected when
@ -834,8 +836,8 @@ func TestGetDevicesFromEnvvar(t *testing.T) {
envSwarmGPU: &envDockerResourceGPUs, envSwarmGPU: &envDockerResourceGPUs,
env: map[string]string{ env: map[string]string{
envDockerResourceGPUs: gpuID, envDockerResourceGPUs: gpuID,
envCUDAVersion: "legacy",
}, },
legacyImage: true,
expectedDevices: &gpuID, expectedDevices: &gpuID,
}, },
{ {
@ -860,7 +862,7 @@ func TestGetDevicesFromEnvvar(t *testing.T) {
for i, tc := range tests { for i, tc := range tests {
t.Run(tc.description, func(t *testing.T) { t.Run(tc.description, func(t *testing.T) {
envSwarmGPU = tc.envSwarmGPU envSwarmGPU = tc.envSwarmGPU
devices := getDevicesFromEnvvar(tc.env, tc.legacyImage) devices := getDevicesFromEnvvar(image.CUDA(tc.env))
if tc.expectedDevices == nil { if tc.expectedDevices == nil {
require.Nil(t, devices, "%d: %v", i, tc) require.Nil(t, devices, "%d: %v", i, tc)
return return

View File

@ -112,6 +112,36 @@ func (i CUDA) HasDisableRequire() bool {
return false return false
} }
// DevicesFromEnvvars returns the devices requested by the image through environment variables
func (i CUDA) DevicesFromEnvvars(envVars ...string) []string {
// Grab a reference to devices from the first envvar
// in the list that actually exists in the environment.
var devices *string
for _, envVar := range envVars {
if devs, ok := i[envVar]; ok {
devices = &devs
break
}
}
// Environment variable unset with legacy image: default to "all".
if devices == nil && i.IsLegacy() {
return []string{"all"}
}
// Environment variable unset or empty or "void": return nil
if devices == nil || len(*devices) == 0 || *devices == "void" {
return nil
}
// Environment variable set to "none": reset to "".
if *devices == "none" {
return []string{""}
}
return strings.Split(*devices, ",")
}
func (i CUDA) legacyVersion() (string, error) { func (i CUDA) legacyVersion() (string, error) {
majorMinor, err := parseMajorMinorVersion(i[envCUDAVersion]) majorMinor, err := parseMajorMinorVersion(i[envCUDAVersion])
if err != nil { if err != nil {