mirror of
https://github.com/NVIDIA/nvidia-container-toolkit
synced 2024-12-01 16:52:54 +00:00
Add DevicesFromEnvvars function to CUDA image
Signed-off-by: Evan Lezar <elezar@nvidia.com>
This commit is contained in:
parent
25fd1aaf7e
commit
925c348565
@ -165,7 +165,7 @@ func isPrivileged(s *Spec) bool {
|
|||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
func getDevicesFromEnvvar(env map[string]string, legacyImage bool) *string {
|
func getDevicesFromEnvvar(image image.CUDA) *string {
|
||||||
// Build a list of envvars to consider.
|
// Build a list of envvars to consider.
|
||||||
envVars := []string{envNVVisibleDevices}
|
envVars := []string{envNVVisibleDevices}
|
||||||
if envSwarmGPU != nil {
|
if envSwarmGPU != nil {
|
||||||
@ -173,35 +173,14 @@ func getDevicesFromEnvvar(env map[string]string, legacyImage bool) *string {
|
|||||||
envVars = append([]string{*envSwarmGPU}, envVars...)
|
envVars = append([]string{*envSwarmGPU}, envVars...)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Grab a reference to devices from the first envvar
|
devices := image.DevicesFromEnvvars(envVars...)
|
||||||
// in the list that actually exists in the environment.
|
if len(devices) == 0 {
|
||||||
var devices *string
|
|
||||||
for _, envVar := range envVars {
|
|
||||||
if devs, ok := env[envVar]; ok {
|
|
||||||
devices = &devs
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Environment variable unset with legacy image: default to "all".
|
|
||||||
if devices == nil && legacyImage {
|
|
||||||
all := "all"
|
|
||||||
return &all
|
|
||||||
}
|
|
||||||
|
|
||||||
// Environment variable unset or empty or "void": return nil
|
|
||||||
if devices == nil || len(*devices) == 0 || *devices == "void" {
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Environment variable set to "none": reset to "".
|
devicesString := strings.Join(devices, ",")
|
||||||
if *devices == "none" {
|
|
||||||
empty := ""
|
|
||||||
return &empty
|
|
||||||
}
|
|
||||||
|
|
||||||
// Any other value.
|
return &devicesString
|
||||||
return devices
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func getDevicesFromMounts(mounts []Mount) *string {
|
func getDevicesFromMounts(mounts []Mount) *string {
|
||||||
@ -241,7 +220,7 @@ func getDevicesFromMounts(mounts []Mount) *string {
|
|||||||
return &ret
|
return &ret
|
||||||
}
|
}
|
||||||
|
|
||||||
func getDevices(hookConfig *HookConfig, env map[string]string, mounts []Mount, privileged bool, legacyImage bool) *string {
|
func getDevices(hookConfig *HookConfig, image image.CUDA, mounts []Mount, privileged bool) *string {
|
||||||
// If enabled, try and get the device list from volume mounts first
|
// If enabled, try and get the device list from volume mounts first
|
||||||
if hookConfig.AcceptDeviceListAsVolumeMounts {
|
if hookConfig.AcceptDeviceListAsVolumeMounts {
|
||||||
devices := getDevicesFromMounts(mounts)
|
devices := getDevicesFromMounts(mounts)
|
||||||
@ -251,7 +230,7 @@ func getDevices(hookConfig *HookConfig, env map[string]string, mounts []Mount, p
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Fallback to reading from the environment variable if privileges are correct
|
// Fallback to reading from the environment variable if privileges are correct
|
||||||
devices := getDevicesFromEnvvar(env, legacyImage)
|
devices := getDevicesFromEnvvar(image)
|
||||||
if devices == nil {
|
if devices == nil {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@ -307,7 +286,7 @@ func getNvidiaConfig(hookConfig *HookConfig, image image.CUDA, mounts []Mount, p
|
|||||||
legacyImage := image.IsLegacy()
|
legacyImage := image.IsLegacy()
|
||||||
|
|
||||||
var devices string
|
var devices string
|
||||||
if d := getDevices(hookConfig, image, mounts, privileged, legacyImage); d != nil {
|
if d := getDevices(hookConfig, image, mounts, privileged); d != nil {
|
||||||
devices = *d
|
devices = *d
|
||||||
} else {
|
} else {
|
||||||
// 'nil' devices means this is not a GPU container.
|
// 'nil' devices means this is not a GPU container.
|
||||||
|
@ -4,6 +4,7 @@ import (
|
|||||||
"path/filepath"
|
"path/filepath"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
|
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -671,7 +672,7 @@ func TestDeviceListSourcePriority(t *testing.T) {
|
|||||||
hookConfig := getDefaultHookConfig()
|
hookConfig := getDefaultHookConfig()
|
||||||
hookConfig.AcceptEnvvarUnprivileged = tc.acceptUnprivileged
|
hookConfig.AcceptEnvvarUnprivileged = tc.acceptUnprivileged
|
||||||
hookConfig.AcceptDeviceListAsVolumeMounts = tc.acceptMounts
|
hookConfig.AcceptDeviceListAsVolumeMounts = tc.acceptMounts
|
||||||
devices = getDevices(&hookConfig, env, tc.mountDevices, tc.privileged, false)
|
devices = getDevices(&hookConfig, env, tc.mountDevices, tc.privileged)
|
||||||
}
|
}
|
||||||
|
|
||||||
// For all other tests, just grab the devices and check the results
|
// For all other tests, just grab the devices and check the results
|
||||||
@ -693,7 +694,6 @@ func TestGetDevicesFromEnvvar(t *testing.T) {
|
|||||||
description string
|
description string
|
||||||
envSwarmGPU *string
|
envSwarmGPU *string
|
||||||
env map[string]string
|
env map[string]string
|
||||||
legacyImage bool
|
|
||||||
expectedDevices *string
|
expectedDevices *string
|
||||||
}{
|
}{
|
||||||
{
|
{
|
||||||
@ -729,13 +729,15 @@ func TestGetDevicesFromEnvvar(t *testing.T) {
|
|||||||
description: "NVIDIA_VISIBLE_DEVICES set returns value for legacy image",
|
description: "NVIDIA_VISIBLE_DEVICES set returns value for legacy image",
|
||||||
env: map[string]string{
|
env: map[string]string{
|
||||||
envNVVisibleDevices: gpuID,
|
envNVVisibleDevices: gpuID,
|
||||||
|
envCUDAVersion: "legacy",
|
||||||
},
|
},
|
||||||
legacyImage: true,
|
|
||||||
expectedDevices: &gpuID,
|
expectedDevices: &gpuID,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
description: "empty env returns all for legacy image",
|
description: "empty env returns all for legacy image",
|
||||||
legacyImage: true,
|
env: map[string]string{
|
||||||
|
envCUDAVersion: "legacy",
|
||||||
|
},
|
||||||
expectedDevices: &all,
|
expectedDevices: &all,
|
||||||
},
|
},
|
||||||
// Add the `DOCKER_RESOURCE_GPUS` envvar and ensure that this is ignored when
|
// Add the `DOCKER_RESOURCE_GPUS` envvar and ensure that this is ignored when
|
||||||
@ -781,16 +783,16 @@ func TestGetDevicesFromEnvvar(t *testing.T) {
|
|||||||
env: map[string]string{
|
env: map[string]string{
|
||||||
envNVVisibleDevices: gpuID,
|
envNVVisibleDevices: gpuID,
|
||||||
envDockerResourceGPUs: anotherGPUID,
|
envDockerResourceGPUs: anotherGPUID,
|
||||||
|
envCUDAVersion: "legacy",
|
||||||
},
|
},
|
||||||
legacyImage: true,
|
|
||||||
expectedDevices: &gpuID,
|
expectedDevices: &gpuID,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
description: "empty env returns all for legacy image",
|
description: "empty env returns all for legacy image",
|
||||||
env: map[string]string{
|
env: map[string]string{
|
||||||
envDockerResourceGPUs: anotherGPUID,
|
envDockerResourceGPUs: anotherGPUID,
|
||||||
|
envCUDAVersion: "legacy",
|
||||||
},
|
},
|
||||||
legacyImage: true,
|
|
||||||
expectedDevices: &all,
|
expectedDevices: &all,
|
||||||
},
|
},
|
||||||
// Add the `DOCKER_RESOURCE_GPUS` envvar and ensure that this is selected when
|
// Add the `DOCKER_RESOURCE_GPUS` envvar and ensure that this is selected when
|
||||||
@ -834,8 +836,8 @@ func TestGetDevicesFromEnvvar(t *testing.T) {
|
|||||||
envSwarmGPU: &envDockerResourceGPUs,
|
envSwarmGPU: &envDockerResourceGPUs,
|
||||||
env: map[string]string{
|
env: map[string]string{
|
||||||
envDockerResourceGPUs: gpuID,
|
envDockerResourceGPUs: gpuID,
|
||||||
|
envCUDAVersion: "legacy",
|
||||||
},
|
},
|
||||||
legacyImage: true,
|
|
||||||
expectedDevices: &gpuID,
|
expectedDevices: &gpuID,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
@ -860,7 +862,7 @@ func TestGetDevicesFromEnvvar(t *testing.T) {
|
|||||||
for i, tc := range tests {
|
for i, tc := range tests {
|
||||||
t.Run(tc.description, func(t *testing.T) {
|
t.Run(tc.description, func(t *testing.T) {
|
||||||
envSwarmGPU = tc.envSwarmGPU
|
envSwarmGPU = tc.envSwarmGPU
|
||||||
devices := getDevicesFromEnvvar(tc.env, tc.legacyImage)
|
devices := getDevicesFromEnvvar(image.CUDA(tc.env))
|
||||||
if tc.expectedDevices == nil {
|
if tc.expectedDevices == nil {
|
||||||
require.Nil(t, devices, "%d: %v", i, tc)
|
require.Nil(t, devices, "%d: %v", i, tc)
|
||||||
return
|
return
|
||||||
|
@ -112,6 +112,36 @@ func (i CUDA) HasDisableRequire() bool {
|
|||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// DevicesFromEnvvars returns the devices requested by the image through environment variables
|
||||||
|
func (i CUDA) DevicesFromEnvvars(envVars ...string) []string {
|
||||||
|
// Grab a reference to devices from the first envvar
|
||||||
|
// in the list that actually exists in the environment.
|
||||||
|
var devices *string
|
||||||
|
for _, envVar := range envVars {
|
||||||
|
if devs, ok := i[envVar]; ok {
|
||||||
|
devices = &devs
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Environment variable unset with legacy image: default to "all".
|
||||||
|
if devices == nil && i.IsLegacy() {
|
||||||
|
return []string{"all"}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Environment variable unset or empty or "void": return nil
|
||||||
|
if devices == nil || len(*devices) == 0 || *devices == "void" {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Environment variable set to "none": reset to "".
|
||||||
|
if *devices == "none" {
|
||||||
|
return []string{""}
|
||||||
|
}
|
||||||
|
|
||||||
|
return strings.Split(*devices, ",")
|
||||||
|
}
|
||||||
|
|
||||||
func (i CUDA) legacyVersion() (string, error) {
|
func (i CUDA) legacyVersion() (string, error) {
|
||||||
majorMinor, err := parseMajorMinorVersion(i[envCUDAVersion])
|
majorMinor, err := parseMajorMinorVersion(i[envCUDAVersion])
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
Loading…
Reference in New Issue
Block a user