Consider all Swarm resource envvars

This change extends the support for multiple envvars when
specifying swarm resources to consider ALL of the specified
environment variables instead of the first match.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
This commit is contained in:
Evan Lezar 2022-10-05 12:32:28 +02:00
parent b7ba96a72e
commit 877832da69
4 changed files with 51 additions and 25 deletions

View File

@ -164,10 +164,23 @@ func isPrivileged(s *Spec) bool {
} }
func getDevicesFromEnvvar(image image.CUDA, swarmResourceEnvvars []string) *string { func getDevicesFromEnvvar(image image.CUDA, swarmResourceEnvvars []string) *string {
// Build a list of envvars to consider. Note that the Swarm Resource envvars have a higher precedence. // We check if the image has at least one of the Swarm resource envvars defined and use this
envVars := append(swarmResourceEnvvars, envNVVisibleDevices) // if specified.
var hasSwarmEnvvar bool
for _, envvar := range swarmResourceEnvvars {
if _, exists := image[envvar]; exists {
hasSwarmEnvvar = true
break
}
}
var devices []string
if hasSwarmEnvvar {
devices = image.DevicesFromEnvvars(swarmResourceEnvvars...).List()
} else {
devices = image.DevicesFromEnvvars(envNVVisibleDevices).List()
}
devices := image.DevicesFromEnvvars(envVars...).List()
if len(devices) == 0 { if len(devices) == 0 {
return nil return nil
} }

View File

@ -1,6 +1,7 @@
package main package main
import ( import (
"fmt"
"path/filepath" "path/filepath"
"testing" "testing"
@ -906,17 +907,20 @@ func TestGetDevicesFromEnvvar(t *testing.T) {
expectedDevices: &anotherGPUID, expectedDevices: &anotherGPUID,
}, },
{ {
description: "First available swarm resource envvar is selected and overrides NVIDIA_VISIBLE_DEVICES if present", description: "All available swarm resource envvars are selected and override NVIDIA_VISIBLE_DEVICES if present",
swarmResourceEnvvars: []string{"DOCKER_RESOURCE_GPUS", "DOCKER_RESOURCE_GPUS_ADDITIONAL"}, swarmResourceEnvvars: []string{"DOCKER_RESOURCE_GPUS", "DOCKER_RESOURCE_GPUS_ADDITIONAL"},
env: map[string]string{ env: map[string]string{
envNVVisibleDevices: gpuID, envNVVisibleDevices: gpuID,
"DOCKER_RESOURCE_GPUS": thirdGPUID, "DOCKER_RESOURCE_GPUS": thirdGPUID,
"DOCKER_RESOURCE_GPUS_ADDITIONAL": anotherGPUID, "DOCKER_RESOURCE_GPUS_ADDITIONAL": anotherGPUID,
}, },
expectedDevices: &thirdGPUID, expectedDevices: func() *string {
result := fmt.Sprintf("%s,%s", thirdGPUID, anotherGPUID)
return &result
}(),
}, },
{ {
description: "DOCKER_RESOURCE_GPUS_ADDITIONAL or DOCKER_RESOURCE_GPUS overrides NVIDIA_VISIBLE_DEVICES if present", description: "DOCKER_RESOURCE_GPUS_ADDITIONAL or DOCKER_RESOURCE_GPUS override NVIDIA_VISIBLE_DEVICES if present",
swarmResourceEnvvars: []string{"DOCKER_RESOURCE_GPUS", "DOCKER_RESOURCE_GPUS_ADDITIONAL"}, swarmResourceEnvvars: []string{"DOCKER_RESOURCE_GPUS", "DOCKER_RESOURCE_GPUS_ADDITIONAL"},
env: map[string]string{ env: map[string]string{
envNVVisibleDevices: gpuID, envNVVisibleDevices: gpuID,

View File

@ -115,28 +115,35 @@ func (i CUDA) HasDisableRequire() bool {
// DevicesFromEnvvars returns the devices requested by the image through environment variables // DevicesFromEnvvars returns the devices requested by the image through environment variables
func (i CUDA) DevicesFromEnvvars(envVars ...string) VisibleDevices { func (i CUDA) DevicesFromEnvvars(envVars ...string) VisibleDevices {
// Grab a reference to devices from the first envvar // We concantenate all the devices from the specified envvars.
// in the list that actually exists in the environment. var isSet bool
var devices *string var devices []string
requested := make(map[string]bool)
for _, envVar := range envVars { for _, envVar := range envVars {
if devs, ok := i[envVar]; ok { if devs, ok := i[envVar]; ok {
devices = &devs isSet = true
break for _, d := range strings.Split(devs, ",") {
trimmed := strings.TrimSpace(d)
if len(trimmed) == 0 {
continue
}
devices = append(devices, trimmed)
requested[trimmed] = true
}
} }
} }
// Environment variable unset with legacy image: default to "all". // Environment variable unset with legacy image: default to "all".
if devices == nil && i.IsLegacy() { if !isSet && len(devices) == 0 && i.IsLegacy() {
return newVisibleDevices("all") return newVisibleDevices("all")
} }
// Environment variable unset or empty or "void": return nil // Environment variable unset or empty or "void": return nil
if devices == nil || len(*devices) == 0 || *devices == "void" { if len(devices) == 0 || requested["void"] {
return newVisibleDevices("void") return newVisibleDevices("void")
} }
// Environment variable set to "none": reset to "". return newVisibleDevices(devices...)
return newVisibleDevices(*devices)
} }
// GetDriverCapabilities returns the requested driver capabilities. // GetDriverCapabilities returns the requested driver capabilities.

View File

@ -33,7 +33,8 @@ var _ VisibleDevices = (*void)(nil)
var _ VisibleDevices = (*devices)(nil) var _ VisibleDevices = (*devices)(nil)
// newVisibleDevices creates a VisibleDevices based on the value of the specified envvar. // newVisibleDevices creates a VisibleDevices based on the value of the specified envvar.
func newVisibleDevices(envvar string) VisibleDevices { func newVisibleDevices(envvars ...string) VisibleDevices {
for _, envvar := range envvars {
if envvar == "all" { if envvar == "all" {
return all{} return all{}
} }
@ -43,8 +44,9 @@ func newVisibleDevices(envvar string) VisibleDevices {
if envvar == "" || envvar == "void" { if envvar == "" || envvar == "void" {
return void{} return void{}
} }
}
return newDevices(envvar) return newDevices(envvars...)
} }
type all struct{} type all struct{}