Consider all Swarm resource envvars

This change extends the support for multiple envvars when
specifying swarm resources to consider ALL of the specified
environment variables instead of the first match.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
This commit is contained in:
Evan Lezar
2022-10-05 12:32:28 +02:00
parent b7ba96a72e
commit 877832da69
4 changed files with 51 additions and 25 deletions

View File

@@ -164,10 +164,23 @@ func isPrivileged(s *Spec) bool {
}
func getDevicesFromEnvvar(image image.CUDA, swarmResourceEnvvars []string) *string {
// Build a list of envvars to consider. Note that the Swarm Resource envvars have a higher precedence.
envVars := append(swarmResourceEnvvars, envNVVisibleDevices)
// We check if the image has at least one of the Swarm resource envvars defined and use this
// if specified.
var hasSwarmEnvvar bool
for _, envvar := range swarmResourceEnvvars {
if _, exists := image[envvar]; exists {
hasSwarmEnvvar = true
break
}
}
var devices []string
if hasSwarmEnvvar {
devices = image.DevicesFromEnvvars(swarmResourceEnvvars...).List()
} else {
devices = image.DevicesFromEnvvars(envNVVisibleDevices).List()
}
devices := image.DevicesFromEnvvars(envVars...).List()
if len(devices) == 0 {
return nil
}

View File

@@ -1,6 +1,7 @@
package main
import (
"fmt"
"path/filepath"
"testing"
@@ -906,17 +907,20 @@ func TestGetDevicesFromEnvvar(t *testing.T) {
expectedDevices: &anotherGPUID,
},
{
description: "First available swarm resource envvar is selected and overrides NVIDIA_VISIBLE_DEVICES if present",
description: "All available swarm resource envvars are selected and override NVIDIA_VISIBLE_DEVICES if present",
swarmResourceEnvvars: []string{"DOCKER_RESOURCE_GPUS", "DOCKER_RESOURCE_GPUS_ADDITIONAL"},
env: map[string]string{
envNVVisibleDevices: gpuID,
"DOCKER_RESOURCE_GPUS": thirdGPUID,
"DOCKER_RESOURCE_GPUS_ADDITIONAL": anotherGPUID,
},
expectedDevices: &thirdGPUID,
expectedDevices: func() *string {
result := fmt.Sprintf("%s,%s", thirdGPUID, anotherGPUID)
return &result
}(),
},
{
description: "DOCKER_RESOURCE_GPUS_ADDITIONAL or DOCKER_RESOURCE_GPUS overrides NVIDIA_VISIBLE_DEVICES if present",
description: "DOCKER_RESOURCE_GPUS_ADDITIONAL or DOCKER_RESOURCE_GPUS override NVIDIA_VISIBLE_DEVICES if present",
swarmResourceEnvvars: []string{"DOCKER_RESOURCE_GPUS", "DOCKER_RESOURCE_GPUS_ADDITIONAL"},
env: map[string]string{
envNVVisibleDevices: gpuID,