mirror of
https://github.com/NVIDIA/nvidia-container-toolkit
synced 2024-11-22 08:18:32 +00:00
Merge branch 'multiple-docker-swarm' into 'main'
Consider all Swarm resource envvars See merge request nvidia/container-toolkit/container-toolkit!222
This commit is contained in:
commit
205ba098e9
@ -164,10 +164,23 @@ func isPrivileged(s *Spec) bool {
|
||||
}
|
||||
|
||||
func getDevicesFromEnvvar(image image.CUDA, swarmResourceEnvvars []string) *string {
|
||||
// Build a list of envvars to consider. Note that the Swarm Resource envvars have a higher precedence.
|
||||
envVars := append(swarmResourceEnvvars, envNVVisibleDevices)
|
||||
// We check if the image has at least one of the Swarm resource envvars defined and use this
|
||||
// if specified.
|
||||
var hasSwarmEnvvar bool
|
||||
for _, envvar := range swarmResourceEnvvars {
|
||||
if _, exists := image[envvar]; exists {
|
||||
hasSwarmEnvvar = true
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
var devices []string
|
||||
if hasSwarmEnvvar {
|
||||
devices = image.DevicesFromEnvvars(swarmResourceEnvvars...).List()
|
||||
} else {
|
||||
devices = image.DevicesFromEnvvars(envNVVisibleDevices).List()
|
||||
}
|
||||
|
||||
devices := image.DevicesFromEnvvars(envVars...).List()
|
||||
if len(devices) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
@ -1,6 +1,7 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
@ -906,17 +907,20 @@ func TestGetDevicesFromEnvvar(t *testing.T) {
|
||||
expectedDevices: &anotherGPUID,
|
||||
},
|
||||
{
|
||||
description: "First available swarm resource envvar is selected and overrides NVIDIA_VISIBLE_DEVICES if present",
|
||||
description: "All available swarm resource envvars are selected and override NVIDIA_VISIBLE_DEVICES if present",
|
||||
swarmResourceEnvvars: []string{"DOCKER_RESOURCE_GPUS", "DOCKER_RESOURCE_GPUS_ADDITIONAL"},
|
||||
env: map[string]string{
|
||||
envNVVisibleDevices: gpuID,
|
||||
"DOCKER_RESOURCE_GPUS": thirdGPUID,
|
||||
"DOCKER_RESOURCE_GPUS_ADDITIONAL": anotherGPUID,
|
||||
},
|
||||
expectedDevices: &thirdGPUID,
|
||||
expectedDevices: func() *string {
|
||||
result := fmt.Sprintf("%s,%s", thirdGPUID, anotherGPUID)
|
||||
return &result
|
||||
}(),
|
||||
},
|
||||
{
|
||||
description: "DOCKER_RESOURCE_GPUS_ADDITIONAL or DOCKER_RESOURCE_GPUS overrides NVIDIA_VISIBLE_DEVICES if present",
|
||||
description: "DOCKER_RESOURCE_GPUS_ADDITIONAL or DOCKER_RESOURCE_GPUS override NVIDIA_VISIBLE_DEVICES if present",
|
||||
swarmResourceEnvvars: []string{"DOCKER_RESOURCE_GPUS", "DOCKER_RESOURCE_GPUS_ADDITIONAL"},
|
||||
env: map[string]string{
|
||||
envNVVisibleDevices: gpuID,
|
||||
|
@ -115,28 +115,35 @@ func (i CUDA) HasDisableRequire() bool {
|
||||
|
||||
// DevicesFromEnvvars returns the devices requested by the image through environment variables
|
||||
func (i CUDA) DevicesFromEnvvars(envVars ...string) VisibleDevices {
|
||||
// Grab a reference to devices from the first envvar
|
||||
// in the list that actually exists in the environment.
|
||||
var devices *string
|
||||
// We concantenate all the devices from the specified envvars.
|
||||
var isSet bool
|
||||
var devices []string
|
||||
requested := make(map[string]bool)
|
||||
for _, envVar := range envVars {
|
||||
if devs, ok := i[envVar]; ok {
|
||||
devices = &devs
|
||||
break
|
||||
isSet = true
|
||||
for _, d := range strings.Split(devs, ",") {
|
||||
trimmed := strings.TrimSpace(d)
|
||||
if len(trimmed) == 0 {
|
||||
continue
|
||||
}
|
||||
devices = append(devices, trimmed)
|
||||
requested[trimmed] = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Environment variable unset with legacy image: default to "all".
|
||||
if devices == nil && i.IsLegacy() {
|
||||
if !isSet && len(devices) == 0 && i.IsLegacy() {
|
||||
return newVisibleDevices("all")
|
||||
}
|
||||
|
||||
// Environment variable unset or empty or "void": return nil
|
||||
if devices == nil || len(*devices) == 0 || *devices == "void" {
|
||||
if len(devices) == 0 || requested["void"] {
|
||||
return newVisibleDevices("void")
|
||||
}
|
||||
|
||||
// Environment variable set to "none": reset to "".
|
||||
return newVisibleDevices(*devices)
|
||||
return newVisibleDevices(devices...)
|
||||
}
|
||||
|
||||
// GetDriverCapabilities returns the requested driver capabilities.
|
||||
|
@ -33,18 +33,20 @@ var _ VisibleDevices = (*void)(nil)
|
||||
var _ VisibleDevices = (*devices)(nil)
|
||||
|
||||
// newVisibleDevices creates a VisibleDevices based on the value of the specified envvar.
|
||||
func newVisibleDevices(envvar string) VisibleDevices {
|
||||
if envvar == "all" {
|
||||
return all{}
|
||||
}
|
||||
if envvar == "none" {
|
||||
return none{}
|
||||
}
|
||||
if envvar == "" || envvar == "void" {
|
||||
return void{}
|
||||
func newVisibleDevices(envvars ...string) VisibleDevices {
|
||||
for _, envvar := range envvars {
|
||||
if envvar == "all" {
|
||||
return all{}
|
||||
}
|
||||
if envvar == "none" {
|
||||
return none{}
|
||||
}
|
||||
if envvar == "" || envvar == "void" {
|
||||
return void{}
|
||||
}
|
||||
}
|
||||
|
||||
return newDevices(envvar)
|
||||
return newDevices(envvars...)
|
||||
}
|
||||
|
||||
type all struct{}
|
||||
|
Loading…
Reference in New Issue
Block a user