mirror of
https://github.com/NVIDIA/nvidia-container-toolkit
synced 2025-06-26 18:18:24 +00:00
Merge branch 'multiple-docker-swarm' into 'main'
Consider all Swarm resource envvars See merge request nvidia/container-toolkit/container-toolkit!222
This commit is contained in:
commit
205ba098e9
@ -164,10 +164,23 @@ func isPrivileged(s *Spec) bool {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func getDevicesFromEnvvar(image image.CUDA, swarmResourceEnvvars []string) *string {
|
func getDevicesFromEnvvar(image image.CUDA, swarmResourceEnvvars []string) *string {
|
||||||
// Build a list of envvars to consider. Note that the Swarm Resource envvars have a higher precedence.
|
// We check if the image has at least one of the Swarm resource envvars defined and use this
|
||||||
envVars := append(swarmResourceEnvvars, envNVVisibleDevices)
|
// if specified.
|
||||||
|
var hasSwarmEnvvar bool
|
||||||
|
for _, envvar := range swarmResourceEnvvars {
|
||||||
|
if _, exists := image[envvar]; exists {
|
||||||
|
hasSwarmEnvvar = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
var devices []string
|
||||||
|
if hasSwarmEnvvar {
|
||||||
|
devices = image.DevicesFromEnvvars(swarmResourceEnvvars...).List()
|
||||||
|
} else {
|
||||||
|
devices = image.DevicesFromEnvvars(envNVVisibleDevices).List()
|
||||||
|
}
|
||||||
|
|
||||||
devices := image.DevicesFromEnvvars(envVars...).List()
|
|
||||||
if len(devices) == 0 {
|
if len(devices) == 0 {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"fmt"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
@ -906,17 +907,20 @@ func TestGetDevicesFromEnvvar(t *testing.T) {
|
|||||||
expectedDevices: &anotherGPUID,
|
expectedDevices: &anotherGPUID,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
description: "First available swarm resource envvar is selected and overrides NVIDIA_VISIBLE_DEVICES if present",
|
description: "All available swarm resource envvars are selected and override NVIDIA_VISIBLE_DEVICES if present",
|
||||||
swarmResourceEnvvars: []string{"DOCKER_RESOURCE_GPUS", "DOCKER_RESOURCE_GPUS_ADDITIONAL"},
|
swarmResourceEnvvars: []string{"DOCKER_RESOURCE_GPUS", "DOCKER_RESOURCE_GPUS_ADDITIONAL"},
|
||||||
env: map[string]string{
|
env: map[string]string{
|
||||||
envNVVisibleDevices: gpuID,
|
envNVVisibleDevices: gpuID,
|
||||||
"DOCKER_RESOURCE_GPUS": thirdGPUID,
|
"DOCKER_RESOURCE_GPUS": thirdGPUID,
|
||||||
"DOCKER_RESOURCE_GPUS_ADDITIONAL": anotherGPUID,
|
"DOCKER_RESOURCE_GPUS_ADDITIONAL": anotherGPUID,
|
||||||
},
|
},
|
||||||
expectedDevices: &thirdGPUID,
|
expectedDevices: func() *string {
|
||||||
|
result := fmt.Sprintf("%s,%s", thirdGPUID, anotherGPUID)
|
||||||
|
return &result
|
||||||
|
}(),
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
description: "DOCKER_RESOURCE_GPUS_ADDITIONAL or DOCKER_RESOURCE_GPUS overrides NVIDIA_VISIBLE_DEVICES if present",
|
description: "DOCKER_RESOURCE_GPUS_ADDITIONAL or DOCKER_RESOURCE_GPUS override NVIDIA_VISIBLE_DEVICES if present",
|
||||||
swarmResourceEnvvars: []string{"DOCKER_RESOURCE_GPUS", "DOCKER_RESOURCE_GPUS_ADDITIONAL"},
|
swarmResourceEnvvars: []string{"DOCKER_RESOURCE_GPUS", "DOCKER_RESOURCE_GPUS_ADDITIONAL"},
|
||||||
env: map[string]string{
|
env: map[string]string{
|
||||||
envNVVisibleDevices: gpuID,
|
envNVVisibleDevices: gpuID,
|
||||||
|
@ -115,28 +115,35 @@ func (i CUDA) HasDisableRequire() bool {
|
|||||||
|
|
||||||
// DevicesFromEnvvars returns the devices requested by the image through environment variables
|
// DevicesFromEnvvars returns the devices requested by the image through environment variables
|
||||||
func (i CUDA) DevicesFromEnvvars(envVars ...string) VisibleDevices {
|
func (i CUDA) DevicesFromEnvvars(envVars ...string) VisibleDevices {
|
||||||
// Grab a reference to devices from the first envvar
|
// We concantenate all the devices from the specified envvars.
|
||||||
// in the list that actually exists in the environment.
|
var isSet bool
|
||||||
var devices *string
|
var devices []string
|
||||||
|
requested := make(map[string]bool)
|
||||||
for _, envVar := range envVars {
|
for _, envVar := range envVars {
|
||||||
if devs, ok := i[envVar]; ok {
|
if devs, ok := i[envVar]; ok {
|
||||||
devices = &devs
|
isSet = true
|
||||||
break
|
for _, d := range strings.Split(devs, ",") {
|
||||||
|
trimmed := strings.TrimSpace(d)
|
||||||
|
if len(trimmed) == 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
devices = append(devices, trimmed)
|
||||||
|
requested[trimmed] = true
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Environment variable unset with legacy image: default to "all".
|
// Environment variable unset with legacy image: default to "all".
|
||||||
if devices == nil && i.IsLegacy() {
|
if !isSet && len(devices) == 0 && i.IsLegacy() {
|
||||||
return newVisibleDevices("all")
|
return newVisibleDevices("all")
|
||||||
}
|
}
|
||||||
|
|
||||||
// Environment variable unset or empty or "void": return nil
|
// Environment variable unset or empty or "void": return nil
|
||||||
if devices == nil || len(*devices) == 0 || *devices == "void" {
|
if len(devices) == 0 || requested["void"] {
|
||||||
return newVisibleDevices("void")
|
return newVisibleDevices("void")
|
||||||
}
|
}
|
||||||
|
|
||||||
// Environment variable set to "none": reset to "".
|
return newVisibleDevices(devices...)
|
||||||
return newVisibleDevices(*devices)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// GetDriverCapabilities returns the requested driver capabilities.
|
// GetDriverCapabilities returns the requested driver capabilities.
|
||||||
|
@ -33,7 +33,8 @@ var _ VisibleDevices = (*void)(nil)
|
|||||||
var _ VisibleDevices = (*devices)(nil)
|
var _ VisibleDevices = (*devices)(nil)
|
||||||
|
|
||||||
// newVisibleDevices creates a VisibleDevices based on the value of the specified envvar.
|
// newVisibleDevices creates a VisibleDevices based on the value of the specified envvar.
|
||||||
func newVisibleDevices(envvar string) VisibleDevices {
|
func newVisibleDevices(envvars ...string) VisibleDevices {
|
||||||
|
for _, envvar := range envvars {
|
||||||
if envvar == "all" {
|
if envvar == "all" {
|
||||||
return all{}
|
return all{}
|
||||||
}
|
}
|
||||||
@ -43,8 +44,9 @@ func newVisibleDevices(envvar string) VisibleDevices {
|
|||||||
if envvar == "" || envvar == "void" {
|
if envvar == "" || envvar == "void" {
|
||||||
return void{}
|
return void{}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return newDevices(envvar)
|
return newDevices(envvars...)
|
||||||
}
|
}
|
||||||
|
|
||||||
type all struct{}
|
type all struct{}
|
||||||
|
Loading…
Reference in New Issue
Block a user