Add ability to merge envars of the form NVIDIA_VISIBLE_DEVICES_*

This allows someone to (for example) pass the following environment
variables:

NVIDIA_VISIBLE_DEVICES_0="0,1"
NVIDIA_VISIBLE_DEVICES_1="2,3"
NVIDIA_VISIBLE_DEVICES_WHATEVER="4,5"

and have the nvidia-container-toolkit automatically merge these into:

NVIDIA_VISIBLE_DEVICES="0,1,2,3,4,5"

This is useful (for example) if the full list of devices comes
from multiple, disparate sources.

Note: This will override whatever the original value of
NVIDIA_VISIBLE_DEVICES was (*excluding* its original value) if it also
exists as an environment variable already. We exclude the original value
to ensure that we have a way to override the default value of
NVIDIA_VISIBLE_DEVICES set to "all" inside a container image.

Signed-off-by: Kevin Klues <kklues@nvidia.com>
This commit is contained in:
Kevin Klues 2020-01-22 04:42:00 -08:00
parent 4e4de762b7
commit 01b4381282
8 changed files with 171 additions and 22 deletions

View File

@ -11,6 +11,7 @@ load-kmods = true
#no-cgroups = false
#user = "root:video"
ldconfig = "@/sbin/ldconfig"
#alpha-merge-visible-devices-envvars = false
[nvidia-container-runtime]
#debug = "/var/log/nvidia-container-runtime.log"

View File

@ -11,6 +11,7 @@ load-kmods = true
#no-cgroups = false
#user = "root:video"
ldconfig = "@/sbin/ldconfig"
#alpha-merge-visible-devices-envvars = false
[nvidia-container-runtime]
#debug = "/var/log/nvidia-container-runtime.log"

View File

@ -11,6 +11,7 @@ load-kmods = true
#no-cgroups = false
#user = "root:video"
ldconfig = "@/sbin/ldconfig"
#alpha-merge-visible-devices-envvars = false
[nvidia-container-runtime]
#debug = "/var/log/nvidia-container-runtime.log"

View File

@ -11,6 +11,7 @@ load-kmods = true
#no-cgroups = false
user = "root:video"
ldconfig = "@/sbin/ldconfig"
#alpha-merge-visible-devices-envvars = false
[nvidia-container-runtime]
#debug = "/var/log/nvidia-container-runtime.log"

View File

@ -11,6 +11,7 @@ load-kmods = true
#no-cgroups = false
#user = "root:video"
ldconfig = "@/sbin/ldconfig.real"
#alpha-merge-visible-devices-envvars = false
[nvidia-container-runtime]
#debug = "/var/log/nvidia-container-runtime.log"

131
container_config_test.go Normal file
View File

@ -0,0 +1,131 @@
package main
import (
"github.com/stretchr/testify/require"
"sort"
"strings"
"testing"
)
func TestMergeVisibleDevicesEnvvars(t *testing.T) {
var tests = []struct {
name string
input []string
expected string
enableMerge bool
}{
{
"Simple Merge Enabled",
[]string{
"NVIDIA_VISIBLE_DEVICES_0=0,1",
"NVIDIA_VISIBLE_DEVICES_1=2,3",
"NVIDIA_VISIBLE_DEVICES_WHATEVER=4,5",
},
"0,1,2,3,4,5",
true,
},
{
"Simple Merge Disabled",
[]string{
"NVIDIA_VISIBLE_DEVICES_0=0,1",
"NVIDIA_VISIBLE_DEVICES_1=2,3",
"NVIDIA_VISIBLE_DEVICES_WHATEVER=4,5",
},
"",
false,
},
{
"Merge No Override (Enabled)",
[]string{
"NVIDIA_VISIBLE_DEVICES=all",
},
"all",
true,
},
{
"Merge No Override (Disabled)",
[]string{
"NVIDIA_VISIBLE_DEVICES=all",
},
"all",
false,
},
{
"Merge Override (Enabled, Before)",
[]string{
"NVIDIA_VISIBLE_DEVICES=all",
"NVIDIA_VISIBLE_DEVICES_0=0,1",
"NVIDIA_VISIBLE_DEVICES_1=2,3",
"NVIDIA_VISIBLE_DEVICES_WHATEVER=4,5",
},
"0,1,2,3,4,5",
true,
},
{
"Merge Override (Enabled, After)",
[]string{
"NVIDIA_VISIBLE_DEVICES_0=0,1",
"NVIDIA_VISIBLE_DEVICES_1=2,3",
"NVIDIA_VISIBLE_DEVICES_WHATEVER=4,5",
"NVIDIA_VISIBLE_DEVICES=all",
},
"0,1,2,3,4,5",
true,
},
{
"Merge Override (Enabled, In Between)",
[]string{
"NVIDIA_VISIBLE_DEVICES_0=0,1",
"NVIDIA_VISIBLE_DEVICES_1=2,3",
"NVIDIA_VISIBLE_DEVICES=all",
"NVIDIA_VISIBLE_DEVICES_WHATEVER=4,5",
},
"0,1,2,3,4,5",
true,
},
{
"Merge Override (Disabled, Before)",
[]string{
"NVIDIA_VISIBLE_DEVICES=all",
"NVIDIA_VISIBLE_DEVICES_0=0,1",
"NVIDIA_VISIBLE_DEVICES_1=2,3",
"NVIDIA_VISIBLE_DEVICES_WHATEVER=4,5",
},
"all",
false,
},
{
"Merge Override (Disabled, After)",
[]string{
"NVIDIA_VISIBLE_DEVICES_0=0,1",
"NVIDIA_VISIBLE_DEVICES_1=2,3",
"NVIDIA_VISIBLE_DEVICES_WHATEVER=4,5",
"NVIDIA_VISIBLE_DEVICES=all",
},
"all",
false,
},
{
"Merge Override (Disabled, In Between)",
[]string{
"NVIDIA_VISIBLE_DEVICES_0=0,1",
"NVIDIA_VISIBLE_DEVICES_1=2,3",
"NVIDIA_VISIBLE_DEVICES=all",
"NVIDIA_VISIBLE_DEVICES_WHATEVER=4,5",
},
"all",
false,
},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
config := CLIConfig{
AlphaMergeVisibleDevicesEnvvars: tc.enableMerge,
}
envvars := getEnvMap(tc.input, config)
devices := strings.Split(envvars[envNVVisibleDevices], ",")
sort.Strings(devices)
require.Equal(t, tc.expected, strings.Join(devices, ","))
})
}
}

View File

@ -82,7 +82,7 @@ func parseCudaVersion(cudaVersion string) (vmaj, vmin, vpatch uint32) {
return
}
func getEnvMap(e []string) (m map[string]string) {
func getEnvMap(e []string, config CLIConfig) (m map[string]string) {
m = make(map[string]string)
for _, s := range e {
p := strings.SplitN(s, "=", 2)
@ -91,6 +91,17 @@ func getEnvMap(e []string) (m map[string]string) {
}
m[p[0]] = p[1]
}
if config.AlphaMergeVisibleDevicesEnvvars {
var mergable []string
for k, v := range m {
if strings.HasPrefix(k, envNVVisibleDevices+"_") {
mergable = append(mergable, v)
}
}
if len(mergable) > 0 {
m[envNVVisibleDevices] = strings.Join(mergable, ",")
}
}
return
}
@ -254,7 +265,7 @@ func getContainerConfig(hook HookConfig) (config containerConfig) {
s := loadSpec(path.Join(b, "config.json"))
env := getEnvMap(s.Process.Env)
env := getEnvMap(s.Process.Env, hook.NvidiaContainerCLI)
envSwarmGPU = hook.SwarmResource
return containerConfig{
Pid: h.Pid,

View File

@ -30,6 +30,7 @@ type CLIConfig struct {
NoCgroups bool `toml:"no-cgroups"`
User *string `toml:"user"`
Ldconfig *string `toml:"ldconfig"`
AlphaMergeVisibleDevicesEnvvars bool `toml:"alpha-merge-visible-devices-envvars"`
}
type HookConfig struct {
@ -54,6 +55,7 @@ func getDefaultHookConfig() (config HookConfig) {
NoCgroups: false,
User: nil,
Ldconfig: nil,
AlphaMergeVisibleDevicesEnvvars: false,
},
}
}