Add ability to merge envars of the form NVIDIA_VISIBLE_DEVICES_*

This allows someone to (for example) pass the following environment
variables:

NVIDIA_VISIBLE_DEVICES_0="0,1"
NVIDIA_VISIBLE_DEVICES_1="2,3"
NVIDIA_VISIBLE_DEVICES_WHATEVER="4,5"

and have the nvidia-container-toolkit automatically merge these into:

NVIDIA_VISIBLE_DEVICES="0,1,2,3,4,5"

This is useful (for example) if the full list of devices comes
from multiple, disparate sources.

Note: This will override whatever the original value of
NVIDIA_VISIBLE_DEVICES was (*excluding* its original value) if it also
exists as an environment variable already. We exclude the original value
to ensure that we have a way to override the default value of
NVIDIA_VISIBLE_DEVICES set to "all" inside a container image.

Signed-off-by: Kevin Klues <kklues@nvidia.com>
This commit is contained in:
Kevin Klues 2020-01-22 04:42:00 -08:00
parent 4e4de762b7
commit 01b4381282
8 changed files with 171 additions and 22 deletions

View File

@ -11,6 +11,7 @@ load-kmods = true
#no-cgroups = false #no-cgroups = false
#user = "root:video" #user = "root:video"
ldconfig = "@/sbin/ldconfig" ldconfig = "@/sbin/ldconfig"
#alpha-merge-visible-devices-envvars = false
[nvidia-container-runtime] [nvidia-container-runtime]
#debug = "/var/log/nvidia-container-runtime.log" #debug = "/var/log/nvidia-container-runtime.log"

View File

@ -11,6 +11,7 @@ load-kmods = true
#no-cgroups = false #no-cgroups = false
#user = "root:video" #user = "root:video"
ldconfig = "@/sbin/ldconfig" ldconfig = "@/sbin/ldconfig"
#alpha-merge-visible-devices-envvars = false
[nvidia-container-runtime] [nvidia-container-runtime]
#debug = "/var/log/nvidia-container-runtime.log" #debug = "/var/log/nvidia-container-runtime.log"

View File

@ -11,6 +11,7 @@ load-kmods = true
#no-cgroups = false #no-cgroups = false
#user = "root:video" #user = "root:video"
ldconfig = "@/sbin/ldconfig" ldconfig = "@/sbin/ldconfig"
#alpha-merge-visible-devices-envvars = false
[nvidia-container-runtime] [nvidia-container-runtime]
#debug = "/var/log/nvidia-container-runtime.log" #debug = "/var/log/nvidia-container-runtime.log"

View File

@ -11,6 +11,7 @@ load-kmods = true
#no-cgroups = false #no-cgroups = false
user = "root:video" user = "root:video"
ldconfig = "@/sbin/ldconfig" ldconfig = "@/sbin/ldconfig"
#alpha-merge-visible-devices-envvars = false
[nvidia-container-runtime] [nvidia-container-runtime]
#debug = "/var/log/nvidia-container-runtime.log" #debug = "/var/log/nvidia-container-runtime.log"

View File

@ -11,6 +11,7 @@ load-kmods = true
#no-cgroups = false #no-cgroups = false
#user = "root:video" #user = "root:video"
ldconfig = "@/sbin/ldconfig.real" ldconfig = "@/sbin/ldconfig.real"
#alpha-merge-visible-devices-envvars = false
[nvidia-container-runtime] [nvidia-container-runtime]
#debug = "/var/log/nvidia-container-runtime.log" #debug = "/var/log/nvidia-container-runtime.log"

131
container_config_test.go Normal file
View File

@ -0,0 +1,131 @@
package main
import (
"github.com/stretchr/testify/require"
"sort"
"strings"
"testing"
)
func TestMergeVisibleDevicesEnvvars(t *testing.T) {
var tests = []struct {
name string
input []string
expected string
enableMerge bool
}{
{
"Simple Merge Enabled",
[]string{
"NVIDIA_VISIBLE_DEVICES_0=0,1",
"NVIDIA_VISIBLE_DEVICES_1=2,3",
"NVIDIA_VISIBLE_DEVICES_WHATEVER=4,5",
},
"0,1,2,3,4,5",
true,
},
{
"Simple Merge Disabled",
[]string{
"NVIDIA_VISIBLE_DEVICES_0=0,1",
"NVIDIA_VISIBLE_DEVICES_1=2,3",
"NVIDIA_VISIBLE_DEVICES_WHATEVER=4,5",
},
"",
false,
},
{
"Merge No Override (Enabled)",
[]string{
"NVIDIA_VISIBLE_DEVICES=all",
},
"all",
true,
},
{
"Merge No Override (Disabled)",
[]string{
"NVIDIA_VISIBLE_DEVICES=all",
},
"all",
false,
},
{
"Merge Override (Enabled, Before)",
[]string{
"NVIDIA_VISIBLE_DEVICES=all",
"NVIDIA_VISIBLE_DEVICES_0=0,1",
"NVIDIA_VISIBLE_DEVICES_1=2,3",
"NVIDIA_VISIBLE_DEVICES_WHATEVER=4,5",
},
"0,1,2,3,4,5",
true,
},
{
"Merge Override (Enabled, After)",
[]string{
"NVIDIA_VISIBLE_DEVICES_0=0,1",
"NVIDIA_VISIBLE_DEVICES_1=2,3",
"NVIDIA_VISIBLE_DEVICES_WHATEVER=4,5",
"NVIDIA_VISIBLE_DEVICES=all",
},
"0,1,2,3,4,5",
true,
},
{
"Merge Override (Enabled, In Between)",
[]string{
"NVIDIA_VISIBLE_DEVICES_0=0,1",
"NVIDIA_VISIBLE_DEVICES_1=2,3",
"NVIDIA_VISIBLE_DEVICES=all",
"NVIDIA_VISIBLE_DEVICES_WHATEVER=4,5",
},
"0,1,2,3,4,5",
true,
},
{
"Merge Override (Disabled, Before)",
[]string{
"NVIDIA_VISIBLE_DEVICES=all",
"NVIDIA_VISIBLE_DEVICES_0=0,1",
"NVIDIA_VISIBLE_DEVICES_1=2,3",
"NVIDIA_VISIBLE_DEVICES_WHATEVER=4,5",
},
"all",
false,
},
{
"Merge Override (Disabled, After)",
[]string{
"NVIDIA_VISIBLE_DEVICES_0=0,1",
"NVIDIA_VISIBLE_DEVICES_1=2,3",
"NVIDIA_VISIBLE_DEVICES_WHATEVER=4,5",
"NVIDIA_VISIBLE_DEVICES=all",
},
"all",
false,
},
{
"Merge Override (Disabled, In Between)",
[]string{
"NVIDIA_VISIBLE_DEVICES_0=0,1",
"NVIDIA_VISIBLE_DEVICES_1=2,3",
"NVIDIA_VISIBLE_DEVICES=all",
"NVIDIA_VISIBLE_DEVICES_WHATEVER=4,5",
},
"all",
false,
},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
config := CLIConfig{
AlphaMergeVisibleDevicesEnvvars: tc.enableMerge,
}
envvars := getEnvMap(tc.input, config)
devices := strings.Split(envvars[envNVVisibleDevices], ",")
sort.Strings(devices)
require.Equal(t, tc.expected, strings.Join(devices, ","))
})
}
}

View File

@ -82,7 +82,7 @@ func parseCudaVersion(cudaVersion string) (vmaj, vmin, vpatch uint32) {
return return
} }
func getEnvMap(e []string) (m map[string]string) { func getEnvMap(e []string, config CLIConfig) (m map[string]string) {
m = make(map[string]string) m = make(map[string]string)
for _, s := range e { for _, s := range e {
p := strings.SplitN(s, "=", 2) p := strings.SplitN(s, "=", 2)
@ -91,6 +91,17 @@ func getEnvMap(e []string) (m map[string]string) {
} }
m[p[0]] = p[1] m[p[0]] = p[1]
} }
if config.AlphaMergeVisibleDevicesEnvvars {
var mergable []string
for k, v := range m {
if strings.HasPrefix(k, envNVVisibleDevices+"_") {
mergable = append(mergable, v)
}
}
if len(mergable) > 0 {
m[envNVVisibleDevices] = strings.Join(mergable, ",")
}
}
return return
} }
@ -254,7 +265,7 @@ func getContainerConfig(hook HookConfig) (config containerConfig) {
s := loadSpec(path.Join(b, "config.json")) s := loadSpec(path.Join(b, "config.json"))
env := getEnvMap(s.Process.Env) env := getEnvMap(s.Process.Env, hook.NvidiaContainerCLI)
envSwarmGPU = hook.SwarmResource envSwarmGPU = hook.SwarmResource
return containerConfig{ return containerConfig{
Pid: h.Pid, Pid: h.Pid,

View File

@ -20,16 +20,17 @@ var defaultPaths = [...]string{
// CLIConfig: options for nvidia-container-cli. // CLIConfig: options for nvidia-container-cli.
type CLIConfig struct { type CLIConfig struct {
Root *string `toml:"root"` Root *string `toml:"root"`
Path *string `toml:"path"` Path *string `toml:"path"`
Environment []string `toml:"environment"` Environment []string `toml:"environment"`
Debug *string `toml:"debug"` Debug *string `toml:"debug"`
Ldcache *string `toml:"ldcache"` Ldcache *string `toml:"ldcache"`
LoadKmods bool `toml:"load-kmods"` LoadKmods bool `toml:"load-kmods"`
NoPivot bool `toml:"no-pivot"` NoPivot bool `toml:"no-pivot"`
NoCgroups bool `toml:"no-cgroups"` NoCgroups bool `toml:"no-cgroups"`
User *string `toml:"user"` User *string `toml:"user"`
Ldconfig *string `toml:"ldconfig"` Ldconfig *string `toml:"ldconfig"`
AlphaMergeVisibleDevicesEnvvars bool `toml:"alpha-merge-visible-devices-envvars"`
} }
type HookConfig struct { type HookConfig struct {
@ -44,16 +45,17 @@ func getDefaultHookConfig() (config HookConfig) {
DisableRequire: false, DisableRequire: false,
SwarmResource: nil, SwarmResource: nil,
NvidiaContainerCLI: CLIConfig{ NvidiaContainerCLI: CLIConfig{
Root: nil, Root: nil,
Path: nil, Path: nil,
Environment: []string{}, Environment: []string{},
Debug: nil, Debug: nil,
Ldcache: nil, Ldcache: nil,
LoadKmods: true, LoadKmods: true,
NoPivot: false, NoPivot: false,
NoCgroups: false, NoCgroups: false,
User: nil, User: nil,
Ldconfig: nil, Ldconfig: nil,
AlphaMergeVisibleDevicesEnvvars: false,
}, },
} }
} }