mirror of
https://github.com/NVIDIA/nvidia-container-toolkit
synced 2025-04-03 12:20:46 +00:00
Merge branch 'upstream-add-alternate-device-list' into 'master'
Add the ability to pull the device list from mounted files instead of just Envvars See merge request nvidia/container-toolkit/container-toolkit!15
This commit is contained in:
commit
4448319605
@ -1,5 +1,7 @@
|
|||||||
disable-require = false
|
disable-require = false
|
||||||
#swarm-resource = "DOCKER_RESOURCE_GPU"
|
#swarm-resource = "DOCKER_RESOURCE_GPU"
|
||||||
|
#accept-nvidia-visible-devices-envvar-when-unprivileged = true
|
||||||
|
#look-for-nvidia-visible-devices-as-volume-mounts-under = "/var/run/nvidia-container-devices"
|
||||||
|
|
||||||
[nvidia-container-cli]
|
[nvidia-container-cli]
|
||||||
#root = "/run/nvidia/driver"
|
#root = "/run/nvidia/driver"
|
||||||
@ -11,7 +13,6 @@ load-kmods = true
|
|||||||
#no-cgroups = false
|
#no-cgroups = false
|
||||||
#user = "root:video"
|
#user = "root:video"
|
||||||
ldconfig = "@/sbin/ldconfig"
|
ldconfig = "@/sbin/ldconfig"
|
||||||
#alpha-merge-visible-devices-envvars = false
|
|
||||||
|
|
||||||
[nvidia-container-runtime]
|
[nvidia-container-runtime]
|
||||||
#debug = "/var/log/nvidia-container-runtime.log"
|
#debug = "/var/log/nvidia-container-runtime.log"
|
||||||
|
@ -1,5 +1,7 @@
|
|||||||
disable-require = false
|
disable-require = false
|
||||||
#swarm-resource = "DOCKER_RESOURCE_GPU"
|
#swarm-resource = "DOCKER_RESOURCE_GPU"
|
||||||
|
#accept-nvidia-visible-devices-envvar-when-unprivileged = true
|
||||||
|
#look-for-nvidia-visible-devices-as-volume-mounts-under = "/var/run/nvidia-container-devices"
|
||||||
|
|
||||||
[nvidia-container-cli]
|
[nvidia-container-cli]
|
||||||
#root = "/run/nvidia/driver"
|
#root = "/run/nvidia/driver"
|
||||||
@ -11,7 +13,6 @@ load-kmods = true
|
|||||||
#no-cgroups = false
|
#no-cgroups = false
|
||||||
#user = "root:video"
|
#user = "root:video"
|
||||||
ldconfig = "@/sbin/ldconfig"
|
ldconfig = "@/sbin/ldconfig"
|
||||||
#alpha-merge-visible-devices-envvars = false
|
|
||||||
|
|
||||||
[nvidia-container-runtime]
|
[nvidia-container-runtime]
|
||||||
#debug = "/var/log/nvidia-container-runtime.log"
|
#debug = "/var/log/nvidia-container-runtime.log"
|
||||||
|
@ -1,5 +1,7 @@
|
|||||||
disable-require = false
|
disable-require = false
|
||||||
#swarm-resource = "DOCKER_RESOURCE_GPU"
|
#swarm-resource = "DOCKER_RESOURCE_GPU"
|
||||||
|
#accept-nvidia-visible-devices-envvar-when-unprivileged = true
|
||||||
|
#look-for-nvidia-visible-devices-as-volume-mounts-under = "/var/run/nvidia-container-devices"
|
||||||
|
|
||||||
[nvidia-container-cli]
|
[nvidia-container-cli]
|
||||||
#root = "/run/nvidia/driver"
|
#root = "/run/nvidia/driver"
|
||||||
@ -11,7 +13,6 @@ load-kmods = true
|
|||||||
#no-cgroups = false
|
#no-cgroups = false
|
||||||
#user = "root:video"
|
#user = "root:video"
|
||||||
ldconfig = "@/sbin/ldconfig"
|
ldconfig = "@/sbin/ldconfig"
|
||||||
#alpha-merge-visible-devices-envvars = false
|
|
||||||
|
|
||||||
[nvidia-container-runtime]
|
[nvidia-container-runtime]
|
||||||
#debug = "/var/log/nvidia-container-runtime.log"
|
#debug = "/var/log/nvidia-container-runtime.log"
|
||||||
|
@ -1,5 +1,7 @@
|
|||||||
disable-require = false
|
disable-require = false
|
||||||
#swarm-resource = "DOCKER_RESOURCE_GPU"
|
#swarm-resource = "DOCKER_RESOURCE_GPU"
|
||||||
|
#accept-nvidia-visible-devices-envvar-when-unprivileged = true
|
||||||
|
#look-for-nvidia-visible-devices-as-volume-mounts-under = "/var/run/nvidia-container-devices"
|
||||||
|
|
||||||
[nvidia-container-cli]
|
[nvidia-container-cli]
|
||||||
#root = "/run/nvidia/driver"
|
#root = "/run/nvidia/driver"
|
||||||
@ -11,7 +13,6 @@ load-kmods = true
|
|||||||
#no-cgroups = false
|
#no-cgroups = false
|
||||||
user = "root:video"
|
user = "root:video"
|
||||||
ldconfig = "@/sbin/ldconfig"
|
ldconfig = "@/sbin/ldconfig"
|
||||||
#alpha-merge-visible-devices-envvars = false
|
|
||||||
|
|
||||||
[nvidia-container-runtime]
|
[nvidia-container-runtime]
|
||||||
#debug = "/var/log/nvidia-container-runtime.log"
|
#debug = "/var/log/nvidia-container-runtime.log"
|
||||||
|
@ -1,5 +1,7 @@
|
|||||||
disable-require = false
|
disable-require = false
|
||||||
#swarm-resource = "DOCKER_RESOURCE_GPU"
|
#swarm-resource = "DOCKER_RESOURCE_GPU"
|
||||||
|
#accept-nvidia-visible-devices-envvar-when-unprivileged = true
|
||||||
|
#look-for-nvidia-visible-devices-as-volume-mounts-under = "/var/run/nvidia-container-devices"
|
||||||
|
|
||||||
[nvidia-container-cli]
|
[nvidia-container-cli]
|
||||||
#root = "/run/nvidia/driver"
|
#root = "/run/nvidia/driver"
|
||||||
@ -11,7 +13,6 @@ load-kmods = true
|
|||||||
#no-cgroups = false
|
#no-cgroups = false
|
||||||
#user = "root:video"
|
#user = "root:video"
|
||||||
ldconfig = "@/sbin/ldconfig.real"
|
ldconfig = "@/sbin/ldconfig.real"
|
||||||
#alpha-merge-visible-devices-envvars = false
|
|
||||||
|
|
||||||
[nvidia-container-runtime]
|
[nvidia-container-runtime]
|
||||||
#debug = "/var/log/nvidia-container-runtime.log"
|
#debug = "/var/log/nvidia-container-runtime.log"
|
||||||
|
@ -1,131 +0,0 @@
|
|||||||
package main
|
|
||||||
|
|
||||||
import (
|
|
||||||
"github.com/stretchr/testify/require"
|
|
||||||
"sort"
|
|
||||||
"strings"
|
|
||||||
"testing"
|
|
||||||
)
|
|
||||||
|
|
||||||
func TestMergeVisibleDevicesEnvvars(t *testing.T) {
|
|
||||||
var tests = []struct {
|
|
||||||
name string
|
|
||||||
input []string
|
|
||||||
expected string
|
|
||||||
enableMerge bool
|
|
||||||
}{
|
|
||||||
{
|
|
||||||
"Simple Merge Enabled",
|
|
||||||
[]string{
|
|
||||||
"NVIDIA_VISIBLE_DEVICES_0=0,1",
|
|
||||||
"NVIDIA_VISIBLE_DEVICES_1=2,3",
|
|
||||||
"NVIDIA_VISIBLE_DEVICES_WHATEVER=4,5",
|
|
||||||
},
|
|
||||||
"0,1,2,3,4,5",
|
|
||||||
true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"Simple Merge Disabled",
|
|
||||||
[]string{
|
|
||||||
"NVIDIA_VISIBLE_DEVICES_0=0,1",
|
|
||||||
"NVIDIA_VISIBLE_DEVICES_1=2,3",
|
|
||||||
"NVIDIA_VISIBLE_DEVICES_WHATEVER=4,5",
|
|
||||||
},
|
|
||||||
"",
|
|
||||||
false,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"Merge No Override (Enabled)",
|
|
||||||
[]string{
|
|
||||||
"NVIDIA_VISIBLE_DEVICES=all",
|
|
||||||
},
|
|
||||||
"all",
|
|
||||||
true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"Merge No Override (Disabled)",
|
|
||||||
[]string{
|
|
||||||
"NVIDIA_VISIBLE_DEVICES=all",
|
|
||||||
},
|
|
||||||
"all",
|
|
||||||
false,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"Merge Override (Enabled, Before)",
|
|
||||||
[]string{
|
|
||||||
"NVIDIA_VISIBLE_DEVICES=all",
|
|
||||||
"NVIDIA_VISIBLE_DEVICES_0=0,1",
|
|
||||||
"NVIDIA_VISIBLE_DEVICES_1=2,3",
|
|
||||||
"NVIDIA_VISIBLE_DEVICES_WHATEVER=4,5",
|
|
||||||
},
|
|
||||||
"0,1,2,3,4,5",
|
|
||||||
true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"Merge Override (Enabled, After)",
|
|
||||||
[]string{
|
|
||||||
"NVIDIA_VISIBLE_DEVICES_0=0,1",
|
|
||||||
"NVIDIA_VISIBLE_DEVICES_1=2,3",
|
|
||||||
"NVIDIA_VISIBLE_DEVICES_WHATEVER=4,5",
|
|
||||||
"NVIDIA_VISIBLE_DEVICES=all",
|
|
||||||
},
|
|
||||||
"0,1,2,3,4,5",
|
|
||||||
true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"Merge Override (Enabled, In Between)",
|
|
||||||
[]string{
|
|
||||||
"NVIDIA_VISIBLE_DEVICES_0=0,1",
|
|
||||||
"NVIDIA_VISIBLE_DEVICES_1=2,3",
|
|
||||||
"NVIDIA_VISIBLE_DEVICES=all",
|
|
||||||
"NVIDIA_VISIBLE_DEVICES_WHATEVER=4,5",
|
|
||||||
},
|
|
||||||
"0,1,2,3,4,5",
|
|
||||||
true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"Merge Override (Disabled, Before)",
|
|
||||||
[]string{
|
|
||||||
"NVIDIA_VISIBLE_DEVICES=all",
|
|
||||||
"NVIDIA_VISIBLE_DEVICES_0=0,1",
|
|
||||||
"NVIDIA_VISIBLE_DEVICES_1=2,3",
|
|
||||||
"NVIDIA_VISIBLE_DEVICES_WHATEVER=4,5",
|
|
||||||
},
|
|
||||||
"all",
|
|
||||||
false,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"Merge Override (Disabled, After)",
|
|
||||||
[]string{
|
|
||||||
"NVIDIA_VISIBLE_DEVICES_0=0,1",
|
|
||||||
"NVIDIA_VISIBLE_DEVICES_1=2,3",
|
|
||||||
"NVIDIA_VISIBLE_DEVICES_WHATEVER=4,5",
|
|
||||||
"NVIDIA_VISIBLE_DEVICES=all",
|
|
||||||
},
|
|
||||||
"all",
|
|
||||||
false,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"Merge Override (Disabled, In Between)",
|
|
||||||
[]string{
|
|
||||||
"NVIDIA_VISIBLE_DEVICES_0=0,1",
|
|
||||||
"NVIDIA_VISIBLE_DEVICES_1=2,3",
|
|
||||||
"NVIDIA_VISIBLE_DEVICES=all",
|
|
||||||
"NVIDIA_VISIBLE_DEVICES_WHATEVER=4,5",
|
|
||||||
},
|
|
||||||
"all",
|
|
||||||
false,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
for _, tc := range tests {
|
|
||||||
t.Run(tc.name, func(t *testing.T) {
|
|
||||||
config := CLIConfig{
|
|
||||||
AlphaMergeVisibleDevicesEnvvars: tc.enableMerge,
|
|
||||||
}
|
|
||||||
envvars := getEnvMap(tc.input, config)
|
|
||||||
devices := strings.Split(envvars[envNVVisibleDevices], ",")
|
|
||||||
sort.Strings(devices)
|
|
||||||
require.Equal(t, tc.expected, strings.Join(devices, ","))
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
12
pkg/Godeps/Godeps.json
generated
12
pkg/Godeps/Godeps.json
generated
@ -1,12 +0,0 @@
|
|||||||
{
|
|
||||||
"ImportPath": "github.com/nvidia/nvidia-container-runtime/toolkit/nvidia-container-toolkit",
|
|
||||||
"GoVersion": "go1.9",
|
|
||||||
"GodepVersion": "v80",
|
|
||||||
"Deps": [
|
|
||||||
{
|
|
||||||
"ImportPath": "github.com/BurntSushi/toml",
|
|
||||||
"Comment": "v0.3.0-7-ga368813",
|
|
||||||
"Rev": "a368813c5e648fee92e5f6c30e3944ff9d5e8895"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
Binary file not shown.
@ -6,6 +6,7 @@ import (
|
|||||||
"log"
|
"log"
|
||||||
"os"
|
"os"
|
||||||
"path"
|
"path"
|
||||||
|
"path/filepath"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
@ -73,6 +74,15 @@ type LinuxCapabilities struct {
|
|||||||
Ambient []string `json:"ambient,omitempty" platform:"linux"`
|
Ambient []string `json:"ambient,omitempty" platform:"linux"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Mount from OCI runtime spec
|
||||||
|
// https://github.com/opencontainers/runtime-spec/blob/v1.0.0/specs-go/config.go#L103
|
||||||
|
type Mount struct {
|
||||||
|
Destination string `json:"destination"`
|
||||||
|
Type string `json:"type,omitempty" platform:"linux,solaris"`
|
||||||
|
Source string `json:"source,omitempty"`
|
||||||
|
Options []string `json:"options,omitempty"`
|
||||||
|
}
|
||||||
|
|
||||||
// Spec from OCI runtime spec
|
// Spec from OCI runtime spec
|
||||||
// We use pointers to structs, similarly to the latest version of runtime-spec:
|
// We use pointers to structs, similarly to the latest version of runtime-spec:
|
||||||
// https://github.com/opencontainers/runtime-spec/blob/v1.0.0/specs-go/config.go#L5-L28
|
// https://github.com/opencontainers/runtime-spec/blob/v1.0.0/specs-go/config.go#L5-L28
|
||||||
@ -80,6 +90,7 @@ type Spec struct {
|
|||||||
Version *string `json:"ociVersion"`
|
Version *string `json:"ociVersion"`
|
||||||
Process *Process `json:"process,omitempty"`
|
Process *Process `json:"process,omitempty"`
|
||||||
Root *Root `json:"root,omitempty"`
|
Root *Root `json:"root,omitempty"`
|
||||||
|
Mounts []Mount `json:"mounts,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// HookState holds state information about the hook
|
// HookState holds state information about the hook
|
||||||
@ -108,7 +119,7 @@ func parseCudaVersion(cudaVersion string) (vmaj, vmin, vpatch uint32) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
func getEnvMap(e []string, config CLIConfig) (m map[string]string) {
|
func getEnvMap(e []string) (m map[string]string) {
|
||||||
m = make(map[string]string)
|
m = make(map[string]string)
|
||||||
for _, s := range e {
|
for _, s := range e {
|
||||||
p := strings.SplitN(s, "=", 2)
|
p := strings.SplitN(s, "=", 2)
|
||||||
@ -117,17 +128,6 @@ func getEnvMap(e []string, config CLIConfig) (m map[string]string) {
|
|||||||
}
|
}
|
||||||
m[p[0]] = p[1]
|
m[p[0]] = p[1]
|
||||||
}
|
}
|
||||||
if config.AlphaMergeVisibleDevicesEnvvars {
|
|
||||||
var mergable []string
|
|
||||||
for k, v := range m {
|
|
||||||
if strings.HasPrefix(k, envNVVisibleDevices+"_") {
|
|
||||||
mergable = append(mergable, v)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if len(mergable) > 0 {
|
|
||||||
m[envNVVisibleDevices] = strings.Join(mergable, ",")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -198,7 +198,7 @@ func isLegacyCUDAImage(env map[string]string) bool {
|
|||||||
return len(legacyCudaVersion) > 0 && len(cudaRequire) == 0
|
return len(legacyCudaVersion) > 0 && len(cudaRequire) == 0
|
||||||
}
|
}
|
||||||
|
|
||||||
func getDevices(env map[string]string, legacyImage bool) *string {
|
func getDevicesFromEnvvar(env map[string]string, legacyImage bool) *string {
|
||||||
// Build a list of envvars to consider.
|
// Build a list of envvars to consider.
|
||||||
envVars := []string{envNVVisibleDevices}
|
envVars := []string{envNVVisibleDevices}
|
||||||
if envSwarmGPU != nil {
|
if envSwarmGPU != nil {
|
||||||
@ -236,6 +236,65 @@ func getDevices(env map[string]string, legacyImage bool) *string {
|
|||||||
return devices
|
return devices
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func getDevicesFromMounts(root string, mounts []Mount) *string {
|
||||||
|
var devices []string
|
||||||
|
for _, m := range mounts {
|
||||||
|
root := filepath.Clean(root)
|
||||||
|
source := filepath.Clean(m.Source)
|
||||||
|
destination := filepath.Clean(m.Destination)
|
||||||
|
|
||||||
|
// Only consider mounts who's host volume is /dev/null
|
||||||
|
if source != "/dev/null" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// Only consider container mount points that begin with 'root'
|
||||||
|
if len(destination) < len(root) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
if destination[:len(root)] != root {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
// Grab the full path beyond 'root' and add it to the list of devices
|
||||||
|
device := destination[len(root):]
|
||||||
|
if len(device) > 0 && device[0] == '/' {
|
||||||
|
device = device[1:]
|
||||||
|
}
|
||||||
|
if len(device) == 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
devices = append(devices, device)
|
||||||
|
}
|
||||||
|
|
||||||
|
if devices == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
ret := strings.Join(devices, ",")
|
||||||
|
return &ret
|
||||||
|
}
|
||||||
|
|
||||||
|
func getDevices(hookConfig *HookConfig, env map[string]string, mounts []Mount, privileged bool, legacyImage bool) *string {
|
||||||
|
// Try and get the device list from mount volumes first
|
||||||
|
devices := getDevicesFromMounts(*hookConfig.DeviceListVolumeMount, mounts)
|
||||||
|
if devices != nil {
|
||||||
|
return devices
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fallback to reading from the environment variable if privileges are correct
|
||||||
|
devices = getDevicesFromEnvvar(env, legacyImage)
|
||||||
|
if devices == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
if privileged || hookConfig.AcceptEnvvarUnprivileged {
|
||||||
|
return devices
|
||||||
|
}
|
||||||
|
|
||||||
|
// Error out otherwise
|
||||||
|
log.Panicln("insufficient privileges to read device list from NVIDIA_VISIBLE_DEVICES envvar")
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
func getMigConfigDevices(env map[string]string) *string {
|
func getMigConfigDevices(env map[string]string) *string {
|
||||||
if devices, ok := env[envNVMigConfigDevices]; ok {
|
if devices, ok := env[envNVMigConfigDevices]; ok {
|
||||||
return &devices
|
return &devices
|
||||||
@ -296,11 +355,11 @@ func getRequirements(env map[string]string, legacyImage bool) []string {
|
|||||||
return requirements
|
return requirements
|
||||||
}
|
}
|
||||||
|
|
||||||
func getNvidiaConfig(env map[string]string, privileged bool) *nvidiaConfig {
|
func getNvidiaConfig(hookConfig *HookConfig, env map[string]string, mounts []Mount, privileged bool) *nvidiaConfig {
|
||||||
legacyImage := isLegacyCUDAImage(env)
|
legacyImage := isLegacyCUDAImage(env)
|
||||||
|
|
||||||
var devices string
|
var devices string
|
||||||
if d := getDevices(env, legacyImage); d != nil {
|
if d := getDevices(hookConfig, env, mounts, privileged, legacyImage); d != nil {
|
||||||
devices = *d
|
devices = *d
|
||||||
} else {
|
} else {
|
||||||
// 'nil' devices means this is not a GPU container.
|
// 'nil' devices means this is not a GPU container.
|
||||||
@ -357,13 +416,13 @@ func getContainerConfig(hook HookConfig) (config containerConfig) {
|
|||||||
|
|
||||||
s := loadSpec(path.Join(b, "config.json"))
|
s := loadSpec(path.Join(b, "config.json"))
|
||||||
|
|
||||||
env := getEnvMap(s.Process.Env, hook.NvidiaContainerCLI)
|
env := getEnvMap(s.Process.Env)
|
||||||
privileged := isPrivileged(s)
|
privileged := isPrivileged(s)
|
||||||
envSwarmGPU = hook.SwarmResource
|
envSwarmGPU = hook.SwarmResource
|
||||||
return containerConfig{
|
return containerConfig{
|
||||||
Pid: h.Pid,
|
Pid: h.Pid,
|
||||||
Rootfs: s.Root.Path,
|
Rootfs: s.Root.Path,
|
||||||
Env: env,
|
Env: env,
|
||||||
Nvidia: getNvidiaConfig(env, privileged),
|
Nvidia: getNvidiaConfig(&hook, env, s.Mounts, privileged),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"path/filepath"
|
||||||
"reflect"
|
"reflect"
|
||||||
"testing"
|
"testing"
|
||||||
)
|
)
|
||||||
@ -407,7 +408,8 @@ func TestGetNvidiaConfig(t *testing.T) {
|
|||||||
// Wrap the call to getNvidiaConfig() in a closure.
|
// Wrap the call to getNvidiaConfig() in a closure.
|
||||||
var config *nvidiaConfig
|
var config *nvidiaConfig
|
||||||
getConfig := func() {
|
getConfig := func() {
|
||||||
config = getNvidiaConfig(tc.env, tc.privileged)
|
hookConfig := getDefaultHookConfig()
|
||||||
|
config = getNvidiaConfig(&hookConfig, tc.env, nil, tc.privileged)
|
||||||
}
|
}
|
||||||
|
|
||||||
// For any tests that are expected to panic, make sure they do.
|
// For any tests that are expected to panic, make sure they do.
|
||||||
@ -449,6 +451,173 @@ func TestGetNvidiaConfig(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestGetDevicesFromMounts(t *testing.T) {
|
||||||
|
var tests = []struct {
|
||||||
|
description string
|
||||||
|
root string
|
||||||
|
mounts []Mount
|
||||||
|
expectedDevices *string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
description: "No mounts",
|
||||||
|
root: defaultDeviceListVolumeMount,
|
||||||
|
mounts: nil,
|
||||||
|
expectedDevices: nil,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "Host path is not /dev/null",
|
||||||
|
root: defaultDeviceListVolumeMount,
|
||||||
|
mounts: []Mount{
|
||||||
|
{
|
||||||
|
Source: "/not/dev/null",
|
||||||
|
Destination: filepath.Join(defaultDeviceListVolumeMount, "GPU0"),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
expectedDevices: nil,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "Container path is not prefixed by 'root'",
|
||||||
|
root: defaultDeviceListVolumeMount,
|
||||||
|
mounts: []Mount{
|
||||||
|
{
|
||||||
|
Source: "/dev/null",
|
||||||
|
Destination: filepath.Join("/other/prefix", "GPU0"),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
expectedDevices: nil,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "Container path is only 'root'",
|
||||||
|
root: defaultDeviceListVolumeMount,
|
||||||
|
mounts: []Mount{
|
||||||
|
{
|
||||||
|
Source: "/dev/null",
|
||||||
|
Destination: defaultDeviceListVolumeMount,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
expectedDevices: nil,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "Discover 2 devices",
|
||||||
|
root: defaultDeviceListVolumeMount,
|
||||||
|
mounts: []Mount{
|
||||||
|
{
|
||||||
|
Source: "/dev/null",
|
||||||
|
Destination: filepath.Join(defaultDeviceListVolumeMount, "GPU0"),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Source: "/dev/null",
|
||||||
|
Destination: filepath.Join(defaultDeviceListVolumeMount, "GPU1"),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
expectedDevices: &[]string{"GPU0,GPU1"}[0],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "Discover 2 devices with slashes in the name",
|
||||||
|
root: defaultDeviceListVolumeMount,
|
||||||
|
mounts: []Mount{
|
||||||
|
{
|
||||||
|
Source: "/dev/null",
|
||||||
|
Destination: filepath.Join(defaultDeviceListVolumeMount, "GPU0-MIG0/0/1"),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Source: "/dev/null",
|
||||||
|
Destination: filepath.Join(defaultDeviceListVolumeMount, "GPU1-MIG0/0/1"),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
expectedDevices: &[]string{"GPU0-MIG0/0/1,GPU1-MIG0/0/1"}[0],
|
||||||
|
},
|
||||||
|
}
|
||||||
|
for _, tc := range tests {
|
||||||
|
t.Run(tc.description, func(t *testing.T) {
|
||||||
|
devices := getDevicesFromMounts(tc.root, tc.mounts)
|
||||||
|
if !reflect.DeepEqual(devices, tc.expectedDevices) {
|
||||||
|
t.Errorf("Unexpected devices (got: %v, wanted: %v)", *devices, *tc.expectedDevices)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDeviceListSourcePriority(t *testing.T) {
|
||||||
|
var tests = []struct {
|
||||||
|
description string
|
||||||
|
mountDevices []Mount
|
||||||
|
envvarDevices string
|
||||||
|
privileged bool
|
||||||
|
acceptUnprivileged bool
|
||||||
|
expectedDevices *string
|
||||||
|
expectedPanic bool
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
description: "Mount devices, unprivileged, no accept unprivileged",
|
||||||
|
mountDevices: []Mount{
|
||||||
|
{
|
||||||
|
Source: "/dev/null",
|
||||||
|
Destination: filepath.Join(defaultDeviceListVolumeMount, "GPU0"),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Source: "/dev/null",
|
||||||
|
Destination: filepath.Join(defaultDeviceListVolumeMount, "GPU1"),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
envvarDevices: "GPU2,GPU3",
|
||||||
|
privileged: false,
|
||||||
|
acceptUnprivileged: false,
|
||||||
|
expectedDevices: &[]string{"GPU0,GPU1"}[0],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "No mount devices, unprivileged, no accept unprivileged",
|
||||||
|
mountDevices: nil,
|
||||||
|
envvarDevices: "GPU0,GPU1",
|
||||||
|
privileged: false,
|
||||||
|
acceptUnprivileged: false,
|
||||||
|
expectedPanic: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "No mount devices, privileged, no accept unprivileged",
|
||||||
|
mountDevices: nil,
|
||||||
|
envvarDevices: "GPU0,GPU1",
|
||||||
|
privileged: true,
|
||||||
|
acceptUnprivileged: false,
|
||||||
|
expectedDevices: &[]string{"GPU0,GPU1"}[0],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "No mount devices, unprivileged, accept unprivileged",
|
||||||
|
mountDevices: nil,
|
||||||
|
envvarDevices: "GPU0,GPU1",
|
||||||
|
privileged: false,
|
||||||
|
acceptUnprivileged: true,
|
||||||
|
expectedDevices: &[]string{"GPU0,GPU1"}[0],
|
||||||
|
},
|
||||||
|
}
|
||||||
|
for _, tc := range tests {
|
||||||
|
t.Run(tc.description, func(t *testing.T) {
|
||||||
|
// Wrap the call to getDevices() in a closure.
|
||||||
|
var devices *string
|
||||||
|
getDevices := func() {
|
||||||
|
env := map[string]string{
|
||||||
|
envNVVisibleDevices: tc.envvarDevices,
|
||||||
|
}
|
||||||
|
hookConfig := getDefaultHookConfig()
|
||||||
|
hookConfig.AcceptEnvvarUnprivileged = tc.acceptUnprivileged
|
||||||
|
devices = getDevices(&hookConfig, env, tc.mountDevices, tc.privileged, false)
|
||||||
|
}
|
||||||
|
|
||||||
|
// For any tests that are expected to panic, make sure they do.
|
||||||
|
if tc.expectedPanic {
|
||||||
|
mustPanic(t, getDevices)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// For all other tests, just grab the devices and check the results
|
||||||
|
getDevices()
|
||||||
|
if !reflect.DeepEqual(devices, tc.expectedDevices) {
|
||||||
|
t.Errorf("Unexpected devices (got: %v, wanted: %v)", *devices, *tc.expectedDevices)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func elementsMatch(slice0, slice1 []string) bool {
|
func elementsMatch(slice0, slice1 []string) bool {
|
||||||
map0 := make(map[string]int)
|
map0 := make(map[string]int)
|
||||||
map1 := make(map[string]int)
|
map1 := make(map[string]int)
|
||||||
|
@ -13,6 +13,10 @@ const (
|
|||||||
driverPath = "/run/nvidia/driver"
|
driverPath = "/run/nvidia/driver"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
defaultDeviceListVolumeMount = "/var/run/nvidia-container-devices"
|
||||||
|
)
|
||||||
|
|
||||||
var defaultPaths = [...]string{
|
var defaultPaths = [...]string{
|
||||||
path.Join(driverPath, configPath),
|
path.Join(driverPath, configPath),
|
||||||
configPath,
|
configPath,
|
||||||
@ -30,13 +34,14 @@ type CLIConfig struct {
|
|||||||
NoCgroups bool `toml:"no-cgroups"`
|
NoCgroups bool `toml:"no-cgroups"`
|
||||||
User *string `toml:"user"`
|
User *string `toml:"user"`
|
||||||
Ldconfig *string `toml:"ldconfig"`
|
Ldconfig *string `toml:"ldconfig"`
|
||||||
AlphaMergeVisibleDevicesEnvvars bool `toml:"alpha-merge-visible-devices-envvars"`
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// HookConfig : options for the nvidia-container-toolkit.
|
// HookConfig : options for the nvidia-container-toolkit.
|
||||||
type HookConfig struct {
|
type HookConfig struct {
|
||||||
DisableRequire bool `toml:"disable-require"`
|
DisableRequire bool `toml:"disable-require"`
|
||||||
SwarmResource *string `toml:"swarm-resource"`
|
SwarmResource *string `toml:"swarm-resource"`
|
||||||
|
AcceptEnvvarUnprivileged bool `toml:"accept-nvidia-visible-devices-envvar-when-unprivileged"`
|
||||||
|
DeviceListVolumeMount *string `toml:"look-for-nvidia-visible-devices-as-volume-mounts-under"`
|
||||||
|
|
||||||
NvidiaContainerCLI CLIConfig `toml:"nvidia-container-cli"`
|
NvidiaContainerCLI CLIConfig `toml:"nvidia-container-cli"`
|
||||||
}
|
}
|
||||||
@ -45,6 +50,8 @@ func getDefaultHookConfig() (config HookConfig) {
|
|||||||
return HookConfig{
|
return HookConfig{
|
||||||
DisableRequire: false,
|
DisableRequire: false,
|
||||||
SwarmResource: nil,
|
SwarmResource: nil,
|
||||||
|
AcceptEnvvarUnprivileged: true,
|
||||||
|
DeviceListVolumeMount: &[]string{defaultDeviceListVolumeMount}[0],
|
||||||
NvidiaContainerCLI: CLIConfig{
|
NvidiaContainerCLI: CLIConfig{
|
||||||
Root: nil,
|
Root: nil,
|
||||||
Path: nil,
|
Path: nil,
|
||||||
@ -56,7 +63,6 @@ func getDefaultHookConfig() (config HookConfig) {
|
|||||||
NoCgroups: false,
|
NoCgroups: false,
|
||||||
User: nil,
|
User: nil,
|
||||||
Ldconfig: nil,
|
Ldconfig: nil,
|
||||||
AlphaMergeVisibleDevicesEnvvars: false,
|
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user