[no-relnote] Use image.CUDA to extract visible devices

Signed-off-by: Evan Lezar <elezar@nvidia.com>
This commit is contained in:
Evan Lezar
2024-10-14 15:06:06 +02:00
parent 1991b3ef2a
commit 92df542f2f
11 changed files with 313 additions and 337 deletions

View File

@@ -6,7 +6,6 @@ import (
"log"
"os"
"path"
"path/filepath"
"github.com/opencontainers/runtime-spec/specs-go"
"golang.org/x/mod/semver"
@@ -14,26 +13,10 @@ import (
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
)
const (
envCUDAVersion = "CUDA_VERSION"
envNVRequirePrefix = "NVIDIA_REQUIRE_"
envNVRequireCUDA = envNVRequirePrefix + "CUDA"
envNVDisableRequire = "NVIDIA_DISABLE_REQUIRE"
envNVVisibleDevices = "NVIDIA_VISIBLE_DEVICES"
envNVMigConfigDevices = "NVIDIA_MIG_CONFIG_DEVICES"
envNVMigMonitorDevices = "NVIDIA_MIG_MONITOR_DEVICES"
envNVImexChannels = "NVIDIA_IMEX_CHANNELS"
envNVDriverCapabilities = "NVIDIA_DRIVER_CAPABILITIES"
)
const (
capSysAdmin = "CAP_SYS_ADMIN"
)
const (
deviceListAsVolumeMountsRoot = "/var/run/nvidia-container-devices"
)
type nvidiaConfig struct {
Devices []string
MigConfigDevices string
@@ -76,23 +59,14 @@ type LinuxCapabilities struct {
Ambient []string `json:"ambient,omitempty" platform:"linux"`
}
// Mount from OCI runtime spec
// https://github.com/opencontainers/runtime-spec/blob/v1.0.0/specs-go/config.go#L103
type Mount struct {
Destination string `json:"destination"`
Type string `json:"type,omitempty" platform:"linux,solaris"`
Source string `json:"source,omitempty"`
Options []string `json:"options,omitempty"`
}
// Spec from OCI runtime spec
// We use pointers to structs, similarly to the latest version of runtime-spec:
// https://github.com/opencontainers/runtime-spec/blob/v1.0.0/specs-go/config.go#L5-L28
type Spec struct {
Version *string `json:"ociVersion"`
Process *Process `json:"process,omitempty"`
Root *Root `json:"root,omitempty"`
Mounts []Mount `json:"mounts,omitempty"`
Version *string `json:"ociVersion"`
Process *Process `json:"process,omitempty"`
Root *Root `json:"root,omitempty"`
Mounts []specs.Mount `json:"mounts,omitempty"`
}
// HookState holds state information about the hook
@@ -171,58 +145,22 @@ func isPrivileged(s *Spec) bool {
return image.IsPrivileged(&fullSpec)
}
func getDevicesFromEnvvar(image image.CUDA, swarmResourceEnvvars []string) []string {
func getDevicesFromEnvvar(containerImage image.CUDA, swarmResourceEnvvars []string) []string {
// We check if the image has at least one of the Swarm resource envvars defined and use this
// if specified.
for _, envvar := range swarmResourceEnvvars {
if image.HasEnvvar(envvar) {
return image.DevicesFromEnvvars(swarmResourceEnvvars...).List()
if containerImage.HasEnvvar(envvar) {
return containerImage.DevicesFromEnvvars(swarmResourceEnvvars...).List()
}
}
return image.DevicesFromEnvvars(envNVVisibleDevices).List()
return containerImage.VisibleDevicesFromEnvVar()
}
func getDevicesFromMounts(mounts []Mount) []string {
var devices []string
for _, m := range mounts {
root := filepath.Clean(deviceListAsVolumeMountsRoot)
source := filepath.Clean(m.Source)
destination := filepath.Clean(m.Destination)
// Only consider mounts who's host volume is /dev/null
if source != "/dev/null" {
continue
}
// Only consider container mount points that begin with 'root'
if len(destination) < len(root) {
continue
}
if destination[:len(root)] != root {
continue
}
// Grab the full path beyond 'root' and add it to the list of devices
device := destination[len(root):]
if len(device) > 0 && device[0] == '/' {
device = device[1:]
}
if len(device) == 0 {
continue
}
devices = append(devices, device)
}
if devices == nil {
return nil
}
return devices
}
func getDevices(hookConfig *HookConfig, image image.CUDA, mounts []Mount, privileged bool) []string {
func getDevices(hookConfig *HookConfig, image image.CUDA, privileged bool) []string {
// If enabled, try and get the device list from volume mounts first
if hookConfig.AcceptDeviceListAsVolumeMounts {
devices := getDevicesFromMounts(mounts)
devices := image.VisibleDevicesFromMounts()
if len(devices) > 0 {
return devices
}
@@ -243,12 +181,12 @@ func getDevices(hookConfig *HookConfig, image image.CUDA, mounts []Mount, privil
return nil
}
func getMigConfigDevices(image image.CUDA) *string {
return getMigDevices(image, envNVMigConfigDevices)
func getMigConfigDevices(i image.CUDA) *string {
return getMigDevices(i, image.EnvVarNvidiaMigConfigDevices)
}
func getMigMonitorDevices(image image.CUDA) *string {
return getMigDevices(image, envNVMigMonitorDevices)
func getMigMonitorDevices(i image.CUDA) *string {
return getMigDevices(i, image.EnvVarNvidiaMigMonitorDevices)
}
func getMigDevices(image image.CUDA, envvar string) *string {
@@ -259,11 +197,11 @@ func getMigDevices(image image.CUDA, envvar string) *string {
return &devices
}
func getImexChannels(image image.CUDA) *string {
if !image.HasEnvvar(envNVImexChannels) {
func getImexChannels(i image.CUDA) *string {
if !i.HasEnvvar(image.EnvVarNvidiaImexChannels) {
return nil
}
chans := image.Getenv(envNVImexChannels)
chans := i.Getenv(image.EnvVarNvidiaImexChannels)
return &chans
}
@@ -274,8 +212,8 @@ func (c *HookConfig) getDriverCapabilities(cudaImage image.CUDA, legacyImage boo
capabilities := supportedDriverCapabilities.Intersection(image.DefaultDriverCapabilities)
capsEnvSpecified := cudaImage.HasEnvvar(envNVDriverCapabilities)
capsEnv := cudaImage.Getenv(envNVDriverCapabilities)
capsEnvSpecified := cudaImage.HasEnvvar(image.EnvVarNvidiaDriverCapabilities)
capsEnv := cudaImage.Getenv(image.EnvVarNvidiaDriverCapabilities)
if !capsEnvSpecified && legacyImage {
// Environment variable unset with legacy image: set all capabilities.
@@ -294,10 +232,10 @@ func (c *HookConfig) getDriverCapabilities(cudaImage image.CUDA, legacyImage boo
return capabilities
}
func getNvidiaConfig(hookConfig *HookConfig, image image.CUDA, mounts []Mount, privileged bool) *nvidiaConfig {
func getNvidiaConfig(hookConfig *HookConfig, image image.CUDA, privileged bool) *nvidiaConfig {
legacyImage := image.IsLegacy()
devices := getDevices(hookConfig, image, mounts, privileged)
devices := getDevices(hookConfig, image, privileged)
if len(devices) == 0 {
// empty devices means this is not a GPU container.
return nil
@@ -357,6 +295,7 @@ func getContainerConfig(hook HookConfig) (config containerConfig) {
image, err := image.New(
image.WithEnv(s.Process.Env),
image.WithMounts(s.Mounts),
image.WithDisableRequire(hook.DisableRequire),
)
if err != nil {
@@ -368,6 +307,6 @@ func getContainerConfig(hook HookConfig) (config containerConfig) {
Pid: h.Pid,
Rootfs: s.Root.Path,
Image: image,
Nvidia: getNvidiaConfig(&hook, image, s.Mounts, privileged),
Nvidia: getNvidiaConfig(&hook, image, privileged),
}
}

View File

@@ -4,6 +4,7 @@ import (
"path/filepath"
"testing"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/stretchr/testify/require"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
@@ -33,7 +34,7 @@ func TestGetNvidiaConfig(t *testing.T) {
{
description: "Legacy image, no devices, no capabilities, no requirements",
env: map[string]string{
envCUDAVersion: "9.0",
image.EnvVarCudaVersion: "9.0",
},
privileged: false,
expectedConfig: &nvidiaConfig{
@@ -45,8 +46,8 @@ func TestGetNvidiaConfig(t *testing.T) {
{
description: "Legacy image, devices 'all', no capabilities, no requirements",
env: map[string]string{
envCUDAVersion: "9.0",
envNVVisibleDevices: "all",
image.EnvVarCudaVersion: "9.0",
image.EnvVarNvidiaVisibleDevices: "all",
},
privileged: false,
expectedConfig: &nvidiaConfig{
@@ -58,8 +59,8 @@ func TestGetNvidiaConfig(t *testing.T) {
{
description: "Legacy image, devices 'empty', no capabilities, no requirements",
env: map[string]string{
envCUDAVersion: "9.0",
envNVVisibleDevices: "",
image.EnvVarCudaVersion: "9.0",
image.EnvVarNvidiaVisibleDevices: "",
},
privileged: false,
expectedConfig: nil,
@@ -67,8 +68,8 @@ func TestGetNvidiaConfig(t *testing.T) {
{
description: "Legacy image, devices 'void', no capabilities, no requirements",
env: map[string]string{
envCUDAVersion: "9.0",
envNVVisibleDevices: "void",
image.EnvVarCudaVersion: "9.0",
image.EnvVarNvidiaVisibleDevices: "void",
},
privileged: false,
expectedConfig: nil,
@@ -76,8 +77,8 @@ func TestGetNvidiaConfig(t *testing.T) {
{
description: "Legacy image, devices 'none', no capabilities, no requirements",
env: map[string]string{
envCUDAVersion: "9.0",
envNVVisibleDevices: "none",
image.EnvVarCudaVersion: "9.0",
image.EnvVarNvidiaVisibleDevices: "none",
},
privileged: false,
expectedConfig: &nvidiaConfig{
@@ -89,8 +90,8 @@ func TestGetNvidiaConfig(t *testing.T) {
{
description: "Legacy image, devices set, no capabilities, no requirements",
env: map[string]string{
envCUDAVersion: "9.0",
envNVVisibleDevices: "gpu0,gpu1",
image.EnvVarCudaVersion: "9.0",
image.EnvVarNvidiaVisibleDevices: "gpu0,gpu1",
},
privileged: false,
expectedConfig: &nvidiaConfig{
@@ -102,9 +103,9 @@ func TestGetNvidiaConfig(t *testing.T) {
{
description: "Legacy image, devices set, capabilities 'empty', no requirements",
env: map[string]string{
envCUDAVersion: "9.0",
envNVVisibleDevices: "gpu0,gpu1",
envNVDriverCapabilities: "",
image.EnvVarCudaVersion: "9.0",
image.EnvVarNvidiaVisibleDevices: "gpu0,gpu1",
image.EnvVarNvidiaDriverCapabilities: "",
},
privileged: false,
expectedConfig: &nvidiaConfig{
@@ -116,9 +117,9 @@ func TestGetNvidiaConfig(t *testing.T) {
{
description: "Legacy image, devices set, capabilities 'all', no requirements",
env: map[string]string{
envCUDAVersion: "9.0",
envNVVisibleDevices: "gpu0,gpu1",
envNVDriverCapabilities: "all",
image.EnvVarCudaVersion: "9.0",
image.EnvVarNvidiaVisibleDevices: "gpu0,gpu1",
image.EnvVarNvidiaDriverCapabilities: "all",
},
privileged: false,
expectedConfig: &nvidiaConfig{
@@ -130,9 +131,9 @@ func TestGetNvidiaConfig(t *testing.T) {
{
description: "Legacy image, devices set, capabilities set, no requirements",
env: map[string]string{
envCUDAVersion: "9.0",
envNVVisibleDevices: "gpu0,gpu1",
envNVDriverCapabilities: "video,display",
image.EnvVarCudaVersion: "9.0",
image.EnvVarNvidiaVisibleDevices: "gpu0,gpu1",
image.EnvVarNvidiaDriverCapabilities: "video,display",
},
privileged: false,
expectedConfig: &nvidiaConfig{
@@ -144,11 +145,11 @@ func TestGetNvidiaConfig(t *testing.T) {
{
description: "Legacy image, devices set, capabilities set, requirements set",
env: map[string]string{
envCUDAVersion: "9.0",
envNVVisibleDevices: "gpu0,gpu1",
envNVDriverCapabilities: "video,display",
envNVRequirePrefix + "REQ0": "req0=true",
envNVRequirePrefix + "REQ1": "req1=false",
image.EnvVarCudaVersion: "9.0",
image.EnvVarNvidiaVisibleDevices: "gpu0,gpu1",
image.EnvVarNvidiaDriverCapabilities: "video,display",
image.NvidiaRequirePrefix + "REQ0": "req0=true",
image.NvidiaRequirePrefix + "REQ1": "req1=false",
},
privileged: false,
expectedConfig: &nvidiaConfig{
@@ -160,12 +161,12 @@ func TestGetNvidiaConfig(t *testing.T) {
{
description: "Legacy image, devices set, capabilities set, requirements set, disable requirements",
env: map[string]string{
envCUDAVersion: "9.0",
envNVVisibleDevices: "gpu0,gpu1",
envNVDriverCapabilities: "video,display",
envNVRequirePrefix + "REQ0": "req0=true",
envNVRequirePrefix + "REQ1": "req1=false",
envNVDisableRequire: "true",
image.EnvVarCudaVersion: "9.0",
image.EnvVarNvidiaVisibleDevices: "gpu0,gpu1",
image.EnvVarNvidiaDriverCapabilities: "video,display",
image.NvidiaRequirePrefix + "REQ0": "req0=true",
image.NvidiaRequirePrefix + "REQ1": "req1=false",
image.EnvVarNvidiaDisableRequire: "true",
},
privileged: false,
expectedConfig: &nvidiaConfig{
@@ -175,18 +176,18 @@ func TestGetNvidiaConfig(t *testing.T) {
},
},
{
description: "Modern image, no devices, no capabilities, no requirements, no envCUDAVersion",
description: "Modern image, no devices, no capabilities, no requirements, no image.EnvVarCudaVersion",
env: map[string]string{
envNVRequireCUDA: "cuda>=9.0",
image.EnvVarNvidiaRequireCuda: "cuda>=9.0",
},
privileged: false,
expectedConfig: nil,
},
{
description: "Modern image, no devices, no capabilities, no requirement, envCUDAVersion set",
description: "Modern image, no devices, no capabilities, no requirement, image.EnvVarCudaVersion set",
env: map[string]string{
envCUDAVersion: "9.0",
envNVRequireCUDA: "cuda>=9.0",
image.EnvVarCudaVersion: "9.0",
image.EnvVarNvidiaRequireCuda: "cuda>=9.0",
},
privileged: false,
expectedConfig: nil,
@@ -194,8 +195,8 @@ func TestGetNvidiaConfig(t *testing.T) {
{
description: "Modern image, devices 'all', no capabilities, no requirements",
env: map[string]string{
envNVRequireCUDA: "cuda>=9.0",
envNVVisibleDevices: "all",
image.EnvVarNvidiaRequireCuda: "cuda>=9.0",
image.EnvVarNvidiaVisibleDevices: "all",
},
privileged: false,
expectedConfig: &nvidiaConfig{
@@ -207,8 +208,8 @@ func TestGetNvidiaConfig(t *testing.T) {
{
description: "Modern image, devices 'empty', no capabilities, no requirements",
env: map[string]string{
envNVRequireCUDA: "cuda>=9.0",
envNVVisibleDevices: "",
image.EnvVarNvidiaRequireCuda: "cuda>=9.0",
image.EnvVarNvidiaVisibleDevices: "",
},
privileged: false,
expectedConfig: nil,
@@ -216,8 +217,8 @@ func TestGetNvidiaConfig(t *testing.T) {
{
description: "Modern image, devices 'void', no capabilities, no requirements",
env: map[string]string{
envNVRequireCUDA: "cuda>=9.0",
envNVVisibleDevices: "void",
image.EnvVarNvidiaRequireCuda: "cuda>=9.0",
image.EnvVarNvidiaVisibleDevices: "void",
},
privileged: false,
expectedConfig: nil,
@@ -225,8 +226,8 @@ func TestGetNvidiaConfig(t *testing.T) {
{
description: "Modern image, devices 'none', no capabilities, no requirements",
env: map[string]string{
envNVRequireCUDA: "cuda>=9.0",
envNVVisibleDevices: "none",
image.EnvVarNvidiaRequireCuda: "cuda>=9.0",
image.EnvVarNvidiaVisibleDevices: "none",
},
privileged: false,
expectedConfig: &nvidiaConfig{
@@ -238,8 +239,8 @@ func TestGetNvidiaConfig(t *testing.T) {
{
description: "Modern image, devices set, no capabilities, no requirements",
env: map[string]string{
envNVRequireCUDA: "cuda>=9.0",
envNVVisibleDevices: "gpu0,gpu1",
image.EnvVarNvidiaRequireCuda: "cuda>=9.0",
image.EnvVarNvidiaVisibleDevices: "gpu0,gpu1",
},
privileged: false,
expectedConfig: &nvidiaConfig{
@@ -251,9 +252,9 @@ func TestGetNvidiaConfig(t *testing.T) {
{
description: "Modern image, devices set, capabilities 'empty', no requirements",
env: map[string]string{
envNVRequireCUDA: "cuda>=9.0",
envNVVisibleDevices: "gpu0,gpu1",
envNVDriverCapabilities: "",
image.EnvVarNvidiaRequireCuda: "cuda>=9.0",
image.EnvVarNvidiaVisibleDevices: "gpu0,gpu1",
image.EnvVarNvidiaDriverCapabilities: "",
},
privileged: false,
expectedConfig: &nvidiaConfig{
@@ -265,9 +266,9 @@ func TestGetNvidiaConfig(t *testing.T) {
{
description: "Modern image, devices set, capabilities 'all', no requirements",
env: map[string]string{
envNVRequireCUDA: "cuda>=9.0",
envNVVisibleDevices: "gpu0,gpu1",
envNVDriverCapabilities: "all",
image.EnvVarNvidiaRequireCuda: "cuda>=9.0",
image.EnvVarNvidiaVisibleDevices: "gpu0,gpu1",
image.EnvVarNvidiaDriverCapabilities: "all",
},
privileged: false,
expectedConfig: &nvidiaConfig{
@@ -279,9 +280,9 @@ func TestGetNvidiaConfig(t *testing.T) {
{
description: "Modern image, devices set, capabilities set, no requirements",
env: map[string]string{
envNVRequireCUDA: "cuda>=9.0",
envNVVisibleDevices: "gpu0,gpu1",
envNVDriverCapabilities: "video,display",
image.EnvVarNvidiaRequireCuda: "cuda>=9.0",
image.EnvVarNvidiaVisibleDevices: "gpu0,gpu1",
image.EnvVarNvidiaDriverCapabilities: "video,display",
},
privileged: false,
expectedConfig: &nvidiaConfig{
@@ -293,11 +294,11 @@ func TestGetNvidiaConfig(t *testing.T) {
{
description: "Modern image, devices set, capabilities set, requirements set",
env: map[string]string{
envNVRequireCUDA: "cuda>=9.0",
envNVVisibleDevices: "gpu0,gpu1",
envNVDriverCapabilities: "video,display",
envNVRequirePrefix + "REQ0": "req0=true",
envNVRequirePrefix + "REQ1": "req1=false",
image.EnvVarNvidiaRequireCuda: "cuda>=9.0",
image.EnvVarNvidiaVisibleDevices: "gpu0,gpu1",
image.EnvVarNvidiaDriverCapabilities: "video,display",
image.NvidiaRequirePrefix + "REQ0": "req0=true",
image.NvidiaRequirePrefix + "REQ1": "req1=false",
},
privileged: false,
expectedConfig: &nvidiaConfig{
@@ -309,12 +310,12 @@ func TestGetNvidiaConfig(t *testing.T) {
{
description: "Modern image, devices set, capabilities set, requirements set, disable requirements",
env: map[string]string{
envNVRequireCUDA: "cuda>=9.0",
envNVVisibleDevices: "gpu0,gpu1",
envNVDriverCapabilities: "video,display",
envNVRequirePrefix + "REQ0": "req0=true",
envNVRequirePrefix + "REQ1": "req1=false",
envNVDisableRequire: "true",
image.EnvVarNvidiaRequireCuda: "cuda>=9.0",
image.EnvVarNvidiaVisibleDevices: "gpu0,gpu1",
image.EnvVarNvidiaDriverCapabilities: "video,display",
image.NvidiaRequirePrefix + "REQ0": "req0=true",
image.NvidiaRequirePrefix + "REQ1": "req1=false",
image.EnvVarNvidiaDisableRequire: "true",
},
privileged: false,
expectedConfig: &nvidiaConfig{
@@ -326,7 +327,7 @@ func TestGetNvidiaConfig(t *testing.T) {
{
description: "No cuda envs, devices 'all'",
env: map[string]string{
envNVVisibleDevices: "all",
image.EnvVarNvidiaVisibleDevices: "all",
},
privileged: false,
@@ -339,9 +340,9 @@ func TestGetNvidiaConfig(t *testing.T) {
{
description: "Modern image, devices 'all', migConfig set, privileged",
env: map[string]string{
envNVRequireCUDA: "cuda>=9.0",
envNVVisibleDevices: "all",
envNVMigConfigDevices: "mig0,mig1",
image.EnvVarNvidiaRequireCuda: "cuda>=9.0",
image.EnvVarNvidiaVisibleDevices: "all",
image.EnvVarNvidiaMigConfigDevices: "mig0,mig1",
},
privileged: true,
expectedConfig: &nvidiaConfig{
@@ -354,9 +355,9 @@ func TestGetNvidiaConfig(t *testing.T) {
{
description: "Modern image, devices 'all', migConfig set, unprivileged",
env: map[string]string{
envNVRequireCUDA: "cuda>=9.0",
envNVVisibleDevices: "all",
envNVMigConfigDevices: "mig0,mig1",
image.EnvVarNvidiaRequireCuda: "cuda>=9.0",
image.EnvVarNvidiaVisibleDevices: "all",
image.EnvVarNvidiaMigConfigDevices: "mig0,mig1",
},
privileged: false,
expectedPanic: true,
@@ -364,9 +365,9 @@ func TestGetNvidiaConfig(t *testing.T) {
{
description: "Modern image, devices 'all', migMonitor set, privileged",
env: map[string]string{
envNVRequireCUDA: "cuda>=9.0",
envNVVisibleDevices: "all",
envNVMigMonitorDevices: "mig0,mig1",
image.EnvVarNvidiaRequireCuda: "cuda>=9.0",
image.EnvVarNvidiaVisibleDevices: "all",
image.EnvVarNvidiaMigMonitorDevices: "mig0,mig1",
},
privileged: true,
expectedConfig: &nvidiaConfig{
@@ -379,9 +380,9 @@ func TestGetNvidiaConfig(t *testing.T) {
{
description: "Modern image, devices 'all', migMonitor set, unprivileged",
env: map[string]string{
envNVRequireCUDA: "cuda>=9.0",
envNVVisibleDevices: "all",
envNVMigMonitorDevices: "mig0,mig1",
image.EnvVarNvidiaRequireCuda: "cuda>=9.0",
image.EnvVarNvidiaVisibleDevices: "all",
image.EnvVarNvidiaMigMonitorDevices: "mig0,mig1",
},
privileged: false,
expectedPanic: true,
@@ -389,8 +390,8 @@ func TestGetNvidiaConfig(t *testing.T) {
{
description: "Hook config set as driver-capabilities-all",
env: map[string]string{
envNVVisibleDevices: "all",
envNVDriverCapabilities: "all",
image.EnvVarNvidiaVisibleDevices: "all",
image.EnvVarNvidiaDriverCapabilities: "all",
},
privileged: true,
hookConfig: &HookConfig{
@@ -404,8 +405,8 @@ func TestGetNvidiaConfig(t *testing.T) {
{
description: "Hook config set, envvar sets driver-capabilities",
env: map[string]string{
envNVVisibleDevices: "all",
envNVDriverCapabilities: "video,display",
image.EnvVarNvidiaVisibleDevices: "all",
image.EnvVarNvidiaDriverCapabilities: "video,display",
},
privileged: true,
hookConfig: &HookConfig{
@@ -419,7 +420,7 @@ func TestGetNvidiaConfig(t *testing.T) {
{
description: "Hook config set, envvar unset sets default driver-capabilities",
env: map[string]string{
envNVVisibleDevices: "all",
image.EnvVarNvidiaVisibleDevices: "all",
},
privileged: true,
hookConfig: &HookConfig{
@@ -433,8 +434,8 @@ func TestGetNvidiaConfig(t *testing.T) {
{
description: "Hook config set, swarmResource overrides device selection",
env: map[string]string{
envNVVisibleDevices: "all",
"DOCKER_SWARM_RESOURCE": "GPU1,GPU2",
image.EnvVarNvidiaVisibleDevices: "all",
"DOCKER_SWARM_RESOURCE": "GPU1,GPU2",
},
privileged: true,
hookConfig: &HookConfig{
@@ -449,8 +450,8 @@ func TestGetNvidiaConfig(t *testing.T) {
{
description: "Hook config set, comma separated swarmResource is split and overrides device selection",
env: map[string]string{
envNVVisibleDevices: "all",
"DOCKER_SWARM_RESOURCE": "GPU1,GPU2",
image.EnvVarNvidiaVisibleDevices: "all",
"DOCKER_SWARM_RESOURCE": "GPU1,GPU2",
},
privileged: true,
hookConfig: &HookConfig{
@@ -476,7 +477,7 @@ func TestGetNvidiaConfig(t *testing.T) {
defaultConfig, _ := getDefaultHookConfig()
hookConfig = &defaultConfig
}
config = getNvidiaConfig(hookConfig, image, nil, tc.privileged)
config = getNvidiaConfig(hookConfig, image, tc.privileged)
}
// For any tests that are expected to panic, make sure they do.
@@ -506,88 +507,10 @@ func TestGetNvidiaConfig(t *testing.T) {
}
}
func TestGetDevicesFromMounts(t *testing.T) {
var tests = []struct {
description string
mounts []Mount
expectedDevices []string
}{
{
description: "No mounts",
mounts: nil,
expectedDevices: nil,
},
{
description: "Host path is not /dev/null",
mounts: []Mount{
{
Source: "/not/dev/null",
Destination: filepath.Join(deviceListAsVolumeMountsRoot, "GPU0"),
},
},
expectedDevices: nil,
},
{
description: "Container path is not prefixed by 'root'",
mounts: []Mount{
{
Source: "/dev/null",
Destination: filepath.Join("/other/prefix", "GPU0"),
},
},
expectedDevices: nil,
},
{
description: "Container path is only 'root'",
mounts: []Mount{
{
Source: "/dev/null",
Destination: deviceListAsVolumeMountsRoot,
},
},
expectedDevices: nil,
},
{
description: "Discover 2 devices",
mounts: []Mount{
{
Source: "/dev/null",
Destination: filepath.Join(deviceListAsVolumeMountsRoot, "GPU0"),
},
{
Source: "/dev/null",
Destination: filepath.Join(deviceListAsVolumeMountsRoot, "GPU1"),
},
},
expectedDevices: []string{"GPU0", "GPU1"},
},
{
description: "Discover 2 devices with slashes in the name",
mounts: []Mount{
{
Source: "/dev/null",
Destination: filepath.Join(deviceListAsVolumeMountsRoot, "GPU0-MIG0/0/1"),
},
{
Source: "/dev/null",
Destination: filepath.Join(deviceListAsVolumeMountsRoot, "GPU1-MIG0/0/1"),
},
},
expectedDevices: []string{"GPU0-MIG0/0/1", "GPU1-MIG0/0/1"},
},
}
for _, tc := range tests {
t.Run(tc.description, func(t *testing.T) {
devices := getDevicesFromMounts(tc.mounts)
require.Equal(t, tc.expectedDevices, devices)
})
}
}
func TestDeviceListSourcePriority(t *testing.T) {
var tests = []struct {
description string
mountDevices []Mount
mountDevices []specs.Mount
envvarDevices string
privileged bool
acceptUnprivileged bool
@@ -596,14 +519,14 @@ func TestDeviceListSourcePriority(t *testing.T) {
}{
{
description: "Mount devices, unprivileged, no accept unprivileged",
mountDevices: []Mount{
mountDevices: []specs.Mount{
{
Source: "/dev/null",
Destination: filepath.Join(deviceListAsVolumeMountsRoot, "GPU0"),
Destination: filepath.Join(image.DeviceListAsVolumeMountsRoot, "GPU0"),
},
{
Source: "/dev/null",
Destination: filepath.Join(deviceListAsVolumeMountsRoot, "GPU1"),
Destination: filepath.Join(image.DeviceListAsVolumeMountsRoot, "GPU1"),
},
},
envvarDevices: "GPU2,GPU3",
@@ -641,14 +564,14 @@ func TestDeviceListSourcePriority(t *testing.T) {
},
{
description: "Mount devices, unprivileged, accept unprivileged, no accept mounts",
mountDevices: []Mount{
mountDevices: []specs.Mount{
{
Source: "/dev/null",
Destination: filepath.Join(deviceListAsVolumeMountsRoot, "GPU0"),
Destination: filepath.Join(image.DeviceListAsVolumeMountsRoot, "GPU0"),
},
{
Source: "/dev/null",
Destination: filepath.Join(deviceListAsVolumeMountsRoot, "GPU1"),
Destination: filepath.Join(image.DeviceListAsVolumeMountsRoot, "GPU1"),
},
},
envvarDevices: "GPU2,GPU3",
@@ -659,14 +582,14 @@ func TestDeviceListSourcePriority(t *testing.T) {
},
{
description: "Mount devices, unprivileged, no accept unprivileged, no accept mounts",
mountDevices: []Mount{
mountDevices: []specs.Mount{
{
Source: "/dev/null",
Destination: filepath.Join(deviceListAsVolumeMountsRoot, "GPU0"),
Destination: filepath.Join(image.DeviceListAsVolumeMountsRoot, "GPU0"),
},
{
Source: "/dev/null",
Destination: filepath.Join(deviceListAsVolumeMountsRoot, "GPU1"),
Destination: filepath.Join(image.DeviceListAsVolumeMountsRoot, "GPU1"),
},
},
envvarDevices: "GPU2,GPU3",
@@ -684,14 +607,15 @@ func TestDeviceListSourcePriority(t *testing.T) {
image, _ := image.New(
image.WithEnvMap(
map[string]string{
envNVVisibleDevices: tc.envvarDevices,
image.EnvVarNvidiaVisibleDevices: tc.envvarDevices,
},
),
image.WithMounts(tc.mountDevices),
)
hookConfig, _ := getDefaultHookConfig()
hookConfig.AcceptEnvvarUnprivileged = tc.acceptUnprivileged
hookConfig.AcceptDeviceListAsVolumeMounts = tc.acceptMounts
devices = getDevices(&hookConfig, image, tc.mountDevices, tc.privileged)
devices = getDevices(&hookConfig, image, tc.privileged)
}
// For all other tests, just grab the devices and check the results
@@ -720,41 +644,41 @@ func TestGetDevicesFromEnvvar(t *testing.T) {
{
description: "blank NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image",
env: map[string]string{
envNVVisibleDevices: "",
image.EnvVarNvidiaVisibleDevices: "",
},
},
{
description: "'void' NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image",
env: map[string]string{
envNVVisibleDevices: "void",
image.EnvVarNvidiaVisibleDevices: "void",
},
},
{
description: "'none' NVIDIA_VISIBLE_DEVICES returns empty for non-legacy image",
env: map[string]string{
envNVVisibleDevices: "none",
image.EnvVarNvidiaVisibleDevices: "none",
},
expectedDevices: []string{""},
},
{
description: "NVIDIA_VISIBLE_DEVICES set returns value for non-legacy image",
env: map[string]string{
envNVVisibleDevices: gpuID,
image.EnvVarNvidiaVisibleDevices: gpuID,
},
expectedDevices: []string{gpuID},
},
{
description: "NVIDIA_VISIBLE_DEVICES set returns value for legacy image",
env: map[string]string{
envNVVisibleDevices: gpuID,
envCUDAVersion: "legacy",
image.EnvVarNvidiaVisibleDevices: gpuID,
image.EnvVarCudaVersion: "legacy",
},
expectedDevices: []string{gpuID},
},
{
description: "empty env returns all for legacy image",
env: map[string]string{
envCUDAVersion: "legacy",
image.EnvVarCudaVersion: "legacy",
},
expectedDevices: []string{"all"},
},
@@ -769,47 +693,47 @@ func TestGetDevicesFromEnvvar(t *testing.T) {
{
description: "blank NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image",
env: map[string]string{
envNVVisibleDevices: "",
envDockerResourceGPUs: anotherGPUID,
image.EnvVarNvidiaVisibleDevices: "",
envDockerResourceGPUs: anotherGPUID,
},
},
{
description: "'void' NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image",
env: map[string]string{
envNVVisibleDevices: "void",
envDockerResourceGPUs: anotherGPUID,
image.EnvVarNvidiaVisibleDevices: "void",
envDockerResourceGPUs: anotherGPUID,
},
},
{
description: "'none' NVIDIA_VISIBLE_DEVICES returns empty for non-legacy image",
env: map[string]string{
envNVVisibleDevices: "none",
envDockerResourceGPUs: anotherGPUID,
image.EnvVarNvidiaVisibleDevices: "none",
envDockerResourceGPUs: anotherGPUID,
},
expectedDevices: []string{""},
},
{
description: "NVIDIA_VISIBLE_DEVICES set returns value for non-legacy image",
env: map[string]string{
envNVVisibleDevices: gpuID,
envDockerResourceGPUs: anotherGPUID,
image.EnvVarNvidiaVisibleDevices: gpuID,
envDockerResourceGPUs: anotherGPUID,
},
expectedDevices: []string{gpuID},
},
{
description: "NVIDIA_VISIBLE_DEVICES set returns value for legacy image",
env: map[string]string{
envNVVisibleDevices: gpuID,
envDockerResourceGPUs: anotherGPUID,
envCUDAVersion: "legacy",
image.EnvVarNvidiaVisibleDevices: gpuID,
envDockerResourceGPUs: anotherGPUID,
image.EnvVarCudaVersion: "legacy",
},
expectedDevices: []string{gpuID},
},
{
description: "empty env returns all for legacy image",
env: map[string]string{
envDockerResourceGPUs: anotherGPUID,
envCUDAVersion: "legacy",
envDockerResourceGPUs: anotherGPUID,
image.EnvVarCudaVersion: "legacy",
},
expectedDevices: []string{"all"},
},
@@ -853,8 +777,8 @@ func TestGetDevicesFromEnvvar(t *testing.T) {
description: "DOCKER_RESOURCE_GPUS set returns value for legacy image",
swarmResourceEnvvars: []string{envDockerResourceGPUs},
env: map[string]string{
envDockerResourceGPUs: gpuID,
envCUDAVersion: "legacy",
envDockerResourceGPUs: gpuID,
image.EnvVarCudaVersion: "legacy",
},
expectedDevices: []string{gpuID},
},
@@ -870,8 +794,8 @@ func TestGetDevicesFromEnvvar(t *testing.T) {
description: "DOCKER_RESOURCE_GPUS overrides NVIDIA_VISIBLE_DEVICES if present",
swarmResourceEnvvars: []string{envDockerResourceGPUs},
env: map[string]string{
envNVVisibleDevices: gpuID,
envDockerResourceGPUs: anotherGPUID,
image.EnvVarNvidiaVisibleDevices: gpuID,
envDockerResourceGPUs: anotherGPUID,
},
expectedDevices: []string{anotherGPUID},
},
@@ -879,7 +803,7 @@ func TestGetDevicesFromEnvvar(t *testing.T) {
description: "DOCKER_RESOURCE_GPUS_ADDITIONAL overrides NVIDIA_VISIBLE_DEVICES if present",
swarmResourceEnvvars: []string{"DOCKER_RESOURCE_GPUS_ADDITIONAL"},
env: map[string]string{
envNVVisibleDevices: gpuID,
image.EnvVarNvidiaVisibleDevices: gpuID,
"DOCKER_RESOURCE_GPUS_ADDITIONAL": anotherGPUID,
},
expectedDevices: []string{anotherGPUID},
@@ -888,7 +812,7 @@ func TestGetDevicesFromEnvvar(t *testing.T) {
description: "All available swarm resource envvars are selected and override NVIDIA_VISIBLE_DEVICES if present",
swarmResourceEnvvars: []string{"DOCKER_RESOURCE_GPUS", "DOCKER_RESOURCE_GPUS_ADDITIONAL"},
env: map[string]string{
envNVVisibleDevices: gpuID,
image.EnvVarNvidiaVisibleDevices: gpuID,
"DOCKER_RESOURCE_GPUS": thirdGPUID,
"DOCKER_RESOURCE_GPUS_ADDITIONAL": anotherGPUID,
},
@@ -898,7 +822,7 @@ func TestGetDevicesFromEnvvar(t *testing.T) {
description: "DOCKER_RESOURCE_GPUS_ADDITIONAL or DOCKER_RESOURCE_GPUS override NVIDIA_VISIBLE_DEVICES if present",
swarmResourceEnvvars: []string{"DOCKER_RESOURCE_GPUS", "DOCKER_RESOURCE_GPUS_ADDITIONAL"},
env: map[string]string{
envNVVisibleDevices: gpuID,
image.EnvVarNvidiaVisibleDevices: gpuID,
"DOCKER_RESOURCE_GPUS_ADDITIONAL": anotherGPUID,
},
expectedDevices: []string{anotherGPUID},
@@ -931,7 +855,7 @@ func TestGetDriverCapabilities(t *testing.T) {
{
description: "Env is set for legacy image",
env: map[string]string{
envNVDriverCapabilities: "display,video",
image.EnvVarNvidiaDriverCapabilities: "display,video",
},
legacyImage: true,
supportedCapabilities: supportedCapabilities,
@@ -940,7 +864,7 @@ func TestGetDriverCapabilities(t *testing.T) {
{
description: "Env is all for legacy image",
env: map[string]string{
envNVDriverCapabilities: "all",
image.EnvVarNvidiaDriverCapabilities: "all",
},
legacyImage: true,
supportedCapabilities: supportedCapabilities,
@@ -949,7 +873,7 @@ func TestGetDriverCapabilities(t *testing.T) {
{
description: "Env is empty for legacy image",
env: map[string]string{
envNVDriverCapabilities: "",
image.EnvVarNvidiaDriverCapabilities: "",
},
legacyImage: true,
supportedCapabilities: supportedCapabilities,
@@ -965,7 +889,7 @@ func TestGetDriverCapabilities(t *testing.T) {
{
description: "Env is set for modern image",
env: map[string]string{
envNVDriverCapabilities: "display,video",
image.EnvVarNvidiaDriverCapabilities: "display,video",
},
legacyImage: false,
supportedCapabilities: supportedCapabilities,
@@ -981,7 +905,7 @@ func TestGetDriverCapabilities(t *testing.T) {
{
description: "Env is all for modern image",
env: map[string]string{
envNVDriverCapabilities: "all",
image.EnvVarNvidiaDriverCapabilities: "all",
},
legacyImage: false,
supportedCapabilities: supportedCapabilities,
@@ -990,7 +914,7 @@ func TestGetDriverCapabilities(t *testing.T) {
{
description: "Env is empty for modern image",
env: map[string]string{
envNVDriverCapabilities: "",
image.EnvVarNvidiaDriverCapabilities: "",
},
legacyImage: false,
supportedCapabilities: supportedCapabilities,
@@ -999,7 +923,7 @@ func TestGetDriverCapabilities(t *testing.T) {
{
description: "Invalid capabilities panic",
env: map[string]string{
envNVDriverCapabilities: "compute,utility",
image.EnvVarNvidiaDriverCapabilities: "compute,utility",
},
supportedCapabilities: "not-compute,not-utility",
expectedPanic: true,