mirror of
https://github.com/NVIDIA/nvidia-container-toolkit
synced 2025-04-22 07:05:06 +00:00
[no-relnote] Use image.CUDA to extract visible devices
Signed-off-by: Evan Lezar <elezar@nvidia.com>
This commit is contained in:
parent
1991b3ef2a
commit
92df542f2f
@ -6,7 +6,6 @@ import (
|
|||||||
"log"
|
"log"
|
||||||
"os"
|
"os"
|
||||||
"path"
|
"path"
|
||||||
"path/filepath"
|
|
||||||
|
|
||||||
"github.com/opencontainers/runtime-spec/specs-go"
|
"github.com/opencontainers/runtime-spec/specs-go"
|
||||||
"golang.org/x/mod/semver"
|
"golang.org/x/mod/semver"
|
||||||
@ -14,26 +13,10 @@ import (
|
|||||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
|
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
|
||||||
envCUDAVersion = "CUDA_VERSION"
|
|
||||||
envNVRequirePrefix = "NVIDIA_REQUIRE_"
|
|
||||||
envNVRequireCUDA = envNVRequirePrefix + "CUDA"
|
|
||||||
envNVDisableRequire = "NVIDIA_DISABLE_REQUIRE"
|
|
||||||
envNVVisibleDevices = "NVIDIA_VISIBLE_DEVICES"
|
|
||||||
envNVMigConfigDevices = "NVIDIA_MIG_CONFIG_DEVICES"
|
|
||||||
envNVMigMonitorDevices = "NVIDIA_MIG_MONITOR_DEVICES"
|
|
||||||
envNVImexChannels = "NVIDIA_IMEX_CHANNELS"
|
|
||||||
envNVDriverCapabilities = "NVIDIA_DRIVER_CAPABILITIES"
|
|
||||||
)
|
|
||||||
|
|
||||||
const (
|
const (
|
||||||
capSysAdmin = "CAP_SYS_ADMIN"
|
capSysAdmin = "CAP_SYS_ADMIN"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
|
||||||
deviceListAsVolumeMountsRoot = "/var/run/nvidia-container-devices"
|
|
||||||
)
|
|
||||||
|
|
||||||
type nvidiaConfig struct {
|
type nvidiaConfig struct {
|
||||||
Devices []string
|
Devices []string
|
||||||
MigConfigDevices string
|
MigConfigDevices string
|
||||||
@ -76,15 +59,6 @@ type LinuxCapabilities struct {
|
|||||||
Ambient []string `json:"ambient,omitempty" platform:"linux"`
|
Ambient []string `json:"ambient,omitempty" platform:"linux"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// Mount from OCI runtime spec
|
|
||||||
// https://github.com/opencontainers/runtime-spec/blob/v1.0.0/specs-go/config.go#L103
|
|
||||||
type Mount struct {
|
|
||||||
Destination string `json:"destination"`
|
|
||||||
Type string `json:"type,omitempty" platform:"linux,solaris"`
|
|
||||||
Source string `json:"source,omitempty"`
|
|
||||||
Options []string `json:"options,omitempty"`
|
|
||||||
}
|
|
||||||
|
|
||||||
// Spec from OCI runtime spec
|
// Spec from OCI runtime spec
|
||||||
// We use pointers to structs, similarly to the latest version of runtime-spec:
|
// We use pointers to structs, similarly to the latest version of runtime-spec:
|
||||||
// https://github.com/opencontainers/runtime-spec/blob/v1.0.0/specs-go/config.go#L5-L28
|
// https://github.com/opencontainers/runtime-spec/blob/v1.0.0/specs-go/config.go#L5-L28
|
||||||
@ -92,7 +66,7 @@ type Spec struct {
|
|||||||
Version *string `json:"ociVersion"`
|
Version *string `json:"ociVersion"`
|
||||||
Process *Process `json:"process,omitempty"`
|
Process *Process `json:"process,omitempty"`
|
||||||
Root *Root `json:"root,omitempty"`
|
Root *Root `json:"root,omitempty"`
|
||||||
Mounts []Mount `json:"mounts,omitempty"`
|
Mounts []specs.Mount `json:"mounts,omitempty"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// HookState holds state information about the hook
|
// HookState holds state information about the hook
|
||||||
@ -171,58 +145,22 @@ func isPrivileged(s *Spec) bool {
|
|||||||
return image.IsPrivileged(&fullSpec)
|
return image.IsPrivileged(&fullSpec)
|
||||||
}
|
}
|
||||||
|
|
||||||
func getDevicesFromEnvvar(image image.CUDA, swarmResourceEnvvars []string) []string {
|
func getDevicesFromEnvvar(containerImage image.CUDA, swarmResourceEnvvars []string) []string {
|
||||||
// We check if the image has at least one of the Swarm resource envvars defined and use this
|
// We check if the image has at least one of the Swarm resource envvars defined and use this
|
||||||
// if specified.
|
// if specified.
|
||||||
for _, envvar := range swarmResourceEnvvars {
|
for _, envvar := range swarmResourceEnvvars {
|
||||||
if image.HasEnvvar(envvar) {
|
if containerImage.HasEnvvar(envvar) {
|
||||||
return image.DevicesFromEnvvars(swarmResourceEnvvars...).List()
|
return containerImage.DevicesFromEnvvars(swarmResourceEnvvars...).List()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return image.DevicesFromEnvvars(envNVVisibleDevices).List()
|
return containerImage.VisibleDevicesFromEnvVar()
|
||||||
}
|
}
|
||||||
|
|
||||||
func getDevicesFromMounts(mounts []Mount) []string {
|
func getDevices(hookConfig *HookConfig, image image.CUDA, privileged bool) []string {
|
||||||
var devices []string
|
|
||||||
for _, m := range mounts {
|
|
||||||
root := filepath.Clean(deviceListAsVolumeMountsRoot)
|
|
||||||
source := filepath.Clean(m.Source)
|
|
||||||
destination := filepath.Clean(m.Destination)
|
|
||||||
|
|
||||||
// Only consider mounts who's host volume is /dev/null
|
|
||||||
if source != "/dev/null" {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
// Only consider container mount points that begin with 'root'
|
|
||||||
if len(destination) < len(root) {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
if destination[:len(root)] != root {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
// Grab the full path beyond 'root' and add it to the list of devices
|
|
||||||
device := destination[len(root):]
|
|
||||||
if len(device) > 0 && device[0] == '/' {
|
|
||||||
device = device[1:]
|
|
||||||
}
|
|
||||||
if len(device) == 0 {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
devices = append(devices, device)
|
|
||||||
}
|
|
||||||
|
|
||||||
if devices == nil {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
return devices
|
|
||||||
}
|
|
||||||
|
|
||||||
func getDevices(hookConfig *HookConfig, image image.CUDA, mounts []Mount, privileged bool) []string {
|
|
||||||
// If enabled, try and get the device list from volume mounts first
|
// If enabled, try and get the device list from volume mounts first
|
||||||
if hookConfig.AcceptDeviceListAsVolumeMounts {
|
if hookConfig.AcceptDeviceListAsVolumeMounts {
|
||||||
devices := getDevicesFromMounts(mounts)
|
devices := image.VisibleDevicesFromMounts()
|
||||||
if len(devices) > 0 {
|
if len(devices) > 0 {
|
||||||
return devices
|
return devices
|
||||||
}
|
}
|
||||||
@ -243,12 +181,12 @@ func getDevices(hookConfig *HookConfig, image image.CUDA, mounts []Mount, privil
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func getMigConfigDevices(image image.CUDA) *string {
|
func getMigConfigDevices(i image.CUDA) *string {
|
||||||
return getMigDevices(image, envNVMigConfigDevices)
|
return getMigDevices(i, image.EnvVarNvidiaMigConfigDevices)
|
||||||
}
|
}
|
||||||
|
|
||||||
func getMigMonitorDevices(image image.CUDA) *string {
|
func getMigMonitorDevices(i image.CUDA) *string {
|
||||||
return getMigDevices(image, envNVMigMonitorDevices)
|
return getMigDevices(i, image.EnvVarNvidiaMigMonitorDevices)
|
||||||
}
|
}
|
||||||
|
|
||||||
func getMigDevices(image image.CUDA, envvar string) *string {
|
func getMigDevices(image image.CUDA, envvar string) *string {
|
||||||
@ -259,11 +197,11 @@ func getMigDevices(image image.CUDA, envvar string) *string {
|
|||||||
return &devices
|
return &devices
|
||||||
}
|
}
|
||||||
|
|
||||||
func getImexChannels(image image.CUDA) *string {
|
func getImexChannels(i image.CUDA) *string {
|
||||||
if !image.HasEnvvar(envNVImexChannels) {
|
if !i.HasEnvvar(image.EnvVarNvidiaImexChannels) {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
chans := image.Getenv(envNVImexChannels)
|
chans := i.Getenv(image.EnvVarNvidiaImexChannels)
|
||||||
return &chans
|
return &chans
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -274,8 +212,8 @@ func (c *HookConfig) getDriverCapabilities(cudaImage image.CUDA, legacyImage boo
|
|||||||
|
|
||||||
capabilities := supportedDriverCapabilities.Intersection(image.DefaultDriverCapabilities)
|
capabilities := supportedDriverCapabilities.Intersection(image.DefaultDriverCapabilities)
|
||||||
|
|
||||||
capsEnvSpecified := cudaImage.HasEnvvar(envNVDriverCapabilities)
|
capsEnvSpecified := cudaImage.HasEnvvar(image.EnvVarNvidiaDriverCapabilities)
|
||||||
capsEnv := cudaImage.Getenv(envNVDriverCapabilities)
|
capsEnv := cudaImage.Getenv(image.EnvVarNvidiaDriverCapabilities)
|
||||||
|
|
||||||
if !capsEnvSpecified && legacyImage {
|
if !capsEnvSpecified && legacyImage {
|
||||||
// Environment variable unset with legacy image: set all capabilities.
|
// Environment variable unset with legacy image: set all capabilities.
|
||||||
@ -294,10 +232,10 @@ func (c *HookConfig) getDriverCapabilities(cudaImage image.CUDA, legacyImage boo
|
|||||||
return capabilities
|
return capabilities
|
||||||
}
|
}
|
||||||
|
|
||||||
func getNvidiaConfig(hookConfig *HookConfig, image image.CUDA, mounts []Mount, privileged bool) *nvidiaConfig {
|
func getNvidiaConfig(hookConfig *HookConfig, image image.CUDA, privileged bool) *nvidiaConfig {
|
||||||
legacyImage := image.IsLegacy()
|
legacyImage := image.IsLegacy()
|
||||||
|
|
||||||
devices := getDevices(hookConfig, image, mounts, privileged)
|
devices := getDevices(hookConfig, image, privileged)
|
||||||
if len(devices) == 0 {
|
if len(devices) == 0 {
|
||||||
// empty devices means this is not a GPU container.
|
// empty devices means this is not a GPU container.
|
||||||
return nil
|
return nil
|
||||||
@ -357,6 +295,7 @@ func getContainerConfig(hook HookConfig) (config containerConfig) {
|
|||||||
|
|
||||||
image, err := image.New(
|
image, err := image.New(
|
||||||
image.WithEnv(s.Process.Env),
|
image.WithEnv(s.Process.Env),
|
||||||
|
image.WithMounts(s.Mounts),
|
||||||
image.WithDisableRequire(hook.DisableRequire),
|
image.WithDisableRequire(hook.DisableRequire),
|
||||||
)
|
)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -368,6 +307,6 @@ func getContainerConfig(hook HookConfig) (config containerConfig) {
|
|||||||
Pid: h.Pid,
|
Pid: h.Pid,
|
||||||
Rootfs: s.Root.Path,
|
Rootfs: s.Root.Path,
|
||||||
Image: image,
|
Image: image,
|
||||||
Nvidia: getNvidiaConfig(&hook, image, s.Mounts, privileged),
|
Nvidia: getNvidiaConfig(&hook, image, privileged),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -4,6 +4,7 @@ import (
|
|||||||
"path/filepath"
|
"path/filepath"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
|
"github.com/opencontainers/runtime-spec/specs-go"
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
|
|
||||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
|
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
|
||||||
@ -33,7 +34,7 @@ func TestGetNvidiaConfig(t *testing.T) {
|
|||||||
{
|
{
|
||||||
description: "Legacy image, no devices, no capabilities, no requirements",
|
description: "Legacy image, no devices, no capabilities, no requirements",
|
||||||
env: map[string]string{
|
env: map[string]string{
|
||||||
envCUDAVersion: "9.0",
|
image.EnvVarCudaVersion: "9.0",
|
||||||
},
|
},
|
||||||
privileged: false,
|
privileged: false,
|
||||||
expectedConfig: &nvidiaConfig{
|
expectedConfig: &nvidiaConfig{
|
||||||
@ -45,8 +46,8 @@ func TestGetNvidiaConfig(t *testing.T) {
|
|||||||
{
|
{
|
||||||
description: "Legacy image, devices 'all', no capabilities, no requirements",
|
description: "Legacy image, devices 'all', no capabilities, no requirements",
|
||||||
env: map[string]string{
|
env: map[string]string{
|
||||||
envCUDAVersion: "9.0",
|
image.EnvVarCudaVersion: "9.0",
|
||||||
envNVVisibleDevices: "all",
|
image.EnvVarNvidiaVisibleDevices: "all",
|
||||||
},
|
},
|
||||||
privileged: false,
|
privileged: false,
|
||||||
expectedConfig: &nvidiaConfig{
|
expectedConfig: &nvidiaConfig{
|
||||||
@ -58,8 +59,8 @@ func TestGetNvidiaConfig(t *testing.T) {
|
|||||||
{
|
{
|
||||||
description: "Legacy image, devices 'empty', no capabilities, no requirements",
|
description: "Legacy image, devices 'empty', no capabilities, no requirements",
|
||||||
env: map[string]string{
|
env: map[string]string{
|
||||||
envCUDAVersion: "9.0",
|
image.EnvVarCudaVersion: "9.0",
|
||||||
envNVVisibleDevices: "",
|
image.EnvVarNvidiaVisibleDevices: "",
|
||||||
},
|
},
|
||||||
privileged: false,
|
privileged: false,
|
||||||
expectedConfig: nil,
|
expectedConfig: nil,
|
||||||
@ -67,8 +68,8 @@ func TestGetNvidiaConfig(t *testing.T) {
|
|||||||
{
|
{
|
||||||
description: "Legacy image, devices 'void', no capabilities, no requirements",
|
description: "Legacy image, devices 'void', no capabilities, no requirements",
|
||||||
env: map[string]string{
|
env: map[string]string{
|
||||||
envCUDAVersion: "9.0",
|
image.EnvVarCudaVersion: "9.0",
|
||||||
envNVVisibleDevices: "void",
|
image.EnvVarNvidiaVisibleDevices: "void",
|
||||||
},
|
},
|
||||||
privileged: false,
|
privileged: false,
|
||||||
expectedConfig: nil,
|
expectedConfig: nil,
|
||||||
@ -76,8 +77,8 @@ func TestGetNvidiaConfig(t *testing.T) {
|
|||||||
{
|
{
|
||||||
description: "Legacy image, devices 'none', no capabilities, no requirements",
|
description: "Legacy image, devices 'none', no capabilities, no requirements",
|
||||||
env: map[string]string{
|
env: map[string]string{
|
||||||
envCUDAVersion: "9.0",
|
image.EnvVarCudaVersion: "9.0",
|
||||||
envNVVisibleDevices: "none",
|
image.EnvVarNvidiaVisibleDevices: "none",
|
||||||
},
|
},
|
||||||
privileged: false,
|
privileged: false,
|
||||||
expectedConfig: &nvidiaConfig{
|
expectedConfig: &nvidiaConfig{
|
||||||
@ -89,8 +90,8 @@ func TestGetNvidiaConfig(t *testing.T) {
|
|||||||
{
|
{
|
||||||
description: "Legacy image, devices set, no capabilities, no requirements",
|
description: "Legacy image, devices set, no capabilities, no requirements",
|
||||||
env: map[string]string{
|
env: map[string]string{
|
||||||
envCUDAVersion: "9.0",
|
image.EnvVarCudaVersion: "9.0",
|
||||||
envNVVisibleDevices: "gpu0,gpu1",
|
image.EnvVarNvidiaVisibleDevices: "gpu0,gpu1",
|
||||||
},
|
},
|
||||||
privileged: false,
|
privileged: false,
|
||||||
expectedConfig: &nvidiaConfig{
|
expectedConfig: &nvidiaConfig{
|
||||||
@ -102,9 +103,9 @@ func TestGetNvidiaConfig(t *testing.T) {
|
|||||||
{
|
{
|
||||||
description: "Legacy image, devices set, capabilities 'empty', no requirements",
|
description: "Legacy image, devices set, capabilities 'empty', no requirements",
|
||||||
env: map[string]string{
|
env: map[string]string{
|
||||||
envCUDAVersion: "9.0",
|
image.EnvVarCudaVersion: "9.0",
|
||||||
envNVVisibleDevices: "gpu0,gpu1",
|
image.EnvVarNvidiaVisibleDevices: "gpu0,gpu1",
|
||||||
envNVDriverCapabilities: "",
|
image.EnvVarNvidiaDriverCapabilities: "",
|
||||||
},
|
},
|
||||||
privileged: false,
|
privileged: false,
|
||||||
expectedConfig: &nvidiaConfig{
|
expectedConfig: &nvidiaConfig{
|
||||||
@ -116,9 +117,9 @@ func TestGetNvidiaConfig(t *testing.T) {
|
|||||||
{
|
{
|
||||||
description: "Legacy image, devices set, capabilities 'all', no requirements",
|
description: "Legacy image, devices set, capabilities 'all', no requirements",
|
||||||
env: map[string]string{
|
env: map[string]string{
|
||||||
envCUDAVersion: "9.0",
|
image.EnvVarCudaVersion: "9.0",
|
||||||
envNVVisibleDevices: "gpu0,gpu1",
|
image.EnvVarNvidiaVisibleDevices: "gpu0,gpu1",
|
||||||
envNVDriverCapabilities: "all",
|
image.EnvVarNvidiaDriverCapabilities: "all",
|
||||||
},
|
},
|
||||||
privileged: false,
|
privileged: false,
|
||||||
expectedConfig: &nvidiaConfig{
|
expectedConfig: &nvidiaConfig{
|
||||||
@ -130,9 +131,9 @@ func TestGetNvidiaConfig(t *testing.T) {
|
|||||||
{
|
{
|
||||||
description: "Legacy image, devices set, capabilities set, no requirements",
|
description: "Legacy image, devices set, capabilities set, no requirements",
|
||||||
env: map[string]string{
|
env: map[string]string{
|
||||||
envCUDAVersion: "9.0",
|
image.EnvVarCudaVersion: "9.0",
|
||||||
envNVVisibleDevices: "gpu0,gpu1",
|
image.EnvVarNvidiaVisibleDevices: "gpu0,gpu1",
|
||||||
envNVDriverCapabilities: "video,display",
|
image.EnvVarNvidiaDriverCapabilities: "video,display",
|
||||||
},
|
},
|
||||||
privileged: false,
|
privileged: false,
|
||||||
expectedConfig: &nvidiaConfig{
|
expectedConfig: &nvidiaConfig{
|
||||||
@ -144,11 +145,11 @@ func TestGetNvidiaConfig(t *testing.T) {
|
|||||||
{
|
{
|
||||||
description: "Legacy image, devices set, capabilities set, requirements set",
|
description: "Legacy image, devices set, capabilities set, requirements set",
|
||||||
env: map[string]string{
|
env: map[string]string{
|
||||||
envCUDAVersion: "9.0",
|
image.EnvVarCudaVersion: "9.0",
|
||||||
envNVVisibleDevices: "gpu0,gpu1",
|
image.EnvVarNvidiaVisibleDevices: "gpu0,gpu1",
|
||||||
envNVDriverCapabilities: "video,display",
|
image.EnvVarNvidiaDriverCapabilities: "video,display",
|
||||||
envNVRequirePrefix + "REQ0": "req0=true",
|
image.NvidiaRequirePrefix + "REQ0": "req0=true",
|
||||||
envNVRequirePrefix + "REQ1": "req1=false",
|
image.NvidiaRequirePrefix + "REQ1": "req1=false",
|
||||||
},
|
},
|
||||||
privileged: false,
|
privileged: false,
|
||||||
expectedConfig: &nvidiaConfig{
|
expectedConfig: &nvidiaConfig{
|
||||||
@ -160,12 +161,12 @@ func TestGetNvidiaConfig(t *testing.T) {
|
|||||||
{
|
{
|
||||||
description: "Legacy image, devices set, capabilities set, requirements set, disable requirements",
|
description: "Legacy image, devices set, capabilities set, requirements set, disable requirements",
|
||||||
env: map[string]string{
|
env: map[string]string{
|
||||||
envCUDAVersion: "9.0",
|
image.EnvVarCudaVersion: "9.0",
|
||||||
envNVVisibleDevices: "gpu0,gpu1",
|
image.EnvVarNvidiaVisibleDevices: "gpu0,gpu1",
|
||||||
envNVDriverCapabilities: "video,display",
|
image.EnvVarNvidiaDriverCapabilities: "video,display",
|
||||||
envNVRequirePrefix + "REQ0": "req0=true",
|
image.NvidiaRequirePrefix + "REQ0": "req0=true",
|
||||||
envNVRequirePrefix + "REQ1": "req1=false",
|
image.NvidiaRequirePrefix + "REQ1": "req1=false",
|
||||||
envNVDisableRequire: "true",
|
image.EnvVarNvidiaDisableRequire: "true",
|
||||||
},
|
},
|
||||||
privileged: false,
|
privileged: false,
|
||||||
expectedConfig: &nvidiaConfig{
|
expectedConfig: &nvidiaConfig{
|
||||||
@ -175,18 +176,18 @@ func TestGetNvidiaConfig(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
description: "Modern image, no devices, no capabilities, no requirements, no envCUDAVersion",
|
description: "Modern image, no devices, no capabilities, no requirements, no image.EnvVarCudaVersion",
|
||||||
env: map[string]string{
|
env: map[string]string{
|
||||||
envNVRequireCUDA: "cuda>=9.0",
|
image.EnvVarNvidiaRequireCuda: "cuda>=9.0",
|
||||||
},
|
},
|
||||||
privileged: false,
|
privileged: false,
|
||||||
expectedConfig: nil,
|
expectedConfig: nil,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
description: "Modern image, no devices, no capabilities, no requirement, envCUDAVersion set",
|
description: "Modern image, no devices, no capabilities, no requirement, image.EnvVarCudaVersion set",
|
||||||
env: map[string]string{
|
env: map[string]string{
|
||||||
envCUDAVersion: "9.0",
|
image.EnvVarCudaVersion: "9.0",
|
||||||
envNVRequireCUDA: "cuda>=9.0",
|
image.EnvVarNvidiaRequireCuda: "cuda>=9.0",
|
||||||
},
|
},
|
||||||
privileged: false,
|
privileged: false,
|
||||||
expectedConfig: nil,
|
expectedConfig: nil,
|
||||||
@ -194,8 +195,8 @@ func TestGetNvidiaConfig(t *testing.T) {
|
|||||||
{
|
{
|
||||||
description: "Modern image, devices 'all', no capabilities, no requirements",
|
description: "Modern image, devices 'all', no capabilities, no requirements",
|
||||||
env: map[string]string{
|
env: map[string]string{
|
||||||
envNVRequireCUDA: "cuda>=9.0",
|
image.EnvVarNvidiaRequireCuda: "cuda>=9.0",
|
||||||
envNVVisibleDevices: "all",
|
image.EnvVarNvidiaVisibleDevices: "all",
|
||||||
},
|
},
|
||||||
privileged: false,
|
privileged: false,
|
||||||
expectedConfig: &nvidiaConfig{
|
expectedConfig: &nvidiaConfig{
|
||||||
@ -207,8 +208,8 @@ func TestGetNvidiaConfig(t *testing.T) {
|
|||||||
{
|
{
|
||||||
description: "Modern image, devices 'empty', no capabilities, no requirements",
|
description: "Modern image, devices 'empty', no capabilities, no requirements",
|
||||||
env: map[string]string{
|
env: map[string]string{
|
||||||
envNVRequireCUDA: "cuda>=9.0",
|
image.EnvVarNvidiaRequireCuda: "cuda>=9.0",
|
||||||
envNVVisibleDevices: "",
|
image.EnvVarNvidiaVisibleDevices: "",
|
||||||
},
|
},
|
||||||
privileged: false,
|
privileged: false,
|
||||||
expectedConfig: nil,
|
expectedConfig: nil,
|
||||||
@ -216,8 +217,8 @@ func TestGetNvidiaConfig(t *testing.T) {
|
|||||||
{
|
{
|
||||||
description: "Modern image, devices 'void', no capabilities, no requirements",
|
description: "Modern image, devices 'void', no capabilities, no requirements",
|
||||||
env: map[string]string{
|
env: map[string]string{
|
||||||
envNVRequireCUDA: "cuda>=9.0",
|
image.EnvVarNvidiaRequireCuda: "cuda>=9.0",
|
||||||
envNVVisibleDevices: "void",
|
image.EnvVarNvidiaVisibleDevices: "void",
|
||||||
},
|
},
|
||||||
privileged: false,
|
privileged: false,
|
||||||
expectedConfig: nil,
|
expectedConfig: nil,
|
||||||
@ -225,8 +226,8 @@ func TestGetNvidiaConfig(t *testing.T) {
|
|||||||
{
|
{
|
||||||
description: "Modern image, devices 'none', no capabilities, no requirements",
|
description: "Modern image, devices 'none', no capabilities, no requirements",
|
||||||
env: map[string]string{
|
env: map[string]string{
|
||||||
envNVRequireCUDA: "cuda>=9.0",
|
image.EnvVarNvidiaRequireCuda: "cuda>=9.0",
|
||||||
envNVVisibleDevices: "none",
|
image.EnvVarNvidiaVisibleDevices: "none",
|
||||||
},
|
},
|
||||||
privileged: false,
|
privileged: false,
|
||||||
expectedConfig: &nvidiaConfig{
|
expectedConfig: &nvidiaConfig{
|
||||||
@ -238,8 +239,8 @@ func TestGetNvidiaConfig(t *testing.T) {
|
|||||||
{
|
{
|
||||||
description: "Modern image, devices set, no capabilities, no requirements",
|
description: "Modern image, devices set, no capabilities, no requirements",
|
||||||
env: map[string]string{
|
env: map[string]string{
|
||||||
envNVRequireCUDA: "cuda>=9.0",
|
image.EnvVarNvidiaRequireCuda: "cuda>=9.0",
|
||||||
envNVVisibleDevices: "gpu0,gpu1",
|
image.EnvVarNvidiaVisibleDevices: "gpu0,gpu1",
|
||||||
},
|
},
|
||||||
privileged: false,
|
privileged: false,
|
||||||
expectedConfig: &nvidiaConfig{
|
expectedConfig: &nvidiaConfig{
|
||||||
@ -251,9 +252,9 @@ func TestGetNvidiaConfig(t *testing.T) {
|
|||||||
{
|
{
|
||||||
description: "Modern image, devices set, capabilities 'empty', no requirements",
|
description: "Modern image, devices set, capabilities 'empty', no requirements",
|
||||||
env: map[string]string{
|
env: map[string]string{
|
||||||
envNVRequireCUDA: "cuda>=9.0",
|
image.EnvVarNvidiaRequireCuda: "cuda>=9.0",
|
||||||
envNVVisibleDevices: "gpu0,gpu1",
|
image.EnvVarNvidiaVisibleDevices: "gpu0,gpu1",
|
||||||
envNVDriverCapabilities: "",
|
image.EnvVarNvidiaDriverCapabilities: "",
|
||||||
},
|
},
|
||||||
privileged: false,
|
privileged: false,
|
||||||
expectedConfig: &nvidiaConfig{
|
expectedConfig: &nvidiaConfig{
|
||||||
@ -265,9 +266,9 @@ func TestGetNvidiaConfig(t *testing.T) {
|
|||||||
{
|
{
|
||||||
description: "Modern image, devices set, capabilities 'all', no requirements",
|
description: "Modern image, devices set, capabilities 'all', no requirements",
|
||||||
env: map[string]string{
|
env: map[string]string{
|
||||||
envNVRequireCUDA: "cuda>=9.0",
|
image.EnvVarNvidiaRequireCuda: "cuda>=9.0",
|
||||||
envNVVisibleDevices: "gpu0,gpu1",
|
image.EnvVarNvidiaVisibleDevices: "gpu0,gpu1",
|
||||||
envNVDriverCapabilities: "all",
|
image.EnvVarNvidiaDriverCapabilities: "all",
|
||||||
},
|
},
|
||||||
privileged: false,
|
privileged: false,
|
||||||
expectedConfig: &nvidiaConfig{
|
expectedConfig: &nvidiaConfig{
|
||||||
@ -279,9 +280,9 @@ func TestGetNvidiaConfig(t *testing.T) {
|
|||||||
{
|
{
|
||||||
description: "Modern image, devices set, capabilities set, no requirements",
|
description: "Modern image, devices set, capabilities set, no requirements",
|
||||||
env: map[string]string{
|
env: map[string]string{
|
||||||
envNVRequireCUDA: "cuda>=9.0",
|
image.EnvVarNvidiaRequireCuda: "cuda>=9.0",
|
||||||
envNVVisibleDevices: "gpu0,gpu1",
|
image.EnvVarNvidiaVisibleDevices: "gpu0,gpu1",
|
||||||
envNVDriverCapabilities: "video,display",
|
image.EnvVarNvidiaDriverCapabilities: "video,display",
|
||||||
},
|
},
|
||||||
privileged: false,
|
privileged: false,
|
||||||
expectedConfig: &nvidiaConfig{
|
expectedConfig: &nvidiaConfig{
|
||||||
@ -293,11 +294,11 @@ func TestGetNvidiaConfig(t *testing.T) {
|
|||||||
{
|
{
|
||||||
description: "Modern image, devices set, capabilities set, requirements set",
|
description: "Modern image, devices set, capabilities set, requirements set",
|
||||||
env: map[string]string{
|
env: map[string]string{
|
||||||
envNVRequireCUDA: "cuda>=9.0",
|
image.EnvVarNvidiaRequireCuda: "cuda>=9.0",
|
||||||
envNVVisibleDevices: "gpu0,gpu1",
|
image.EnvVarNvidiaVisibleDevices: "gpu0,gpu1",
|
||||||
envNVDriverCapabilities: "video,display",
|
image.EnvVarNvidiaDriverCapabilities: "video,display",
|
||||||
envNVRequirePrefix + "REQ0": "req0=true",
|
image.NvidiaRequirePrefix + "REQ0": "req0=true",
|
||||||
envNVRequirePrefix + "REQ1": "req1=false",
|
image.NvidiaRequirePrefix + "REQ1": "req1=false",
|
||||||
},
|
},
|
||||||
privileged: false,
|
privileged: false,
|
||||||
expectedConfig: &nvidiaConfig{
|
expectedConfig: &nvidiaConfig{
|
||||||
@ -309,12 +310,12 @@ func TestGetNvidiaConfig(t *testing.T) {
|
|||||||
{
|
{
|
||||||
description: "Modern image, devices set, capabilities set, requirements set, disable requirements",
|
description: "Modern image, devices set, capabilities set, requirements set, disable requirements",
|
||||||
env: map[string]string{
|
env: map[string]string{
|
||||||
envNVRequireCUDA: "cuda>=9.0",
|
image.EnvVarNvidiaRequireCuda: "cuda>=9.0",
|
||||||
envNVVisibleDevices: "gpu0,gpu1",
|
image.EnvVarNvidiaVisibleDevices: "gpu0,gpu1",
|
||||||
envNVDriverCapabilities: "video,display",
|
image.EnvVarNvidiaDriverCapabilities: "video,display",
|
||||||
envNVRequirePrefix + "REQ0": "req0=true",
|
image.NvidiaRequirePrefix + "REQ0": "req0=true",
|
||||||
envNVRequirePrefix + "REQ1": "req1=false",
|
image.NvidiaRequirePrefix + "REQ1": "req1=false",
|
||||||
envNVDisableRequire: "true",
|
image.EnvVarNvidiaDisableRequire: "true",
|
||||||
},
|
},
|
||||||
privileged: false,
|
privileged: false,
|
||||||
expectedConfig: &nvidiaConfig{
|
expectedConfig: &nvidiaConfig{
|
||||||
@ -326,7 +327,7 @@ func TestGetNvidiaConfig(t *testing.T) {
|
|||||||
{
|
{
|
||||||
description: "No cuda envs, devices 'all'",
|
description: "No cuda envs, devices 'all'",
|
||||||
env: map[string]string{
|
env: map[string]string{
|
||||||
envNVVisibleDevices: "all",
|
image.EnvVarNvidiaVisibleDevices: "all",
|
||||||
},
|
},
|
||||||
privileged: false,
|
privileged: false,
|
||||||
|
|
||||||
@ -339,9 +340,9 @@ func TestGetNvidiaConfig(t *testing.T) {
|
|||||||
{
|
{
|
||||||
description: "Modern image, devices 'all', migConfig set, privileged",
|
description: "Modern image, devices 'all', migConfig set, privileged",
|
||||||
env: map[string]string{
|
env: map[string]string{
|
||||||
envNVRequireCUDA: "cuda>=9.0",
|
image.EnvVarNvidiaRequireCuda: "cuda>=9.0",
|
||||||
envNVVisibleDevices: "all",
|
image.EnvVarNvidiaVisibleDevices: "all",
|
||||||
envNVMigConfigDevices: "mig0,mig1",
|
image.EnvVarNvidiaMigConfigDevices: "mig0,mig1",
|
||||||
},
|
},
|
||||||
privileged: true,
|
privileged: true,
|
||||||
expectedConfig: &nvidiaConfig{
|
expectedConfig: &nvidiaConfig{
|
||||||
@ -354,9 +355,9 @@ func TestGetNvidiaConfig(t *testing.T) {
|
|||||||
{
|
{
|
||||||
description: "Modern image, devices 'all', migConfig set, unprivileged",
|
description: "Modern image, devices 'all', migConfig set, unprivileged",
|
||||||
env: map[string]string{
|
env: map[string]string{
|
||||||
envNVRequireCUDA: "cuda>=9.0",
|
image.EnvVarNvidiaRequireCuda: "cuda>=9.0",
|
||||||
envNVVisibleDevices: "all",
|
image.EnvVarNvidiaVisibleDevices: "all",
|
||||||
envNVMigConfigDevices: "mig0,mig1",
|
image.EnvVarNvidiaMigConfigDevices: "mig0,mig1",
|
||||||
},
|
},
|
||||||
privileged: false,
|
privileged: false,
|
||||||
expectedPanic: true,
|
expectedPanic: true,
|
||||||
@ -364,9 +365,9 @@ func TestGetNvidiaConfig(t *testing.T) {
|
|||||||
{
|
{
|
||||||
description: "Modern image, devices 'all', migMonitor set, privileged",
|
description: "Modern image, devices 'all', migMonitor set, privileged",
|
||||||
env: map[string]string{
|
env: map[string]string{
|
||||||
envNVRequireCUDA: "cuda>=9.0",
|
image.EnvVarNvidiaRequireCuda: "cuda>=9.0",
|
||||||
envNVVisibleDevices: "all",
|
image.EnvVarNvidiaVisibleDevices: "all",
|
||||||
envNVMigMonitorDevices: "mig0,mig1",
|
image.EnvVarNvidiaMigMonitorDevices: "mig0,mig1",
|
||||||
},
|
},
|
||||||
privileged: true,
|
privileged: true,
|
||||||
expectedConfig: &nvidiaConfig{
|
expectedConfig: &nvidiaConfig{
|
||||||
@ -379,9 +380,9 @@ func TestGetNvidiaConfig(t *testing.T) {
|
|||||||
{
|
{
|
||||||
description: "Modern image, devices 'all', migMonitor set, unprivileged",
|
description: "Modern image, devices 'all', migMonitor set, unprivileged",
|
||||||
env: map[string]string{
|
env: map[string]string{
|
||||||
envNVRequireCUDA: "cuda>=9.0",
|
image.EnvVarNvidiaRequireCuda: "cuda>=9.0",
|
||||||
envNVVisibleDevices: "all",
|
image.EnvVarNvidiaVisibleDevices: "all",
|
||||||
envNVMigMonitorDevices: "mig0,mig1",
|
image.EnvVarNvidiaMigMonitorDevices: "mig0,mig1",
|
||||||
},
|
},
|
||||||
privileged: false,
|
privileged: false,
|
||||||
expectedPanic: true,
|
expectedPanic: true,
|
||||||
@ -389,8 +390,8 @@ func TestGetNvidiaConfig(t *testing.T) {
|
|||||||
{
|
{
|
||||||
description: "Hook config set as driver-capabilities-all",
|
description: "Hook config set as driver-capabilities-all",
|
||||||
env: map[string]string{
|
env: map[string]string{
|
||||||
envNVVisibleDevices: "all",
|
image.EnvVarNvidiaVisibleDevices: "all",
|
||||||
envNVDriverCapabilities: "all",
|
image.EnvVarNvidiaDriverCapabilities: "all",
|
||||||
},
|
},
|
||||||
privileged: true,
|
privileged: true,
|
||||||
hookConfig: &HookConfig{
|
hookConfig: &HookConfig{
|
||||||
@ -404,8 +405,8 @@ func TestGetNvidiaConfig(t *testing.T) {
|
|||||||
{
|
{
|
||||||
description: "Hook config set, envvar sets driver-capabilities",
|
description: "Hook config set, envvar sets driver-capabilities",
|
||||||
env: map[string]string{
|
env: map[string]string{
|
||||||
envNVVisibleDevices: "all",
|
image.EnvVarNvidiaVisibleDevices: "all",
|
||||||
envNVDriverCapabilities: "video,display",
|
image.EnvVarNvidiaDriverCapabilities: "video,display",
|
||||||
},
|
},
|
||||||
privileged: true,
|
privileged: true,
|
||||||
hookConfig: &HookConfig{
|
hookConfig: &HookConfig{
|
||||||
@ -419,7 +420,7 @@ func TestGetNvidiaConfig(t *testing.T) {
|
|||||||
{
|
{
|
||||||
description: "Hook config set, envvar unset sets default driver-capabilities",
|
description: "Hook config set, envvar unset sets default driver-capabilities",
|
||||||
env: map[string]string{
|
env: map[string]string{
|
||||||
envNVVisibleDevices: "all",
|
image.EnvVarNvidiaVisibleDevices: "all",
|
||||||
},
|
},
|
||||||
privileged: true,
|
privileged: true,
|
||||||
hookConfig: &HookConfig{
|
hookConfig: &HookConfig{
|
||||||
@ -433,7 +434,7 @@ func TestGetNvidiaConfig(t *testing.T) {
|
|||||||
{
|
{
|
||||||
description: "Hook config set, swarmResource overrides device selection",
|
description: "Hook config set, swarmResource overrides device selection",
|
||||||
env: map[string]string{
|
env: map[string]string{
|
||||||
envNVVisibleDevices: "all",
|
image.EnvVarNvidiaVisibleDevices: "all",
|
||||||
"DOCKER_SWARM_RESOURCE": "GPU1,GPU2",
|
"DOCKER_SWARM_RESOURCE": "GPU1,GPU2",
|
||||||
},
|
},
|
||||||
privileged: true,
|
privileged: true,
|
||||||
@ -449,7 +450,7 @@ func TestGetNvidiaConfig(t *testing.T) {
|
|||||||
{
|
{
|
||||||
description: "Hook config set, comma separated swarmResource is split and overrides device selection",
|
description: "Hook config set, comma separated swarmResource is split and overrides device selection",
|
||||||
env: map[string]string{
|
env: map[string]string{
|
||||||
envNVVisibleDevices: "all",
|
image.EnvVarNvidiaVisibleDevices: "all",
|
||||||
"DOCKER_SWARM_RESOURCE": "GPU1,GPU2",
|
"DOCKER_SWARM_RESOURCE": "GPU1,GPU2",
|
||||||
},
|
},
|
||||||
privileged: true,
|
privileged: true,
|
||||||
@ -476,7 +477,7 @@ func TestGetNvidiaConfig(t *testing.T) {
|
|||||||
defaultConfig, _ := getDefaultHookConfig()
|
defaultConfig, _ := getDefaultHookConfig()
|
||||||
hookConfig = &defaultConfig
|
hookConfig = &defaultConfig
|
||||||
}
|
}
|
||||||
config = getNvidiaConfig(hookConfig, image, nil, tc.privileged)
|
config = getNvidiaConfig(hookConfig, image, tc.privileged)
|
||||||
}
|
}
|
||||||
|
|
||||||
// For any tests that are expected to panic, make sure they do.
|
// For any tests that are expected to panic, make sure they do.
|
||||||
@ -506,88 +507,10 @@ func TestGetNvidiaConfig(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestGetDevicesFromMounts(t *testing.T) {
|
|
||||||
var tests = []struct {
|
|
||||||
description string
|
|
||||||
mounts []Mount
|
|
||||||
expectedDevices []string
|
|
||||||
}{
|
|
||||||
{
|
|
||||||
description: "No mounts",
|
|
||||||
mounts: nil,
|
|
||||||
expectedDevices: nil,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
description: "Host path is not /dev/null",
|
|
||||||
mounts: []Mount{
|
|
||||||
{
|
|
||||||
Source: "/not/dev/null",
|
|
||||||
Destination: filepath.Join(deviceListAsVolumeMountsRoot, "GPU0"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
expectedDevices: nil,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
description: "Container path is not prefixed by 'root'",
|
|
||||||
mounts: []Mount{
|
|
||||||
{
|
|
||||||
Source: "/dev/null",
|
|
||||||
Destination: filepath.Join("/other/prefix", "GPU0"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
expectedDevices: nil,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
description: "Container path is only 'root'",
|
|
||||||
mounts: []Mount{
|
|
||||||
{
|
|
||||||
Source: "/dev/null",
|
|
||||||
Destination: deviceListAsVolumeMountsRoot,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
expectedDevices: nil,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
description: "Discover 2 devices",
|
|
||||||
mounts: []Mount{
|
|
||||||
{
|
|
||||||
Source: "/dev/null",
|
|
||||||
Destination: filepath.Join(deviceListAsVolumeMountsRoot, "GPU0"),
|
|
||||||
},
|
|
||||||
{
|
|
||||||
Source: "/dev/null",
|
|
||||||
Destination: filepath.Join(deviceListAsVolumeMountsRoot, "GPU1"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
expectedDevices: []string{"GPU0", "GPU1"},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
description: "Discover 2 devices with slashes in the name",
|
|
||||||
mounts: []Mount{
|
|
||||||
{
|
|
||||||
Source: "/dev/null",
|
|
||||||
Destination: filepath.Join(deviceListAsVolumeMountsRoot, "GPU0-MIG0/0/1"),
|
|
||||||
},
|
|
||||||
{
|
|
||||||
Source: "/dev/null",
|
|
||||||
Destination: filepath.Join(deviceListAsVolumeMountsRoot, "GPU1-MIG0/0/1"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
expectedDevices: []string{"GPU0-MIG0/0/1", "GPU1-MIG0/0/1"},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
for _, tc := range tests {
|
|
||||||
t.Run(tc.description, func(t *testing.T) {
|
|
||||||
devices := getDevicesFromMounts(tc.mounts)
|
|
||||||
require.Equal(t, tc.expectedDevices, devices)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestDeviceListSourcePriority(t *testing.T) {
|
func TestDeviceListSourcePriority(t *testing.T) {
|
||||||
var tests = []struct {
|
var tests = []struct {
|
||||||
description string
|
description string
|
||||||
mountDevices []Mount
|
mountDevices []specs.Mount
|
||||||
envvarDevices string
|
envvarDevices string
|
||||||
privileged bool
|
privileged bool
|
||||||
acceptUnprivileged bool
|
acceptUnprivileged bool
|
||||||
@ -596,14 +519,14 @@ func TestDeviceListSourcePriority(t *testing.T) {
|
|||||||
}{
|
}{
|
||||||
{
|
{
|
||||||
description: "Mount devices, unprivileged, no accept unprivileged",
|
description: "Mount devices, unprivileged, no accept unprivileged",
|
||||||
mountDevices: []Mount{
|
mountDevices: []specs.Mount{
|
||||||
{
|
{
|
||||||
Source: "/dev/null",
|
Source: "/dev/null",
|
||||||
Destination: filepath.Join(deviceListAsVolumeMountsRoot, "GPU0"),
|
Destination: filepath.Join(image.DeviceListAsVolumeMountsRoot, "GPU0"),
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Source: "/dev/null",
|
Source: "/dev/null",
|
||||||
Destination: filepath.Join(deviceListAsVolumeMountsRoot, "GPU1"),
|
Destination: filepath.Join(image.DeviceListAsVolumeMountsRoot, "GPU1"),
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
envvarDevices: "GPU2,GPU3",
|
envvarDevices: "GPU2,GPU3",
|
||||||
@ -641,14 +564,14 @@ func TestDeviceListSourcePriority(t *testing.T) {
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
description: "Mount devices, unprivileged, accept unprivileged, no accept mounts",
|
description: "Mount devices, unprivileged, accept unprivileged, no accept mounts",
|
||||||
mountDevices: []Mount{
|
mountDevices: []specs.Mount{
|
||||||
{
|
{
|
||||||
Source: "/dev/null",
|
Source: "/dev/null",
|
||||||
Destination: filepath.Join(deviceListAsVolumeMountsRoot, "GPU0"),
|
Destination: filepath.Join(image.DeviceListAsVolumeMountsRoot, "GPU0"),
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Source: "/dev/null",
|
Source: "/dev/null",
|
||||||
Destination: filepath.Join(deviceListAsVolumeMountsRoot, "GPU1"),
|
Destination: filepath.Join(image.DeviceListAsVolumeMountsRoot, "GPU1"),
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
envvarDevices: "GPU2,GPU3",
|
envvarDevices: "GPU2,GPU3",
|
||||||
@ -659,14 +582,14 @@ func TestDeviceListSourcePriority(t *testing.T) {
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
description: "Mount devices, unprivileged, no accept unprivileged, no accept mounts",
|
description: "Mount devices, unprivileged, no accept unprivileged, no accept mounts",
|
||||||
mountDevices: []Mount{
|
mountDevices: []specs.Mount{
|
||||||
{
|
{
|
||||||
Source: "/dev/null",
|
Source: "/dev/null",
|
||||||
Destination: filepath.Join(deviceListAsVolumeMountsRoot, "GPU0"),
|
Destination: filepath.Join(image.DeviceListAsVolumeMountsRoot, "GPU0"),
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Source: "/dev/null",
|
Source: "/dev/null",
|
||||||
Destination: filepath.Join(deviceListAsVolumeMountsRoot, "GPU1"),
|
Destination: filepath.Join(image.DeviceListAsVolumeMountsRoot, "GPU1"),
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
envvarDevices: "GPU2,GPU3",
|
envvarDevices: "GPU2,GPU3",
|
||||||
@ -684,14 +607,15 @@ func TestDeviceListSourcePriority(t *testing.T) {
|
|||||||
image, _ := image.New(
|
image, _ := image.New(
|
||||||
image.WithEnvMap(
|
image.WithEnvMap(
|
||||||
map[string]string{
|
map[string]string{
|
||||||
envNVVisibleDevices: tc.envvarDevices,
|
image.EnvVarNvidiaVisibleDevices: tc.envvarDevices,
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
|
image.WithMounts(tc.mountDevices),
|
||||||
)
|
)
|
||||||
hookConfig, _ := getDefaultHookConfig()
|
hookConfig, _ := getDefaultHookConfig()
|
||||||
hookConfig.AcceptEnvvarUnprivileged = tc.acceptUnprivileged
|
hookConfig.AcceptEnvvarUnprivileged = tc.acceptUnprivileged
|
||||||
hookConfig.AcceptDeviceListAsVolumeMounts = tc.acceptMounts
|
hookConfig.AcceptDeviceListAsVolumeMounts = tc.acceptMounts
|
||||||
devices = getDevices(&hookConfig, image, tc.mountDevices, tc.privileged)
|
devices = getDevices(&hookConfig, image, tc.privileged)
|
||||||
}
|
}
|
||||||
|
|
||||||
// For all other tests, just grab the devices and check the results
|
// For all other tests, just grab the devices and check the results
|
||||||
@ -720,41 +644,41 @@ func TestGetDevicesFromEnvvar(t *testing.T) {
|
|||||||
{
|
{
|
||||||
description: "blank NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image",
|
description: "blank NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image",
|
||||||
env: map[string]string{
|
env: map[string]string{
|
||||||
envNVVisibleDevices: "",
|
image.EnvVarNvidiaVisibleDevices: "",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
description: "'void' NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image",
|
description: "'void' NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image",
|
||||||
env: map[string]string{
|
env: map[string]string{
|
||||||
envNVVisibleDevices: "void",
|
image.EnvVarNvidiaVisibleDevices: "void",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
description: "'none' NVIDIA_VISIBLE_DEVICES returns empty for non-legacy image",
|
description: "'none' NVIDIA_VISIBLE_DEVICES returns empty for non-legacy image",
|
||||||
env: map[string]string{
|
env: map[string]string{
|
||||||
envNVVisibleDevices: "none",
|
image.EnvVarNvidiaVisibleDevices: "none",
|
||||||
},
|
},
|
||||||
expectedDevices: []string{""},
|
expectedDevices: []string{""},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
description: "NVIDIA_VISIBLE_DEVICES set returns value for non-legacy image",
|
description: "NVIDIA_VISIBLE_DEVICES set returns value for non-legacy image",
|
||||||
env: map[string]string{
|
env: map[string]string{
|
||||||
envNVVisibleDevices: gpuID,
|
image.EnvVarNvidiaVisibleDevices: gpuID,
|
||||||
},
|
},
|
||||||
expectedDevices: []string{gpuID},
|
expectedDevices: []string{gpuID},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
description: "NVIDIA_VISIBLE_DEVICES set returns value for legacy image",
|
description: "NVIDIA_VISIBLE_DEVICES set returns value for legacy image",
|
||||||
env: map[string]string{
|
env: map[string]string{
|
||||||
envNVVisibleDevices: gpuID,
|
image.EnvVarNvidiaVisibleDevices: gpuID,
|
||||||
envCUDAVersion: "legacy",
|
image.EnvVarCudaVersion: "legacy",
|
||||||
},
|
},
|
||||||
expectedDevices: []string{gpuID},
|
expectedDevices: []string{gpuID},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
description: "empty env returns all for legacy image",
|
description: "empty env returns all for legacy image",
|
||||||
env: map[string]string{
|
env: map[string]string{
|
||||||
envCUDAVersion: "legacy",
|
image.EnvVarCudaVersion: "legacy",
|
||||||
},
|
},
|
||||||
expectedDevices: []string{"all"},
|
expectedDevices: []string{"all"},
|
||||||
},
|
},
|
||||||
@ -769,21 +693,21 @@ func TestGetDevicesFromEnvvar(t *testing.T) {
|
|||||||
{
|
{
|
||||||
description: "blank NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image",
|
description: "blank NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image",
|
||||||
env: map[string]string{
|
env: map[string]string{
|
||||||
envNVVisibleDevices: "",
|
image.EnvVarNvidiaVisibleDevices: "",
|
||||||
envDockerResourceGPUs: anotherGPUID,
|
envDockerResourceGPUs: anotherGPUID,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
description: "'void' NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image",
|
description: "'void' NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image",
|
||||||
env: map[string]string{
|
env: map[string]string{
|
||||||
envNVVisibleDevices: "void",
|
image.EnvVarNvidiaVisibleDevices: "void",
|
||||||
envDockerResourceGPUs: anotherGPUID,
|
envDockerResourceGPUs: anotherGPUID,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
description: "'none' NVIDIA_VISIBLE_DEVICES returns empty for non-legacy image",
|
description: "'none' NVIDIA_VISIBLE_DEVICES returns empty for non-legacy image",
|
||||||
env: map[string]string{
|
env: map[string]string{
|
||||||
envNVVisibleDevices: "none",
|
image.EnvVarNvidiaVisibleDevices: "none",
|
||||||
envDockerResourceGPUs: anotherGPUID,
|
envDockerResourceGPUs: anotherGPUID,
|
||||||
},
|
},
|
||||||
expectedDevices: []string{""},
|
expectedDevices: []string{""},
|
||||||
@ -791,7 +715,7 @@ func TestGetDevicesFromEnvvar(t *testing.T) {
|
|||||||
{
|
{
|
||||||
description: "NVIDIA_VISIBLE_DEVICES set returns value for non-legacy image",
|
description: "NVIDIA_VISIBLE_DEVICES set returns value for non-legacy image",
|
||||||
env: map[string]string{
|
env: map[string]string{
|
||||||
envNVVisibleDevices: gpuID,
|
image.EnvVarNvidiaVisibleDevices: gpuID,
|
||||||
envDockerResourceGPUs: anotherGPUID,
|
envDockerResourceGPUs: anotherGPUID,
|
||||||
},
|
},
|
||||||
expectedDevices: []string{gpuID},
|
expectedDevices: []string{gpuID},
|
||||||
@ -799,9 +723,9 @@ func TestGetDevicesFromEnvvar(t *testing.T) {
|
|||||||
{
|
{
|
||||||
description: "NVIDIA_VISIBLE_DEVICES set returns value for legacy image",
|
description: "NVIDIA_VISIBLE_DEVICES set returns value for legacy image",
|
||||||
env: map[string]string{
|
env: map[string]string{
|
||||||
envNVVisibleDevices: gpuID,
|
image.EnvVarNvidiaVisibleDevices: gpuID,
|
||||||
envDockerResourceGPUs: anotherGPUID,
|
envDockerResourceGPUs: anotherGPUID,
|
||||||
envCUDAVersion: "legacy",
|
image.EnvVarCudaVersion: "legacy",
|
||||||
},
|
},
|
||||||
expectedDevices: []string{gpuID},
|
expectedDevices: []string{gpuID},
|
||||||
},
|
},
|
||||||
@ -809,7 +733,7 @@ func TestGetDevicesFromEnvvar(t *testing.T) {
|
|||||||
description: "empty env returns all for legacy image",
|
description: "empty env returns all for legacy image",
|
||||||
env: map[string]string{
|
env: map[string]string{
|
||||||
envDockerResourceGPUs: anotherGPUID,
|
envDockerResourceGPUs: anotherGPUID,
|
||||||
envCUDAVersion: "legacy",
|
image.EnvVarCudaVersion: "legacy",
|
||||||
},
|
},
|
||||||
expectedDevices: []string{"all"},
|
expectedDevices: []string{"all"},
|
||||||
},
|
},
|
||||||
@ -854,7 +778,7 @@ func TestGetDevicesFromEnvvar(t *testing.T) {
|
|||||||
swarmResourceEnvvars: []string{envDockerResourceGPUs},
|
swarmResourceEnvvars: []string{envDockerResourceGPUs},
|
||||||
env: map[string]string{
|
env: map[string]string{
|
||||||
envDockerResourceGPUs: gpuID,
|
envDockerResourceGPUs: gpuID,
|
||||||
envCUDAVersion: "legacy",
|
image.EnvVarCudaVersion: "legacy",
|
||||||
},
|
},
|
||||||
expectedDevices: []string{gpuID},
|
expectedDevices: []string{gpuID},
|
||||||
},
|
},
|
||||||
@ -870,7 +794,7 @@ func TestGetDevicesFromEnvvar(t *testing.T) {
|
|||||||
description: "DOCKER_RESOURCE_GPUS overrides NVIDIA_VISIBLE_DEVICES if present",
|
description: "DOCKER_RESOURCE_GPUS overrides NVIDIA_VISIBLE_DEVICES if present",
|
||||||
swarmResourceEnvvars: []string{envDockerResourceGPUs},
|
swarmResourceEnvvars: []string{envDockerResourceGPUs},
|
||||||
env: map[string]string{
|
env: map[string]string{
|
||||||
envNVVisibleDevices: gpuID,
|
image.EnvVarNvidiaVisibleDevices: gpuID,
|
||||||
envDockerResourceGPUs: anotherGPUID,
|
envDockerResourceGPUs: anotherGPUID,
|
||||||
},
|
},
|
||||||
expectedDevices: []string{anotherGPUID},
|
expectedDevices: []string{anotherGPUID},
|
||||||
@ -879,7 +803,7 @@ func TestGetDevicesFromEnvvar(t *testing.T) {
|
|||||||
description: "DOCKER_RESOURCE_GPUS_ADDITIONAL overrides NVIDIA_VISIBLE_DEVICES if present",
|
description: "DOCKER_RESOURCE_GPUS_ADDITIONAL overrides NVIDIA_VISIBLE_DEVICES if present",
|
||||||
swarmResourceEnvvars: []string{"DOCKER_RESOURCE_GPUS_ADDITIONAL"},
|
swarmResourceEnvvars: []string{"DOCKER_RESOURCE_GPUS_ADDITIONAL"},
|
||||||
env: map[string]string{
|
env: map[string]string{
|
||||||
envNVVisibleDevices: gpuID,
|
image.EnvVarNvidiaVisibleDevices: gpuID,
|
||||||
"DOCKER_RESOURCE_GPUS_ADDITIONAL": anotherGPUID,
|
"DOCKER_RESOURCE_GPUS_ADDITIONAL": anotherGPUID,
|
||||||
},
|
},
|
||||||
expectedDevices: []string{anotherGPUID},
|
expectedDevices: []string{anotherGPUID},
|
||||||
@ -888,7 +812,7 @@ func TestGetDevicesFromEnvvar(t *testing.T) {
|
|||||||
description: "All available swarm resource envvars are selected and override NVIDIA_VISIBLE_DEVICES if present",
|
description: "All available swarm resource envvars are selected and override NVIDIA_VISIBLE_DEVICES if present",
|
||||||
swarmResourceEnvvars: []string{"DOCKER_RESOURCE_GPUS", "DOCKER_RESOURCE_GPUS_ADDITIONAL"},
|
swarmResourceEnvvars: []string{"DOCKER_RESOURCE_GPUS", "DOCKER_RESOURCE_GPUS_ADDITIONAL"},
|
||||||
env: map[string]string{
|
env: map[string]string{
|
||||||
envNVVisibleDevices: gpuID,
|
image.EnvVarNvidiaVisibleDevices: gpuID,
|
||||||
"DOCKER_RESOURCE_GPUS": thirdGPUID,
|
"DOCKER_RESOURCE_GPUS": thirdGPUID,
|
||||||
"DOCKER_RESOURCE_GPUS_ADDITIONAL": anotherGPUID,
|
"DOCKER_RESOURCE_GPUS_ADDITIONAL": anotherGPUID,
|
||||||
},
|
},
|
||||||
@ -898,7 +822,7 @@ func TestGetDevicesFromEnvvar(t *testing.T) {
|
|||||||
description: "DOCKER_RESOURCE_GPUS_ADDITIONAL or DOCKER_RESOURCE_GPUS override NVIDIA_VISIBLE_DEVICES if present",
|
description: "DOCKER_RESOURCE_GPUS_ADDITIONAL or DOCKER_RESOURCE_GPUS override NVIDIA_VISIBLE_DEVICES if present",
|
||||||
swarmResourceEnvvars: []string{"DOCKER_RESOURCE_GPUS", "DOCKER_RESOURCE_GPUS_ADDITIONAL"},
|
swarmResourceEnvvars: []string{"DOCKER_RESOURCE_GPUS", "DOCKER_RESOURCE_GPUS_ADDITIONAL"},
|
||||||
env: map[string]string{
|
env: map[string]string{
|
||||||
envNVVisibleDevices: gpuID,
|
image.EnvVarNvidiaVisibleDevices: gpuID,
|
||||||
"DOCKER_RESOURCE_GPUS_ADDITIONAL": anotherGPUID,
|
"DOCKER_RESOURCE_GPUS_ADDITIONAL": anotherGPUID,
|
||||||
},
|
},
|
||||||
expectedDevices: []string{anotherGPUID},
|
expectedDevices: []string{anotherGPUID},
|
||||||
@ -931,7 +855,7 @@ func TestGetDriverCapabilities(t *testing.T) {
|
|||||||
{
|
{
|
||||||
description: "Env is set for legacy image",
|
description: "Env is set for legacy image",
|
||||||
env: map[string]string{
|
env: map[string]string{
|
||||||
envNVDriverCapabilities: "display,video",
|
image.EnvVarNvidiaDriverCapabilities: "display,video",
|
||||||
},
|
},
|
||||||
legacyImage: true,
|
legacyImage: true,
|
||||||
supportedCapabilities: supportedCapabilities,
|
supportedCapabilities: supportedCapabilities,
|
||||||
@ -940,7 +864,7 @@ func TestGetDriverCapabilities(t *testing.T) {
|
|||||||
{
|
{
|
||||||
description: "Env is all for legacy image",
|
description: "Env is all for legacy image",
|
||||||
env: map[string]string{
|
env: map[string]string{
|
||||||
envNVDriverCapabilities: "all",
|
image.EnvVarNvidiaDriverCapabilities: "all",
|
||||||
},
|
},
|
||||||
legacyImage: true,
|
legacyImage: true,
|
||||||
supportedCapabilities: supportedCapabilities,
|
supportedCapabilities: supportedCapabilities,
|
||||||
@ -949,7 +873,7 @@ func TestGetDriverCapabilities(t *testing.T) {
|
|||||||
{
|
{
|
||||||
description: "Env is empty for legacy image",
|
description: "Env is empty for legacy image",
|
||||||
env: map[string]string{
|
env: map[string]string{
|
||||||
envNVDriverCapabilities: "",
|
image.EnvVarNvidiaDriverCapabilities: "",
|
||||||
},
|
},
|
||||||
legacyImage: true,
|
legacyImage: true,
|
||||||
supportedCapabilities: supportedCapabilities,
|
supportedCapabilities: supportedCapabilities,
|
||||||
@ -965,7 +889,7 @@ func TestGetDriverCapabilities(t *testing.T) {
|
|||||||
{
|
{
|
||||||
description: "Env is set for modern image",
|
description: "Env is set for modern image",
|
||||||
env: map[string]string{
|
env: map[string]string{
|
||||||
envNVDriverCapabilities: "display,video",
|
image.EnvVarNvidiaDriverCapabilities: "display,video",
|
||||||
},
|
},
|
||||||
legacyImage: false,
|
legacyImage: false,
|
||||||
supportedCapabilities: supportedCapabilities,
|
supportedCapabilities: supportedCapabilities,
|
||||||
@ -981,7 +905,7 @@ func TestGetDriverCapabilities(t *testing.T) {
|
|||||||
{
|
{
|
||||||
description: "Env is all for modern image",
|
description: "Env is all for modern image",
|
||||||
env: map[string]string{
|
env: map[string]string{
|
||||||
envNVDriverCapabilities: "all",
|
image.EnvVarNvidiaDriverCapabilities: "all",
|
||||||
},
|
},
|
||||||
legacyImage: false,
|
legacyImage: false,
|
||||||
supportedCapabilities: supportedCapabilities,
|
supportedCapabilities: supportedCapabilities,
|
||||||
@ -990,7 +914,7 @@ func TestGetDriverCapabilities(t *testing.T) {
|
|||||||
{
|
{
|
||||||
description: "Env is empty for modern image",
|
description: "Env is empty for modern image",
|
||||||
env: map[string]string{
|
env: map[string]string{
|
||||||
envNVDriverCapabilities: "",
|
image.EnvVarNvidiaDriverCapabilities: "",
|
||||||
},
|
},
|
||||||
legacyImage: false,
|
legacyImage: false,
|
||||||
supportedCapabilities: supportedCapabilities,
|
supportedCapabilities: supportedCapabilities,
|
||||||
@ -999,7 +923,7 @@ func TestGetDriverCapabilities(t *testing.T) {
|
|||||||
{
|
{
|
||||||
description: "Invalid capabilities panic",
|
description: "Invalid capabilities panic",
|
||||||
env: map[string]string{
|
env: map[string]string{
|
||||||
envNVDriverCapabilities: "compute,utility",
|
image.EnvVarNvidiaDriverCapabilities: "compute,utility",
|
||||||
},
|
},
|
||||||
supportedCapabilities: "not-compute,not-utility",
|
supportedCapabilities: "not-compute,not-utility",
|
||||||
expectedPanic: true,
|
expectedPanic: true,
|
||||||
|
@ -47,7 +47,7 @@ func New(opt ...Option) (CUDA, error) {
|
|||||||
// build creates a CUDA image from the builder.
|
// build creates a CUDA image from the builder.
|
||||||
func (b builder) build() (CUDA, error) {
|
func (b builder) build() (CUDA, error) {
|
||||||
if b.disableRequire {
|
if b.disableRequire {
|
||||||
b.env[envNVDisableRequire] = "true"
|
b.env[EnvVarNvidiaDisableRequire] = "true"
|
||||||
}
|
}
|
||||||
|
|
||||||
c := CUDA{
|
c := CUDA{
|
||||||
|
@ -28,12 +28,9 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
envCUDAVersion = "CUDA_VERSION"
|
DeviceListAsVolumeMountsRoot = "/var/run/nvidia-container-devices"
|
||||||
envNVRequirePrefix = "NVIDIA_REQUIRE_"
|
|
||||||
envNVRequireCUDA = envNVRequirePrefix + "CUDA"
|
volumeMountDevicePrefixCDI = "cdi/"
|
||||||
envNVRequireJetpack = envNVRequirePrefix + "JETPACK"
|
|
||||||
envNVDisableRequire = "NVIDIA_DISABLE_REQUIRE"
|
|
||||||
envNVDriverCapabilities = "NVIDIA_DRIVER_CAPABILITIES"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// CUDA represents a CUDA image that can be used for GPU computing. This wraps
|
// CUDA represents a CUDA image that can be used for GPU computing. This wraps
|
||||||
@ -80,8 +77,8 @@ func (i CUDA) HasEnvvar(key string) bool {
|
|||||||
// image is considered legacy if it has a CUDA_VERSION environment variable defined
|
// image is considered legacy if it has a CUDA_VERSION environment variable defined
|
||||||
// and no NVIDIA_REQUIRE_CUDA environment variable defined.
|
// and no NVIDIA_REQUIRE_CUDA environment variable defined.
|
||||||
func (i CUDA) IsLegacy() bool {
|
func (i CUDA) IsLegacy() bool {
|
||||||
legacyCudaVersion := i.env[envCUDAVersion]
|
legacyCudaVersion := i.env[EnvVarCudaVersion]
|
||||||
cudaRequire := i.env[envNVRequireCUDA]
|
cudaRequire := i.env[EnvVarNvidiaRequireCuda]
|
||||||
return len(legacyCudaVersion) > 0 && len(cudaRequire) == 0
|
return len(legacyCudaVersion) > 0 && len(cudaRequire) == 0
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -95,7 +92,7 @@ func (i CUDA) GetRequirements() ([]string, error) {
|
|||||||
// All variables with the "NVIDIA_REQUIRE_" prefix are passed to nvidia-container-cli
|
// All variables with the "NVIDIA_REQUIRE_" prefix are passed to nvidia-container-cli
|
||||||
var requirements []string
|
var requirements []string
|
||||||
for name, value := range i.env {
|
for name, value := range i.env {
|
||||||
if strings.HasPrefix(name, envNVRequirePrefix) && !strings.HasPrefix(name, envNVRequireJetpack) {
|
if strings.HasPrefix(name, NvidiaRequirePrefix) && !strings.HasPrefix(name, EnvVarNvidiaRequireJetpack) {
|
||||||
requirements = append(requirements, value)
|
requirements = append(requirements, value)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -113,7 +110,7 @@ func (i CUDA) GetRequirements() ([]string, error) {
|
|||||||
// HasDisableRequire checks for the value of the NVIDIA_DISABLE_REQUIRE. If set
|
// HasDisableRequire checks for the value of the NVIDIA_DISABLE_REQUIRE. If set
|
||||||
// to a valid (true) boolean value this can be used to disable the requirement checks
|
// to a valid (true) boolean value this can be used to disable the requirement checks
|
||||||
func (i CUDA) HasDisableRequire() bool {
|
func (i CUDA) HasDisableRequire() bool {
|
||||||
if disable, exists := i.env[envNVDisableRequire]; exists {
|
if disable, exists := i.env[EnvVarNvidiaDisableRequire]; exists {
|
||||||
// i.logger.Debugf("NVIDIA_DISABLE_REQUIRE=%v; skipping requirement checks", disable)
|
// i.logger.Debugf("NVIDIA_DISABLE_REQUIRE=%v; skipping requirement checks", disable)
|
||||||
d, _ := strconv.ParseBool(disable)
|
d, _ := strconv.ParseBool(disable)
|
||||||
return d
|
return d
|
||||||
@ -157,7 +154,7 @@ func (i CUDA) DevicesFromEnvvars(envVars ...string) VisibleDevices {
|
|||||||
|
|
||||||
// GetDriverCapabilities returns the requested driver capabilities.
|
// GetDriverCapabilities returns the requested driver capabilities.
|
||||||
func (i CUDA) GetDriverCapabilities() DriverCapabilities {
|
func (i CUDA) GetDriverCapabilities() DriverCapabilities {
|
||||||
env := i.env[envNVDriverCapabilities]
|
env := i.env[EnvVarNvidiaDriverCapabilities]
|
||||||
|
|
||||||
capabilities := make(DriverCapabilities)
|
capabilities := make(DriverCapabilities)
|
||||||
for _, c := range strings.Split(env, ",") {
|
for _, c := range strings.Split(env, ",") {
|
||||||
@ -168,7 +165,7 @@ func (i CUDA) GetDriverCapabilities() DriverCapabilities {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (i CUDA) legacyVersion() (string, error) {
|
func (i CUDA) legacyVersion() (string, error) {
|
||||||
cudaVersion := i.env[envCUDAVersion]
|
cudaVersion := i.env[EnvVarCudaVersion]
|
||||||
majorMinor, err := parseMajorMinorVersion(cudaVersion)
|
majorMinor, err := parseMajorMinorVersion(cudaVersion)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return "", fmt.Errorf("invalid CUDA version %v: %v", cudaVersion, err)
|
return "", fmt.Errorf("invalid CUDA version %v: %v", cudaVersion, err)
|
||||||
@ -202,7 +199,7 @@ func parseMajorMinorVersion(version string) (string, error) {
|
|||||||
// OnlyFullyQualifiedCDIDevices returns true if all devices requested in the image are requested as CDI devices/
|
// OnlyFullyQualifiedCDIDevices returns true if all devices requested in the image are requested as CDI devices/
|
||||||
func (i CUDA) OnlyFullyQualifiedCDIDevices() bool {
|
func (i CUDA) OnlyFullyQualifiedCDIDevices() bool {
|
||||||
var hasCDIdevice bool
|
var hasCDIdevice bool
|
||||||
for _, device := range i.DevicesFromEnvvars("NVIDIA_VISIBLE_DEVICES").List() {
|
for _, device := range i.VisibleDevicesFromEnvVar() {
|
||||||
if !parser.IsQualifiedName(device) {
|
if !parser.IsQualifiedName(device) {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
@ -218,14 +215,28 @@ func (i CUDA) OnlyFullyQualifiedCDIDevices() bool {
|
|||||||
return hasCDIdevice
|
return hasCDIdevice
|
||||||
}
|
}
|
||||||
|
|
||||||
const (
|
// VisibleDevicesFromEnvVar returns the set of visible devices requested through
|
||||||
deviceListAsVolumeMountsRoot = "/var/run/nvidia-container-devices"
|
// the NVIDIA_VISIBLE_DEVICES environment variable.
|
||||||
)
|
func (i CUDA) VisibleDevicesFromEnvVar() []string {
|
||||||
|
return i.DevicesFromEnvvars(EnvVarNvidiaVisibleDevices).List()
|
||||||
|
}
|
||||||
|
|
||||||
|
// VisibleDevicesFromMounts returns the set of visible devices requested as mounts.
|
||||||
|
func (i CUDA) VisibleDevicesFromMounts() []string {
|
||||||
|
var devices []string
|
||||||
|
for _, device := range i.DevicesFromMounts() {
|
||||||
|
if strings.HasPrefix(device, volumeMountDevicePrefixCDI) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
devices = append(devices, device)
|
||||||
|
}
|
||||||
|
return devices
|
||||||
|
}
|
||||||
|
|
||||||
// DevicesFromMounts returns a list of device specified as mounts.
|
// DevicesFromMounts returns a list of device specified as mounts.
|
||||||
// TODO: This should be merged with getDevicesFromMounts used in the NVIDIA Container Runtime
|
// TODO: This should be merged with getDevicesFromMounts used in the NVIDIA Container Runtime
|
||||||
func (i CUDA) DevicesFromMounts() []string {
|
func (i CUDA) DevicesFromMounts() []string {
|
||||||
root := filepath.Clean(deviceListAsVolumeMountsRoot)
|
root := filepath.Clean(DeviceListAsVolumeMountsRoot)
|
||||||
seen := make(map[string]bool)
|
seen := make(map[string]bool)
|
||||||
var devices []string
|
var devices []string
|
||||||
for _, m := range i.mounts {
|
for _, m := range i.mounts {
|
||||||
@ -260,10 +271,10 @@ func (i CUDA) DevicesFromMounts() []string {
|
|||||||
func (i CUDA) CDIDevicesFromMounts() []string {
|
func (i CUDA) CDIDevicesFromMounts() []string {
|
||||||
var devices []string
|
var devices []string
|
||||||
for _, mountDevice := range i.DevicesFromMounts() {
|
for _, mountDevice := range i.DevicesFromMounts() {
|
||||||
if !strings.HasPrefix(mountDevice, "cdi/") {
|
if !strings.HasPrefix(mountDevice, volumeMountDevicePrefixCDI) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
parts := strings.SplitN(strings.TrimPrefix(mountDevice, "cdi/"), "/", 3)
|
parts := strings.SplitN(strings.TrimPrefix(mountDevice, volumeMountDevicePrefixCDI), "/", 3)
|
||||||
if len(parts) != 3 {
|
if len(parts) != 3 {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
@ -17,8 +17,10 @@
|
|||||||
package image
|
package image
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"path/filepath"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
|
"github.com/opencontainers/runtime-spec/specs-go"
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -130,3 +132,80 @@ func TestGetRequirements(t *testing.T) {
|
|||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestGetVisibleDevicesFromMounts(t *testing.T) {
|
||||||
|
var tests = []struct {
|
||||||
|
description string
|
||||||
|
mounts []specs.Mount
|
||||||
|
expectedDevices []string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
description: "No mounts",
|
||||||
|
mounts: nil,
|
||||||
|
expectedDevices: nil,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "Host path is not /dev/null",
|
||||||
|
mounts: []specs.Mount{
|
||||||
|
{
|
||||||
|
Source: "/not/dev/null",
|
||||||
|
Destination: filepath.Join(DeviceListAsVolumeMountsRoot, "GPU0"),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
expectedDevices: nil,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "Container path is not prefixed by 'root'",
|
||||||
|
mounts: []specs.Mount{
|
||||||
|
{
|
||||||
|
Source: "/dev/null",
|
||||||
|
Destination: filepath.Join("/other/prefix", "GPU0"),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
expectedDevices: nil,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "Container path is only 'root'",
|
||||||
|
mounts: []specs.Mount{
|
||||||
|
{
|
||||||
|
Source: "/dev/null",
|
||||||
|
Destination: DeviceListAsVolumeMountsRoot,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
expectedDevices: nil,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "Discover 2 devices",
|
||||||
|
mounts: makeTestMounts("GPU0", "GPU1"),
|
||||||
|
expectedDevices: []string{"GPU0", "GPU1"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "Discover 2 devices with slashes in the name",
|
||||||
|
mounts: makeTestMounts("GPU0-MIG0/0/1", "GPU1-MIG0/0/1"),
|
||||||
|
expectedDevices: []string{"GPU0-MIG0/0/1", "GPU1-MIG0/0/1"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "cdi devices are ignored",
|
||||||
|
mounts: makeTestMounts("GPU0", "cdi/nvidia.com/gpu=all", "GPU1"),
|
||||||
|
expectedDevices: []string{"GPU0", "GPU1"},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
for _, tc := range tests {
|
||||||
|
t.Run(tc.description, func(t *testing.T) {
|
||||||
|
image, _ := New(WithMounts(tc.mounts))
|
||||||
|
require.Equal(t, tc.expectedDevices, image.VisibleDevicesFromMounts())
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func makeTestMounts(paths ...string) []specs.Mount {
|
||||||
|
var mounts []specs.Mount
|
||||||
|
for _, path := range paths {
|
||||||
|
mount := specs.Mount{
|
||||||
|
Source: "/dev/null",
|
||||||
|
Destination: filepath.Join(DeviceListAsVolumeMountsRoot, path),
|
||||||
|
}
|
||||||
|
mounts = append(mounts, mount)
|
||||||
|
}
|
||||||
|
return mounts
|
||||||
|
}
|
||||||
|
31
internal/config/image/envvars.go
Normal file
31
internal/config/image/envvars.go
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
/**
|
||||||
|
# Copyright 2024 NVIDIA CORPORATION
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
**/
|
||||||
|
|
||||||
|
package image
|
||||||
|
|
||||||
|
const (
|
||||||
|
EnvVarCudaVersion = "CUDA_VERSION"
|
||||||
|
EnvVarNvidiaDisableRequire = "NVIDIA_DISABLE_REQUIRE"
|
||||||
|
EnvVarNvidiaDriverCapabilities = "NVIDIA_DRIVER_CAPABILITIES"
|
||||||
|
EnvVarNvidiaImexChannels = "NVIDIA_IMEX_CHANNELS"
|
||||||
|
EnvVarNvidiaMigConfigDevices = "NVIDIA_MIG_CONFIG_DEVICES"
|
||||||
|
EnvVarNvidiaMigMonitorDevices = "NVIDIA_MIG_MONITOR_DEVICES"
|
||||||
|
EnvVarNvidiaRequireCuda = NvidiaRequirePrefix + "CUDA"
|
||||||
|
EnvVarNvidiaRequireJetpack = NvidiaRequirePrefix + "JETPACK"
|
||||||
|
EnvVarNvidiaVisibleDevices = "NVIDIA_VISIBLE_DEVICES"
|
||||||
|
|
||||||
|
NvidiaRequirePrefix = "NVIDIA_REQUIRE_"
|
||||||
|
)
|
@ -90,11 +90,9 @@ func getDevicesFromSpec(logger logger.Interface, ociSpec oci.Spec, cfg *config.C
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
envDevices := container.DevicesFromEnvvars(visibleDevicesEnvvar)
|
|
||||||
|
|
||||||
var devices []string
|
var devices []string
|
||||||
seen := make(map[string]bool)
|
seen := make(map[string]bool)
|
||||||
for _, name := range envDevices.List() {
|
for _, name := range container.VisibleDevicesFromEnvVar() {
|
||||||
if !parser.IsQualifiedName(name) {
|
if !parser.IsQualifiedName(name) {
|
||||||
name = fmt.Sprintf("%s=%s", cfg.NVIDIAContainerRuntimeConfig.Modes.CDI.DefaultKind, name)
|
name = fmt.Sprintf("%s=%s", cfg.NVIDIAContainerRuntimeConfig.Modes.CDI.DefaultKind, name)
|
||||||
}
|
}
|
||||||
|
@ -30,23 +30,16 @@ import (
|
|||||||
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi"
|
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
|
||||||
visibleDevicesEnvvar = "NVIDIA_VISIBLE_DEVICES"
|
|
||||||
visibleDevicesVoid = "void"
|
|
||||||
|
|
||||||
nvidiaRequireJetpackEnvvar = "NVIDIA_REQUIRE_JETPACK"
|
|
||||||
)
|
|
||||||
|
|
||||||
// NewCSVModifier creates a modifier that applies modications to an OCI spec if required by the runtime wrapper.
|
// NewCSVModifier creates a modifier that applies modications to an OCI spec if required by the runtime wrapper.
|
||||||
// The modifications are defined by CSV MountSpecs.
|
// The modifications are defined by CSV MountSpecs.
|
||||||
func NewCSVModifier(logger logger.Interface, cfg *config.Config, image image.CUDA) (oci.SpecModifier, error) {
|
func NewCSVModifier(logger logger.Interface, cfg *config.Config, container image.CUDA) (oci.SpecModifier, error) {
|
||||||
if devices := image.DevicesFromEnvvars(visibleDevicesEnvvar); len(devices.List()) == 0 {
|
if devices := container.VisibleDevicesFromEnvVar(); len(devices) == 0 {
|
||||||
logger.Infof("No modification required; no devices requested")
|
logger.Infof("No modification required; no devices requested")
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
logger.Infof("Constructing modifier from config: %+v", *cfg)
|
logger.Infof("Constructing modifier from config: %+v", *cfg)
|
||||||
|
|
||||||
if err := checkRequirements(logger, image); err != nil {
|
if err := checkRequirements(logger, container); err != nil {
|
||||||
return nil, fmt.Errorf("requirements not met: %v", err)
|
return nil, fmt.Errorf("requirements not met: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -55,7 +48,7 @@ func NewCSVModifier(logger logger.Interface, cfg *config.Config, image image.CUD
|
|||||||
return nil, fmt.Errorf("failed to get list of CSV files: %v", err)
|
return nil, fmt.Errorf("failed to get list of CSV files: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if image.Getenv(nvidiaRequireJetpackEnvvar) != "csv-mounts=all" {
|
if container.Getenv(image.EnvVarNvidiaRequireJetpack) != "csv-mounts=all" {
|
||||||
csvFiles = csv.BaseFilesOnly(csvFiles)
|
csvFiles = csv.BaseFilesOnly(csvFiles)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -36,7 +36,7 @@ import (
|
|||||||
//
|
//
|
||||||
// If not devices are selected, no changes are made.
|
// If not devices are selected, no changes are made.
|
||||||
func NewFeatureGatedModifier(logger logger.Interface, cfg *config.Config, image image.CUDA) (oci.SpecModifier, error) {
|
func NewFeatureGatedModifier(logger logger.Interface, cfg *config.Config, image image.CUDA) (oci.SpecModifier, error) {
|
||||||
if devices := image.DevicesFromEnvvars(visibleDevicesEnvvar); len(devices.List()) == 0 {
|
if devices := image.VisibleDevicesFromEnvVar(); len(devices) == 0 {
|
||||||
logger.Infof("No modification required; no devices requested")
|
logger.Infof("No modification required; no devices requested")
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
@ -29,8 +29,8 @@ import (
|
|||||||
|
|
||||||
// NewGraphicsModifier constructs a modifier that injects graphics-related modifications into an OCI runtime specification.
|
// NewGraphicsModifier constructs a modifier that injects graphics-related modifications into an OCI runtime specification.
|
||||||
// The value of the NVIDIA_DRIVER_CAPABILITIES environment variable is checked to determine if this modification should be made.
|
// The value of the NVIDIA_DRIVER_CAPABILITIES environment variable is checked to determine if this modification should be made.
|
||||||
func NewGraphicsModifier(logger logger.Interface, cfg *config.Config, image image.CUDA, driver *root.Driver) (oci.SpecModifier, error) {
|
func NewGraphicsModifier(logger logger.Interface, cfg *config.Config, containerImage image.CUDA, driver *root.Driver) (oci.SpecModifier, error) {
|
||||||
if required, reason := requiresGraphicsModifier(image); !required {
|
if required, reason := requiresGraphicsModifier(containerImage); !required {
|
||||||
logger.Infof("No graphics modifier required: %v", reason)
|
logger.Infof("No graphics modifier required: %v", reason)
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
@ -50,7 +50,7 @@ func NewGraphicsModifier(logger logger.Interface, cfg *config.Config, image imag
|
|||||||
devRoot := driver.Root
|
devRoot := driver.Root
|
||||||
drmNodes, err := discover.NewDRMNodesDiscoverer(
|
drmNodes, err := discover.NewDRMNodesDiscoverer(
|
||||||
logger,
|
logger,
|
||||||
image.DevicesFromEnvvars(visibleDevicesEnvvar),
|
containerImage.DevicesFromEnvvars(image.EnvVarNvidiaVisibleDevices),
|
||||||
devRoot,
|
devRoot,
|
||||||
nvidiaCDIHookPath,
|
nvidiaCDIHookPath,
|
||||||
)
|
)
|
||||||
@ -67,7 +67,7 @@ func NewGraphicsModifier(logger logger.Interface, cfg *config.Config, image imag
|
|||||||
|
|
||||||
// requiresGraphicsModifier determines whether a graphics modifier is required.
|
// requiresGraphicsModifier determines whether a graphics modifier is required.
|
||||||
func requiresGraphicsModifier(cudaImage image.CUDA) (bool, string) {
|
func requiresGraphicsModifier(cudaImage image.CUDA) (bool, string) {
|
||||||
if devices := cudaImage.DevicesFromEnvvars(visibleDevicesEnvvar); len(devices.List()) == 0 {
|
if devices := cudaImage.VisibleDevicesFromEnvVar(); len(devices) == 0 {
|
||||||
return false, "no devices requested"
|
return false, "no devices requested"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -24,6 +24,7 @@ import (
|
|||||||
"github.com/NVIDIA/go-nvml/pkg/nvml"
|
"github.com/NVIDIA/go-nvml/pkg/nvml"
|
||||||
"tags.cncf.io/container-device-interface/pkg/cdi"
|
"tags.cncf.io/container-device-interface/pkg/cdi"
|
||||||
|
|
||||||
|
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
|
||||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/root"
|
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup/root"
|
||||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/nvsandboxutils"
|
"github.com/NVIDIA/nvidia-container-toolkit/internal/nvsandboxutils"
|
||||||
@ -200,7 +201,7 @@ func (m *wrapper) GetCommonEdits() (*cdi.ContainerEdits, error) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
edits.Env = append(edits.Env, "NVIDIA_VISIBLE_DEVICES=void")
|
edits.Env = append(edits.Env, image.EnvVarNvidiaVisibleDevices+"=void")
|
||||||
|
|
||||||
return edits, nil
|
return edits, nil
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user