mirror of
https://github.com/NVIDIA/nvidia-container-toolkit
synced 2025-06-26 18:18:24 +00:00
Merge pull request #1110 from ArangoGutierrez/i/1049
Refactor extracting requested devices from the container image
This commit is contained in:
commit
b55255e31f
@ -13,10 +13,6 @@ import (
|
|||||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
|
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
|
||||||
capSysAdmin = "CAP_SYS_ADMIN"
|
|
||||||
)
|
|
||||||
|
|
||||||
type nvidiaConfig struct {
|
type nvidiaConfig struct {
|
||||||
Devices []string
|
Devices []string
|
||||||
MigConfigDevices string
|
MigConfigDevices string
|
||||||
@ -103,9 +99,9 @@ func loadSpec(path string) (spec *Spec) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
func isPrivileged(s *Spec) bool {
|
func (s *Spec) GetCapabilities() []string {
|
||||||
if s.Process.Capabilities == nil {
|
if s == nil || s.Process == nil || s.Process.Capabilities == nil {
|
||||||
return false
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
var caps []string
|
var caps []string
|
||||||
@ -118,67 +114,22 @@ func isPrivileged(s *Spec) bool {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
log.Panicln("could not decode Process.Capabilities in OCI spec:", err)
|
log.Panicln("could not decode Process.Capabilities in OCI spec:", err)
|
||||||
}
|
}
|
||||||
for _, c := range caps {
|
return caps
|
||||||
if c == capSysAdmin {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Otherwise, parse s.Process.Capabilities as:
|
// Otherwise, parse s.Process.Capabilities as:
|
||||||
// github.com/opencontainers/runtime-spec/blob/v1.0.0/specs-go/config.go#L30-L54
|
// github.com/opencontainers/runtime-spec/blob/v1.0.0/specs-go/config.go#L30-L54
|
||||||
process := specs.Process{
|
capabilities := specs.LinuxCapabilities{}
|
||||||
Env: s.Process.Env,
|
err := json.Unmarshal(*s.Process.Capabilities, &capabilities)
|
||||||
}
|
|
||||||
|
|
||||||
err := json.Unmarshal(*s.Process.Capabilities, &process.Capabilities)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Panicln("could not decode Process.Capabilities in OCI spec:", err)
|
log.Panicln("could not decode Process.Capabilities in OCI spec:", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
fullSpec := specs.Spec{
|
return image.OCISpecCapabilities(capabilities).GetCapabilities()
|
||||||
Version: *s.Version,
|
|
||||||
Process: &process,
|
|
||||||
}
|
|
||||||
|
|
||||||
return image.IsPrivileged(&fullSpec)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func getDevicesFromEnvvar(containerImage image.CUDA, swarmResourceEnvvars []string) []string {
|
func isPrivileged(s *Spec) bool {
|
||||||
// We check if the image has at least one of the Swarm resource envvars defined and use this
|
return image.IsPrivileged(s)
|
||||||
// if specified.
|
|
||||||
for _, envvar := range swarmResourceEnvvars {
|
|
||||||
if containerImage.HasEnvvar(envvar) {
|
|
||||||
return containerImage.DevicesFromEnvvars(swarmResourceEnvvars...).List()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return containerImage.VisibleDevicesFromEnvVar()
|
|
||||||
}
|
|
||||||
|
|
||||||
func (hookConfig *hookConfig) getDevices(image image.CUDA, privileged bool) []string {
|
|
||||||
// If enabled, try and get the device list from volume mounts first
|
|
||||||
if hookConfig.AcceptDeviceListAsVolumeMounts {
|
|
||||||
devices := image.VisibleDevicesFromMounts()
|
|
||||||
if len(devices) > 0 {
|
|
||||||
return devices
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fallback to reading from the environment variable if privileges are correct
|
|
||||||
devices := getDevicesFromEnvvar(image, hookConfig.getSwarmResourceEnvvars())
|
|
||||||
if len(devices) == 0 {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
if privileged || hookConfig.AcceptEnvvarUnprivileged {
|
|
||||||
return devices
|
|
||||||
}
|
|
||||||
|
|
||||||
configName := hookConfig.getConfigOption("AcceptEnvvarUnprivileged")
|
|
||||||
log.Printf("Ignoring devices specified in NVIDIA_VISIBLE_DEVICES (privileged=%v, %v=%v) ", privileged, configName, hookConfig.AcceptEnvvarUnprivileged)
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func getMigConfigDevices(i image.CUDA) *string {
|
func getMigConfigDevices(i image.CUDA) *string {
|
||||||
@ -225,7 +176,6 @@ func (hookConfig *hookConfig) getDriverCapabilities(cudaImage image.CUDA, legacy
|
|||||||
// We use the default driver capabilities by default. This is filtered to only include the
|
// We use the default driver capabilities by default. This is filtered to only include the
|
||||||
// supported capabilities
|
// supported capabilities
|
||||||
supportedDriverCapabilities := image.NewDriverCapabilities(hookConfig.SupportedDriverCapabilities)
|
supportedDriverCapabilities := image.NewDriverCapabilities(hookConfig.SupportedDriverCapabilities)
|
||||||
|
|
||||||
capabilities := supportedDriverCapabilities.Intersection(image.DefaultDriverCapabilities)
|
capabilities := supportedDriverCapabilities.Intersection(image.DefaultDriverCapabilities)
|
||||||
|
|
||||||
capsEnvSpecified := cudaImage.HasEnvvar(image.EnvVarNvidiaDriverCapabilities)
|
capsEnvSpecified := cudaImage.HasEnvvar(image.EnvVarNvidiaDriverCapabilities)
|
||||||
@ -251,7 +201,7 @@ func (hookConfig *hookConfig) getDriverCapabilities(cudaImage image.CUDA, legacy
|
|||||||
func (hookConfig *hookConfig) getNvidiaConfig(image image.CUDA, privileged bool) *nvidiaConfig {
|
func (hookConfig *hookConfig) getNvidiaConfig(image image.CUDA, privileged bool) *nvidiaConfig {
|
||||||
legacyImage := image.IsLegacy()
|
legacyImage := image.IsLegacy()
|
||||||
|
|
||||||
devices := hookConfig.getDevices(image, privileged)
|
devices := image.VisibleDevices()
|
||||||
if len(devices) == 0 {
|
if len(devices) == 0 {
|
||||||
// empty devices means this is not a GPU container.
|
// empty devices means this is not a GPU container.
|
||||||
return nil
|
return nil
|
||||||
@ -306,20 +256,25 @@ func (hookConfig *hookConfig) getContainerConfig() (config containerConfig) {
|
|||||||
|
|
||||||
s := loadSpec(path.Join(b, "config.json"))
|
s := loadSpec(path.Join(b, "config.json"))
|
||||||
|
|
||||||
image, err := image.New(
|
privileged := isPrivileged(s)
|
||||||
|
|
||||||
|
i, err := image.New(
|
||||||
image.WithEnv(s.Process.Env),
|
image.WithEnv(s.Process.Env),
|
||||||
image.WithMounts(s.Mounts),
|
image.WithMounts(s.Mounts),
|
||||||
|
image.WithPrivileged(privileged),
|
||||||
image.WithDisableRequire(hookConfig.DisableRequire),
|
image.WithDisableRequire(hookConfig.DisableRequire),
|
||||||
|
image.WithAcceptDeviceListAsVolumeMounts(hookConfig.AcceptDeviceListAsVolumeMounts),
|
||||||
|
image.WithAcceptEnvvarUnprivileged(hookConfig.AcceptEnvvarUnprivileged),
|
||||||
|
image.WithPreferredVisibleDevicesEnvVars(hookConfig.getSwarmResourceEnvvars()...),
|
||||||
)
|
)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Panicln(err)
|
log.Panicln(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
privileged := isPrivileged(s)
|
|
||||||
return containerConfig{
|
return containerConfig{
|
||||||
Pid: h.Pid,
|
Pid: h.Pid,
|
||||||
Rootfs: s.Root.Path,
|
Rootfs: s.Root.Path,
|
||||||
Image: image,
|
Image: i,
|
||||||
Nvidia: hookConfig.getNvidiaConfig(image, privileged),
|
Nvidia: hookConfig.getNvidiaConfig(i, privileged),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,10 +1,8 @@
|
|||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"path/filepath"
|
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/opencontainers/runtime-spec/specs-go"
|
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
|
|
||||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
|
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
|
||||||
@ -479,7 +477,10 @@ func TestGetNvidiaConfig(t *testing.T) {
|
|||||||
t.Run(tc.description, func(t *testing.T) {
|
t.Run(tc.description, func(t *testing.T) {
|
||||||
image, _ := image.New(
|
image, _ := image.New(
|
||||||
image.WithEnvMap(tc.env),
|
image.WithEnvMap(tc.env),
|
||||||
|
image.WithPrivileged(tc.privileged),
|
||||||
|
image.WithPreferredVisibleDevicesEnvVars(tc.hookConfig.getSwarmResourceEnvvars()...),
|
||||||
)
|
)
|
||||||
|
|
||||||
// Wrap the call to getNvidiaConfig() in a closure.
|
// Wrap the call to getNvidiaConfig() in a closure.
|
||||||
var cfg *nvidiaConfig
|
var cfg *nvidiaConfig
|
||||||
getConfig := func() {
|
getConfig := func() {
|
||||||
@ -518,340 +519,6 @@ func TestGetNvidiaConfig(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestDeviceListSourcePriority(t *testing.T) {
|
|
||||||
var tests = []struct {
|
|
||||||
description string
|
|
||||||
mountDevices []specs.Mount
|
|
||||||
envvarDevices string
|
|
||||||
privileged bool
|
|
||||||
acceptUnprivileged bool
|
|
||||||
acceptMounts bool
|
|
||||||
expectedDevices []string
|
|
||||||
}{
|
|
||||||
{
|
|
||||||
description: "Mount devices, unprivileged, no accept unprivileged",
|
|
||||||
mountDevices: []specs.Mount{
|
|
||||||
{
|
|
||||||
Source: "/dev/null",
|
|
||||||
Destination: filepath.Join(image.DeviceListAsVolumeMountsRoot, "GPU0"),
|
|
||||||
},
|
|
||||||
{
|
|
||||||
Source: "/dev/null",
|
|
||||||
Destination: filepath.Join(image.DeviceListAsVolumeMountsRoot, "GPU1"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
envvarDevices: "GPU2,GPU3",
|
|
||||||
privileged: false,
|
|
||||||
acceptUnprivileged: false,
|
|
||||||
acceptMounts: true,
|
|
||||||
expectedDevices: []string{"GPU0", "GPU1"},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
description: "No mount devices, unprivileged, no accept unprivileged",
|
|
||||||
mountDevices: nil,
|
|
||||||
envvarDevices: "GPU0,GPU1",
|
|
||||||
privileged: false,
|
|
||||||
acceptUnprivileged: false,
|
|
||||||
acceptMounts: true,
|
|
||||||
expectedDevices: nil,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
description: "No mount devices, privileged, no accept unprivileged",
|
|
||||||
mountDevices: nil,
|
|
||||||
envvarDevices: "GPU0,GPU1",
|
|
||||||
privileged: true,
|
|
||||||
acceptUnprivileged: false,
|
|
||||||
acceptMounts: true,
|
|
||||||
expectedDevices: []string{"GPU0", "GPU1"},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
description: "No mount devices, unprivileged, accept unprivileged",
|
|
||||||
mountDevices: nil,
|
|
||||||
envvarDevices: "GPU0,GPU1",
|
|
||||||
privileged: false,
|
|
||||||
acceptUnprivileged: true,
|
|
||||||
acceptMounts: true,
|
|
||||||
expectedDevices: []string{"GPU0", "GPU1"},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
description: "Mount devices, unprivileged, accept unprivileged, no accept mounts",
|
|
||||||
mountDevices: []specs.Mount{
|
|
||||||
{
|
|
||||||
Source: "/dev/null",
|
|
||||||
Destination: filepath.Join(image.DeviceListAsVolumeMountsRoot, "GPU0"),
|
|
||||||
},
|
|
||||||
{
|
|
||||||
Source: "/dev/null",
|
|
||||||
Destination: filepath.Join(image.DeviceListAsVolumeMountsRoot, "GPU1"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
envvarDevices: "GPU2,GPU3",
|
|
||||||
privileged: false,
|
|
||||||
acceptUnprivileged: true,
|
|
||||||
acceptMounts: false,
|
|
||||||
expectedDevices: []string{"GPU2", "GPU3"},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
description: "Mount devices, unprivileged, no accept unprivileged, no accept mounts",
|
|
||||||
mountDevices: []specs.Mount{
|
|
||||||
{
|
|
||||||
Source: "/dev/null",
|
|
||||||
Destination: filepath.Join(image.DeviceListAsVolumeMountsRoot, "GPU0"),
|
|
||||||
},
|
|
||||||
{
|
|
||||||
Source: "/dev/null",
|
|
||||||
Destination: filepath.Join(image.DeviceListAsVolumeMountsRoot, "GPU1"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
envvarDevices: "GPU2,GPU3",
|
|
||||||
privileged: false,
|
|
||||||
acceptUnprivileged: false,
|
|
||||||
acceptMounts: false,
|
|
||||||
expectedDevices: nil,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
for _, tc := range tests {
|
|
||||||
t.Run(tc.description, func(t *testing.T) {
|
|
||||||
// Wrap the call to getDevices() in a closure.
|
|
||||||
var devices []string
|
|
||||||
getDevices := func() {
|
|
||||||
image, _ := image.New(
|
|
||||||
image.WithEnvMap(
|
|
||||||
map[string]string{
|
|
||||||
image.EnvVarNvidiaVisibleDevices: tc.envvarDevices,
|
|
||||||
},
|
|
||||||
),
|
|
||||||
image.WithMounts(tc.mountDevices),
|
|
||||||
)
|
|
||||||
defaultConfig, _ := config.GetDefault()
|
|
||||||
cfg := &hookConfig{defaultConfig}
|
|
||||||
cfg.AcceptEnvvarUnprivileged = tc.acceptUnprivileged
|
|
||||||
cfg.AcceptDeviceListAsVolumeMounts = tc.acceptMounts
|
|
||||||
devices = cfg.getDevices(image, tc.privileged)
|
|
||||||
}
|
|
||||||
|
|
||||||
// For all other tests, just grab the devices and check the results
|
|
||||||
getDevices()
|
|
||||||
|
|
||||||
require.Equal(t, tc.expectedDevices, devices)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestGetDevicesFromEnvvar(t *testing.T) {
|
|
||||||
envDockerResourceGPUs := "DOCKER_RESOURCE_GPUS"
|
|
||||||
gpuID := "GPU-12345"
|
|
||||||
anotherGPUID := "GPU-67890"
|
|
||||||
thirdGPUID := "MIG-12345"
|
|
||||||
|
|
||||||
var tests = []struct {
|
|
||||||
description string
|
|
||||||
swarmResourceEnvvars []string
|
|
||||||
env map[string]string
|
|
||||||
expectedDevices []string
|
|
||||||
}{
|
|
||||||
{
|
|
||||||
description: "empty env returns nil for non-legacy image",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
description: "blank NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image",
|
|
||||||
env: map[string]string{
|
|
||||||
image.EnvVarNvidiaVisibleDevices: "",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
description: "'void' NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image",
|
|
||||||
env: map[string]string{
|
|
||||||
image.EnvVarNvidiaVisibleDevices: "void",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
description: "'none' NVIDIA_VISIBLE_DEVICES returns empty for non-legacy image",
|
|
||||||
env: map[string]string{
|
|
||||||
image.EnvVarNvidiaVisibleDevices: "none",
|
|
||||||
},
|
|
||||||
expectedDevices: []string{""},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
description: "NVIDIA_VISIBLE_DEVICES set returns value for non-legacy image",
|
|
||||||
env: map[string]string{
|
|
||||||
image.EnvVarNvidiaVisibleDevices: gpuID,
|
|
||||||
},
|
|
||||||
expectedDevices: []string{gpuID},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
description: "NVIDIA_VISIBLE_DEVICES set returns value for legacy image",
|
|
||||||
env: map[string]string{
|
|
||||||
image.EnvVarNvidiaVisibleDevices: gpuID,
|
|
||||||
image.EnvVarCudaVersion: "legacy",
|
|
||||||
},
|
|
||||||
expectedDevices: []string{gpuID},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
description: "empty env returns all for legacy image",
|
|
||||||
env: map[string]string{
|
|
||||||
image.EnvVarCudaVersion: "legacy",
|
|
||||||
},
|
|
||||||
expectedDevices: []string{"all"},
|
|
||||||
},
|
|
||||||
// Add the `DOCKER_RESOURCE_GPUS` envvar and ensure that this is ignored when
|
|
||||||
// not enabled
|
|
||||||
{
|
|
||||||
description: "missing NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image",
|
|
||||||
env: map[string]string{
|
|
||||||
envDockerResourceGPUs: anotherGPUID,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
description: "blank NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image",
|
|
||||||
env: map[string]string{
|
|
||||||
image.EnvVarNvidiaVisibleDevices: "",
|
|
||||||
envDockerResourceGPUs: anotherGPUID,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
description: "'void' NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image",
|
|
||||||
env: map[string]string{
|
|
||||||
image.EnvVarNvidiaVisibleDevices: "void",
|
|
||||||
envDockerResourceGPUs: anotherGPUID,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
description: "'none' NVIDIA_VISIBLE_DEVICES returns empty for non-legacy image",
|
|
||||||
env: map[string]string{
|
|
||||||
image.EnvVarNvidiaVisibleDevices: "none",
|
|
||||||
envDockerResourceGPUs: anotherGPUID,
|
|
||||||
},
|
|
||||||
expectedDevices: []string{""},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
description: "NVIDIA_VISIBLE_DEVICES set returns value for non-legacy image",
|
|
||||||
env: map[string]string{
|
|
||||||
image.EnvVarNvidiaVisibleDevices: gpuID,
|
|
||||||
envDockerResourceGPUs: anotherGPUID,
|
|
||||||
},
|
|
||||||
expectedDevices: []string{gpuID},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
description: "NVIDIA_VISIBLE_DEVICES set returns value for legacy image",
|
|
||||||
env: map[string]string{
|
|
||||||
image.EnvVarNvidiaVisibleDevices: gpuID,
|
|
||||||
envDockerResourceGPUs: anotherGPUID,
|
|
||||||
image.EnvVarCudaVersion: "legacy",
|
|
||||||
},
|
|
||||||
expectedDevices: []string{gpuID},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
description: "empty env returns all for legacy image",
|
|
||||||
env: map[string]string{
|
|
||||||
envDockerResourceGPUs: anotherGPUID,
|
|
||||||
image.EnvVarCudaVersion: "legacy",
|
|
||||||
},
|
|
||||||
expectedDevices: []string{"all"},
|
|
||||||
},
|
|
||||||
// Add the `DOCKER_RESOURCE_GPUS` envvar and ensure that this is selected when
|
|
||||||
// enabled
|
|
||||||
{
|
|
||||||
description: "empty env returns nil for non-legacy image",
|
|
||||||
swarmResourceEnvvars: []string{envDockerResourceGPUs},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
description: "blank DOCKER_RESOURCE_GPUS returns nil for non-legacy image",
|
|
||||||
swarmResourceEnvvars: []string{envDockerResourceGPUs},
|
|
||||||
env: map[string]string{
|
|
||||||
envDockerResourceGPUs: "",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
description: "'void' DOCKER_RESOURCE_GPUS returns nil for non-legacy image",
|
|
||||||
swarmResourceEnvvars: []string{envDockerResourceGPUs},
|
|
||||||
env: map[string]string{
|
|
||||||
envDockerResourceGPUs: "void",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
description: "'none' DOCKER_RESOURCE_GPUS returns empty for non-legacy image",
|
|
||||||
swarmResourceEnvvars: []string{envDockerResourceGPUs},
|
|
||||||
env: map[string]string{
|
|
||||||
envDockerResourceGPUs: "none",
|
|
||||||
},
|
|
||||||
expectedDevices: []string{""},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
description: "DOCKER_RESOURCE_GPUS set returns value for non-legacy image",
|
|
||||||
swarmResourceEnvvars: []string{envDockerResourceGPUs},
|
|
||||||
env: map[string]string{
|
|
||||||
envDockerResourceGPUs: gpuID,
|
|
||||||
},
|
|
||||||
expectedDevices: []string{gpuID},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
description: "DOCKER_RESOURCE_GPUS set returns value for legacy image",
|
|
||||||
swarmResourceEnvvars: []string{envDockerResourceGPUs},
|
|
||||||
env: map[string]string{
|
|
||||||
envDockerResourceGPUs: gpuID,
|
|
||||||
image.EnvVarCudaVersion: "legacy",
|
|
||||||
},
|
|
||||||
expectedDevices: []string{gpuID},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
description: "DOCKER_RESOURCE_GPUS is selected if present",
|
|
||||||
swarmResourceEnvvars: []string{envDockerResourceGPUs},
|
|
||||||
env: map[string]string{
|
|
||||||
envDockerResourceGPUs: anotherGPUID,
|
|
||||||
},
|
|
||||||
expectedDevices: []string{anotherGPUID},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
description: "DOCKER_RESOURCE_GPUS overrides NVIDIA_VISIBLE_DEVICES if present",
|
|
||||||
swarmResourceEnvvars: []string{envDockerResourceGPUs},
|
|
||||||
env: map[string]string{
|
|
||||||
image.EnvVarNvidiaVisibleDevices: gpuID,
|
|
||||||
envDockerResourceGPUs: anotherGPUID,
|
|
||||||
},
|
|
||||||
expectedDevices: []string{anotherGPUID},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
description: "DOCKER_RESOURCE_GPUS_ADDITIONAL overrides NVIDIA_VISIBLE_DEVICES if present",
|
|
||||||
swarmResourceEnvvars: []string{"DOCKER_RESOURCE_GPUS_ADDITIONAL"},
|
|
||||||
env: map[string]string{
|
|
||||||
image.EnvVarNvidiaVisibleDevices: gpuID,
|
|
||||||
"DOCKER_RESOURCE_GPUS_ADDITIONAL": anotherGPUID,
|
|
||||||
},
|
|
||||||
expectedDevices: []string{anotherGPUID},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
description: "All available swarm resource envvars are selected and override NVIDIA_VISIBLE_DEVICES if present",
|
|
||||||
swarmResourceEnvvars: []string{"DOCKER_RESOURCE_GPUS", "DOCKER_RESOURCE_GPUS_ADDITIONAL"},
|
|
||||||
env: map[string]string{
|
|
||||||
image.EnvVarNvidiaVisibleDevices: gpuID,
|
|
||||||
"DOCKER_RESOURCE_GPUS": thirdGPUID,
|
|
||||||
"DOCKER_RESOURCE_GPUS_ADDITIONAL": anotherGPUID,
|
|
||||||
},
|
|
||||||
expectedDevices: []string{thirdGPUID, anotherGPUID},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
description: "DOCKER_RESOURCE_GPUS_ADDITIONAL or DOCKER_RESOURCE_GPUS override NVIDIA_VISIBLE_DEVICES if present",
|
|
||||||
swarmResourceEnvvars: []string{"DOCKER_RESOURCE_GPUS", "DOCKER_RESOURCE_GPUS_ADDITIONAL"},
|
|
||||||
env: map[string]string{
|
|
||||||
image.EnvVarNvidiaVisibleDevices: gpuID,
|
|
||||||
"DOCKER_RESOURCE_GPUS_ADDITIONAL": anotherGPUID,
|
|
||||||
},
|
|
||||||
expectedDevices: []string{anotherGPUID},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, tc := range tests {
|
|
||||||
t.Run(tc.description, func(t *testing.T) {
|
|
||||||
image, _ := image.New(
|
|
||||||
image.WithEnvMap(tc.env),
|
|
||||||
)
|
|
||||||
devices := getDevicesFromEnvvar(image, tc.swarmResourceEnvvars)
|
|
||||||
require.EqualValues(t, tc.expectedDevices, devices)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestGetDriverCapabilities(t *testing.T) {
|
func TestGetDriverCapabilities(t *testing.T) {
|
||||||
|
|
||||||
supportedCapabilities := "compute,display,utility,video"
|
supportedCapabilities := "compute,display,utility,video"
|
||||||
|
@ -88,7 +88,7 @@ func (c hookConfig) getConfigOption(fieldName string) string {
|
|||||||
|
|
||||||
// getSwarmResourceEnvvars returns the swarm resource envvars for the config.
|
// getSwarmResourceEnvvars returns the swarm resource envvars for the config.
|
||||||
func (c *hookConfig) getSwarmResourceEnvvars() []string {
|
func (c *hookConfig) getSwarmResourceEnvvars() []string {
|
||||||
if c.SwarmResource == "" {
|
if c == nil || c.SwarmResource == "" {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -21,22 +21,36 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/opencontainers/runtime-spec/specs-go"
|
"github.com/opencontainers/runtime-spec/specs-go"
|
||||||
|
|
||||||
|
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||||
)
|
)
|
||||||
|
|
||||||
type builder struct {
|
type builder struct {
|
||||||
env map[string]string
|
CUDA
|
||||||
mounts []specs.Mount
|
|
||||||
disableRequire bool
|
disableRequire bool
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Option is a functional option for creating a CUDA image.
|
||||||
|
type Option func(*builder) error
|
||||||
|
|
||||||
// New creates a new CUDA image from the input options.
|
// New creates a new CUDA image from the input options.
|
||||||
func New(opt ...Option) (CUDA, error) {
|
func New(opt ...Option) (CUDA, error) {
|
||||||
b := &builder{}
|
b := &builder{
|
||||||
|
CUDA: CUDA{
|
||||||
|
acceptEnvvarUnprivileged: true,
|
||||||
|
},
|
||||||
|
}
|
||||||
for _, o := range opt {
|
for _, o := range opt {
|
||||||
if err := o(b); err != nil {
|
if err := o(b); err != nil {
|
||||||
return CUDA{}, err
|
return CUDA{}, err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if b.logger == nil {
|
||||||
|
b.logger = logger.New()
|
||||||
|
}
|
||||||
|
|
||||||
if b.env == nil {
|
if b.env == nil {
|
||||||
b.env = make(map[string]string)
|
b.env = make(map[string]string)
|
||||||
}
|
}
|
||||||
@ -50,15 +64,22 @@ func (b builder) build() (CUDA, error) {
|
|||||||
b.env[EnvVarNvidiaDisableRequire] = "true"
|
b.env[EnvVarNvidiaDisableRequire] = "true"
|
||||||
}
|
}
|
||||||
|
|
||||||
c := CUDA{
|
return b.CUDA, nil
|
||||||
env: b.env,
|
|
||||||
mounts: b.mounts,
|
|
||||||
}
|
|
||||||
return c, nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Option is a functional option for creating a CUDA image.
|
func WithAcceptDeviceListAsVolumeMounts(acceptDeviceListAsVolumeMounts bool) Option {
|
||||||
type Option func(*builder) error
|
return func(b *builder) error {
|
||||||
|
b.acceptDeviceListAsVolumeMounts = acceptDeviceListAsVolumeMounts
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func WithAcceptEnvvarUnprivileged(acceptEnvvarUnprivileged bool) Option {
|
||||||
|
return func(b *builder) error {
|
||||||
|
b.acceptEnvvarUnprivileged = acceptEnvvarUnprivileged
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// WithDisableRequire sets the disable require option.
|
// WithDisableRequire sets the disable require option.
|
||||||
func WithDisableRequire(disableRequire bool) Option {
|
func WithDisableRequire(disableRequire bool) Option {
|
||||||
@ -93,6 +114,14 @@ func WithEnvMap(env map[string]string) Option {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// WithLogger sets the logger to use when creating the CUDA image.
|
||||||
|
func WithLogger(logger logger.Interface) Option {
|
||||||
|
return func(b *builder) error {
|
||||||
|
b.logger = logger
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// WithMounts sets the mounts associated with the CUDA image.
|
// WithMounts sets the mounts associated with the CUDA image.
|
||||||
func WithMounts(mounts []specs.Mount) Option {
|
func WithMounts(mounts []specs.Mount) Option {
|
||||||
return func(b *builder) error {
|
return func(b *builder) error {
|
||||||
@ -100,3 +129,20 @@ func WithMounts(mounts []specs.Mount) Option {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// WithPreferredVisibleDevicesEnvVars sets the environment variables that
|
||||||
|
// should take precedence over the default NVIDIA_VISIBLE_DEVICES.
|
||||||
|
func WithPreferredVisibleDevicesEnvVars(preferredVisibleDeviceEnvVars ...string) Option {
|
||||||
|
return func(b *builder) error {
|
||||||
|
b.preferredVisibleDeviceEnvVars = preferredVisibleDeviceEnvVars
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// WithPrivileged sets whether an image is privileged or not.
|
||||||
|
func WithPrivileged(isPrivileged bool) Option {
|
||||||
|
return func(b *builder) error {
|
||||||
|
b.isPrivileged = isPrivileged
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -25,6 +25,8 @@ import (
|
|||||||
"github.com/opencontainers/runtime-spec/specs-go"
|
"github.com/opencontainers/runtime-spec/specs-go"
|
||||||
"golang.org/x/mod/semver"
|
"golang.org/x/mod/semver"
|
||||||
"tags.cncf.io/container-device-interface/pkg/parser"
|
"tags.cncf.io/container-device-interface/pkg/parser"
|
||||||
|
|
||||||
|
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
@ -38,27 +40,37 @@ const (
|
|||||||
// a map of environment variable to values that can be used to perform lookups
|
// a map of environment variable to values that can be used to perform lookups
|
||||||
// such as requirements.
|
// such as requirements.
|
||||||
type CUDA struct {
|
type CUDA struct {
|
||||||
env map[string]string
|
logger logger.Interface
|
||||||
mounts []specs.Mount
|
|
||||||
|
env map[string]string
|
||||||
|
isPrivileged bool
|
||||||
|
mounts []specs.Mount
|
||||||
|
|
||||||
|
acceptDeviceListAsVolumeMounts bool
|
||||||
|
acceptEnvvarUnprivileged bool
|
||||||
|
preferredVisibleDeviceEnvVars []string
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewCUDAImageFromSpec creates a CUDA image from the input OCI runtime spec.
|
// NewCUDAImageFromSpec creates a CUDA image from the input OCI runtime spec.
|
||||||
// The process environment is read (if present) to construc the CUDA Image.
|
// The process environment is read (if present) to construc the CUDA Image.
|
||||||
func NewCUDAImageFromSpec(spec *specs.Spec) (CUDA, error) {
|
func NewCUDAImageFromSpec(spec *specs.Spec, opts ...Option) (CUDA, error) {
|
||||||
var env []string
|
var env []string
|
||||||
if spec != nil && spec.Process != nil {
|
if spec != nil && spec.Process != nil {
|
||||||
env = spec.Process.Env
|
env = spec.Process.Env
|
||||||
}
|
}
|
||||||
|
|
||||||
return New(
|
specOpts := []Option{
|
||||||
WithEnv(env),
|
WithEnv(env),
|
||||||
WithMounts(spec.Mounts),
|
WithMounts(spec.Mounts),
|
||||||
)
|
WithPrivileged(IsPrivileged((*OCISpec)(spec))),
|
||||||
|
}
|
||||||
|
|
||||||
|
return New(append(opts, specOpts...)...)
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewCUDAImageFromEnv creates a CUDA image from the input environment. The environment
|
// newCUDAImageFromEnv creates a CUDA image from the input environment. The environment
|
||||||
// is a list of strings of the form ENVAR=VALUE.
|
// is a list of strings of the form ENVAR=VALUE.
|
||||||
func NewCUDAImageFromEnv(env []string) (CUDA, error) {
|
func newCUDAImageFromEnv(env []string) (CUDA, error) {
|
||||||
return New(WithEnv(env))
|
return New(WithEnv(env))
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -216,14 +228,53 @@ func (i CUDA) OnlyFullyQualifiedCDIDevices() bool {
|
|||||||
return hasCDIdevice
|
return hasCDIdevice
|
||||||
}
|
}
|
||||||
|
|
||||||
// VisibleDevicesFromEnvVar returns the set of visible devices requested through
|
// VisibleDevices returns a list of devices requested in the container image.
|
||||||
// the NVIDIA_VISIBLE_DEVICES environment variable.
|
// If volume mount requests are enabled these are returned if requested,
|
||||||
|
// otherwise device requests through environment variables are considered.
|
||||||
|
// In cases where environment variable requests required privileged containers,
|
||||||
|
// such devices requests are ignored.
|
||||||
|
func (i CUDA) VisibleDevices() []string {
|
||||||
|
// If enabled, try and get the device list from volume mounts first
|
||||||
|
if i.acceptDeviceListAsVolumeMounts {
|
||||||
|
volumeMountDeviceRequests := i.visibleDevicesFromMounts()
|
||||||
|
if len(volumeMountDeviceRequests) > 0 {
|
||||||
|
return volumeMountDeviceRequests
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get the Fallback to reading from the environment variable if privileges are correct
|
||||||
|
envVarDeviceRequests := i.VisibleDevicesFromEnvVar()
|
||||||
|
if len(envVarDeviceRequests) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// If the container is privileged, or environment variable requests are
|
||||||
|
// allowed for unprivileged containers, these devices are returned.
|
||||||
|
if i.isPrivileged || i.acceptEnvvarUnprivileged {
|
||||||
|
return envVarDeviceRequests
|
||||||
|
}
|
||||||
|
|
||||||
|
// We log a warning if we are ignoring the environment variable requests.
|
||||||
|
i.logger.Warningf("Ignoring devices specified in NVIDIA_VISIBLE_DEVICES in unprivileged container")
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// VisibleDevicesFromEnvVar returns the set of visible devices requested through environment variables.
|
||||||
|
// If any of the preferredVisibleDeviceEnvVars are present in the image, they
|
||||||
|
// are used to determine the visible devices. If this is not the case, the
|
||||||
|
// NVIDIA_VISIBLE_DEVICES environment variable is used.
|
||||||
func (i CUDA) VisibleDevicesFromEnvVar() []string {
|
func (i CUDA) VisibleDevicesFromEnvVar() []string {
|
||||||
|
for _, envVar := range i.preferredVisibleDeviceEnvVars {
|
||||||
|
if i.HasEnvvar(envVar) {
|
||||||
|
return i.DevicesFromEnvvars(i.preferredVisibleDeviceEnvVars...).List()
|
||||||
|
}
|
||||||
|
}
|
||||||
return i.DevicesFromEnvvars(EnvVarNvidiaVisibleDevices).List()
|
return i.DevicesFromEnvvars(EnvVarNvidiaVisibleDevices).List()
|
||||||
}
|
}
|
||||||
|
|
||||||
// VisibleDevicesFromMounts returns the set of visible devices requested as mounts.
|
// visibleDevicesFromMounts returns the set of visible devices requested as mounts.
|
||||||
func (i CUDA) VisibleDevicesFromMounts() []string {
|
func (i CUDA) visibleDevicesFromMounts() []string {
|
||||||
var devices []string
|
var devices []string
|
||||||
for _, device := range i.DevicesFromMounts() {
|
for _, device := range i.DevicesFromMounts() {
|
||||||
switch {
|
switch {
|
||||||
@ -238,7 +289,6 @@ func (i CUDA) VisibleDevicesFromMounts() []string {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// DevicesFromMounts returns a list of device specified as mounts.
|
// DevicesFromMounts returns a list of device specified as mounts.
|
||||||
// TODO: This should be merged with getDevicesFromMounts used in the NVIDIA Container Runtime
|
|
||||||
func (i CUDA) DevicesFromMounts() []string {
|
func (i CUDA) DevicesFromMounts() []string {
|
||||||
root := filepath.Clean(DeviceListAsVolumeMountsRoot)
|
root := filepath.Clean(DeviceListAsVolumeMountsRoot)
|
||||||
seen := make(map[string]bool)
|
seen := make(map[string]bool)
|
||||||
|
@ -21,9 +21,91 @@ import (
|
|||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/opencontainers/runtime-spec/specs-go"
|
"github.com/opencontainers/runtime-spec/specs-go"
|
||||||
|
testlog "github.com/sirupsen/logrus/hooks/test"
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
func TestNewCUDAImageFromSpec(t *testing.T) {
|
||||||
|
logger, _ := testlog.NewNullLogger()
|
||||||
|
|
||||||
|
testCases := []struct {
|
||||||
|
description string
|
||||||
|
spec *specs.Spec
|
||||||
|
options []Option
|
||||||
|
expected CUDA
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
description: "no env vars",
|
||||||
|
spec: &specs.Spec{
|
||||||
|
Process: &specs.Process{
|
||||||
|
Env: []string{},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
expected: CUDA{
|
||||||
|
logger: logger,
|
||||||
|
env: map[string]string{},
|
||||||
|
acceptEnvvarUnprivileged: true,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "NVIDIA_VISIBLE_DEVICES=all",
|
||||||
|
spec: &specs.Spec{
|
||||||
|
Process: &specs.Process{
|
||||||
|
Env: []string{"NVIDIA_VISIBLE_DEVICES=all"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
expected: CUDA{
|
||||||
|
logger: logger,
|
||||||
|
env: map[string]string{"NVIDIA_VISIBLE_DEVICES": "all"},
|
||||||
|
acceptEnvvarUnprivileged: true,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "Spec overrides options",
|
||||||
|
spec: &specs.Spec{
|
||||||
|
Process: &specs.Process{
|
||||||
|
Env: []string{"NVIDIA_VISIBLE_DEVICES=all"},
|
||||||
|
},
|
||||||
|
Mounts: []specs.Mount{
|
||||||
|
{
|
||||||
|
Source: "/spec-source",
|
||||||
|
Destination: "/spec-destination",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
options: []Option{
|
||||||
|
WithEnvMap(map[string]string{"OTHER": "value"}),
|
||||||
|
WithMounts([]specs.Mount{
|
||||||
|
{
|
||||||
|
Source: "/option-source",
|
||||||
|
Destination: "/option-destination",
|
||||||
|
},
|
||||||
|
}),
|
||||||
|
},
|
||||||
|
expected: CUDA{
|
||||||
|
logger: logger,
|
||||||
|
env: map[string]string{"NVIDIA_VISIBLE_DEVICES": "all"},
|
||||||
|
mounts: []specs.Mount{
|
||||||
|
{
|
||||||
|
Source: "/spec-source",
|
||||||
|
Destination: "/spec-destination",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
acceptEnvvarUnprivileged: true,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tc := range testCases {
|
||||||
|
t.Run(tc.description, func(t *testing.T) {
|
||||||
|
options := append([]Option{WithLogger(logger)}, tc.options...)
|
||||||
|
image, err := NewCUDAImageFromSpec(tc.spec, options...)
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.EqualValues(t, tc.expected, image)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestParseMajorMinorVersionValid(t *testing.T) {
|
func TestParseMajorMinorVersionValid(t *testing.T) {
|
||||||
var tests = []struct {
|
var tests = []struct {
|
||||||
version string
|
version string
|
||||||
@ -122,7 +204,7 @@ func TestGetRequirements(t *testing.T) {
|
|||||||
|
|
||||||
for _, tc := range testCases {
|
for _, tc := range testCases {
|
||||||
t.Run(tc.description, func(t *testing.T) {
|
t.Run(tc.description, func(t *testing.T) {
|
||||||
image, err := NewCUDAImageFromEnv(tc.env)
|
image, err := newCUDAImageFromEnv(tc.env)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
|
||||||
requirements, err := image.GetRequirements()
|
requirements, err := image.GetRequirements()
|
||||||
@ -133,6 +215,226 @@ func TestGetRequirements(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestGetDevicesFromEnvvar(t *testing.T) {
|
||||||
|
envDockerResourceGPUs := "DOCKER_RESOURCE_GPUS"
|
||||||
|
gpuID := "GPU-12345"
|
||||||
|
anotherGPUID := "GPU-67890"
|
||||||
|
thirdGPUID := "MIG-12345"
|
||||||
|
|
||||||
|
var tests = []struct {
|
||||||
|
description string
|
||||||
|
preferredVisibleDeviceEnvVars []string
|
||||||
|
env map[string]string
|
||||||
|
expectedDevices []string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
description: "empty env returns nil for non-legacy image",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "blank NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image",
|
||||||
|
env: map[string]string{
|
||||||
|
EnvVarNvidiaVisibleDevices: "",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "'void' NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image",
|
||||||
|
env: map[string]string{
|
||||||
|
EnvVarNvidiaVisibleDevices: "void",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "'none' NVIDIA_VISIBLE_DEVICES returns empty for non-legacy image",
|
||||||
|
env: map[string]string{
|
||||||
|
EnvVarNvidiaVisibleDevices: "none",
|
||||||
|
},
|
||||||
|
expectedDevices: []string{""},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "NVIDIA_VISIBLE_DEVICES set returns value for non-legacy image",
|
||||||
|
env: map[string]string{
|
||||||
|
EnvVarNvidiaVisibleDevices: gpuID,
|
||||||
|
},
|
||||||
|
expectedDevices: []string{gpuID},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "NVIDIA_VISIBLE_DEVICES set returns value for legacy image",
|
||||||
|
env: map[string]string{
|
||||||
|
EnvVarNvidiaVisibleDevices: gpuID,
|
||||||
|
EnvVarCudaVersion: "legacy",
|
||||||
|
},
|
||||||
|
expectedDevices: []string{gpuID},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "empty env returns all for legacy image",
|
||||||
|
env: map[string]string{
|
||||||
|
EnvVarCudaVersion: "legacy",
|
||||||
|
},
|
||||||
|
expectedDevices: []string{"all"},
|
||||||
|
},
|
||||||
|
// Add the `DOCKER_RESOURCE_GPUS` envvar and ensure that this is ignored when
|
||||||
|
// not enabled
|
||||||
|
{
|
||||||
|
description: "missing NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image",
|
||||||
|
env: map[string]string{
|
||||||
|
envDockerResourceGPUs: anotherGPUID,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "blank NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image",
|
||||||
|
env: map[string]string{
|
||||||
|
EnvVarNvidiaVisibleDevices: "",
|
||||||
|
envDockerResourceGPUs: anotherGPUID,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "'void' NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image",
|
||||||
|
env: map[string]string{
|
||||||
|
EnvVarNvidiaVisibleDevices: "void",
|
||||||
|
envDockerResourceGPUs: anotherGPUID,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "'none' NVIDIA_VISIBLE_DEVICES returns empty for non-legacy image",
|
||||||
|
env: map[string]string{
|
||||||
|
EnvVarNvidiaVisibleDevices: "none",
|
||||||
|
envDockerResourceGPUs: anotherGPUID,
|
||||||
|
},
|
||||||
|
expectedDevices: []string{""},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "NVIDIA_VISIBLE_DEVICES set returns value for non-legacy image",
|
||||||
|
env: map[string]string{
|
||||||
|
EnvVarNvidiaVisibleDevices: gpuID,
|
||||||
|
envDockerResourceGPUs: anotherGPUID,
|
||||||
|
},
|
||||||
|
expectedDevices: []string{gpuID},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "NVIDIA_VISIBLE_DEVICES set returns value for legacy image",
|
||||||
|
env: map[string]string{
|
||||||
|
EnvVarNvidiaVisibleDevices: gpuID,
|
||||||
|
envDockerResourceGPUs: anotherGPUID,
|
||||||
|
EnvVarCudaVersion: "legacy",
|
||||||
|
},
|
||||||
|
expectedDevices: []string{gpuID},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "empty env returns all for legacy image",
|
||||||
|
env: map[string]string{
|
||||||
|
envDockerResourceGPUs: anotherGPUID,
|
||||||
|
EnvVarCudaVersion: "legacy",
|
||||||
|
},
|
||||||
|
expectedDevices: []string{"all"},
|
||||||
|
},
|
||||||
|
// Add the `DOCKER_RESOURCE_GPUS` envvar and ensure that this is selected when
|
||||||
|
// enabled
|
||||||
|
{
|
||||||
|
description: "empty env returns nil for non-legacy image",
|
||||||
|
preferredVisibleDeviceEnvVars: []string{envDockerResourceGPUs},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "blank DOCKER_RESOURCE_GPUS returns nil for non-legacy image",
|
||||||
|
preferredVisibleDeviceEnvVars: []string{envDockerResourceGPUs},
|
||||||
|
env: map[string]string{
|
||||||
|
envDockerResourceGPUs: "",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "'void' DOCKER_RESOURCE_GPUS returns nil for non-legacy image",
|
||||||
|
preferredVisibleDeviceEnvVars: []string{envDockerResourceGPUs},
|
||||||
|
env: map[string]string{
|
||||||
|
envDockerResourceGPUs: "void",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "'none' DOCKER_RESOURCE_GPUS returns empty for non-legacy image",
|
||||||
|
preferredVisibleDeviceEnvVars: []string{envDockerResourceGPUs},
|
||||||
|
env: map[string]string{
|
||||||
|
envDockerResourceGPUs: "none",
|
||||||
|
},
|
||||||
|
expectedDevices: []string{""},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "DOCKER_RESOURCE_GPUS set returns value for non-legacy image",
|
||||||
|
preferredVisibleDeviceEnvVars: []string{envDockerResourceGPUs},
|
||||||
|
env: map[string]string{
|
||||||
|
envDockerResourceGPUs: gpuID,
|
||||||
|
},
|
||||||
|
expectedDevices: []string{gpuID},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "DOCKER_RESOURCE_GPUS set returns value for legacy image",
|
||||||
|
preferredVisibleDeviceEnvVars: []string{envDockerResourceGPUs},
|
||||||
|
env: map[string]string{
|
||||||
|
envDockerResourceGPUs: gpuID,
|
||||||
|
EnvVarCudaVersion: "legacy",
|
||||||
|
},
|
||||||
|
expectedDevices: []string{gpuID},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "DOCKER_RESOURCE_GPUS is selected if present",
|
||||||
|
preferredVisibleDeviceEnvVars: []string{envDockerResourceGPUs},
|
||||||
|
env: map[string]string{
|
||||||
|
envDockerResourceGPUs: anotherGPUID,
|
||||||
|
},
|
||||||
|
expectedDevices: []string{anotherGPUID},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "DOCKER_RESOURCE_GPUS overrides NVIDIA_VISIBLE_DEVICES if present",
|
||||||
|
preferredVisibleDeviceEnvVars: []string{envDockerResourceGPUs},
|
||||||
|
env: map[string]string{
|
||||||
|
EnvVarNvidiaVisibleDevices: gpuID,
|
||||||
|
envDockerResourceGPUs: anotherGPUID,
|
||||||
|
},
|
||||||
|
expectedDevices: []string{anotherGPUID},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "DOCKER_RESOURCE_GPUS_ADDITIONAL overrides NVIDIA_VISIBLE_DEVICES if present",
|
||||||
|
preferredVisibleDeviceEnvVars: []string{"DOCKER_RESOURCE_GPUS_ADDITIONAL"},
|
||||||
|
env: map[string]string{
|
||||||
|
EnvVarNvidiaVisibleDevices: gpuID,
|
||||||
|
"DOCKER_RESOURCE_GPUS_ADDITIONAL": anotherGPUID,
|
||||||
|
},
|
||||||
|
expectedDevices: []string{anotherGPUID},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "All available swarm resource envvars are selected and override NVIDIA_VISIBLE_DEVICES if present",
|
||||||
|
preferredVisibleDeviceEnvVars: []string{"DOCKER_RESOURCE_GPUS", "DOCKER_RESOURCE_GPUS_ADDITIONAL"},
|
||||||
|
env: map[string]string{
|
||||||
|
EnvVarNvidiaVisibleDevices: gpuID,
|
||||||
|
"DOCKER_RESOURCE_GPUS": thirdGPUID,
|
||||||
|
"DOCKER_RESOURCE_GPUS_ADDITIONAL": anotherGPUID,
|
||||||
|
},
|
||||||
|
expectedDevices: []string{thirdGPUID, anotherGPUID},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "DOCKER_RESOURCE_GPUS_ADDITIONAL or DOCKER_RESOURCE_GPUS override NVIDIA_VISIBLE_DEVICES if present",
|
||||||
|
preferredVisibleDeviceEnvVars: []string{"DOCKER_RESOURCE_GPUS", "DOCKER_RESOURCE_GPUS_ADDITIONAL"},
|
||||||
|
env: map[string]string{
|
||||||
|
EnvVarNvidiaVisibleDevices: gpuID,
|
||||||
|
"DOCKER_RESOURCE_GPUS_ADDITIONAL": anotherGPUID,
|
||||||
|
},
|
||||||
|
expectedDevices: []string{anotherGPUID},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tc := range tests {
|
||||||
|
t.Run(tc.description, func(t *testing.T) {
|
||||||
|
image, err := New(
|
||||||
|
WithEnvMap(tc.env),
|
||||||
|
WithPrivileged(true),
|
||||||
|
WithAcceptDeviceListAsVolumeMounts(false),
|
||||||
|
WithAcceptEnvvarUnprivileged(false),
|
||||||
|
WithPreferredVisibleDevicesEnvVars(tc.preferredVisibleDeviceEnvVars...),
|
||||||
|
)
|
||||||
|
|
||||||
|
require.NoError(t, err)
|
||||||
|
devices := image.VisibleDevicesFromEnvVar()
|
||||||
|
require.EqualValues(t, tc.expectedDevices, devices)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestGetVisibleDevicesFromMounts(t *testing.T) {
|
func TestGetVisibleDevicesFromMounts(t *testing.T) {
|
||||||
var tests = []struct {
|
var tests = []struct {
|
||||||
description string
|
description string
|
||||||
@ -197,8 +499,121 @@ func TestGetVisibleDevicesFromMounts(t *testing.T) {
|
|||||||
}
|
}
|
||||||
for _, tc := range tests {
|
for _, tc := range tests {
|
||||||
t.Run(tc.description, func(t *testing.T) {
|
t.Run(tc.description, func(t *testing.T) {
|
||||||
image, _ := New(WithMounts(tc.mounts))
|
image, err := New(WithMounts(tc.mounts))
|
||||||
require.Equal(t, tc.expectedDevices, image.VisibleDevicesFromMounts())
|
require.NoError(t, err)
|
||||||
|
require.Equal(t, tc.expectedDevices, image.visibleDevicesFromMounts())
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestVisibleDevices(t *testing.T) {
|
||||||
|
var tests = []struct {
|
||||||
|
description string
|
||||||
|
mountDevices []specs.Mount
|
||||||
|
envvarDevices string
|
||||||
|
privileged bool
|
||||||
|
acceptUnprivileged bool
|
||||||
|
acceptMounts bool
|
||||||
|
expectedDevices []string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
description: "Mount devices, unprivileged, no accept unprivileged",
|
||||||
|
mountDevices: []specs.Mount{
|
||||||
|
{
|
||||||
|
Source: "/dev/null",
|
||||||
|
Destination: filepath.Join(DeviceListAsVolumeMountsRoot, "GPU0"),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Source: "/dev/null",
|
||||||
|
Destination: filepath.Join(DeviceListAsVolumeMountsRoot, "GPU1"),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
envvarDevices: "GPU2,GPU3",
|
||||||
|
privileged: false,
|
||||||
|
acceptUnprivileged: false,
|
||||||
|
acceptMounts: true,
|
||||||
|
expectedDevices: []string{"GPU0", "GPU1"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "No mount devices, unprivileged, no accept unprivileged",
|
||||||
|
mountDevices: nil,
|
||||||
|
envvarDevices: "GPU0,GPU1",
|
||||||
|
privileged: false,
|
||||||
|
acceptUnprivileged: false,
|
||||||
|
acceptMounts: true,
|
||||||
|
expectedDevices: nil,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "No mount devices, privileged, no accept unprivileged",
|
||||||
|
mountDevices: nil,
|
||||||
|
envvarDevices: "GPU0,GPU1",
|
||||||
|
privileged: true,
|
||||||
|
acceptUnprivileged: false,
|
||||||
|
acceptMounts: true,
|
||||||
|
expectedDevices: []string{"GPU0", "GPU1"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "No mount devices, unprivileged, accept unprivileged",
|
||||||
|
mountDevices: nil,
|
||||||
|
envvarDevices: "GPU0,GPU1",
|
||||||
|
privileged: false,
|
||||||
|
acceptUnprivileged: true,
|
||||||
|
acceptMounts: true,
|
||||||
|
expectedDevices: []string{"GPU0", "GPU1"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "Mount devices, unprivileged, accept unprivileged, no accept mounts",
|
||||||
|
mountDevices: []specs.Mount{
|
||||||
|
{
|
||||||
|
Source: "/dev/null",
|
||||||
|
Destination: filepath.Join(DeviceListAsVolumeMountsRoot, "GPU0"),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Source: "/dev/null",
|
||||||
|
Destination: filepath.Join(DeviceListAsVolumeMountsRoot, "GPU1"),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
envvarDevices: "GPU2,GPU3",
|
||||||
|
privileged: false,
|
||||||
|
acceptUnprivileged: true,
|
||||||
|
acceptMounts: false,
|
||||||
|
expectedDevices: []string{"GPU2", "GPU3"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "Mount devices, unprivileged, no accept unprivileged, no accept mounts",
|
||||||
|
mountDevices: []specs.Mount{
|
||||||
|
{
|
||||||
|
Source: "/dev/null",
|
||||||
|
Destination: filepath.Join(DeviceListAsVolumeMountsRoot, "GPU0"),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Source: "/dev/null",
|
||||||
|
Destination: filepath.Join(DeviceListAsVolumeMountsRoot, "GPU1"),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
envvarDevices: "GPU2,GPU3",
|
||||||
|
privileged: false,
|
||||||
|
acceptUnprivileged: false,
|
||||||
|
acceptMounts: false,
|
||||||
|
expectedDevices: nil,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
for _, tc := range tests {
|
||||||
|
t.Run(tc.description, func(t *testing.T) {
|
||||||
|
// Wrap the call to getDevices() in a closure.
|
||||||
|
image, err := New(
|
||||||
|
WithEnvMap(
|
||||||
|
map[string]string{
|
||||||
|
EnvVarNvidiaVisibleDevices: tc.envvarDevices,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
WithMounts(tc.mountDevices),
|
||||||
|
WithPrivileged(tc.privileged),
|
||||||
|
WithAcceptDeviceListAsVolumeMounts(tc.acceptMounts),
|
||||||
|
WithAcceptEnvvarUnprivileged(tc.acceptUnprivileged),
|
||||||
|
)
|
||||||
|
require.NoError(t, err)
|
||||||
|
require.Equal(t, tc.expectedDevices, image.VisibleDevices())
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -224,7 +639,7 @@ func TestImexChannelsFromEnvVar(t *testing.T) {
|
|||||||
for _, tc := range testCases {
|
for _, tc := range testCases {
|
||||||
for id, baseEnvvars := range map[string][]string{"": nil, "legacy": {"CUDA_VERSION=1.2.3"}} {
|
for id, baseEnvvars := range map[string][]string{"": nil, "legacy": {"CUDA_VERSION=1.2.3"}} {
|
||||||
t.Run(tc.description+id, func(t *testing.T) {
|
t.Run(tc.description+id, func(t *testing.T) {
|
||||||
i, err := NewCUDAImageFromEnv(append(baseEnvvars, tc.env...))
|
i, err := newCUDAImageFromEnv(append(baseEnvvars, tc.env...))
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
|
||||||
channels := i.ImexChannelsFromEnvVar()
|
channels := i.ImexChannelsFromEnvVar()
|
||||||
|
@ -24,20 +24,39 @@ const (
|
|||||||
capSysAdmin = "CAP_SYS_ADMIN"
|
capSysAdmin = "CAP_SYS_ADMIN"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
type CapabilitiesGetter interface {
|
||||||
|
GetCapabilities() []string
|
||||||
|
}
|
||||||
|
|
||||||
|
type OCISpec specs.Spec
|
||||||
|
|
||||||
|
type OCISpecCapabilities specs.LinuxCapabilities
|
||||||
|
|
||||||
// IsPrivileged returns true if the container is a privileged container.
|
// IsPrivileged returns true if the container is a privileged container.
|
||||||
func IsPrivileged(s *specs.Spec) bool {
|
func IsPrivileged(s CapabilitiesGetter) bool {
|
||||||
if s.Process.Capabilities == nil {
|
if s == nil {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
for _, c := range s.GetCapabilities() {
|
||||||
// We only make sure that the bounding capabibility set has
|
|
||||||
// CAP_SYS_ADMIN. This allows us to make sure that the container was
|
|
||||||
// actually started as '--privileged', but also allow non-root users to
|
|
||||||
// access the privileged NVIDIA capabilities.
|
|
||||||
for _, c := range s.Process.Capabilities.Bounding {
|
|
||||||
if c == capSysAdmin {
|
if c == capSysAdmin {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (s OCISpec) GetCapabilities() []string {
|
||||||
|
if s.Process == nil || s.Process.Capabilities == nil {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return (*OCISpecCapabilities)(s.Process.Capabilities).GetCapabilities()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c OCISpecCapabilities) GetCapabilities() []string {
|
||||||
|
// We only make sure that the bounding capability set has
|
||||||
|
// CAP_SYS_ADMIN. This allows us to make sure that the container was
|
||||||
|
// actually started as '--privileged', but also allow non-root users to
|
||||||
|
// access the privileged NVIDIA capabilities.
|
||||||
|
return c.Bounding
|
||||||
|
}
|
||||||
|
57
internal/config/image/privileged_test.go
Normal file
57
internal/config/image/privileged_test.go
Normal file
@ -0,0 +1,57 @@
|
|||||||
|
/**
|
||||||
|
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
**/
|
||||||
|
|
||||||
|
package image
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/opencontainers/runtime-spec/specs-go"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestIsPrivileged(t *testing.T) {
|
||||||
|
var tests = []struct {
|
||||||
|
spec specs.Spec
|
||||||
|
expected bool
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
specs.Spec{
|
||||||
|
Process: &specs.Process{
|
||||||
|
Capabilities: &specs.LinuxCapabilities{
|
||||||
|
Bounding: []string{"CAP_SYS_ADMIN"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
specs.Spec{
|
||||||
|
Process: &specs.Process{
|
||||||
|
Capabilities: &specs.LinuxCapabilities{
|
||||||
|
Bounding: []string{"CAP_SYS_FOO"},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
false,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
for i, tc := range tests {
|
||||||
|
privileged := IsPrivileged((*OCISpec)(&tc.spec))
|
||||||
|
|
||||||
|
require.Equal(t, tc.expected, privileged, "%d: %v", i, tc)
|
||||||
|
}
|
||||||
|
}
|
@ -79,7 +79,10 @@ func getDevicesFromSpec(logger logger.Interface, ociSpec oci.Spec, cfg *config.C
|
|||||||
return annotationDevices, nil
|
return annotationDevices, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
container, err := image.NewCUDAImageFromSpec(rawSpec)
|
container, err := image.NewCUDAImageFromSpec(
|
||||||
|
rawSpec,
|
||||||
|
image.WithLogger(logger),
|
||||||
|
)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@ -107,7 +110,7 @@ func getDevicesFromSpec(logger logger.Interface, ociSpec oci.Spec, cfg *config.C
|
|||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
if cfg.AcceptEnvvarUnprivileged || image.IsPrivileged(rawSpec) {
|
if cfg.AcceptEnvvarUnprivileged || image.IsPrivileged((*image.OCISpec)(rawSpec)) {
|
||||||
return devices, nil
|
return devices, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -70,7 +70,10 @@ func newSpecModifier(logger logger.Interface, cfg *config.Config, ociSpec oci.Sp
|
|||||||
return nil, fmt.Errorf("failed to load OCI spec: %v", err)
|
return nil, fmt.Errorf("failed to load OCI spec: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
image, err := image.NewCUDAImageFromSpec(rawSpec)
|
image, err := image.NewCUDAImageFromSpec(
|
||||||
|
rawSpec,
|
||||||
|
image.WithLogger(logger),
|
||||||
|
)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user