mirror of
https://github.com/NVIDIA/nvidia-container-toolkit
synced 2025-06-26 18:18:24 +00:00
Merge pull request #1110 from ArangoGutierrez/i/1049
Refactor extracting requested devices from the container image
This commit is contained in:
commit
b55255e31f
@ -13,10 +13,6 @@ import (
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
|
||||
)
|
||||
|
||||
const (
|
||||
capSysAdmin = "CAP_SYS_ADMIN"
|
||||
)
|
||||
|
||||
type nvidiaConfig struct {
|
||||
Devices []string
|
||||
MigConfigDevices string
|
||||
@ -103,9 +99,9 @@ func loadSpec(path string) (spec *Spec) {
|
||||
return
|
||||
}
|
||||
|
||||
func isPrivileged(s *Spec) bool {
|
||||
if s.Process.Capabilities == nil {
|
||||
return false
|
||||
func (s *Spec) GetCapabilities() []string {
|
||||
if s == nil || s.Process == nil || s.Process.Capabilities == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
var caps []string
|
||||
@ -118,67 +114,22 @@ func isPrivileged(s *Spec) bool {
|
||||
if err != nil {
|
||||
log.Panicln("could not decode Process.Capabilities in OCI spec:", err)
|
||||
}
|
||||
for _, c := range caps {
|
||||
if c == capSysAdmin {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
return caps
|
||||
}
|
||||
|
||||
// Otherwise, parse s.Process.Capabilities as:
|
||||
// github.com/opencontainers/runtime-spec/blob/v1.0.0/specs-go/config.go#L30-L54
|
||||
process := specs.Process{
|
||||
Env: s.Process.Env,
|
||||
}
|
||||
|
||||
err := json.Unmarshal(*s.Process.Capabilities, &process.Capabilities)
|
||||
capabilities := specs.LinuxCapabilities{}
|
||||
err := json.Unmarshal(*s.Process.Capabilities, &capabilities)
|
||||
if err != nil {
|
||||
log.Panicln("could not decode Process.Capabilities in OCI spec:", err)
|
||||
}
|
||||
|
||||
fullSpec := specs.Spec{
|
||||
Version: *s.Version,
|
||||
Process: &process,
|
||||
return image.OCISpecCapabilities(capabilities).GetCapabilities()
|
||||
}
|
||||
|
||||
return image.IsPrivileged(&fullSpec)
|
||||
}
|
||||
|
||||
func getDevicesFromEnvvar(containerImage image.CUDA, swarmResourceEnvvars []string) []string {
|
||||
// We check if the image has at least one of the Swarm resource envvars defined and use this
|
||||
// if specified.
|
||||
for _, envvar := range swarmResourceEnvvars {
|
||||
if containerImage.HasEnvvar(envvar) {
|
||||
return containerImage.DevicesFromEnvvars(swarmResourceEnvvars...).List()
|
||||
}
|
||||
}
|
||||
|
||||
return containerImage.VisibleDevicesFromEnvVar()
|
||||
}
|
||||
|
||||
func (hookConfig *hookConfig) getDevices(image image.CUDA, privileged bool) []string {
|
||||
// If enabled, try and get the device list from volume mounts first
|
||||
if hookConfig.AcceptDeviceListAsVolumeMounts {
|
||||
devices := image.VisibleDevicesFromMounts()
|
||||
if len(devices) > 0 {
|
||||
return devices
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback to reading from the environment variable if privileges are correct
|
||||
devices := getDevicesFromEnvvar(image, hookConfig.getSwarmResourceEnvvars())
|
||||
if len(devices) == 0 {
|
||||
return nil
|
||||
}
|
||||
if privileged || hookConfig.AcceptEnvvarUnprivileged {
|
||||
return devices
|
||||
}
|
||||
|
||||
configName := hookConfig.getConfigOption("AcceptEnvvarUnprivileged")
|
||||
log.Printf("Ignoring devices specified in NVIDIA_VISIBLE_DEVICES (privileged=%v, %v=%v) ", privileged, configName, hookConfig.AcceptEnvvarUnprivileged)
|
||||
|
||||
return nil
|
||||
func isPrivileged(s *Spec) bool {
|
||||
return image.IsPrivileged(s)
|
||||
}
|
||||
|
||||
func getMigConfigDevices(i image.CUDA) *string {
|
||||
@ -225,7 +176,6 @@ func (hookConfig *hookConfig) getDriverCapabilities(cudaImage image.CUDA, legacy
|
||||
// We use the default driver capabilities by default. This is filtered to only include the
|
||||
// supported capabilities
|
||||
supportedDriverCapabilities := image.NewDriverCapabilities(hookConfig.SupportedDriverCapabilities)
|
||||
|
||||
capabilities := supportedDriverCapabilities.Intersection(image.DefaultDriverCapabilities)
|
||||
|
||||
capsEnvSpecified := cudaImage.HasEnvvar(image.EnvVarNvidiaDriverCapabilities)
|
||||
@ -251,7 +201,7 @@ func (hookConfig *hookConfig) getDriverCapabilities(cudaImage image.CUDA, legacy
|
||||
func (hookConfig *hookConfig) getNvidiaConfig(image image.CUDA, privileged bool) *nvidiaConfig {
|
||||
legacyImage := image.IsLegacy()
|
||||
|
||||
devices := hookConfig.getDevices(image, privileged)
|
||||
devices := image.VisibleDevices()
|
||||
if len(devices) == 0 {
|
||||
// empty devices means this is not a GPU container.
|
||||
return nil
|
||||
@ -306,20 +256,25 @@ func (hookConfig *hookConfig) getContainerConfig() (config containerConfig) {
|
||||
|
||||
s := loadSpec(path.Join(b, "config.json"))
|
||||
|
||||
image, err := image.New(
|
||||
privileged := isPrivileged(s)
|
||||
|
||||
i, err := image.New(
|
||||
image.WithEnv(s.Process.Env),
|
||||
image.WithMounts(s.Mounts),
|
||||
image.WithPrivileged(privileged),
|
||||
image.WithDisableRequire(hookConfig.DisableRequire),
|
||||
image.WithAcceptDeviceListAsVolumeMounts(hookConfig.AcceptDeviceListAsVolumeMounts),
|
||||
image.WithAcceptEnvvarUnprivileged(hookConfig.AcceptEnvvarUnprivileged),
|
||||
image.WithPreferredVisibleDevicesEnvVars(hookConfig.getSwarmResourceEnvvars()...),
|
||||
)
|
||||
if err != nil {
|
||||
log.Panicln(err)
|
||||
}
|
||||
|
||||
privileged := isPrivileged(s)
|
||||
return containerConfig{
|
||||
Pid: h.Pid,
|
||||
Rootfs: s.Root.Path,
|
||||
Image: image,
|
||||
Nvidia: hookConfig.getNvidiaConfig(image, privileged),
|
||||
Image: i,
|
||||
Nvidia: hookConfig.getNvidiaConfig(i, privileged),
|
||||
}
|
||||
}
|
||||
|
@ -1,10 +1,8 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
|
||||
@ -479,7 +477,10 @@ func TestGetNvidiaConfig(t *testing.T) {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
image, _ := image.New(
|
||||
image.WithEnvMap(tc.env),
|
||||
image.WithPrivileged(tc.privileged),
|
||||
image.WithPreferredVisibleDevicesEnvVars(tc.hookConfig.getSwarmResourceEnvvars()...),
|
||||
)
|
||||
|
||||
// Wrap the call to getNvidiaConfig() in a closure.
|
||||
var cfg *nvidiaConfig
|
||||
getConfig := func() {
|
||||
@ -518,340 +519,6 @@ func TestGetNvidiaConfig(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestDeviceListSourcePriority(t *testing.T) {
|
||||
var tests = []struct {
|
||||
description string
|
||||
mountDevices []specs.Mount
|
||||
envvarDevices string
|
||||
privileged bool
|
||||
acceptUnprivileged bool
|
||||
acceptMounts bool
|
||||
expectedDevices []string
|
||||
}{
|
||||
{
|
||||
description: "Mount devices, unprivileged, no accept unprivileged",
|
||||
mountDevices: []specs.Mount{
|
||||
{
|
||||
Source: "/dev/null",
|
||||
Destination: filepath.Join(image.DeviceListAsVolumeMountsRoot, "GPU0"),
|
||||
},
|
||||
{
|
||||
Source: "/dev/null",
|
||||
Destination: filepath.Join(image.DeviceListAsVolumeMountsRoot, "GPU1"),
|
||||
},
|
||||
},
|
||||
envvarDevices: "GPU2,GPU3",
|
||||
privileged: false,
|
||||
acceptUnprivileged: false,
|
||||
acceptMounts: true,
|
||||
expectedDevices: []string{"GPU0", "GPU1"},
|
||||
},
|
||||
{
|
||||
description: "No mount devices, unprivileged, no accept unprivileged",
|
||||
mountDevices: nil,
|
||||
envvarDevices: "GPU0,GPU1",
|
||||
privileged: false,
|
||||
acceptUnprivileged: false,
|
||||
acceptMounts: true,
|
||||
expectedDevices: nil,
|
||||
},
|
||||
{
|
||||
description: "No mount devices, privileged, no accept unprivileged",
|
||||
mountDevices: nil,
|
||||
envvarDevices: "GPU0,GPU1",
|
||||
privileged: true,
|
||||
acceptUnprivileged: false,
|
||||
acceptMounts: true,
|
||||
expectedDevices: []string{"GPU0", "GPU1"},
|
||||
},
|
||||
{
|
||||
description: "No mount devices, unprivileged, accept unprivileged",
|
||||
mountDevices: nil,
|
||||
envvarDevices: "GPU0,GPU1",
|
||||
privileged: false,
|
||||
acceptUnprivileged: true,
|
||||
acceptMounts: true,
|
||||
expectedDevices: []string{"GPU0", "GPU1"},
|
||||
},
|
||||
{
|
||||
description: "Mount devices, unprivileged, accept unprivileged, no accept mounts",
|
||||
mountDevices: []specs.Mount{
|
||||
{
|
||||
Source: "/dev/null",
|
||||
Destination: filepath.Join(image.DeviceListAsVolumeMountsRoot, "GPU0"),
|
||||
},
|
||||
{
|
||||
Source: "/dev/null",
|
||||
Destination: filepath.Join(image.DeviceListAsVolumeMountsRoot, "GPU1"),
|
||||
},
|
||||
},
|
||||
envvarDevices: "GPU2,GPU3",
|
||||
privileged: false,
|
||||
acceptUnprivileged: true,
|
||||
acceptMounts: false,
|
||||
expectedDevices: []string{"GPU2", "GPU3"},
|
||||
},
|
||||
{
|
||||
description: "Mount devices, unprivileged, no accept unprivileged, no accept mounts",
|
||||
mountDevices: []specs.Mount{
|
||||
{
|
||||
Source: "/dev/null",
|
||||
Destination: filepath.Join(image.DeviceListAsVolumeMountsRoot, "GPU0"),
|
||||
},
|
||||
{
|
||||
Source: "/dev/null",
|
||||
Destination: filepath.Join(image.DeviceListAsVolumeMountsRoot, "GPU1"),
|
||||
},
|
||||
},
|
||||
envvarDevices: "GPU2,GPU3",
|
||||
privileged: false,
|
||||
acceptUnprivileged: false,
|
||||
acceptMounts: false,
|
||||
expectedDevices: nil,
|
||||
},
|
||||
}
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
// Wrap the call to getDevices() in a closure.
|
||||
var devices []string
|
||||
getDevices := func() {
|
||||
image, _ := image.New(
|
||||
image.WithEnvMap(
|
||||
map[string]string{
|
||||
image.EnvVarNvidiaVisibleDevices: tc.envvarDevices,
|
||||
},
|
||||
),
|
||||
image.WithMounts(tc.mountDevices),
|
||||
)
|
||||
defaultConfig, _ := config.GetDefault()
|
||||
cfg := &hookConfig{defaultConfig}
|
||||
cfg.AcceptEnvvarUnprivileged = tc.acceptUnprivileged
|
||||
cfg.AcceptDeviceListAsVolumeMounts = tc.acceptMounts
|
||||
devices = cfg.getDevices(image, tc.privileged)
|
||||
}
|
||||
|
||||
// For all other tests, just grab the devices and check the results
|
||||
getDevices()
|
||||
|
||||
require.Equal(t, tc.expectedDevices, devices)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetDevicesFromEnvvar(t *testing.T) {
|
||||
envDockerResourceGPUs := "DOCKER_RESOURCE_GPUS"
|
||||
gpuID := "GPU-12345"
|
||||
anotherGPUID := "GPU-67890"
|
||||
thirdGPUID := "MIG-12345"
|
||||
|
||||
var tests = []struct {
|
||||
description string
|
||||
swarmResourceEnvvars []string
|
||||
env map[string]string
|
||||
expectedDevices []string
|
||||
}{
|
||||
{
|
||||
description: "empty env returns nil for non-legacy image",
|
||||
},
|
||||
{
|
||||
description: "blank NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image",
|
||||
env: map[string]string{
|
||||
image.EnvVarNvidiaVisibleDevices: "",
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "'void' NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image",
|
||||
env: map[string]string{
|
||||
image.EnvVarNvidiaVisibleDevices: "void",
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "'none' NVIDIA_VISIBLE_DEVICES returns empty for non-legacy image",
|
||||
env: map[string]string{
|
||||
image.EnvVarNvidiaVisibleDevices: "none",
|
||||
},
|
||||
expectedDevices: []string{""},
|
||||
},
|
||||
{
|
||||
description: "NVIDIA_VISIBLE_DEVICES set returns value for non-legacy image",
|
||||
env: map[string]string{
|
||||
image.EnvVarNvidiaVisibleDevices: gpuID,
|
||||
},
|
||||
expectedDevices: []string{gpuID},
|
||||
},
|
||||
{
|
||||
description: "NVIDIA_VISIBLE_DEVICES set returns value for legacy image",
|
||||
env: map[string]string{
|
||||
image.EnvVarNvidiaVisibleDevices: gpuID,
|
||||
image.EnvVarCudaVersion: "legacy",
|
||||
},
|
||||
expectedDevices: []string{gpuID},
|
||||
},
|
||||
{
|
||||
description: "empty env returns all for legacy image",
|
||||
env: map[string]string{
|
||||
image.EnvVarCudaVersion: "legacy",
|
||||
},
|
||||
expectedDevices: []string{"all"},
|
||||
},
|
||||
// Add the `DOCKER_RESOURCE_GPUS` envvar and ensure that this is ignored when
|
||||
// not enabled
|
||||
{
|
||||
description: "missing NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image",
|
||||
env: map[string]string{
|
||||
envDockerResourceGPUs: anotherGPUID,
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "blank NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image",
|
||||
env: map[string]string{
|
||||
image.EnvVarNvidiaVisibleDevices: "",
|
||||
envDockerResourceGPUs: anotherGPUID,
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "'void' NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image",
|
||||
env: map[string]string{
|
||||
image.EnvVarNvidiaVisibleDevices: "void",
|
||||
envDockerResourceGPUs: anotherGPUID,
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "'none' NVIDIA_VISIBLE_DEVICES returns empty for non-legacy image",
|
||||
env: map[string]string{
|
||||
image.EnvVarNvidiaVisibleDevices: "none",
|
||||
envDockerResourceGPUs: anotherGPUID,
|
||||
},
|
||||
expectedDevices: []string{""},
|
||||
},
|
||||
{
|
||||
description: "NVIDIA_VISIBLE_DEVICES set returns value for non-legacy image",
|
||||
env: map[string]string{
|
||||
image.EnvVarNvidiaVisibleDevices: gpuID,
|
||||
envDockerResourceGPUs: anotherGPUID,
|
||||
},
|
||||
expectedDevices: []string{gpuID},
|
||||
},
|
||||
{
|
||||
description: "NVIDIA_VISIBLE_DEVICES set returns value for legacy image",
|
||||
env: map[string]string{
|
||||
image.EnvVarNvidiaVisibleDevices: gpuID,
|
||||
envDockerResourceGPUs: anotherGPUID,
|
||||
image.EnvVarCudaVersion: "legacy",
|
||||
},
|
||||
expectedDevices: []string{gpuID},
|
||||
},
|
||||
{
|
||||
description: "empty env returns all for legacy image",
|
||||
env: map[string]string{
|
||||
envDockerResourceGPUs: anotherGPUID,
|
||||
image.EnvVarCudaVersion: "legacy",
|
||||
},
|
||||
expectedDevices: []string{"all"},
|
||||
},
|
||||
// Add the `DOCKER_RESOURCE_GPUS` envvar and ensure that this is selected when
|
||||
// enabled
|
||||
{
|
||||
description: "empty env returns nil for non-legacy image",
|
||||
swarmResourceEnvvars: []string{envDockerResourceGPUs},
|
||||
},
|
||||
{
|
||||
description: "blank DOCKER_RESOURCE_GPUS returns nil for non-legacy image",
|
||||
swarmResourceEnvvars: []string{envDockerResourceGPUs},
|
||||
env: map[string]string{
|
||||
envDockerResourceGPUs: "",
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "'void' DOCKER_RESOURCE_GPUS returns nil for non-legacy image",
|
||||
swarmResourceEnvvars: []string{envDockerResourceGPUs},
|
||||
env: map[string]string{
|
||||
envDockerResourceGPUs: "void",
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "'none' DOCKER_RESOURCE_GPUS returns empty for non-legacy image",
|
||||
swarmResourceEnvvars: []string{envDockerResourceGPUs},
|
||||
env: map[string]string{
|
||||
envDockerResourceGPUs: "none",
|
||||
},
|
||||
expectedDevices: []string{""},
|
||||
},
|
||||
{
|
||||
description: "DOCKER_RESOURCE_GPUS set returns value for non-legacy image",
|
||||
swarmResourceEnvvars: []string{envDockerResourceGPUs},
|
||||
env: map[string]string{
|
||||
envDockerResourceGPUs: gpuID,
|
||||
},
|
||||
expectedDevices: []string{gpuID},
|
||||
},
|
||||
{
|
||||
description: "DOCKER_RESOURCE_GPUS set returns value for legacy image",
|
||||
swarmResourceEnvvars: []string{envDockerResourceGPUs},
|
||||
env: map[string]string{
|
||||
envDockerResourceGPUs: gpuID,
|
||||
image.EnvVarCudaVersion: "legacy",
|
||||
},
|
||||
expectedDevices: []string{gpuID},
|
||||
},
|
||||
{
|
||||
description: "DOCKER_RESOURCE_GPUS is selected if present",
|
||||
swarmResourceEnvvars: []string{envDockerResourceGPUs},
|
||||
env: map[string]string{
|
||||
envDockerResourceGPUs: anotherGPUID,
|
||||
},
|
||||
expectedDevices: []string{anotherGPUID},
|
||||
},
|
||||
{
|
||||
description: "DOCKER_RESOURCE_GPUS overrides NVIDIA_VISIBLE_DEVICES if present",
|
||||
swarmResourceEnvvars: []string{envDockerResourceGPUs},
|
||||
env: map[string]string{
|
||||
image.EnvVarNvidiaVisibleDevices: gpuID,
|
||||
envDockerResourceGPUs: anotherGPUID,
|
||||
},
|
||||
expectedDevices: []string{anotherGPUID},
|
||||
},
|
||||
{
|
||||
description: "DOCKER_RESOURCE_GPUS_ADDITIONAL overrides NVIDIA_VISIBLE_DEVICES if present",
|
||||
swarmResourceEnvvars: []string{"DOCKER_RESOURCE_GPUS_ADDITIONAL"},
|
||||
env: map[string]string{
|
||||
image.EnvVarNvidiaVisibleDevices: gpuID,
|
||||
"DOCKER_RESOURCE_GPUS_ADDITIONAL": anotherGPUID,
|
||||
},
|
||||
expectedDevices: []string{anotherGPUID},
|
||||
},
|
||||
{
|
||||
description: "All available swarm resource envvars are selected and override NVIDIA_VISIBLE_DEVICES if present",
|
||||
swarmResourceEnvvars: []string{"DOCKER_RESOURCE_GPUS", "DOCKER_RESOURCE_GPUS_ADDITIONAL"},
|
||||
env: map[string]string{
|
||||
image.EnvVarNvidiaVisibleDevices: gpuID,
|
||||
"DOCKER_RESOURCE_GPUS": thirdGPUID,
|
||||
"DOCKER_RESOURCE_GPUS_ADDITIONAL": anotherGPUID,
|
||||
},
|
||||
expectedDevices: []string{thirdGPUID, anotherGPUID},
|
||||
},
|
||||
{
|
||||
description: "DOCKER_RESOURCE_GPUS_ADDITIONAL or DOCKER_RESOURCE_GPUS override NVIDIA_VISIBLE_DEVICES if present",
|
||||
swarmResourceEnvvars: []string{"DOCKER_RESOURCE_GPUS", "DOCKER_RESOURCE_GPUS_ADDITIONAL"},
|
||||
env: map[string]string{
|
||||
image.EnvVarNvidiaVisibleDevices: gpuID,
|
||||
"DOCKER_RESOURCE_GPUS_ADDITIONAL": anotherGPUID,
|
||||
},
|
||||
expectedDevices: []string{anotherGPUID},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
image, _ := image.New(
|
||||
image.WithEnvMap(tc.env),
|
||||
)
|
||||
devices := getDevicesFromEnvvar(image, tc.swarmResourceEnvvars)
|
||||
require.EqualValues(t, tc.expectedDevices, devices)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetDriverCapabilities(t *testing.T) {
|
||||
|
||||
supportedCapabilities := "compute,display,utility,video"
|
||||
|
@ -88,7 +88,7 @@ func (c hookConfig) getConfigOption(fieldName string) string {
|
||||
|
||||
// getSwarmResourceEnvvars returns the swarm resource envvars for the config.
|
||||
func (c *hookConfig) getSwarmResourceEnvvars() []string {
|
||||
if c.SwarmResource == "" {
|
||||
if c == nil || c.SwarmResource == "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -21,22 +21,36 @@ import (
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
)
|
||||
|
||||
type builder struct {
|
||||
env map[string]string
|
||||
mounts []specs.Mount
|
||||
CUDA
|
||||
|
||||
disableRequire bool
|
||||
}
|
||||
|
||||
// Option is a functional option for creating a CUDA image.
|
||||
type Option func(*builder) error
|
||||
|
||||
// New creates a new CUDA image from the input options.
|
||||
func New(opt ...Option) (CUDA, error) {
|
||||
b := &builder{}
|
||||
b := &builder{
|
||||
CUDA: CUDA{
|
||||
acceptEnvvarUnprivileged: true,
|
||||
},
|
||||
}
|
||||
for _, o := range opt {
|
||||
if err := o(b); err != nil {
|
||||
return CUDA{}, err
|
||||
}
|
||||
}
|
||||
|
||||
if b.logger == nil {
|
||||
b.logger = logger.New()
|
||||
}
|
||||
|
||||
if b.env == nil {
|
||||
b.env = make(map[string]string)
|
||||
}
|
||||
@ -50,15 +64,22 @@ func (b builder) build() (CUDA, error) {
|
||||
b.env[EnvVarNvidiaDisableRequire] = "true"
|
||||
}
|
||||
|
||||
c := CUDA{
|
||||
env: b.env,
|
||||
mounts: b.mounts,
|
||||
}
|
||||
return c, nil
|
||||
return b.CUDA, nil
|
||||
}
|
||||
|
||||
// Option is a functional option for creating a CUDA image.
|
||||
type Option func(*builder) error
|
||||
func WithAcceptDeviceListAsVolumeMounts(acceptDeviceListAsVolumeMounts bool) Option {
|
||||
return func(b *builder) error {
|
||||
b.acceptDeviceListAsVolumeMounts = acceptDeviceListAsVolumeMounts
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
func WithAcceptEnvvarUnprivileged(acceptEnvvarUnprivileged bool) Option {
|
||||
return func(b *builder) error {
|
||||
b.acceptEnvvarUnprivileged = acceptEnvvarUnprivileged
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// WithDisableRequire sets the disable require option.
|
||||
func WithDisableRequire(disableRequire bool) Option {
|
||||
@ -93,6 +114,14 @@ func WithEnvMap(env map[string]string) Option {
|
||||
}
|
||||
}
|
||||
|
||||
// WithLogger sets the logger to use when creating the CUDA image.
|
||||
func WithLogger(logger logger.Interface) Option {
|
||||
return func(b *builder) error {
|
||||
b.logger = logger
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// WithMounts sets the mounts associated with the CUDA image.
|
||||
func WithMounts(mounts []specs.Mount) Option {
|
||||
return func(b *builder) error {
|
||||
@ -100,3 +129,20 @@ func WithMounts(mounts []specs.Mount) Option {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// WithPreferredVisibleDevicesEnvVars sets the environment variables that
|
||||
// should take precedence over the default NVIDIA_VISIBLE_DEVICES.
|
||||
func WithPreferredVisibleDevicesEnvVars(preferredVisibleDeviceEnvVars ...string) Option {
|
||||
return func(b *builder) error {
|
||||
b.preferredVisibleDeviceEnvVars = preferredVisibleDeviceEnvVars
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// WithPrivileged sets whether an image is privileged or not.
|
||||
func WithPrivileged(isPrivileged bool) Option {
|
||||
return func(b *builder) error {
|
||||
b.isPrivileged = isPrivileged
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
@ -25,6 +25,8 @@ import (
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
"golang.org/x/mod/semver"
|
||||
"tags.cncf.io/container-device-interface/pkg/parser"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
)
|
||||
|
||||
const (
|
||||
@ -38,27 +40,37 @@ const (
|
||||
// a map of environment variable to values that can be used to perform lookups
|
||||
// such as requirements.
|
||||
type CUDA struct {
|
||||
logger logger.Interface
|
||||
|
||||
env map[string]string
|
||||
isPrivileged bool
|
||||
mounts []specs.Mount
|
||||
|
||||
acceptDeviceListAsVolumeMounts bool
|
||||
acceptEnvvarUnprivileged bool
|
||||
preferredVisibleDeviceEnvVars []string
|
||||
}
|
||||
|
||||
// NewCUDAImageFromSpec creates a CUDA image from the input OCI runtime spec.
|
||||
// The process environment is read (if present) to construc the CUDA Image.
|
||||
func NewCUDAImageFromSpec(spec *specs.Spec) (CUDA, error) {
|
||||
func NewCUDAImageFromSpec(spec *specs.Spec, opts ...Option) (CUDA, error) {
|
||||
var env []string
|
||||
if spec != nil && spec.Process != nil {
|
||||
env = spec.Process.Env
|
||||
}
|
||||
|
||||
return New(
|
||||
specOpts := []Option{
|
||||
WithEnv(env),
|
||||
WithMounts(spec.Mounts),
|
||||
)
|
||||
WithPrivileged(IsPrivileged((*OCISpec)(spec))),
|
||||
}
|
||||
|
||||
// NewCUDAImageFromEnv creates a CUDA image from the input environment. The environment
|
||||
return New(append(opts, specOpts...)...)
|
||||
}
|
||||
|
||||
// newCUDAImageFromEnv creates a CUDA image from the input environment. The environment
|
||||
// is a list of strings of the form ENVAR=VALUE.
|
||||
func NewCUDAImageFromEnv(env []string) (CUDA, error) {
|
||||
func newCUDAImageFromEnv(env []string) (CUDA, error) {
|
||||
return New(WithEnv(env))
|
||||
}
|
||||
|
||||
@ -216,14 +228,53 @@ func (i CUDA) OnlyFullyQualifiedCDIDevices() bool {
|
||||
return hasCDIdevice
|
||||
}
|
||||
|
||||
// VisibleDevicesFromEnvVar returns the set of visible devices requested through
|
||||
// the NVIDIA_VISIBLE_DEVICES environment variable.
|
||||
// VisibleDevices returns a list of devices requested in the container image.
|
||||
// If volume mount requests are enabled these are returned if requested,
|
||||
// otherwise device requests through environment variables are considered.
|
||||
// In cases where environment variable requests required privileged containers,
|
||||
// such devices requests are ignored.
|
||||
func (i CUDA) VisibleDevices() []string {
|
||||
// If enabled, try and get the device list from volume mounts first
|
||||
if i.acceptDeviceListAsVolumeMounts {
|
||||
volumeMountDeviceRequests := i.visibleDevicesFromMounts()
|
||||
if len(volumeMountDeviceRequests) > 0 {
|
||||
return volumeMountDeviceRequests
|
||||
}
|
||||
}
|
||||
|
||||
// Get the Fallback to reading from the environment variable if privileges are correct
|
||||
envVarDeviceRequests := i.VisibleDevicesFromEnvVar()
|
||||
if len(envVarDeviceRequests) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
// If the container is privileged, or environment variable requests are
|
||||
// allowed for unprivileged containers, these devices are returned.
|
||||
if i.isPrivileged || i.acceptEnvvarUnprivileged {
|
||||
return envVarDeviceRequests
|
||||
}
|
||||
|
||||
// We log a warning if we are ignoring the environment variable requests.
|
||||
i.logger.Warningf("Ignoring devices specified in NVIDIA_VISIBLE_DEVICES in unprivileged container")
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// VisibleDevicesFromEnvVar returns the set of visible devices requested through environment variables.
|
||||
// If any of the preferredVisibleDeviceEnvVars are present in the image, they
|
||||
// are used to determine the visible devices. If this is not the case, the
|
||||
// NVIDIA_VISIBLE_DEVICES environment variable is used.
|
||||
func (i CUDA) VisibleDevicesFromEnvVar() []string {
|
||||
for _, envVar := range i.preferredVisibleDeviceEnvVars {
|
||||
if i.HasEnvvar(envVar) {
|
||||
return i.DevicesFromEnvvars(i.preferredVisibleDeviceEnvVars...).List()
|
||||
}
|
||||
}
|
||||
return i.DevicesFromEnvvars(EnvVarNvidiaVisibleDevices).List()
|
||||
}
|
||||
|
||||
// VisibleDevicesFromMounts returns the set of visible devices requested as mounts.
|
||||
func (i CUDA) VisibleDevicesFromMounts() []string {
|
||||
// visibleDevicesFromMounts returns the set of visible devices requested as mounts.
|
||||
func (i CUDA) visibleDevicesFromMounts() []string {
|
||||
var devices []string
|
||||
for _, device := range i.DevicesFromMounts() {
|
||||
switch {
|
||||
@ -238,7 +289,6 @@ func (i CUDA) VisibleDevicesFromMounts() []string {
|
||||
}
|
||||
|
||||
// DevicesFromMounts returns a list of device specified as mounts.
|
||||
// TODO: This should be merged with getDevicesFromMounts used in the NVIDIA Container Runtime
|
||||
func (i CUDA) DevicesFromMounts() []string {
|
||||
root := filepath.Clean(DeviceListAsVolumeMountsRoot)
|
||||
seen := make(map[string]bool)
|
||||
|
@ -21,9 +21,91 @@ import (
|
||||
"testing"
|
||||
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
testlog "github.com/sirupsen/logrus/hooks/test"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestNewCUDAImageFromSpec(t *testing.T) {
|
||||
logger, _ := testlog.NewNullLogger()
|
||||
|
||||
testCases := []struct {
|
||||
description string
|
||||
spec *specs.Spec
|
||||
options []Option
|
||||
expected CUDA
|
||||
}{
|
||||
{
|
||||
description: "no env vars",
|
||||
spec: &specs.Spec{
|
||||
Process: &specs.Process{
|
||||
Env: []string{},
|
||||
},
|
||||
},
|
||||
expected: CUDA{
|
||||
logger: logger,
|
||||
env: map[string]string{},
|
||||
acceptEnvvarUnprivileged: true,
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "NVIDIA_VISIBLE_DEVICES=all",
|
||||
spec: &specs.Spec{
|
||||
Process: &specs.Process{
|
||||
Env: []string{"NVIDIA_VISIBLE_DEVICES=all"},
|
||||
},
|
||||
},
|
||||
expected: CUDA{
|
||||
logger: logger,
|
||||
env: map[string]string{"NVIDIA_VISIBLE_DEVICES": "all"},
|
||||
acceptEnvvarUnprivileged: true,
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "Spec overrides options",
|
||||
spec: &specs.Spec{
|
||||
Process: &specs.Process{
|
||||
Env: []string{"NVIDIA_VISIBLE_DEVICES=all"},
|
||||
},
|
||||
Mounts: []specs.Mount{
|
||||
{
|
||||
Source: "/spec-source",
|
||||
Destination: "/spec-destination",
|
||||
},
|
||||
},
|
||||
},
|
||||
options: []Option{
|
||||
WithEnvMap(map[string]string{"OTHER": "value"}),
|
||||
WithMounts([]specs.Mount{
|
||||
{
|
||||
Source: "/option-source",
|
||||
Destination: "/option-destination",
|
||||
},
|
||||
}),
|
||||
},
|
||||
expected: CUDA{
|
||||
logger: logger,
|
||||
env: map[string]string{"NVIDIA_VISIBLE_DEVICES": "all"},
|
||||
mounts: []specs.Mount{
|
||||
{
|
||||
Source: "/spec-source",
|
||||
Destination: "/spec-destination",
|
||||
},
|
||||
},
|
||||
acceptEnvvarUnprivileged: true,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
options := append([]Option{WithLogger(logger)}, tc.options...)
|
||||
image, err := NewCUDAImageFromSpec(tc.spec, options...)
|
||||
require.NoError(t, err)
|
||||
require.EqualValues(t, tc.expected, image)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseMajorMinorVersionValid(t *testing.T) {
|
||||
var tests = []struct {
|
||||
version string
|
||||
@ -122,7 +204,7 @@ func TestGetRequirements(t *testing.T) {
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
image, err := NewCUDAImageFromEnv(tc.env)
|
||||
image, err := newCUDAImageFromEnv(tc.env)
|
||||
require.NoError(t, err)
|
||||
|
||||
requirements, err := image.GetRequirements()
|
||||
@ -133,6 +215,226 @@ func TestGetRequirements(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetDevicesFromEnvvar(t *testing.T) {
|
||||
envDockerResourceGPUs := "DOCKER_RESOURCE_GPUS"
|
||||
gpuID := "GPU-12345"
|
||||
anotherGPUID := "GPU-67890"
|
||||
thirdGPUID := "MIG-12345"
|
||||
|
||||
var tests = []struct {
|
||||
description string
|
||||
preferredVisibleDeviceEnvVars []string
|
||||
env map[string]string
|
||||
expectedDevices []string
|
||||
}{
|
||||
{
|
||||
description: "empty env returns nil for non-legacy image",
|
||||
},
|
||||
{
|
||||
description: "blank NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image",
|
||||
env: map[string]string{
|
||||
EnvVarNvidiaVisibleDevices: "",
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "'void' NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image",
|
||||
env: map[string]string{
|
||||
EnvVarNvidiaVisibleDevices: "void",
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "'none' NVIDIA_VISIBLE_DEVICES returns empty for non-legacy image",
|
||||
env: map[string]string{
|
||||
EnvVarNvidiaVisibleDevices: "none",
|
||||
},
|
||||
expectedDevices: []string{""},
|
||||
},
|
||||
{
|
||||
description: "NVIDIA_VISIBLE_DEVICES set returns value for non-legacy image",
|
||||
env: map[string]string{
|
||||
EnvVarNvidiaVisibleDevices: gpuID,
|
||||
},
|
||||
expectedDevices: []string{gpuID},
|
||||
},
|
||||
{
|
||||
description: "NVIDIA_VISIBLE_DEVICES set returns value for legacy image",
|
||||
env: map[string]string{
|
||||
EnvVarNvidiaVisibleDevices: gpuID,
|
||||
EnvVarCudaVersion: "legacy",
|
||||
},
|
||||
expectedDevices: []string{gpuID},
|
||||
},
|
||||
{
|
||||
description: "empty env returns all for legacy image",
|
||||
env: map[string]string{
|
||||
EnvVarCudaVersion: "legacy",
|
||||
},
|
||||
expectedDevices: []string{"all"},
|
||||
},
|
||||
// Add the `DOCKER_RESOURCE_GPUS` envvar and ensure that this is ignored when
|
||||
// not enabled
|
||||
{
|
||||
description: "missing NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image",
|
||||
env: map[string]string{
|
||||
envDockerResourceGPUs: anotherGPUID,
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "blank NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image",
|
||||
env: map[string]string{
|
||||
EnvVarNvidiaVisibleDevices: "",
|
||||
envDockerResourceGPUs: anotherGPUID,
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "'void' NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image",
|
||||
env: map[string]string{
|
||||
EnvVarNvidiaVisibleDevices: "void",
|
||||
envDockerResourceGPUs: anotherGPUID,
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "'none' NVIDIA_VISIBLE_DEVICES returns empty for non-legacy image",
|
||||
env: map[string]string{
|
||||
EnvVarNvidiaVisibleDevices: "none",
|
||||
envDockerResourceGPUs: anotherGPUID,
|
||||
},
|
||||
expectedDevices: []string{""},
|
||||
},
|
||||
{
|
||||
description: "NVIDIA_VISIBLE_DEVICES set returns value for non-legacy image",
|
||||
env: map[string]string{
|
||||
EnvVarNvidiaVisibleDevices: gpuID,
|
||||
envDockerResourceGPUs: anotherGPUID,
|
||||
},
|
||||
expectedDevices: []string{gpuID},
|
||||
},
|
||||
{
|
||||
description: "NVIDIA_VISIBLE_DEVICES set returns value for legacy image",
|
||||
env: map[string]string{
|
||||
EnvVarNvidiaVisibleDevices: gpuID,
|
||||
envDockerResourceGPUs: anotherGPUID,
|
||||
EnvVarCudaVersion: "legacy",
|
||||
},
|
||||
expectedDevices: []string{gpuID},
|
||||
},
|
||||
{
|
||||
description: "empty env returns all for legacy image",
|
||||
env: map[string]string{
|
||||
envDockerResourceGPUs: anotherGPUID,
|
||||
EnvVarCudaVersion: "legacy",
|
||||
},
|
||||
expectedDevices: []string{"all"},
|
||||
},
|
||||
// Add the `DOCKER_RESOURCE_GPUS` envvar and ensure that this is selected when
|
||||
// enabled
|
||||
{
|
||||
description: "empty env returns nil for non-legacy image",
|
||||
preferredVisibleDeviceEnvVars: []string{envDockerResourceGPUs},
|
||||
},
|
||||
{
|
||||
description: "blank DOCKER_RESOURCE_GPUS returns nil for non-legacy image",
|
||||
preferredVisibleDeviceEnvVars: []string{envDockerResourceGPUs},
|
||||
env: map[string]string{
|
||||
envDockerResourceGPUs: "",
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "'void' DOCKER_RESOURCE_GPUS returns nil for non-legacy image",
|
||||
preferredVisibleDeviceEnvVars: []string{envDockerResourceGPUs},
|
||||
env: map[string]string{
|
||||
envDockerResourceGPUs: "void",
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "'none' DOCKER_RESOURCE_GPUS returns empty for non-legacy image",
|
||||
preferredVisibleDeviceEnvVars: []string{envDockerResourceGPUs},
|
||||
env: map[string]string{
|
||||
envDockerResourceGPUs: "none",
|
||||
},
|
||||
expectedDevices: []string{""},
|
||||
},
|
||||
{
|
||||
description: "DOCKER_RESOURCE_GPUS set returns value for non-legacy image",
|
||||
preferredVisibleDeviceEnvVars: []string{envDockerResourceGPUs},
|
||||
env: map[string]string{
|
||||
envDockerResourceGPUs: gpuID,
|
||||
},
|
||||
expectedDevices: []string{gpuID},
|
||||
},
|
||||
{
|
||||
description: "DOCKER_RESOURCE_GPUS set returns value for legacy image",
|
||||
preferredVisibleDeviceEnvVars: []string{envDockerResourceGPUs},
|
||||
env: map[string]string{
|
||||
envDockerResourceGPUs: gpuID,
|
||||
EnvVarCudaVersion: "legacy",
|
||||
},
|
||||
expectedDevices: []string{gpuID},
|
||||
},
|
||||
{
|
||||
description: "DOCKER_RESOURCE_GPUS is selected if present",
|
||||
preferredVisibleDeviceEnvVars: []string{envDockerResourceGPUs},
|
||||
env: map[string]string{
|
||||
envDockerResourceGPUs: anotherGPUID,
|
||||
},
|
||||
expectedDevices: []string{anotherGPUID},
|
||||
},
|
||||
{
|
||||
description: "DOCKER_RESOURCE_GPUS overrides NVIDIA_VISIBLE_DEVICES if present",
|
||||
preferredVisibleDeviceEnvVars: []string{envDockerResourceGPUs},
|
||||
env: map[string]string{
|
||||
EnvVarNvidiaVisibleDevices: gpuID,
|
||||
envDockerResourceGPUs: anotherGPUID,
|
||||
},
|
||||
expectedDevices: []string{anotherGPUID},
|
||||
},
|
||||
{
|
||||
description: "DOCKER_RESOURCE_GPUS_ADDITIONAL overrides NVIDIA_VISIBLE_DEVICES if present",
|
||||
preferredVisibleDeviceEnvVars: []string{"DOCKER_RESOURCE_GPUS_ADDITIONAL"},
|
||||
env: map[string]string{
|
||||
EnvVarNvidiaVisibleDevices: gpuID,
|
||||
"DOCKER_RESOURCE_GPUS_ADDITIONAL": anotherGPUID,
|
||||
},
|
||||
expectedDevices: []string{anotherGPUID},
|
||||
},
|
||||
{
|
||||
description: "All available swarm resource envvars are selected and override NVIDIA_VISIBLE_DEVICES if present",
|
||||
preferredVisibleDeviceEnvVars: []string{"DOCKER_RESOURCE_GPUS", "DOCKER_RESOURCE_GPUS_ADDITIONAL"},
|
||||
env: map[string]string{
|
||||
EnvVarNvidiaVisibleDevices: gpuID,
|
||||
"DOCKER_RESOURCE_GPUS": thirdGPUID,
|
||||
"DOCKER_RESOURCE_GPUS_ADDITIONAL": anotherGPUID,
|
||||
},
|
||||
expectedDevices: []string{thirdGPUID, anotherGPUID},
|
||||
},
|
||||
{
|
||||
description: "DOCKER_RESOURCE_GPUS_ADDITIONAL or DOCKER_RESOURCE_GPUS override NVIDIA_VISIBLE_DEVICES if present",
|
||||
preferredVisibleDeviceEnvVars: []string{"DOCKER_RESOURCE_GPUS", "DOCKER_RESOURCE_GPUS_ADDITIONAL"},
|
||||
env: map[string]string{
|
||||
EnvVarNvidiaVisibleDevices: gpuID,
|
||||
"DOCKER_RESOURCE_GPUS_ADDITIONAL": anotherGPUID,
|
||||
},
|
||||
expectedDevices: []string{anotherGPUID},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
image, err := New(
|
||||
WithEnvMap(tc.env),
|
||||
WithPrivileged(true),
|
||||
WithAcceptDeviceListAsVolumeMounts(false),
|
||||
WithAcceptEnvvarUnprivileged(false),
|
||||
WithPreferredVisibleDevicesEnvVars(tc.preferredVisibleDeviceEnvVars...),
|
||||
)
|
||||
|
||||
require.NoError(t, err)
|
||||
devices := image.VisibleDevicesFromEnvVar()
|
||||
require.EqualValues(t, tc.expectedDevices, devices)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetVisibleDevicesFromMounts(t *testing.T) {
|
||||
var tests = []struct {
|
||||
description string
|
||||
@ -197,8 +499,121 @@ func TestGetVisibleDevicesFromMounts(t *testing.T) {
|
||||
}
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
image, _ := New(WithMounts(tc.mounts))
|
||||
require.Equal(t, tc.expectedDevices, image.VisibleDevicesFromMounts())
|
||||
image, err := New(WithMounts(tc.mounts))
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, tc.expectedDevices, image.visibleDevicesFromMounts())
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestVisibleDevices(t *testing.T) {
|
||||
var tests = []struct {
|
||||
description string
|
||||
mountDevices []specs.Mount
|
||||
envvarDevices string
|
||||
privileged bool
|
||||
acceptUnprivileged bool
|
||||
acceptMounts bool
|
||||
expectedDevices []string
|
||||
}{
|
||||
{
|
||||
description: "Mount devices, unprivileged, no accept unprivileged",
|
||||
mountDevices: []specs.Mount{
|
||||
{
|
||||
Source: "/dev/null",
|
||||
Destination: filepath.Join(DeviceListAsVolumeMountsRoot, "GPU0"),
|
||||
},
|
||||
{
|
||||
Source: "/dev/null",
|
||||
Destination: filepath.Join(DeviceListAsVolumeMountsRoot, "GPU1"),
|
||||
},
|
||||
},
|
||||
envvarDevices: "GPU2,GPU3",
|
||||
privileged: false,
|
||||
acceptUnprivileged: false,
|
||||
acceptMounts: true,
|
||||
expectedDevices: []string{"GPU0", "GPU1"},
|
||||
},
|
||||
{
|
||||
description: "No mount devices, unprivileged, no accept unprivileged",
|
||||
mountDevices: nil,
|
||||
envvarDevices: "GPU0,GPU1",
|
||||
privileged: false,
|
||||
acceptUnprivileged: false,
|
||||
acceptMounts: true,
|
||||
expectedDevices: nil,
|
||||
},
|
||||
{
|
||||
description: "No mount devices, privileged, no accept unprivileged",
|
||||
mountDevices: nil,
|
||||
envvarDevices: "GPU0,GPU1",
|
||||
privileged: true,
|
||||
acceptUnprivileged: false,
|
||||
acceptMounts: true,
|
||||
expectedDevices: []string{"GPU0", "GPU1"},
|
||||
},
|
||||
{
|
||||
description: "No mount devices, unprivileged, accept unprivileged",
|
||||
mountDevices: nil,
|
||||
envvarDevices: "GPU0,GPU1",
|
||||
privileged: false,
|
||||
acceptUnprivileged: true,
|
||||
acceptMounts: true,
|
||||
expectedDevices: []string{"GPU0", "GPU1"},
|
||||
},
|
||||
{
|
||||
description: "Mount devices, unprivileged, accept unprivileged, no accept mounts",
|
||||
mountDevices: []specs.Mount{
|
||||
{
|
||||
Source: "/dev/null",
|
||||
Destination: filepath.Join(DeviceListAsVolumeMountsRoot, "GPU0"),
|
||||
},
|
||||
{
|
||||
Source: "/dev/null",
|
||||
Destination: filepath.Join(DeviceListAsVolumeMountsRoot, "GPU1"),
|
||||
},
|
||||
},
|
||||
envvarDevices: "GPU2,GPU3",
|
||||
privileged: false,
|
||||
acceptUnprivileged: true,
|
||||
acceptMounts: false,
|
||||
expectedDevices: []string{"GPU2", "GPU3"},
|
||||
},
|
||||
{
|
||||
description: "Mount devices, unprivileged, no accept unprivileged, no accept mounts",
|
||||
mountDevices: []specs.Mount{
|
||||
{
|
||||
Source: "/dev/null",
|
||||
Destination: filepath.Join(DeviceListAsVolumeMountsRoot, "GPU0"),
|
||||
},
|
||||
{
|
||||
Source: "/dev/null",
|
||||
Destination: filepath.Join(DeviceListAsVolumeMountsRoot, "GPU1"),
|
||||
},
|
||||
},
|
||||
envvarDevices: "GPU2,GPU3",
|
||||
privileged: false,
|
||||
acceptUnprivileged: false,
|
||||
acceptMounts: false,
|
||||
expectedDevices: nil,
|
||||
},
|
||||
}
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
// Wrap the call to getDevices() in a closure.
|
||||
image, err := New(
|
||||
WithEnvMap(
|
||||
map[string]string{
|
||||
EnvVarNvidiaVisibleDevices: tc.envvarDevices,
|
||||
},
|
||||
),
|
||||
WithMounts(tc.mountDevices),
|
||||
WithPrivileged(tc.privileged),
|
||||
WithAcceptDeviceListAsVolumeMounts(tc.acceptMounts),
|
||||
WithAcceptEnvvarUnprivileged(tc.acceptUnprivileged),
|
||||
)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, tc.expectedDevices, image.VisibleDevices())
|
||||
})
|
||||
}
|
||||
}
|
||||
@ -224,7 +639,7 @@ func TestImexChannelsFromEnvVar(t *testing.T) {
|
||||
for _, tc := range testCases {
|
||||
for id, baseEnvvars := range map[string][]string{"": nil, "legacy": {"CUDA_VERSION=1.2.3"}} {
|
||||
t.Run(tc.description+id, func(t *testing.T) {
|
||||
i, err := NewCUDAImageFromEnv(append(baseEnvvars, tc.env...))
|
||||
i, err := newCUDAImageFromEnv(append(baseEnvvars, tc.env...))
|
||||
require.NoError(t, err)
|
||||
|
||||
channels := i.ImexChannelsFromEnvVar()
|
||||
|
@ -24,20 +24,39 @@ const (
|
||||
capSysAdmin = "CAP_SYS_ADMIN"
|
||||
)
|
||||
|
||||
// IsPrivileged returns true if the container is a privileged container.
|
||||
func IsPrivileged(s *specs.Spec) bool {
|
||||
if s.Process.Capabilities == nil {
|
||||
return false
|
||||
type CapabilitiesGetter interface {
|
||||
GetCapabilities() []string
|
||||
}
|
||||
|
||||
// We only make sure that the bounding capabibility set has
|
||||
// CAP_SYS_ADMIN. This allows us to make sure that the container was
|
||||
// actually started as '--privileged', but also allow non-root users to
|
||||
// access the privileged NVIDIA capabilities.
|
||||
for _, c := range s.Process.Capabilities.Bounding {
|
||||
type OCISpec specs.Spec
|
||||
|
||||
type OCISpecCapabilities specs.LinuxCapabilities
|
||||
|
||||
// IsPrivileged returns true if the container is a privileged container.
|
||||
func IsPrivileged(s CapabilitiesGetter) bool {
|
||||
if s == nil {
|
||||
return false
|
||||
}
|
||||
for _, c := range s.GetCapabilities() {
|
||||
if c == capSysAdmin {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func (s OCISpec) GetCapabilities() []string {
|
||||
if s.Process == nil || s.Process.Capabilities == nil {
|
||||
return nil
|
||||
}
|
||||
return (*OCISpecCapabilities)(s.Process.Capabilities).GetCapabilities()
|
||||
}
|
||||
|
||||
func (c OCISpecCapabilities) GetCapabilities() []string {
|
||||
// We only make sure that the bounding capability set has
|
||||
// CAP_SYS_ADMIN. This allows us to make sure that the container was
|
||||
// actually started as '--privileged', but also allow non-root users to
|
||||
// access the privileged NVIDIA capabilities.
|
||||
return c.Bounding
|
||||
}
|
||||
|
57
internal/config/image/privileged_test.go
Normal file
57
internal/config/image/privileged_test.go
Normal file
@ -0,0 +1,57 @@
|
||||
/**
|
||||
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package image
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestIsPrivileged(t *testing.T) {
|
||||
var tests = []struct {
|
||||
spec specs.Spec
|
||||
expected bool
|
||||
}{
|
||||
{
|
||||
specs.Spec{
|
||||
Process: &specs.Process{
|
||||
Capabilities: &specs.LinuxCapabilities{
|
||||
Bounding: []string{"CAP_SYS_ADMIN"},
|
||||
},
|
||||
},
|
||||
},
|
||||
true,
|
||||
},
|
||||
{
|
||||
specs.Spec{
|
||||
Process: &specs.Process{
|
||||
Capabilities: &specs.LinuxCapabilities{
|
||||
Bounding: []string{"CAP_SYS_FOO"},
|
||||
},
|
||||
},
|
||||
},
|
||||
false,
|
||||
},
|
||||
}
|
||||
for i, tc := range tests {
|
||||
privileged := IsPrivileged((*OCISpec)(&tc.spec))
|
||||
|
||||
require.Equal(t, tc.expected, privileged, "%d: %v", i, tc)
|
||||
}
|
||||
}
|
@ -79,7 +79,10 @@ func getDevicesFromSpec(logger logger.Interface, ociSpec oci.Spec, cfg *config.C
|
||||
return annotationDevices, nil
|
||||
}
|
||||
|
||||
container, err := image.NewCUDAImageFromSpec(rawSpec)
|
||||
container, err := image.NewCUDAImageFromSpec(
|
||||
rawSpec,
|
||||
image.WithLogger(logger),
|
||||
)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@ -107,7 +110,7 @@ func getDevicesFromSpec(logger logger.Interface, ociSpec oci.Spec, cfg *config.C
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
if cfg.AcceptEnvvarUnprivileged || image.IsPrivileged(rawSpec) {
|
||||
if cfg.AcceptEnvvarUnprivileged || image.IsPrivileged((*image.OCISpec)(rawSpec)) {
|
||||
return devices, nil
|
||||
}
|
||||
|
||||
|
@ -70,7 +70,10 @@ func newSpecModifier(logger logger.Interface, cfg *config.Config, ociSpec oci.Sp
|
||||
return nil, fmt.Errorf("failed to load OCI spec: %v", err)
|
||||
}
|
||||
|
||||
image, err := image.NewCUDAImageFromSpec(rawSpec)
|
||||
image, err := image.NewCUDAImageFromSpec(
|
||||
rawSpec,
|
||||
image.WithLogger(logger),
|
||||
)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user