mirror of
https://github.com/NVIDIA/nvidia-container-toolkit
synced 2025-06-16 11:30:20 +00:00
Refactor extracting requested devices from the container image
This change consolidates the logic for determining requested devices from the container image. The logic for this has been integrated into the image.CUDA type so that multiple implementations are not required. Signed-off-by: Carlos Eduardo Arango Gutierrez <eduardoa@nvidia.com> Co-authored-by: Evan Lezar <elezar@nvidia.com>
This commit is contained in:
parent
fdcd250362
commit
dede03f322
@ -13,10 +13,6 @@ import (
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
|
||||
)
|
||||
|
||||
const (
|
||||
capSysAdmin = "CAP_SYS_ADMIN"
|
||||
)
|
||||
|
||||
type nvidiaConfig struct {
|
||||
Devices []string
|
||||
MigConfigDevices string
|
||||
@ -103,9 +99,9 @@ func loadSpec(path string) (spec *Spec) {
|
||||
return
|
||||
}
|
||||
|
||||
func isPrivileged(s *Spec) bool {
|
||||
if s.Process.Capabilities == nil {
|
||||
return false
|
||||
func (s *Spec) GetCapabilities() []string {
|
||||
if s == nil || s.Process == nil || s.Process.Capabilities == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
var caps []string
|
||||
@ -118,67 +114,22 @@ func isPrivileged(s *Spec) bool {
|
||||
if err != nil {
|
||||
log.Panicln("could not decode Process.Capabilities in OCI spec:", err)
|
||||
}
|
||||
for _, c := range caps {
|
||||
if c == capSysAdmin {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
return caps
|
||||
}
|
||||
|
||||
// Otherwise, parse s.Process.Capabilities as:
|
||||
// github.com/opencontainers/runtime-spec/blob/v1.0.0/specs-go/config.go#L30-L54
|
||||
process := specs.Process{
|
||||
Env: s.Process.Env,
|
||||
}
|
||||
|
||||
err := json.Unmarshal(*s.Process.Capabilities, &process.Capabilities)
|
||||
capabilities := specs.LinuxCapabilities{}
|
||||
err := json.Unmarshal(*s.Process.Capabilities, &capabilities)
|
||||
if err != nil {
|
||||
log.Panicln("could not decode Process.Capabilities in OCI spec:", err)
|
||||
}
|
||||
|
||||
fullSpec := specs.Spec{
|
||||
Version: *s.Version,
|
||||
Process: &process,
|
||||
return image.OCISpecCapabilities(capabilities).GetCapabilities()
|
||||
}
|
||||
|
||||
return image.IsPrivileged(&fullSpec)
|
||||
}
|
||||
|
||||
func getDevicesFromEnvvar(containerImage image.CUDA, swarmResourceEnvvars []string) []string {
|
||||
// We check if the image has at least one of the Swarm resource envvars defined and use this
|
||||
// if specified.
|
||||
for _, envvar := range swarmResourceEnvvars {
|
||||
if containerImage.HasEnvvar(envvar) {
|
||||
return containerImage.DevicesFromEnvvars(swarmResourceEnvvars...).List()
|
||||
}
|
||||
}
|
||||
|
||||
return containerImage.VisibleDevicesFromEnvVar()
|
||||
}
|
||||
|
||||
func (hookConfig *hookConfig) getDevices(image image.CUDA, privileged bool) []string {
|
||||
// If enabled, try and get the device list from volume mounts first
|
||||
if hookConfig.AcceptDeviceListAsVolumeMounts {
|
||||
devices := image.VisibleDevicesFromMounts()
|
||||
if len(devices) > 0 {
|
||||
return devices
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback to reading from the environment variable if privileges are correct
|
||||
devices := getDevicesFromEnvvar(image, hookConfig.getSwarmResourceEnvvars())
|
||||
if len(devices) == 0 {
|
||||
return nil
|
||||
}
|
||||
if privileged || hookConfig.AcceptEnvvarUnprivileged {
|
||||
return devices
|
||||
}
|
||||
|
||||
configName := hookConfig.getConfigOption("AcceptEnvvarUnprivileged")
|
||||
log.Printf("Ignoring devices specified in NVIDIA_VISIBLE_DEVICES (privileged=%v, %v=%v) ", privileged, configName, hookConfig.AcceptEnvvarUnprivileged)
|
||||
|
||||
return nil
|
||||
func isPrivileged(s *Spec) bool {
|
||||
return image.IsPrivileged(s)
|
||||
}
|
||||
|
||||
func getMigConfigDevices(i image.CUDA) *string {
|
||||
@ -225,7 +176,6 @@ func (hookConfig *hookConfig) getDriverCapabilities(cudaImage image.CUDA, legacy
|
||||
// We use the default driver capabilities by default. This is filtered to only include the
|
||||
// supported capabilities
|
||||
supportedDriverCapabilities := image.NewDriverCapabilities(hookConfig.SupportedDriverCapabilities)
|
||||
|
||||
capabilities := supportedDriverCapabilities.Intersection(image.DefaultDriverCapabilities)
|
||||
|
||||
capsEnvSpecified := cudaImage.HasEnvvar(image.EnvVarNvidiaDriverCapabilities)
|
||||
@ -251,7 +201,7 @@ func (hookConfig *hookConfig) getDriverCapabilities(cudaImage image.CUDA, legacy
|
||||
func (hookConfig *hookConfig) getNvidiaConfig(image image.CUDA, privileged bool) *nvidiaConfig {
|
||||
legacyImage := image.IsLegacy()
|
||||
|
||||
devices := hookConfig.getDevices(image, privileged)
|
||||
devices := image.VisibleDevices()
|
||||
if len(devices) == 0 {
|
||||
// empty devices means this is not a GPU container.
|
||||
return nil
|
||||
@ -306,20 +256,25 @@ func (hookConfig *hookConfig) getContainerConfig() (config containerConfig) {
|
||||
|
||||
s := loadSpec(path.Join(b, "config.json"))
|
||||
|
||||
image, err := image.New(
|
||||
privileged := isPrivileged(s)
|
||||
|
||||
i, err := image.New(
|
||||
image.WithEnv(s.Process.Env),
|
||||
image.WithMounts(s.Mounts),
|
||||
image.WithPrivileged(privileged),
|
||||
image.WithDisableRequire(hookConfig.DisableRequire),
|
||||
image.WithAcceptDeviceListAsVolumeMounts(hookConfig.AcceptDeviceListAsVolumeMounts),
|
||||
image.WithAcceptEnvvarUnprivileged(hookConfig.AcceptEnvvarUnprivileged),
|
||||
image.WithPreferredVisibleDevicesEnvVars(hookConfig.getSwarmResourceEnvvars()...),
|
||||
)
|
||||
if err != nil {
|
||||
log.Panicln(err)
|
||||
}
|
||||
|
||||
privileged := isPrivileged(s)
|
||||
return containerConfig{
|
||||
Pid: h.Pid,
|
||||
Rootfs: s.Root.Path,
|
||||
Image: image,
|
||||
Nvidia: hookConfig.getNvidiaConfig(image, privileged),
|
||||
Image: i,
|
||||
Nvidia: hookConfig.getNvidiaConfig(i, privileged),
|
||||
}
|
||||
}
|
||||
|
@ -1,10 +1,8 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
|
||||
@ -479,7 +477,10 @@ func TestGetNvidiaConfig(t *testing.T) {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
image, _ := image.New(
|
||||
image.WithEnvMap(tc.env),
|
||||
image.WithPrivileged(tc.privileged),
|
||||
image.WithPreferredVisibleDevicesEnvVars(tc.hookConfig.getSwarmResourceEnvvars()...),
|
||||
)
|
||||
|
||||
// Wrap the call to getNvidiaConfig() in a closure.
|
||||
var cfg *nvidiaConfig
|
||||
getConfig := func() {
|
||||
@ -518,340 +519,6 @@ func TestGetNvidiaConfig(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestDeviceListSourcePriority(t *testing.T) {
|
||||
var tests = []struct {
|
||||
description string
|
||||
mountDevices []specs.Mount
|
||||
envvarDevices string
|
||||
privileged bool
|
||||
acceptUnprivileged bool
|
||||
acceptMounts bool
|
||||
expectedDevices []string
|
||||
}{
|
||||
{
|
||||
description: "Mount devices, unprivileged, no accept unprivileged",
|
||||
mountDevices: []specs.Mount{
|
||||
{
|
||||
Source: "/dev/null",
|
||||
Destination: filepath.Join(image.DeviceListAsVolumeMountsRoot, "GPU0"),
|
||||
},
|
||||
{
|
||||
Source: "/dev/null",
|
||||
Destination: filepath.Join(image.DeviceListAsVolumeMountsRoot, "GPU1"),
|
||||
},
|
||||
},
|
||||
envvarDevices: "GPU2,GPU3",
|
||||
privileged: false,
|
||||
acceptUnprivileged: false,
|
||||
acceptMounts: true,
|
||||
expectedDevices: []string{"GPU0", "GPU1"},
|
||||
},
|
||||
{
|
||||
description: "No mount devices, unprivileged, no accept unprivileged",
|
||||
mountDevices: nil,
|
||||
envvarDevices: "GPU0,GPU1",
|
||||
privileged: false,
|
||||
acceptUnprivileged: false,
|
||||
acceptMounts: true,
|
||||
expectedDevices: nil,
|
||||
},
|
||||
{
|
||||
description: "No mount devices, privileged, no accept unprivileged",
|
||||
mountDevices: nil,
|
||||
envvarDevices: "GPU0,GPU1",
|
||||
privileged: true,
|
||||
acceptUnprivileged: false,
|
||||
acceptMounts: true,
|
||||
expectedDevices: []string{"GPU0", "GPU1"},
|
||||
},
|
||||
{
|
||||
description: "No mount devices, unprivileged, accept unprivileged",
|
||||
mountDevices: nil,
|
||||
envvarDevices: "GPU0,GPU1",
|
||||
privileged: false,
|
||||
acceptUnprivileged: true,
|
||||
acceptMounts: true,
|
||||
expectedDevices: []string{"GPU0", "GPU1"},
|
||||
},
|
||||
{
|
||||
description: "Mount devices, unprivileged, accept unprivileged, no accept mounts",
|
||||
mountDevices: []specs.Mount{
|
||||
{
|
||||
Source: "/dev/null",
|
||||
Destination: filepath.Join(image.DeviceListAsVolumeMountsRoot, "GPU0"),
|
||||
},
|
||||
{
|
||||
Source: "/dev/null",
|
||||
Destination: filepath.Join(image.DeviceListAsVolumeMountsRoot, "GPU1"),
|
||||
},
|
||||
},
|
||||
envvarDevices: "GPU2,GPU3",
|
||||
privileged: false,
|
||||
acceptUnprivileged: true,
|
||||
acceptMounts: false,
|
||||
expectedDevices: []string{"GPU2", "GPU3"},
|
||||
},
|
||||
{
|
||||
description: "Mount devices, unprivileged, no accept unprivileged, no accept mounts",
|
||||
mountDevices: []specs.Mount{
|
||||
{
|
||||
Source: "/dev/null",
|
||||
Destination: filepath.Join(image.DeviceListAsVolumeMountsRoot, "GPU0"),
|
||||
},
|
||||
{
|
||||
Source: "/dev/null",
|
||||
Destination: filepath.Join(image.DeviceListAsVolumeMountsRoot, "GPU1"),
|
||||
},
|
||||
},
|
||||
envvarDevices: "GPU2,GPU3",
|
||||
privileged: false,
|
||||
acceptUnprivileged: false,
|
||||
acceptMounts: false,
|
||||
expectedDevices: nil,
|
||||
},
|
||||
}
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
// Wrap the call to getDevices() in a closure.
|
||||
var devices []string
|
||||
getDevices := func() {
|
||||
image, _ := image.New(
|
||||
image.WithEnvMap(
|
||||
map[string]string{
|
||||
image.EnvVarNvidiaVisibleDevices: tc.envvarDevices,
|
||||
},
|
||||
),
|
||||
image.WithMounts(tc.mountDevices),
|
||||
)
|
||||
defaultConfig, _ := config.GetDefault()
|
||||
cfg := &hookConfig{defaultConfig}
|
||||
cfg.AcceptEnvvarUnprivileged = tc.acceptUnprivileged
|
||||
cfg.AcceptDeviceListAsVolumeMounts = tc.acceptMounts
|
||||
devices = cfg.getDevices(image, tc.privileged)
|
||||
}
|
||||
|
||||
// For all other tests, just grab the devices and check the results
|
||||
getDevices()
|
||||
|
||||
require.Equal(t, tc.expectedDevices, devices)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetDevicesFromEnvvar(t *testing.T) {
|
||||
envDockerResourceGPUs := "DOCKER_RESOURCE_GPUS"
|
||||
gpuID := "GPU-12345"
|
||||
anotherGPUID := "GPU-67890"
|
||||
thirdGPUID := "MIG-12345"
|
||||
|
||||
var tests = []struct {
|
||||
description string
|
||||
swarmResourceEnvvars []string
|
||||
env map[string]string
|
||||
expectedDevices []string
|
||||
}{
|
||||
{
|
||||
description: "empty env returns nil for non-legacy image",
|
||||
},
|
||||
{
|
||||
description: "blank NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image",
|
||||
env: map[string]string{
|
||||
image.EnvVarNvidiaVisibleDevices: "",
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "'void' NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image",
|
||||
env: map[string]string{
|
||||
image.EnvVarNvidiaVisibleDevices: "void",
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "'none' NVIDIA_VISIBLE_DEVICES returns empty for non-legacy image",
|
||||
env: map[string]string{
|
||||
image.EnvVarNvidiaVisibleDevices: "none",
|
||||
},
|
||||
expectedDevices: []string{""},
|
||||
},
|
||||
{
|
||||
description: "NVIDIA_VISIBLE_DEVICES set returns value for non-legacy image",
|
||||
env: map[string]string{
|
||||
image.EnvVarNvidiaVisibleDevices: gpuID,
|
||||
},
|
||||
expectedDevices: []string{gpuID},
|
||||
},
|
||||
{
|
||||
description: "NVIDIA_VISIBLE_DEVICES set returns value for legacy image",
|
||||
env: map[string]string{
|
||||
image.EnvVarNvidiaVisibleDevices: gpuID,
|
||||
image.EnvVarCudaVersion: "legacy",
|
||||
},
|
||||
expectedDevices: []string{gpuID},
|
||||
},
|
||||
{
|
||||
description: "empty env returns all for legacy image",
|
||||
env: map[string]string{
|
||||
image.EnvVarCudaVersion: "legacy",
|
||||
},
|
||||
expectedDevices: []string{"all"},
|
||||
},
|
||||
// Add the `DOCKER_RESOURCE_GPUS` envvar and ensure that this is ignored when
|
||||
// not enabled
|
||||
{
|
||||
description: "missing NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image",
|
||||
env: map[string]string{
|
||||
envDockerResourceGPUs: anotherGPUID,
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "blank NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image",
|
||||
env: map[string]string{
|
||||
image.EnvVarNvidiaVisibleDevices: "",
|
||||
envDockerResourceGPUs: anotherGPUID,
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "'void' NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image",
|
||||
env: map[string]string{
|
||||
image.EnvVarNvidiaVisibleDevices: "void",
|
||||
envDockerResourceGPUs: anotherGPUID,
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "'none' NVIDIA_VISIBLE_DEVICES returns empty for non-legacy image",
|
||||
env: map[string]string{
|
||||
image.EnvVarNvidiaVisibleDevices: "none",
|
||||
envDockerResourceGPUs: anotherGPUID,
|
||||
},
|
||||
expectedDevices: []string{""},
|
||||
},
|
||||
{
|
||||
description: "NVIDIA_VISIBLE_DEVICES set returns value for non-legacy image",
|
||||
env: map[string]string{
|
||||
image.EnvVarNvidiaVisibleDevices: gpuID,
|
||||
envDockerResourceGPUs: anotherGPUID,
|
||||
},
|
||||
expectedDevices: []string{gpuID},
|
||||
},
|
||||
{
|
||||
description: "NVIDIA_VISIBLE_DEVICES set returns value for legacy image",
|
||||
env: map[string]string{
|
||||
image.EnvVarNvidiaVisibleDevices: gpuID,
|
||||
envDockerResourceGPUs: anotherGPUID,
|
||||
image.EnvVarCudaVersion: "legacy",
|
||||
},
|
||||
expectedDevices: []string{gpuID},
|
||||
},
|
||||
{
|
||||
description: "empty env returns all for legacy image",
|
||||
env: map[string]string{
|
||||
envDockerResourceGPUs: anotherGPUID,
|
||||
image.EnvVarCudaVersion: "legacy",
|
||||
},
|
||||
expectedDevices: []string{"all"},
|
||||
},
|
||||
// Add the `DOCKER_RESOURCE_GPUS` envvar and ensure that this is selected when
|
||||
// enabled
|
||||
{
|
||||
description: "empty env returns nil for non-legacy image",
|
||||
swarmResourceEnvvars: []string{envDockerResourceGPUs},
|
||||
},
|
||||
{
|
||||
description: "blank DOCKER_RESOURCE_GPUS returns nil for non-legacy image",
|
||||
swarmResourceEnvvars: []string{envDockerResourceGPUs},
|
||||
env: map[string]string{
|
||||
envDockerResourceGPUs: "",
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "'void' DOCKER_RESOURCE_GPUS returns nil for non-legacy image",
|
||||
swarmResourceEnvvars: []string{envDockerResourceGPUs},
|
||||
env: map[string]string{
|
||||
envDockerResourceGPUs: "void",
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "'none' DOCKER_RESOURCE_GPUS returns empty for non-legacy image",
|
||||
swarmResourceEnvvars: []string{envDockerResourceGPUs},
|
||||
env: map[string]string{
|
||||
envDockerResourceGPUs: "none",
|
||||
},
|
||||
expectedDevices: []string{""},
|
||||
},
|
||||
{
|
||||
description: "DOCKER_RESOURCE_GPUS set returns value for non-legacy image",
|
||||
swarmResourceEnvvars: []string{envDockerResourceGPUs},
|
||||
env: map[string]string{
|
||||
envDockerResourceGPUs: gpuID,
|
||||
},
|
||||
expectedDevices: []string{gpuID},
|
||||
},
|
||||
{
|
||||
description: "DOCKER_RESOURCE_GPUS set returns value for legacy image",
|
||||
swarmResourceEnvvars: []string{envDockerResourceGPUs},
|
||||
env: map[string]string{
|
||||
envDockerResourceGPUs: gpuID,
|
||||
image.EnvVarCudaVersion: "legacy",
|
||||
},
|
||||
expectedDevices: []string{gpuID},
|
||||
},
|
||||
{
|
||||
description: "DOCKER_RESOURCE_GPUS is selected if present",
|
||||
swarmResourceEnvvars: []string{envDockerResourceGPUs},
|
||||
env: map[string]string{
|
||||
envDockerResourceGPUs: anotherGPUID,
|
||||
},
|
||||
expectedDevices: []string{anotherGPUID},
|
||||
},
|
||||
{
|
||||
description: "DOCKER_RESOURCE_GPUS overrides NVIDIA_VISIBLE_DEVICES if present",
|
||||
swarmResourceEnvvars: []string{envDockerResourceGPUs},
|
||||
env: map[string]string{
|
||||
image.EnvVarNvidiaVisibleDevices: gpuID,
|
||||
envDockerResourceGPUs: anotherGPUID,
|
||||
},
|
||||
expectedDevices: []string{anotherGPUID},
|
||||
},
|
||||
{
|
||||
description: "DOCKER_RESOURCE_GPUS_ADDITIONAL overrides NVIDIA_VISIBLE_DEVICES if present",
|
||||
swarmResourceEnvvars: []string{"DOCKER_RESOURCE_GPUS_ADDITIONAL"},
|
||||
env: map[string]string{
|
||||
image.EnvVarNvidiaVisibleDevices: gpuID,
|
||||
"DOCKER_RESOURCE_GPUS_ADDITIONAL": anotherGPUID,
|
||||
},
|
||||
expectedDevices: []string{anotherGPUID},
|
||||
},
|
||||
{
|
||||
description: "All available swarm resource envvars are selected and override NVIDIA_VISIBLE_DEVICES if present",
|
||||
swarmResourceEnvvars: []string{"DOCKER_RESOURCE_GPUS", "DOCKER_RESOURCE_GPUS_ADDITIONAL"},
|
||||
env: map[string]string{
|
||||
image.EnvVarNvidiaVisibleDevices: gpuID,
|
||||
"DOCKER_RESOURCE_GPUS": thirdGPUID,
|
||||
"DOCKER_RESOURCE_GPUS_ADDITIONAL": anotherGPUID,
|
||||
},
|
||||
expectedDevices: []string{thirdGPUID, anotherGPUID},
|
||||
},
|
||||
{
|
||||
description: "DOCKER_RESOURCE_GPUS_ADDITIONAL or DOCKER_RESOURCE_GPUS override NVIDIA_VISIBLE_DEVICES if present",
|
||||
swarmResourceEnvvars: []string{"DOCKER_RESOURCE_GPUS", "DOCKER_RESOURCE_GPUS_ADDITIONAL"},
|
||||
env: map[string]string{
|
||||
image.EnvVarNvidiaVisibleDevices: gpuID,
|
||||
"DOCKER_RESOURCE_GPUS_ADDITIONAL": anotherGPUID,
|
||||
},
|
||||
expectedDevices: []string{anotherGPUID},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
image, _ := image.New(
|
||||
image.WithEnvMap(tc.env),
|
||||
)
|
||||
devices := getDevicesFromEnvvar(image, tc.swarmResourceEnvvars)
|
||||
require.EqualValues(t, tc.expectedDevices, devices)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetDriverCapabilities(t *testing.T) {
|
||||
|
||||
supportedCapabilities := "compute,display,utility,video"
|
||||
|
@ -88,7 +88,7 @@ func (c hookConfig) getConfigOption(fieldName string) string {
|
||||
|
||||
// getSwarmResourceEnvvars returns the swarm resource envvars for the config.
|
||||
func (c *hookConfig) getSwarmResourceEnvvars() []string {
|
||||
if c.SwarmResource == "" {
|
||||
if c == nil || c.SwarmResource == "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -21,22 +21,36 @@ import (
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
)
|
||||
|
||||
type builder struct {
|
||||
env map[string]string
|
||||
mounts []specs.Mount
|
||||
CUDA
|
||||
|
||||
disableRequire bool
|
||||
}
|
||||
|
||||
// Option is a functional option for creating a CUDA image.
|
||||
type Option func(*builder) error
|
||||
|
||||
// New creates a new CUDA image from the input options.
|
||||
func New(opt ...Option) (CUDA, error) {
|
||||
b := &builder{}
|
||||
b := &builder{
|
||||
CUDA: CUDA{
|
||||
acceptEnvvarUnprivileged: true,
|
||||
},
|
||||
}
|
||||
for _, o := range opt {
|
||||
if err := o(b); err != nil {
|
||||
return CUDA{}, err
|
||||
}
|
||||
}
|
||||
|
||||
if b.logger == nil {
|
||||
b.logger = logger.New()
|
||||
}
|
||||
|
||||
if b.env == nil {
|
||||
b.env = make(map[string]string)
|
||||
}
|
||||
@ -50,15 +64,22 @@ func (b builder) build() (CUDA, error) {
|
||||
b.env[EnvVarNvidiaDisableRequire] = "true"
|
||||
}
|
||||
|
||||
c := CUDA{
|
||||
env: b.env,
|
||||
mounts: b.mounts,
|
||||
}
|
||||
return c, nil
|
||||
return b.CUDA, nil
|
||||
}
|
||||
|
||||
// Option is a functional option for creating a CUDA image.
|
||||
type Option func(*builder) error
|
||||
func WithAcceptDeviceListAsVolumeMounts(acceptDeviceListAsVolumeMounts bool) Option {
|
||||
return func(b *builder) error {
|
||||
b.acceptDeviceListAsVolumeMounts = acceptDeviceListAsVolumeMounts
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
func WithAcceptEnvvarUnprivileged(acceptEnvvarUnprivileged bool) Option {
|
||||
return func(b *builder) error {
|
||||
b.acceptEnvvarUnprivileged = acceptEnvvarUnprivileged
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// WithDisableRequire sets the disable require option.
|
||||
func WithDisableRequire(disableRequire bool) Option {
|
||||
@ -93,6 +114,14 @@ func WithEnvMap(env map[string]string) Option {
|
||||
}
|
||||
}
|
||||
|
||||
// WithLogger sets the logger to use when creating the CUDA image.
|
||||
func WithLogger(logger logger.Interface) Option {
|
||||
return func(b *builder) error {
|
||||
b.logger = logger
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// WithMounts sets the mounts associated with the CUDA image.
|
||||
func WithMounts(mounts []specs.Mount) Option {
|
||||
return func(b *builder) error {
|
||||
@ -100,3 +129,20 @@ func WithMounts(mounts []specs.Mount) Option {
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// WithPreferredVisibleDevicesEnvVars sets the environment variables that
|
||||
// should take precedence over the default NVIDIA_VISIBLE_DEVICES.
|
||||
func WithPreferredVisibleDevicesEnvVars(preferredVisibleDeviceEnvVars ...string) Option {
|
||||
return func(b *builder) error {
|
||||
b.preferredVisibleDeviceEnvVars = preferredVisibleDeviceEnvVars
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// WithPrivileged sets whether an image is privileged or not.
|
||||
func WithPrivileged(isPrivileged bool) Option {
|
||||
return func(b *builder) error {
|
||||
b.isPrivileged = isPrivileged
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
@ -25,6 +25,8 @@ import (
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
"golang.org/x/mod/semver"
|
||||
"tags.cncf.io/container-device-interface/pkg/parser"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||
)
|
||||
|
||||
const (
|
||||
@ -38,27 +40,37 @@ const (
|
||||
// a map of environment variable to values that can be used to perform lookups
|
||||
// such as requirements.
|
||||
type CUDA struct {
|
||||
logger logger.Interface
|
||||
|
||||
env map[string]string
|
||||
isPrivileged bool
|
||||
mounts []specs.Mount
|
||||
|
||||
acceptDeviceListAsVolumeMounts bool
|
||||
acceptEnvvarUnprivileged bool
|
||||
preferredVisibleDeviceEnvVars []string
|
||||
}
|
||||
|
||||
// NewCUDAImageFromSpec creates a CUDA image from the input OCI runtime spec.
|
||||
// The process environment is read (if present) to construc the CUDA Image.
|
||||
func NewCUDAImageFromSpec(spec *specs.Spec) (CUDA, error) {
|
||||
func NewCUDAImageFromSpec(spec *specs.Spec, opts ...Option) (CUDA, error) {
|
||||
var env []string
|
||||
if spec != nil && spec.Process != nil {
|
||||
env = spec.Process.Env
|
||||
}
|
||||
|
||||
return New(
|
||||
specOpts := []Option{
|
||||
WithEnv(env),
|
||||
WithMounts(spec.Mounts),
|
||||
)
|
||||
WithPrivileged(IsPrivileged((*OCISpec)(spec))),
|
||||
}
|
||||
|
||||
// NewCUDAImageFromEnv creates a CUDA image from the input environment. The environment
|
||||
return New(append(opts, specOpts...)...)
|
||||
}
|
||||
|
||||
// newCUDAImageFromEnv creates a CUDA image from the input environment. The environment
|
||||
// is a list of strings of the form ENVAR=VALUE.
|
||||
func NewCUDAImageFromEnv(env []string) (CUDA, error) {
|
||||
func newCUDAImageFromEnv(env []string) (CUDA, error) {
|
||||
return New(WithEnv(env))
|
||||
}
|
||||
|
||||
@ -216,14 +228,53 @@ func (i CUDA) OnlyFullyQualifiedCDIDevices() bool {
|
||||
return hasCDIdevice
|
||||
}
|
||||
|
||||
// VisibleDevicesFromEnvVar returns the set of visible devices requested through
|
||||
// the NVIDIA_VISIBLE_DEVICES environment variable.
|
||||
// VisibleDevices returns a list of devices requested in the container image.
|
||||
// If volume mount requests are enabled these are returned if requested,
|
||||
// otherwise device requests through environment variables are considered.
|
||||
// In cases where environment variable requests required privileged containers,
|
||||
// such devices requests are ignored.
|
||||
func (i CUDA) VisibleDevices() []string {
|
||||
// If enabled, try and get the device list from volume mounts first
|
||||
if i.acceptDeviceListAsVolumeMounts {
|
||||
volumeMountDeviceRequests := i.visibleDevicesFromMounts()
|
||||
if len(volumeMountDeviceRequests) > 0 {
|
||||
return volumeMountDeviceRequests
|
||||
}
|
||||
}
|
||||
|
||||
// Get the Fallback to reading from the environment variable if privileges are correct
|
||||
envVarDeviceRequests := i.VisibleDevicesFromEnvVar()
|
||||
if len(envVarDeviceRequests) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
// If the container is privileged, or environment variable requests are
|
||||
// allowed for unprivileged containers, these devices are returned.
|
||||
if i.isPrivileged || i.acceptEnvvarUnprivileged {
|
||||
return envVarDeviceRequests
|
||||
}
|
||||
|
||||
// We log a warning if we are ignoring the environment variable requests.
|
||||
i.logger.Warningf("Ignoring devices specified in NVIDIA_VISIBLE_DEVICES in unprivileged container")
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// VisibleDevicesFromEnvVar returns the set of visible devices requested through environment variables.
|
||||
// If any of the preferredVisibleDeviceEnvVars are present in the image, they
|
||||
// are used to determine the visible devices. If this is not the case, the
|
||||
// NVIDIA_VISIBLE_DEVICES environment variable is used.
|
||||
func (i CUDA) VisibleDevicesFromEnvVar() []string {
|
||||
for _, envVar := range i.preferredVisibleDeviceEnvVars {
|
||||
if i.HasEnvvar(envVar) {
|
||||
return i.DevicesFromEnvvars(i.preferredVisibleDeviceEnvVars...).List()
|
||||
}
|
||||
}
|
||||
return i.DevicesFromEnvvars(EnvVarNvidiaVisibleDevices).List()
|
||||
}
|
||||
|
||||
// VisibleDevicesFromMounts returns the set of visible devices requested as mounts.
|
||||
func (i CUDA) VisibleDevicesFromMounts() []string {
|
||||
// visibleDevicesFromMounts returns the set of visible devices requested as mounts.
|
||||
func (i CUDA) visibleDevicesFromMounts() []string {
|
||||
var devices []string
|
||||
for _, device := range i.DevicesFromMounts() {
|
||||
switch {
|
||||
@ -238,7 +289,6 @@ func (i CUDA) VisibleDevicesFromMounts() []string {
|
||||
}
|
||||
|
||||
// DevicesFromMounts returns a list of device specified as mounts.
|
||||
// TODO: This should be merged with getDevicesFromMounts used in the NVIDIA Container Runtime
|
||||
func (i CUDA) DevicesFromMounts() []string {
|
||||
root := filepath.Clean(DeviceListAsVolumeMountsRoot)
|
||||
seen := make(map[string]bool)
|
||||
|
@ -21,9 +21,91 @@ import (
|
||||
"testing"
|
||||
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
testlog "github.com/sirupsen/logrus/hooks/test"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestNewCUDAImageFromSpec(t *testing.T) {
|
||||
logger, _ := testlog.NewNullLogger()
|
||||
|
||||
testCases := []struct {
|
||||
description string
|
||||
spec *specs.Spec
|
||||
options []Option
|
||||
expected CUDA
|
||||
}{
|
||||
{
|
||||
description: "no env vars",
|
||||
spec: &specs.Spec{
|
||||
Process: &specs.Process{
|
||||
Env: []string{},
|
||||
},
|
||||
},
|
||||
expected: CUDA{
|
||||
logger: logger,
|
||||
env: map[string]string{},
|
||||
acceptEnvvarUnprivileged: true,
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "NVIDIA_VISIBLE_DEVICES=all",
|
||||
spec: &specs.Spec{
|
||||
Process: &specs.Process{
|
||||
Env: []string{"NVIDIA_VISIBLE_DEVICES=all"},
|
||||
},
|
||||
},
|
||||
expected: CUDA{
|
||||
logger: logger,
|
||||
env: map[string]string{"NVIDIA_VISIBLE_DEVICES": "all"},
|
||||
acceptEnvvarUnprivileged: true,
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "Spec overrides options",
|
||||
spec: &specs.Spec{
|
||||
Process: &specs.Process{
|
||||
Env: []string{"NVIDIA_VISIBLE_DEVICES=all"},
|
||||
},
|
||||
Mounts: []specs.Mount{
|
||||
{
|
||||
Source: "/spec-source",
|
||||
Destination: "/spec-destination",
|
||||
},
|
||||
},
|
||||
},
|
||||
options: []Option{
|
||||
WithEnvMap(map[string]string{"OTHER": "value"}),
|
||||
WithMounts([]specs.Mount{
|
||||
{
|
||||
Source: "/option-source",
|
||||
Destination: "/option-destination",
|
||||
},
|
||||
}),
|
||||
},
|
||||
expected: CUDA{
|
||||
logger: logger,
|
||||
env: map[string]string{"NVIDIA_VISIBLE_DEVICES": "all"},
|
||||
mounts: []specs.Mount{
|
||||
{
|
||||
Source: "/spec-source",
|
||||
Destination: "/spec-destination",
|
||||
},
|
||||
},
|
||||
acceptEnvvarUnprivileged: true,
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
options := append([]Option{WithLogger(logger)}, tc.options...)
|
||||
image, err := NewCUDAImageFromSpec(tc.spec, options...)
|
||||
require.NoError(t, err)
|
||||
require.EqualValues(t, tc.expected, image)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseMajorMinorVersionValid(t *testing.T) {
|
||||
var tests = []struct {
|
||||
version string
|
||||
@ -122,7 +204,7 @@ func TestGetRequirements(t *testing.T) {
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
image, err := NewCUDAImageFromEnv(tc.env)
|
||||
image, err := newCUDAImageFromEnv(tc.env)
|
||||
require.NoError(t, err)
|
||||
|
||||
requirements, err := image.GetRequirements()
|
||||
@ -133,6 +215,226 @@ func TestGetRequirements(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetDevicesFromEnvvar(t *testing.T) {
|
||||
envDockerResourceGPUs := "DOCKER_RESOURCE_GPUS"
|
||||
gpuID := "GPU-12345"
|
||||
anotherGPUID := "GPU-67890"
|
||||
thirdGPUID := "MIG-12345"
|
||||
|
||||
var tests = []struct {
|
||||
description string
|
||||
preferredVisibleDeviceEnvVars []string
|
||||
env map[string]string
|
||||
expectedDevices []string
|
||||
}{
|
||||
{
|
||||
description: "empty env returns nil for non-legacy image",
|
||||
},
|
||||
{
|
||||
description: "blank NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image",
|
||||
env: map[string]string{
|
||||
EnvVarNvidiaVisibleDevices: "",
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "'void' NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image",
|
||||
env: map[string]string{
|
||||
EnvVarNvidiaVisibleDevices: "void",
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "'none' NVIDIA_VISIBLE_DEVICES returns empty for non-legacy image",
|
||||
env: map[string]string{
|
||||
EnvVarNvidiaVisibleDevices: "none",
|
||||
},
|
||||
expectedDevices: []string{""},
|
||||
},
|
||||
{
|
||||
description: "NVIDIA_VISIBLE_DEVICES set returns value for non-legacy image",
|
||||
env: map[string]string{
|
||||
EnvVarNvidiaVisibleDevices: gpuID,
|
||||
},
|
||||
expectedDevices: []string{gpuID},
|
||||
},
|
||||
{
|
||||
description: "NVIDIA_VISIBLE_DEVICES set returns value for legacy image",
|
||||
env: map[string]string{
|
||||
EnvVarNvidiaVisibleDevices: gpuID,
|
||||
EnvVarCudaVersion: "legacy",
|
||||
},
|
||||
expectedDevices: []string{gpuID},
|
||||
},
|
||||
{
|
||||
description: "empty env returns all for legacy image",
|
||||
env: map[string]string{
|
||||
EnvVarCudaVersion: "legacy",
|
||||
},
|
||||
expectedDevices: []string{"all"},
|
||||
},
|
||||
// Add the `DOCKER_RESOURCE_GPUS` envvar and ensure that this is ignored when
|
||||
// not enabled
|
||||
{
|
||||
description: "missing NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image",
|
||||
env: map[string]string{
|
||||
envDockerResourceGPUs: anotherGPUID,
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "blank NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image",
|
||||
env: map[string]string{
|
||||
EnvVarNvidiaVisibleDevices: "",
|
||||
envDockerResourceGPUs: anotherGPUID,
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "'void' NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image",
|
||||
env: map[string]string{
|
||||
EnvVarNvidiaVisibleDevices: "void",
|
||||
envDockerResourceGPUs: anotherGPUID,
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "'none' NVIDIA_VISIBLE_DEVICES returns empty for non-legacy image",
|
||||
env: map[string]string{
|
||||
EnvVarNvidiaVisibleDevices: "none",
|
||||
envDockerResourceGPUs: anotherGPUID,
|
||||
},
|
||||
expectedDevices: []string{""},
|
||||
},
|
||||
{
|
||||
description: "NVIDIA_VISIBLE_DEVICES set returns value for non-legacy image",
|
||||
env: map[string]string{
|
||||
EnvVarNvidiaVisibleDevices: gpuID,
|
||||
envDockerResourceGPUs: anotherGPUID,
|
||||
},
|
||||
expectedDevices: []string{gpuID},
|
||||
},
|
||||
{
|
||||
description: "NVIDIA_VISIBLE_DEVICES set returns value for legacy image",
|
||||
env: map[string]string{
|
||||
EnvVarNvidiaVisibleDevices: gpuID,
|
||||
envDockerResourceGPUs: anotherGPUID,
|
||||
EnvVarCudaVersion: "legacy",
|
||||
},
|
||||
expectedDevices: []string{gpuID},
|
||||
},
|
||||
{
|
||||
description: "empty env returns all for legacy image",
|
||||
env: map[string]string{
|
||||
envDockerResourceGPUs: anotherGPUID,
|
||||
EnvVarCudaVersion: "legacy",
|
||||
},
|
||||
expectedDevices: []string{"all"},
|
||||
},
|
||||
// Add the `DOCKER_RESOURCE_GPUS` envvar and ensure that this is selected when
|
||||
// enabled
|
||||
{
|
||||
description: "empty env returns nil for non-legacy image",
|
||||
preferredVisibleDeviceEnvVars: []string{envDockerResourceGPUs},
|
||||
},
|
||||
{
|
||||
description: "blank DOCKER_RESOURCE_GPUS returns nil for non-legacy image",
|
||||
preferredVisibleDeviceEnvVars: []string{envDockerResourceGPUs},
|
||||
env: map[string]string{
|
||||
envDockerResourceGPUs: "",
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "'void' DOCKER_RESOURCE_GPUS returns nil for non-legacy image",
|
||||
preferredVisibleDeviceEnvVars: []string{envDockerResourceGPUs},
|
||||
env: map[string]string{
|
||||
envDockerResourceGPUs: "void",
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "'none' DOCKER_RESOURCE_GPUS returns empty for non-legacy image",
|
||||
preferredVisibleDeviceEnvVars: []string{envDockerResourceGPUs},
|
||||
env: map[string]string{
|
||||
envDockerResourceGPUs: "none",
|
||||
},
|
||||
expectedDevices: []string{""},
|
||||
},
|
||||
{
|
||||
description: "DOCKER_RESOURCE_GPUS set returns value for non-legacy image",
|
||||
preferredVisibleDeviceEnvVars: []string{envDockerResourceGPUs},
|
||||
env: map[string]string{
|
||||
envDockerResourceGPUs: gpuID,
|
||||
},
|
||||
expectedDevices: []string{gpuID},
|
||||
},
|
||||
{
|
||||
description: "DOCKER_RESOURCE_GPUS set returns value for legacy image",
|
||||
preferredVisibleDeviceEnvVars: []string{envDockerResourceGPUs},
|
||||
env: map[string]string{
|
||||
envDockerResourceGPUs: gpuID,
|
||||
EnvVarCudaVersion: "legacy",
|
||||
},
|
||||
expectedDevices: []string{gpuID},
|
||||
},
|
||||
{
|
||||
description: "DOCKER_RESOURCE_GPUS is selected if present",
|
||||
preferredVisibleDeviceEnvVars: []string{envDockerResourceGPUs},
|
||||
env: map[string]string{
|
||||
envDockerResourceGPUs: anotherGPUID,
|
||||
},
|
||||
expectedDevices: []string{anotherGPUID},
|
||||
},
|
||||
{
|
||||
description: "DOCKER_RESOURCE_GPUS overrides NVIDIA_VISIBLE_DEVICES if present",
|
||||
preferredVisibleDeviceEnvVars: []string{envDockerResourceGPUs},
|
||||
env: map[string]string{
|
||||
EnvVarNvidiaVisibleDevices: gpuID,
|
||||
envDockerResourceGPUs: anotherGPUID,
|
||||
},
|
||||
expectedDevices: []string{anotherGPUID},
|
||||
},
|
||||
{
|
||||
description: "DOCKER_RESOURCE_GPUS_ADDITIONAL overrides NVIDIA_VISIBLE_DEVICES if present",
|
||||
preferredVisibleDeviceEnvVars: []string{"DOCKER_RESOURCE_GPUS_ADDITIONAL"},
|
||||
env: map[string]string{
|
||||
EnvVarNvidiaVisibleDevices: gpuID,
|
||||
"DOCKER_RESOURCE_GPUS_ADDITIONAL": anotherGPUID,
|
||||
},
|
||||
expectedDevices: []string{anotherGPUID},
|
||||
},
|
||||
{
|
||||
description: "All available swarm resource envvars are selected and override NVIDIA_VISIBLE_DEVICES if present",
|
||||
preferredVisibleDeviceEnvVars: []string{"DOCKER_RESOURCE_GPUS", "DOCKER_RESOURCE_GPUS_ADDITIONAL"},
|
||||
env: map[string]string{
|
||||
EnvVarNvidiaVisibleDevices: gpuID,
|
||||
"DOCKER_RESOURCE_GPUS": thirdGPUID,
|
||||
"DOCKER_RESOURCE_GPUS_ADDITIONAL": anotherGPUID,
|
||||
},
|
||||
expectedDevices: []string{thirdGPUID, anotherGPUID},
|
||||
},
|
||||
{
|
||||
description: "DOCKER_RESOURCE_GPUS_ADDITIONAL or DOCKER_RESOURCE_GPUS override NVIDIA_VISIBLE_DEVICES if present",
|
||||
preferredVisibleDeviceEnvVars: []string{"DOCKER_RESOURCE_GPUS", "DOCKER_RESOURCE_GPUS_ADDITIONAL"},
|
||||
env: map[string]string{
|
||||
EnvVarNvidiaVisibleDevices: gpuID,
|
||||
"DOCKER_RESOURCE_GPUS_ADDITIONAL": anotherGPUID,
|
||||
},
|
||||
expectedDevices: []string{anotherGPUID},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
image, err := New(
|
||||
WithEnvMap(tc.env),
|
||||
WithPrivileged(true),
|
||||
WithAcceptDeviceListAsVolumeMounts(false),
|
||||
WithAcceptEnvvarUnprivileged(false),
|
||||
WithPreferredVisibleDevicesEnvVars(tc.preferredVisibleDeviceEnvVars...),
|
||||
)
|
||||
|
||||
require.NoError(t, err)
|
||||
devices := image.VisibleDevicesFromEnvVar()
|
||||
require.EqualValues(t, tc.expectedDevices, devices)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetVisibleDevicesFromMounts(t *testing.T) {
|
||||
var tests = []struct {
|
||||
description string
|
||||
@ -197,8 +499,121 @@ func TestGetVisibleDevicesFromMounts(t *testing.T) {
|
||||
}
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
image, _ := New(WithMounts(tc.mounts))
|
||||
require.Equal(t, tc.expectedDevices, image.VisibleDevicesFromMounts())
|
||||
image, err := New(WithMounts(tc.mounts))
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, tc.expectedDevices, image.visibleDevicesFromMounts())
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestVisibleDevices(t *testing.T) {
|
||||
var tests = []struct {
|
||||
description string
|
||||
mountDevices []specs.Mount
|
||||
envvarDevices string
|
||||
privileged bool
|
||||
acceptUnprivileged bool
|
||||
acceptMounts bool
|
||||
expectedDevices []string
|
||||
}{
|
||||
{
|
||||
description: "Mount devices, unprivileged, no accept unprivileged",
|
||||
mountDevices: []specs.Mount{
|
||||
{
|
||||
Source: "/dev/null",
|
||||
Destination: filepath.Join(DeviceListAsVolumeMountsRoot, "GPU0"),
|
||||
},
|
||||
{
|
||||
Source: "/dev/null",
|
||||
Destination: filepath.Join(DeviceListAsVolumeMountsRoot, "GPU1"),
|
||||
},
|
||||
},
|
||||
envvarDevices: "GPU2,GPU3",
|
||||
privileged: false,
|
||||
acceptUnprivileged: false,
|
||||
acceptMounts: true,
|
||||
expectedDevices: []string{"GPU0", "GPU1"},
|
||||
},
|
||||
{
|
||||
description: "No mount devices, unprivileged, no accept unprivileged",
|
||||
mountDevices: nil,
|
||||
envvarDevices: "GPU0,GPU1",
|
||||
privileged: false,
|
||||
acceptUnprivileged: false,
|
||||
acceptMounts: true,
|
||||
expectedDevices: nil,
|
||||
},
|
||||
{
|
||||
description: "No mount devices, privileged, no accept unprivileged",
|
||||
mountDevices: nil,
|
||||
envvarDevices: "GPU0,GPU1",
|
||||
privileged: true,
|
||||
acceptUnprivileged: false,
|
||||
acceptMounts: true,
|
||||
expectedDevices: []string{"GPU0", "GPU1"},
|
||||
},
|
||||
{
|
||||
description: "No mount devices, unprivileged, accept unprivileged",
|
||||
mountDevices: nil,
|
||||
envvarDevices: "GPU0,GPU1",
|
||||
privileged: false,
|
||||
acceptUnprivileged: true,
|
||||
acceptMounts: true,
|
||||
expectedDevices: []string{"GPU0", "GPU1"},
|
||||
},
|
||||
{
|
||||
description: "Mount devices, unprivileged, accept unprivileged, no accept mounts",
|
||||
mountDevices: []specs.Mount{
|
||||
{
|
||||
Source: "/dev/null",
|
||||
Destination: filepath.Join(DeviceListAsVolumeMountsRoot, "GPU0"),
|
||||
},
|
||||
{
|
||||
Source: "/dev/null",
|
||||
Destination: filepath.Join(DeviceListAsVolumeMountsRoot, "GPU1"),
|
||||
},
|
||||
},
|
||||
envvarDevices: "GPU2,GPU3",
|
||||
privileged: false,
|
||||
acceptUnprivileged: true,
|
||||
acceptMounts: false,
|
||||
expectedDevices: []string{"GPU2", "GPU3"},
|
||||
},
|
||||
{
|
||||
description: "Mount devices, unprivileged, no accept unprivileged, no accept mounts",
|
||||
mountDevices: []specs.Mount{
|
||||
{
|
||||
Source: "/dev/null",
|
||||
Destination: filepath.Join(DeviceListAsVolumeMountsRoot, "GPU0"),
|
||||
},
|
||||
{
|
||||
Source: "/dev/null",
|
||||
Destination: filepath.Join(DeviceListAsVolumeMountsRoot, "GPU1"),
|
||||
},
|
||||
},
|
||||
envvarDevices: "GPU2,GPU3",
|
||||
privileged: false,
|
||||
acceptUnprivileged: false,
|
||||
acceptMounts: false,
|
||||
expectedDevices: nil,
|
||||
},
|
||||
}
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
// Wrap the call to getDevices() in a closure.
|
||||
image, err := New(
|
||||
WithEnvMap(
|
||||
map[string]string{
|
||||
EnvVarNvidiaVisibleDevices: tc.envvarDevices,
|
||||
},
|
||||
),
|
||||
WithMounts(tc.mountDevices),
|
||||
WithPrivileged(tc.privileged),
|
||||
WithAcceptDeviceListAsVolumeMounts(tc.acceptMounts),
|
||||
WithAcceptEnvvarUnprivileged(tc.acceptUnprivileged),
|
||||
)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, tc.expectedDevices, image.VisibleDevices())
|
||||
})
|
||||
}
|
||||
}
|
||||
@ -224,7 +639,7 @@ func TestImexChannelsFromEnvVar(t *testing.T) {
|
||||
for _, tc := range testCases {
|
||||
for id, baseEnvvars := range map[string][]string{"": nil, "legacy": {"CUDA_VERSION=1.2.3"}} {
|
||||
t.Run(tc.description+id, func(t *testing.T) {
|
||||
i, err := NewCUDAImageFromEnv(append(baseEnvvars, tc.env...))
|
||||
i, err := newCUDAImageFromEnv(append(baseEnvvars, tc.env...))
|
||||
require.NoError(t, err)
|
||||
|
||||
channels := i.ImexChannelsFromEnvVar()
|
||||
|
@ -24,20 +24,39 @@ const (
|
||||
capSysAdmin = "CAP_SYS_ADMIN"
|
||||
)
|
||||
|
||||
// IsPrivileged returns true if the container is a privileged container.
|
||||
func IsPrivileged(s *specs.Spec) bool {
|
||||
if s.Process.Capabilities == nil {
|
||||
return false
|
||||
type CapabilitiesGetter interface {
|
||||
GetCapabilities() []string
|
||||
}
|
||||
|
||||
// We only make sure that the bounding capabibility set has
|
||||
// CAP_SYS_ADMIN. This allows us to make sure that the container was
|
||||
// actually started as '--privileged', but also allow non-root users to
|
||||
// access the privileged NVIDIA capabilities.
|
||||
for _, c := range s.Process.Capabilities.Bounding {
|
||||
type OCISpec specs.Spec
|
||||
|
||||
type OCISpecCapabilities specs.LinuxCapabilities
|
||||
|
||||
// IsPrivileged returns true if the container is a privileged container.
|
||||
func IsPrivileged(s CapabilitiesGetter) bool {
|
||||
if s == nil {
|
||||
return false
|
||||
}
|
||||
for _, c := range s.GetCapabilities() {
|
||||
if c == capSysAdmin {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func (s OCISpec) GetCapabilities() []string {
|
||||
if s.Process == nil || s.Process.Capabilities == nil {
|
||||
return nil
|
||||
}
|
||||
return (*OCISpecCapabilities)(s.Process.Capabilities).GetCapabilities()
|
||||
}
|
||||
|
||||
func (c OCISpecCapabilities) GetCapabilities() []string {
|
||||
// We only make sure that the bounding capability set has
|
||||
// CAP_SYS_ADMIN. This allows us to make sure that the container was
|
||||
// actually started as '--privileged', but also allow non-root users to
|
||||
// access the privileged NVIDIA capabilities.
|
||||
return c.Bounding
|
||||
}
|
||||
|
57
internal/config/image/privileged_test.go
Normal file
57
internal/config/image/privileged_test.go
Normal file
@ -0,0 +1,57 @@
|
||||
/**
|
||||
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package image
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestIsPrivileged(t *testing.T) {
|
||||
var tests = []struct {
|
||||
spec specs.Spec
|
||||
expected bool
|
||||
}{
|
||||
{
|
||||
specs.Spec{
|
||||
Process: &specs.Process{
|
||||
Capabilities: &specs.LinuxCapabilities{
|
||||
Bounding: []string{"CAP_SYS_ADMIN"},
|
||||
},
|
||||
},
|
||||
},
|
||||
true,
|
||||
},
|
||||
{
|
||||
specs.Spec{
|
||||
Process: &specs.Process{
|
||||
Capabilities: &specs.LinuxCapabilities{
|
||||
Bounding: []string{"CAP_SYS_FOO"},
|
||||
},
|
||||
},
|
||||
},
|
||||
false,
|
||||
},
|
||||
}
|
||||
for i, tc := range tests {
|
||||
privileged := IsPrivileged((*OCISpec)(&tc.spec))
|
||||
|
||||
require.Equal(t, tc.expected, privileged, "%d: %v", i, tc)
|
||||
}
|
||||
}
|
@ -79,7 +79,10 @@ func getDevicesFromSpec(logger logger.Interface, ociSpec oci.Spec, cfg *config.C
|
||||
return annotationDevices, nil
|
||||
}
|
||||
|
||||
container, err := image.NewCUDAImageFromSpec(rawSpec)
|
||||
container, err := image.NewCUDAImageFromSpec(
|
||||
rawSpec,
|
||||
image.WithLogger(logger),
|
||||
)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@ -107,7 +110,7 @@ func getDevicesFromSpec(logger logger.Interface, ociSpec oci.Spec, cfg *config.C
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
if cfg.AcceptEnvvarUnprivileged || image.IsPrivileged(rawSpec) {
|
||||
if cfg.AcceptEnvvarUnprivileged || image.IsPrivileged((*image.OCISpec)(rawSpec)) {
|
||||
return devices, nil
|
||||
}
|
||||
|
||||
|
@ -70,7 +70,10 @@ func newSpecModifier(logger logger.Interface, cfg *config.Config, ociSpec oci.Sp
|
||||
return nil, fmt.Errorf("failed to load OCI spec: %v", err)
|
||||
}
|
||||
|
||||
image, err := image.NewCUDAImageFromSpec(rawSpec)
|
||||
image, err := image.NewCUDAImageFromSpec(
|
||||
rawSpec,
|
||||
image.WithLogger(logger),
|
||||
)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user