From dede03f3228748f2fa30b26118e450b1a7ec9e57 Mon Sep 17 00:00:00 2001 From: Carlos Eduardo Arango Gutierrez Date: Wed, 28 May 2025 07:33:27 +0200 Subject: [PATCH] Refactor extracting requested devices from the container image This change consolidates the logic for determining requested devices from the container image. The logic for this has been integrated into the image.CUDA type so that multiple implementations are not required. Signed-off-by: Carlos Eduardo Arango Gutierrez Co-authored-by: Evan Lezar --- .../container_config.go | 83 +--- .../container_config_test.go | 339 +------------- .../hook_config.go | 2 +- internal/config/image/builder.go | 66 ++- internal/config/image/cuda_image.go | 74 ++- internal/config/image/cuda_image_test.go | 423 +++++++++++++++++- internal/config/image/privileged.go | 35 +- internal/config/image/privileged_test.go | 57 +++ internal/modifier/cdi.go | 7 +- internal/runtime/runtime_factory.go | 5 +- 10 files changed, 653 insertions(+), 438 deletions(-) create mode 100644 internal/config/image/privileged_test.go diff --git a/cmd/nvidia-container-runtime-hook/container_config.go b/cmd/nvidia-container-runtime-hook/container_config.go index ae3aca75..cfb84745 100644 --- a/cmd/nvidia-container-runtime-hook/container_config.go +++ b/cmd/nvidia-container-runtime-hook/container_config.go @@ -13,10 +13,6 @@ import ( "github.com/NVIDIA/nvidia-container-toolkit/internal/config/image" ) -const ( - capSysAdmin = "CAP_SYS_ADMIN" -) - type nvidiaConfig struct { Devices []string MigConfigDevices string @@ -103,9 +99,9 @@ func loadSpec(path string) (spec *Spec) { return } -func isPrivileged(s *Spec) bool { - if s.Process.Capabilities == nil { - return false +func (s *Spec) GetCapabilities() []string { + if s == nil || s.Process == nil || s.Process.Capabilities == nil { + return nil } var caps []string @@ -118,67 +114,22 @@ func isPrivileged(s *Spec) bool { if err != nil { log.Panicln("could not decode Process.Capabilities in OCI spec:", err) } - for _, c := range caps { - if c == capSysAdmin { - return true - } - } - return false + return caps } // Otherwise, parse s.Process.Capabilities as: // github.com/opencontainers/runtime-spec/blob/v1.0.0/specs-go/config.go#L30-L54 - process := specs.Process{ - Env: s.Process.Env, - } - - err := json.Unmarshal(*s.Process.Capabilities, &process.Capabilities) + capabilities := specs.LinuxCapabilities{} + err := json.Unmarshal(*s.Process.Capabilities, &capabilities) if err != nil { log.Panicln("could not decode Process.Capabilities in OCI spec:", err) } - fullSpec := specs.Spec{ - Version: *s.Version, - Process: &process, - } - - return image.IsPrivileged(&fullSpec) + return image.OCISpecCapabilities(capabilities).GetCapabilities() } -func getDevicesFromEnvvar(containerImage image.CUDA, swarmResourceEnvvars []string) []string { - // We check if the image has at least one of the Swarm resource envvars defined and use this - // if specified. - for _, envvar := range swarmResourceEnvvars { - if containerImage.HasEnvvar(envvar) { - return containerImage.DevicesFromEnvvars(swarmResourceEnvvars...).List() - } - } - - return containerImage.VisibleDevicesFromEnvVar() -} - -func (hookConfig *hookConfig) getDevices(image image.CUDA, privileged bool) []string { - // If enabled, try and get the device list from volume mounts first - if hookConfig.AcceptDeviceListAsVolumeMounts { - devices := image.VisibleDevicesFromMounts() - if len(devices) > 0 { - return devices - } - } - - // Fallback to reading from the environment variable if privileges are correct - devices := getDevicesFromEnvvar(image, hookConfig.getSwarmResourceEnvvars()) - if len(devices) == 0 { - return nil - } - if privileged || hookConfig.AcceptEnvvarUnprivileged { - return devices - } - - configName := hookConfig.getConfigOption("AcceptEnvvarUnprivileged") - log.Printf("Ignoring devices specified in NVIDIA_VISIBLE_DEVICES (privileged=%v, %v=%v) ", privileged, configName, hookConfig.AcceptEnvvarUnprivileged) - - return nil +func isPrivileged(s *Spec) bool { + return image.IsPrivileged(s) } func getMigConfigDevices(i image.CUDA) *string { @@ -225,7 +176,6 @@ func (hookConfig *hookConfig) getDriverCapabilities(cudaImage image.CUDA, legacy // We use the default driver capabilities by default. This is filtered to only include the // supported capabilities supportedDriverCapabilities := image.NewDriverCapabilities(hookConfig.SupportedDriverCapabilities) - capabilities := supportedDriverCapabilities.Intersection(image.DefaultDriverCapabilities) capsEnvSpecified := cudaImage.HasEnvvar(image.EnvVarNvidiaDriverCapabilities) @@ -251,7 +201,7 @@ func (hookConfig *hookConfig) getDriverCapabilities(cudaImage image.CUDA, legacy func (hookConfig *hookConfig) getNvidiaConfig(image image.CUDA, privileged bool) *nvidiaConfig { legacyImage := image.IsLegacy() - devices := hookConfig.getDevices(image, privileged) + devices := image.VisibleDevices() if len(devices) == 0 { // empty devices means this is not a GPU container. return nil @@ -306,20 +256,25 @@ func (hookConfig *hookConfig) getContainerConfig() (config containerConfig) { s := loadSpec(path.Join(b, "config.json")) - image, err := image.New( + privileged := isPrivileged(s) + + i, err := image.New( image.WithEnv(s.Process.Env), image.WithMounts(s.Mounts), + image.WithPrivileged(privileged), image.WithDisableRequire(hookConfig.DisableRequire), + image.WithAcceptDeviceListAsVolumeMounts(hookConfig.AcceptDeviceListAsVolumeMounts), + image.WithAcceptEnvvarUnprivileged(hookConfig.AcceptEnvvarUnprivileged), + image.WithPreferredVisibleDevicesEnvVars(hookConfig.getSwarmResourceEnvvars()...), ) if err != nil { log.Panicln(err) } - privileged := isPrivileged(s) return containerConfig{ Pid: h.Pid, Rootfs: s.Root.Path, - Image: image, - Nvidia: hookConfig.getNvidiaConfig(image, privileged), + Image: i, + Nvidia: hookConfig.getNvidiaConfig(i, privileged), } } diff --git a/cmd/nvidia-container-runtime-hook/container_config_test.go b/cmd/nvidia-container-runtime-hook/container_config_test.go index 1f3858b1..5247e80f 100644 --- a/cmd/nvidia-container-runtime-hook/container_config_test.go +++ b/cmd/nvidia-container-runtime-hook/container_config_test.go @@ -1,10 +1,8 @@ package main import ( - "path/filepath" "testing" - "github.com/opencontainers/runtime-spec/specs-go" "github.com/stretchr/testify/require" "github.com/NVIDIA/nvidia-container-toolkit/internal/config" @@ -479,7 +477,10 @@ func TestGetNvidiaConfig(t *testing.T) { t.Run(tc.description, func(t *testing.T) { image, _ := image.New( image.WithEnvMap(tc.env), + image.WithPrivileged(tc.privileged), + image.WithPreferredVisibleDevicesEnvVars(tc.hookConfig.getSwarmResourceEnvvars()...), ) + // Wrap the call to getNvidiaConfig() in a closure. var cfg *nvidiaConfig getConfig := func() { @@ -518,340 +519,6 @@ func TestGetNvidiaConfig(t *testing.T) { } } -func TestDeviceListSourcePriority(t *testing.T) { - var tests = []struct { - description string - mountDevices []specs.Mount - envvarDevices string - privileged bool - acceptUnprivileged bool - acceptMounts bool - expectedDevices []string - }{ - { - description: "Mount devices, unprivileged, no accept unprivileged", - mountDevices: []specs.Mount{ - { - Source: "/dev/null", - Destination: filepath.Join(image.DeviceListAsVolumeMountsRoot, "GPU0"), - }, - { - Source: "/dev/null", - Destination: filepath.Join(image.DeviceListAsVolumeMountsRoot, "GPU1"), - }, - }, - envvarDevices: "GPU2,GPU3", - privileged: false, - acceptUnprivileged: false, - acceptMounts: true, - expectedDevices: []string{"GPU0", "GPU1"}, - }, - { - description: "No mount devices, unprivileged, no accept unprivileged", - mountDevices: nil, - envvarDevices: "GPU0,GPU1", - privileged: false, - acceptUnprivileged: false, - acceptMounts: true, - expectedDevices: nil, - }, - { - description: "No mount devices, privileged, no accept unprivileged", - mountDevices: nil, - envvarDevices: "GPU0,GPU1", - privileged: true, - acceptUnprivileged: false, - acceptMounts: true, - expectedDevices: []string{"GPU0", "GPU1"}, - }, - { - description: "No mount devices, unprivileged, accept unprivileged", - mountDevices: nil, - envvarDevices: "GPU0,GPU1", - privileged: false, - acceptUnprivileged: true, - acceptMounts: true, - expectedDevices: []string{"GPU0", "GPU1"}, - }, - { - description: "Mount devices, unprivileged, accept unprivileged, no accept mounts", - mountDevices: []specs.Mount{ - { - Source: "/dev/null", - Destination: filepath.Join(image.DeviceListAsVolumeMountsRoot, "GPU0"), - }, - { - Source: "/dev/null", - Destination: filepath.Join(image.DeviceListAsVolumeMountsRoot, "GPU1"), - }, - }, - envvarDevices: "GPU2,GPU3", - privileged: false, - acceptUnprivileged: true, - acceptMounts: false, - expectedDevices: []string{"GPU2", "GPU3"}, - }, - { - description: "Mount devices, unprivileged, no accept unprivileged, no accept mounts", - mountDevices: []specs.Mount{ - { - Source: "/dev/null", - Destination: filepath.Join(image.DeviceListAsVolumeMountsRoot, "GPU0"), - }, - { - Source: "/dev/null", - Destination: filepath.Join(image.DeviceListAsVolumeMountsRoot, "GPU1"), - }, - }, - envvarDevices: "GPU2,GPU3", - privileged: false, - acceptUnprivileged: false, - acceptMounts: false, - expectedDevices: nil, - }, - } - for _, tc := range tests { - t.Run(tc.description, func(t *testing.T) { - // Wrap the call to getDevices() in a closure. - var devices []string - getDevices := func() { - image, _ := image.New( - image.WithEnvMap( - map[string]string{ - image.EnvVarNvidiaVisibleDevices: tc.envvarDevices, - }, - ), - image.WithMounts(tc.mountDevices), - ) - defaultConfig, _ := config.GetDefault() - cfg := &hookConfig{defaultConfig} - cfg.AcceptEnvvarUnprivileged = tc.acceptUnprivileged - cfg.AcceptDeviceListAsVolumeMounts = tc.acceptMounts - devices = cfg.getDevices(image, tc.privileged) - } - - // For all other tests, just grab the devices and check the results - getDevices() - - require.Equal(t, tc.expectedDevices, devices) - }) - } -} - -func TestGetDevicesFromEnvvar(t *testing.T) { - envDockerResourceGPUs := "DOCKER_RESOURCE_GPUS" - gpuID := "GPU-12345" - anotherGPUID := "GPU-67890" - thirdGPUID := "MIG-12345" - - var tests = []struct { - description string - swarmResourceEnvvars []string - env map[string]string - expectedDevices []string - }{ - { - description: "empty env returns nil for non-legacy image", - }, - { - description: "blank NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image", - env: map[string]string{ - image.EnvVarNvidiaVisibleDevices: "", - }, - }, - { - description: "'void' NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image", - env: map[string]string{ - image.EnvVarNvidiaVisibleDevices: "void", - }, - }, - { - description: "'none' NVIDIA_VISIBLE_DEVICES returns empty for non-legacy image", - env: map[string]string{ - image.EnvVarNvidiaVisibleDevices: "none", - }, - expectedDevices: []string{""}, - }, - { - description: "NVIDIA_VISIBLE_DEVICES set returns value for non-legacy image", - env: map[string]string{ - image.EnvVarNvidiaVisibleDevices: gpuID, - }, - expectedDevices: []string{gpuID}, - }, - { - description: "NVIDIA_VISIBLE_DEVICES set returns value for legacy image", - env: map[string]string{ - image.EnvVarNvidiaVisibleDevices: gpuID, - image.EnvVarCudaVersion: "legacy", - }, - expectedDevices: []string{gpuID}, - }, - { - description: "empty env returns all for legacy image", - env: map[string]string{ - image.EnvVarCudaVersion: "legacy", - }, - expectedDevices: []string{"all"}, - }, - // Add the `DOCKER_RESOURCE_GPUS` envvar and ensure that this is ignored when - // not enabled - { - description: "missing NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image", - env: map[string]string{ - envDockerResourceGPUs: anotherGPUID, - }, - }, - { - description: "blank NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image", - env: map[string]string{ - image.EnvVarNvidiaVisibleDevices: "", - envDockerResourceGPUs: anotherGPUID, - }, - }, - { - description: "'void' NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image", - env: map[string]string{ - image.EnvVarNvidiaVisibleDevices: "void", - envDockerResourceGPUs: anotherGPUID, - }, - }, - { - description: "'none' NVIDIA_VISIBLE_DEVICES returns empty for non-legacy image", - env: map[string]string{ - image.EnvVarNvidiaVisibleDevices: "none", - envDockerResourceGPUs: anotherGPUID, - }, - expectedDevices: []string{""}, - }, - { - description: "NVIDIA_VISIBLE_DEVICES set returns value for non-legacy image", - env: map[string]string{ - image.EnvVarNvidiaVisibleDevices: gpuID, - envDockerResourceGPUs: anotherGPUID, - }, - expectedDevices: []string{gpuID}, - }, - { - description: "NVIDIA_VISIBLE_DEVICES set returns value for legacy image", - env: map[string]string{ - image.EnvVarNvidiaVisibleDevices: gpuID, - envDockerResourceGPUs: anotherGPUID, - image.EnvVarCudaVersion: "legacy", - }, - expectedDevices: []string{gpuID}, - }, - { - description: "empty env returns all for legacy image", - env: map[string]string{ - envDockerResourceGPUs: anotherGPUID, - image.EnvVarCudaVersion: "legacy", - }, - expectedDevices: []string{"all"}, - }, - // Add the `DOCKER_RESOURCE_GPUS` envvar and ensure that this is selected when - // enabled - { - description: "empty env returns nil for non-legacy image", - swarmResourceEnvvars: []string{envDockerResourceGPUs}, - }, - { - description: "blank DOCKER_RESOURCE_GPUS returns nil for non-legacy image", - swarmResourceEnvvars: []string{envDockerResourceGPUs}, - env: map[string]string{ - envDockerResourceGPUs: "", - }, - }, - { - description: "'void' DOCKER_RESOURCE_GPUS returns nil for non-legacy image", - swarmResourceEnvvars: []string{envDockerResourceGPUs}, - env: map[string]string{ - envDockerResourceGPUs: "void", - }, - }, - { - description: "'none' DOCKER_RESOURCE_GPUS returns empty for non-legacy image", - swarmResourceEnvvars: []string{envDockerResourceGPUs}, - env: map[string]string{ - envDockerResourceGPUs: "none", - }, - expectedDevices: []string{""}, - }, - { - description: "DOCKER_RESOURCE_GPUS set returns value for non-legacy image", - swarmResourceEnvvars: []string{envDockerResourceGPUs}, - env: map[string]string{ - envDockerResourceGPUs: gpuID, - }, - expectedDevices: []string{gpuID}, - }, - { - description: "DOCKER_RESOURCE_GPUS set returns value for legacy image", - swarmResourceEnvvars: []string{envDockerResourceGPUs}, - env: map[string]string{ - envDockerResourceGPUs: gpuID, - image.EnvVarCudaVersion: "legacy", - }, - expectedDevices: []string{gpuID}, - }, - { - description: "DOCKER_RESOURCE_GPUS is selected if present", - swarmResourceEnvvars: []string{envDockerResourceGPUs}, - env: map[string]string{ - envDockerResourceGPUs: anotherGPUID, - }, - expectedDevices: []string{anotherGPUID}, - }, - { - description: "DOCKER_RESOURCE_GPUS overrides NVIDIA_VISIBLE_DEVICES if present", - swarmResourceEnvvars: []string{envDockerResourceGPUs}, - env: map[string]string{ - image.EnvVarNvidiaVisibleDevices: gpuID, - envDockerResourceGPUs: anotherGPUID, - }, - expectedDevices: []string{anotherGPUID}, - }, - { - description: "DOCKER_RESOURCE_GPUS_ADDITIONAL overrides NVIDIA_VISIBLE_DEVICES if present", - swarmResourceEnvvars: []string{"DOCKER_RESOURCE_GPUS_ADDITIONAL"}, - env: map[string]string{ - image.EnvVarNvidiaVisibleDevices: gpuID, - "DOCKER_RESOURCE_GPUS_ADDITIONAL": anotherGPUID, - }, - expectedDevices: []string{anotherGPUID}, - }, - { - description: "All available swarm resource envvars are selected and override NVIDIA_VISIBLE_DEVICES if present", - swarmResourceEnvvars: []string{"DOCKER_RESOURCE_GPUS", "DOCKER_RESOURCE_GPUS_ADDITIONAL"}, - env: map[string]string{ - image.EnvVarNvidiaVisibleDevices: gpuID, - "DOCKER_RESOURCE_GPUS": thirdGPUID, - "DOCKER_RESOURCE_GPUS_ADDITIONAL": anotherGPUID, - }, - expectedDevices: []string{thirdGPUID, anotherGPUID}, - }, - { - description: "DOCKER_RESOURCE_GPUS_ADDITIONAL or DOCKER_RESOURCE_GPUS override NVIDIA_VISIBLE_DEVICES if present", - swarmResourceEnvvars: []string{"DOCKER_RESOURCE_GPUS", "DOCKER_RESOURCE_GPUS_ADDITIONAL"}, - env: map[string]string{ - image.EnvVarNvidiaVisibleDevices: gpuID, - "DOCKER_RESOURCE_GPUS_ADDITIONAL": anotherGPUID, - }, - expectedDevices: []string{anotherGPUID}, - }, - } - - for _, tc := range tests { - t.Run(tc.description, func(t *testing.T) { - image, _ := image.New( - image.WithEnvMap(tc.env), - ) - devices := getDevicesFromEnvvar(image, tc.swarmResourceEnvvars) - require.EqualValues(t, tc.expectedDevices, devices) - }) - } -} - func TestGetDriverCapabilities(t *testing.T) { supportedCapabilities := "compute,display,utility,video" diff --git a/cmd/nvidia-container-runtime-hook/hook_config.go b/cmd/nvidia-container-runtime-hook/hook_config.go index 3ad8e3b1..ec4e0434 100644 --- a/cmd/nvidia-container-runtime-hook/hook_config.go +++ b/cmd/nvidia-container-runtime-hook/hook_config.go @@ -88,7 +88,7 @@ func (c hookConfig) getConfigOption(fieldName string) string { // getSwarmResourceEnvvars returns the swarm resource envvars for the config. func (c *hookConfig) getSwarmResourceEnvvars() []string { - if c.SwarmResource == "" { + if c == nil || c.SwarmResource == "" { return nil } diff --git a/internal/config/image/builder.go b/internal/config/image/builder.go index 332d017a..7acd3ba0 100644 --- a/internal/config/image/builder.go +++ b/internal/config/image/builder.go @@ -21,22 +21,36 @@ import ( "strings" "github.com/opencontainers/runtime-spec/specs-go" + + "github.com/NVIDIA/nvidia-container-toolkit/internal/logger" ) type builder struct { - env map[string]string - mounts []specs.Mount + CUDA + disableRequire bool } +// Option is a functional option for creating a CUDA image. +type Option func(*builder) error + // New creates a new CUDA image from the input options. func New(opt ...Option) (CUDA, error) { - b := &builder{} + b := &builder{ + CUDA: CUDA{ + acceptEnvvarUnprivileged: true, + }, + } for _, o := range opt { if err := o(b); err != nil { return CUDA{}, err } } + + if b.logger == nil { + b.logger = logger.New() + } + if b.env == nil { b.env = make(map[string]string) } @@ -50,15 +64,22 @@ func (b builder) build() (CUDA, error) { b.env[EnvVarNvidiaDisableRequire] = "true" } - c := CUDA{ - env: b.env, - mounts: b.mounts, - } - return c, nil + return b.CUDA, nil } -// Option is a functional option for creating a CUDA image. -type Option func(*builder) error +func WithAcceptDeviceListAsVolumeMounts(acceptDeviceListAsVolumeMounts bool) Option { + return func(b *builder) error { + b.acceptDeviceListAsVolumeMounts = acceptDeviceListAsVolumeMounts + return nil + } +} + +func WithAcceptEnvvarUnprivileged(acceptEnvvarUnprivileged bool) Option { + return func(b *builder) error { + b.acceptEnvvarUnprivileged = acceptEnvvarUnprivileged + return nil + } +} // WithDisableRequire sets the disable require option. func WithDisableRequire(disableRequire bool) Option { @@ -93,6 +114,14 @@ func WithEnvMap(env map[string]string) Option { } } +// WithLogger sets the logger to use when creating the CUDA image. +func WithLogger(logger logger.Interface) Option { + return func(b *builder) error { + b.logger = logger + return nil + } +} + // WithMounts sets the mounts associated with the CUDA image. func WithMounts(mounts []specs.Mount) Option { return func(b *builder) error { @@ -100,3 +129,20 @@ func WithMounts(mounts []specs.Mount) Option { return nil } } + +// WithPreferredVisibleDevicesEnvVars sets the environment variables that +// should take precedence over the default NVIDIA_VISIBLE_DEVICES. +func WithPreferredVisibleDevicesEnvVars(preferredVisibleDeviceEnvVars ...string) Option { + return func(b *builder) error { + b.preferredVisibleDeviceEnvVars = preferredVisibleDeviceEnvVars + return nil + } +} + +// WithPrivileged sets whether an image is privileged or not. +func WithPrivileged(isPrivileged bool) Option { + return func(b *builder) error { + b.isPrivileged = isPrivileged + return nil + } +} diff --git a/internal/config/image/cuda_image.go b/internal/config/image/cuda_image.go index d5bbc224..b16e3fed 100644 --- a/internal/config/image/cuda_image.go +++ b/internal/config/image/cuda_image.go @@ -25,6 +25,8 @@ import ( "github.com/opencontainers/runtime-spec/specs-go" "golang.org/x/mod/semver" "tags.cncf.io/container-device-interface/pkg/parser" + + "github.com/NVIDIA/nvidia-container-toolkit/internal/logger" ) const ( @@ -38,27 +40,37 @@ const ( // a map of environment variable to values that can be used to perform lookups // such as requirements. type CUDA struct { - env map[string]string - mounts []specs.Mount + logger logger.Interface + + env map[string]string + isPrivileged bool + mounts []specs.Mount + + acceptDeviceListAsVolumeMounts bool + acceptEnvvarUnprivileged bool + preferredVisibleDeviceEnvVars []string } // NewCUDAImageFromSpec creates a CUDA image from the input OCI runtime spec. // The process environment is read (if present) to construc the CUDA Image. -func NewCUDAImageFromSpec(spec *specs.Spec) (CUDA, error) { +func NewCUDAImageFromSpec(spec *specs.Spec, opts ...Option) (CUDA, error) { var env []string if spec != nil && spec.Process != nil { env = spec.Process.Env } - return New( + specOpts := []Option{ WithEnv(env), WithMounts(spec.Mounts), - ) + WithPrivileged(IsPrivileged((*OCISpec)(spec))), + } + + return New(append(opts, specOpts...)...) } -// NewCUDAImageFromEnv creates a CUDA image from the input environment. The environment +// newCUDAImageFromEnv creates a CUDA image from the input environment. The environment // is a list of strings of the form ENVAR=VALUE. -func NewCUDAImageFromEnv(env []string) (CUDA, error) { +func newCUDAImageFromEnv(env []string) (CUDA, error) { return New(WithEnv(env)) } @@ -216,14 +228,53 @@ func (i CUDA) OnlyFullyQualifiedCDIDevices() bool { return hasCDIdevice } -// VisibleDevicesFromEnvVar returns the set of visible devices requested through -// the NVIDIA_VISIBLE_DEVICES environment variable. +// VisibleDevices returns a list of devices requested in the container image. +// If volume mount requests are enabled these are returned if requested, +// otherwise device requests through environment variables are considered. +// In cases where environment variable requests required privileged containers, +// such devices requests are ignored. +func (i CUDA) VisibleDevices() []string { + // If enabled, try and get the device list from volume mounts first + if i.acceptDeviceListAsVolumeMounts { + volumeMountDeviceRequests := i.visibleDevicesFromMounts() + if len(volumeMountDeviceRequests) > 0 { + return volumeMountDeviceRequests + } + } + + // Get the Fallback to reading from the environment variable if privileges are correct + envVarDeviceRequests := i.VisibleDevicesFromEnvVar() + if len(envVarDeviceRequests) == 0 { + return nil + } + + // If the container is privileged, or environment variable requests are + // allowed for unprivileged containers, these devices are returned. + if i.isPrivileged || i.acceptEnvvarUnprivileged { + return envVarDeviceRequests + } + + // We log a warning if we are ignoring the environment variable requests. + i.logger.Warningf("Ignoring devices specified in NVIDIA_VISIBLE_DEVICES in unprivileged container") + + return nil +} + +// VisibleDevicesFromEnvVar returns the set of visible devices requested through environment variables. +// If any of the preferredVisibleDeviceEnvVars are present in the image, they +// are used to determine the visible devices. If this is not the case, the +// NVIDIA_VISIBLE_DEVICES environment variable is used. func (i CUDA) VisibleDevicesFromEnvVar() []string { + for _, envVar := range i.preferredVisibleDeviceEnvVars { + if i.HasEnvvar(envVar) { + return i.DevicesFromEnvvars(i.preferredVisibleDeviceEnvVars...).List() + } + } return i.DevicesFromEnvvars(EnvVarNvidiaVisibleDevices).List() } -// VisibleDevicesFromMounts returns the set of visible devices requested as mounts. -func (i CUDA) VisibleDevicesFromMounts() []string { +// visibleDevicesFromMounts returns the set of visible devices requested as mounts. +func (i CUDA) visibleDevicesFromMounts() []string { var devices []string for _, device := range i.DevicesFromMounts() { switch { @@ -238,7 +289,6 @@ func (i CUDA) VisibleDevicesFromMounts() []string { } // DevicesFromMounts returns a list of device specified as mounts. -// TODO: This should be merged with getDevicesFromMounts used in the NVIDIA Container Runtime func (i CUDA) DevicesFromMounts() []string { root := filepath.Clean(DeviceListAsVolumeMountsRoot) seen := make(map[string]bool) diff --git a/internal/config/image/cuda_image_test.go b/internal/config/image/cuda_image_test.go index 3b77d333..044466d3 100644 --- a/internal/config/image/cuda_image_test.go +++ b/internal/config/image/cuda_image_test.go @@ -21,9 +21,91 @@ import ( "testing" "github.com/opencontainers/runtime-spec/specs-go" + testlog "github.com/sirupsen/logrus/hooks/test" "github.com/stretchr/testify/require" ) +func TestNewCUDAImageFromSpec(t *testing.T) { + logger, _ := testlog.NewNullLogger() + + testCases := []struct { + description string + spec *specs.Spec + options []Option + expected CUDA + }{ + { + description: "no env vars", + spec: &specs.Spec{ + Process: &specs.Process{ + Env: []string{}, + }, + }, + expected: CUDA{ + logger: logger, + env: map[string]string{}, + acceptEnvvarUnprivileged: true, + }, + }, + { + description: "NVIDIA_VISIBLE_DEVICES=all", + spec: &specs.Spec{ + Process: &specs.Process{ + Env: []string{"NVIDIA_VISIBLE_DEVICES=all"}, + }, + }, + expected: CUDA{ + logger: logger, + env: map[string]string{"NVIDIA_VISIBLE_DEVICES": "all"}, + acceptEnvvarUnprivileged: true, + }, + }, + { + description: "Spec overrides options", + spec: &specs.Spec{ + Process: &specs.Process{ + Env: []string{"NVIDIA_VISIBLE_DEVICES=all"}, + }, + Mounts: []specs.Mount{ + { + Source: "/spec-source", + Destination: "/spec-destination", + }, + }, + }, + options: []Option{ + WithEnvMap(map[string]string{"OTHER": "value"}), + WithMounts([]specs.Mount{ + { + Source: "/option-source", + Destination: "/option-destination", + }, + }), + }, + expected: CUDA{ + logger: logger, + env: map[string]string{"NVIDIA_VISIBLE_DEVICES": "all"}, + mounts: []specs.Mount{ + { + Source: "/spec-source", + Destination: "/spec-destination", + }, + }, + acceptEnvvarUnprivileged: true, + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.description, func(t *testing.T) { + options := append([]Option{WithLogger(logger)}, tc.options...) + image, err := NewCUDAImageFromSpec(tc.spec, options...) + require.NoError(t, err) + require.EqualValues(t, tc.expected, image) + }) + } +} + func TestParseMajorMinorVersionValid(t *testing.T) { var tests = []struct { version string @@ -122,7 +204,7 @@ func TestGetRequirements(t *testing.T) { for _, tc := range testCases { t.Run(tc.description, func(t *testing.T) { - image, err := NewCUDAImageFromEnv(tc.env) + image, err := newCUDAImageFromEnv(tc.env) require.NoError(t, err) requirements, err := image.GetRequirements() @@ -133,6 +215,226 @@ func TestGetRequirements(t *testing.T) { } } +func TestGetDevicesFromEnvvar(t *testing.T) { + envDockerResourceGPUs := "DOCKER_RESOURCE_GPUS" + gpuID := "GPU-12345" + anotherGPUID := "GPU-67890" + thirdGPUID := "MIG-12345" + + var tests = []struct { + description string + preferredVisibleDeviceEnvVars []string + env map[string]string + expectedDevices []string + }{ + { + description: "empty env returns nil for non-legacy image", + }, + { + description: "blank NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image", + env: map[string]string{ + EnvVarNvidiaVisibleDevices: "", + }, + }, + { + description: "'void' NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image", + env: map[string]string{ + EnvVarNvidiaVisibleDevices: "void", + }, + }, + { + description: "'none' NVIDIA_VISIBLE_DEVICES returns empty for non-legacy image", + env: map[string]string{ + EnvVarNvidiaVisibleDevices: "none", + }, + expectedDevices: []string{""}, + }, + { + description: "NVIDIA_VISIBLE_DEVICES set returns value for non-legacy image", + env: map[string]string{ + EnvVarNvidiaVisibleDevices: gpuID, + }, + expectedDevices: []string{gpuID}, + }, + { + description: "NVIDIA_VISIBLE_DEVICES set returns value for legacy image", + env: map[string]string{ + EnvVarNvidiaVisibleDevices: gpuID, + EnvVarCudaVersion: "legacy", + }, + expectedDevices: []string{gpuID}, + }, + { + description: "empty env returns all for legacy image", + env: map[string]string{ + EnvVarCudaVersion: "legacy", + }, + expectedDevices: []string{"all"}, + }, + // Add the `DOCKER_RESOURCE_GPUS` envvar and ensure that this is ignored when + // not enabled + { + description: "missing NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image", + env: map[string]string{ + envDockerResourceGPUs: anotherGPUID, + }, + }, + { + description: "blank NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image", + env: map[string]string{ + EnvVarNvidiaVisibleDevices: "", + envDockerResourceGPUs: anotherGPUID, + }, + }, + { + description: "'void' NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image", + env: map[string]string{ + EnvVarNvidiaVisibleDevices: "void", + envDockerResourceGPUs: anotherGPUID, + }, + }, + { + description: "'none' NVIDIA_VISIBLE_DEVICES returns empty for non-legacy image", + env: map[string]string{ + EnvVarNvidiaVisibleDevices: "none", + envDockerResourceGPUs: anotherGPUID, + }, + expectedDevices: []string{""}, + }, + { + description: "NVIDIA_VISIBLE_DEVICES set returns value for non-legacy image", + env: map[string]string{ + EnvVarNvidiaVisibleDevices: gpuID, + envDockerResourceGPUs: anotherGPUID, + }, + expectedDevices: []string{gpuID}, + }, + { + description: "NVIDIA_VISIBLE_DEVICES set returns value for legacy image", + env: map[string]string{ + EnvVarNvidiaVisibleDevices: gpuID, + envDockerResourceGPUs: anotherGPUID, + EnvVarCudaVersion: "legacy", + }, + expectedDevices: []string{gpuID}, + }, + { + description: "empty env returns all for legacy image", + env: map[string]string{ + envDockerResourceGPUs: anotherGPUID, + EnvVarCudaVersion: "legacy", + }, + expectedDevices: []string{"all"}, + }, + // Add the `DOCKER_RESOURCE_GPUS` envvar and ensure that this is selected when + // enabled + { + description: "empty env returns nil for non-legacy image", + preferredVisibleDeviceEnvVars: []string{envDockerResourceGPUs}, + }, + { + description: "blank DOCKER_RESOURCE_GPUS returns nil for non-legacy image", + preferredVisibleDeviceEnvVars: []string{envDockerResourceGPUs}, + env: map[string]string{ + envDockerResourceGPUs: "", + }, + }, + { + description: "'void' DOCKER_RESOURCE_GPUS returns nil for non-legacy image", + preferredVisibleDeviceEnvVars: []string{envDockerResourceGPUs}, + env: map[string]string{ + envDockerResourceGPUs: "void", + }, + }, + { + description: "'none' DOCKER_RESOURCE_GPUS returns empty for non-legacy image", + preferredVisibleDeviceEnvVars: []string{envDockerResourceGPUs}, + env: map[string]string{ + envDockerResourceGPUs: "none", + }, + expectedDevices: []string{""}, + }, + { + description: "DOCKER_RESOURCE_GPUS set returns value for non-legacy image", + preferredVisibleDeviceEnvVars: []string{envDockerResourceGPUs}, + env: map[string]string{ + envDockerResourceGPUs: gpuID, + }, + expectedDevices: []string{gpuID}, + }, + { + description: "DOCKER_RESOURCE_GPUS set returns value for legacy image", + preferredVisibleDeviceEnvVars: []string{envDockerResourceGPUs}, + env: map[string]string{ + envDockerResourceGPUs: gpuID, + EnvVarCudaVersion: "legacy", + }, + expectedDevices: []string{gpuID}, + }, + { + description: "DOCKER_RESOURCE_GPUS is selected if present", + preferredVisibleDeviceEnvVars: []string{envDockerResourceGPUs}, + env: map[string]string{ + envDockerResourceGPUs: anotherGPUID, + }, + expectedDevices: []string{anotherGPUID}, + }, + { + description: "DOCKER_RESOURCE_GPUS overrides NVIDIA_VISIBLE_DEVICES if present", + preferredVisibleDeviceEnvVars: []string{envDockerResourceGPUs}, + env: map[string]string{ + EnvVarNvidiaVisibleDevices: gpuID, + envDockerResourceGPUs: anotherGPUID, + }, + expectedDevices: []string{anotherGPUID}, + }, + { + description: "DOCKER_RESOURCE_GPUS_ADDITIONAL overrides NVIDIA_VISIBLE_DEVICES if present", + preferredVisibleDeviceEnvVars: []string{"DOCKER_RESOURCE_GPUS_ADDITIONAL"}, + env: map[string]string{ + EnvVarNvidiaVisibleDevices: gpuID, + "DOCKER_RESOURCE_GPUS_ADDITIONAL": anotherGPUID, + }, + expectedDevices: []string{anotherGPUID}, + }, + { + description: "All available swarm resource envvars are selected and override NVIDIA_VISIBLE_DEVICES if present", + preferredVisibleDeviceEnvVars: []string{"DOCKER_RESOURCE_GPUS", "DOCKER_RESOURCE_GPUS_ADDITIONAL"}, + env: map[string]string{ + EnvVarNvidiaVisibleDevices: gpuID, + "DOCKER_RESOURCE_GPUS": thirdGPUID, + "DOCKER_RESOURCE_GPUS_ADDITIONAL": anotherGPUID, + }, + expectedDevices: []string{thirdGPUID, anotherGPUID}, + }, + { + description: "DOCKER_RESOURCE_GPUS_ADDITIONAL or DOCKER_RESOURCE_GPUS override NVIDIA_VISIBLE_DEVICES if present", + preferredVisibleDeviceEnvVars: []string{"DOCKER_RESOURCE_GPUS", "DOCKER_RESOURCE_GPUS_ADDITIONAL"}, + env: map[string]string{ + EnvVarNvidiaVisibleDevices: gpuID, + "DOCKER_RESOURCE_GPUS_ADDITIONAL": anotherGPUID, + }, + expectedDevices: []string{anotherGPUID}, + }, + } + + for _, tc := range tests { + t.Run(tc.description, func(t *testing.T) { + image, err := New( + WithEnvMap(tc.env), + WithPrivileged(true), + WithAcceptDeviceListAsVolumeMounts(false), + WithAcceptEnvvarUnprivileged(false), + WithPreferredVisibleDevicesEnvVars(tc.preferredVisibleDeviceEnvVars...), + ) + + require.NoError(t, err) + devices := image.VisibleDevicesFromEnvVar() + require.EqualValues(t, tc.expectedDevices, devices) + }) + } +} + func TestGetVisibleDevicesFromMounts(t *testing.T) { var tests = []struct { description string @@ -197,8 +499,121 @@ func TestGetVisibleDevicesFromMounts(t *testing.T) { } for _, tc := range tests { t.Run(tc.description, func(t *testing.T) { - image, _ := New(WithMounts(tc.mounts)) - require.Equal(t, tc.expectedDevices, image.VisibleDevicesFromMounts()) + image, err := New(WithMounts(tc.mounts)) + require.NoError(t, err) + require.Equal(t, tc.expectedDevices, image.visibleDevicesFromMounts()) + }) + } +} + +func TestVisibleDevices(t *testing.T) { + var tests = []struct { + description string + mountDevices []specs.Mount + envvarDevices string + privileged bool + acceptUnprivileged bool + acceptMounts bool + expectedDevices []string + }{ + { + description: "Mount devices, unprivileged, no accept unprivileged", + mountDevices: []specs.Mount{ + { + Source: "/dev/null", + Destination: filepath.Join(DeviceListAsVolumeMountsRoot, "GPU0"), + }, + { + Source: "/dev/null", + Destination: filepath.Join(DeviceListAsVolumeMountsRoot, "GPU1"), + }, + }, + envvarDevices: "GPU2,GPU3", + privileged: false, + acceptUnprivileged: false, + acceptMounts: true, + expectedDevices: []string{"GPU0", "GPU1"}, + }, + { + description: "No mount devices, unprivileged, no accept unprivileged", + mountDevices: nil, + envvarDevices: "GPU0,GPU1", + privileged: false, + acceptUnprivileged: false, + acceptMounts: true, + expectedDevices: nil, + }, + { + description: "No mount devices, privileged, no accept unprivileged", + mountDevices: nil, + envvarDevices: "GPU0,GPU1", + privileged: true, + acceptUnprivileged: false, + acceptMounts: true, + expectedDevices: []string{"GPU0", "GPU1"}, + }, + { + description: "No mount devices, unprivileged, accept unprivileged", + mountDevices: nil, + envvarDevices: "GPU0,GPU1", + privileged: false, + acceptUnprivileged: true, + acceptMounts: true, + expectedDevices: []string{"GPU0", "GPU1"}, + }, + { + description: "Mount devices, unprivileged, accept unprivileged, no accept mounts", + mountDevices: []specs.Mount{ + { + Source: "/dev/null", + Destination: filepath.Join(DeviceListAsVolumeMountsRoot, "GPU0"), + }, + { + Source: "/dev/null", + Destination: filepath.Join(DeviceListAsVolumeMountsRoot, "GPU1"), + }, + }, + envvarDevices: "GPU2,GPU3", + privileged: false, + acceptUnprivileged: true, + acceptMounts: false, + expectedDevices: []string{"GPU2", "GPU3"}, + }, + { + description: "Mount devices, unprivileged, no accept unprivileged, no accept mounts", + mountDevices: []specs.Mount{ + { + Source: "/dev/null", + Destination: filepath.Join(DeviceListAsVolumeMountsRoot, "GPU0"), + }, + { + Source: "/dev/null", + Destination: filepath.Join(DeviceListAsVolumeMountsRoot, "GPU1"), + }, + }, + envvarDevices: "GPU2,GPU3", + privileged: false, + acceptUnprivileged: false, + acceptMounts: false, + expectedDevices: nil, + }, + } + for _, tc := range tests { + t.Run(tc.description, func(t *testing.T) { + // Wrap the call to getDevices() in a closure. + image, err := New( + WithEnvMap( + map[string]string{ + EnvVarNvidiaVisibleDevices: tc.envvarDevices, + }, + ), + WithMounts(tc.mountDevices), + WithPrivileged(tc.privileged), + WithAcceptDeviceListAsVolumeMounts(tc.acceptMounts), + WithAcceptEnvvarUnprivileged(tc.acceptUnprivileged), + ) + require.NoError(t, err) + require.Equal(t, tc.expectedDevices, image.VisibleDevices()) }) } } @@ -224,7 +639,7 @@ func TestImexChannelsFromEnvVar(t *testing.T) { for _, tc := range testCases { for id, baseEnvvars := range map[string][]string{"": nil, "legacy": {"CUDA_VERSION=1.2.3"}} { t.Run(tc.description+id, func(t *testing.T) { - i, err := NewCUDAImageFromEnv(append(baseEnvvars, tc.env...)) + i, err := newCUDAImageFromEnv(append(baseEnvvars, tc.env...)) require.NoError(t, err) channels := i.ImexChannelsFromEnvVar() diff --git a/internal/config/image/privileged.go b/internal/config/image/privileged.go index a54598d6..04f32cf5 100644 --- a/internal/config/image/privileged.go +++ b/internal/config/image/privileged.go @@ -24,20 +24,39 @@ const ( capSysAdmin = "CAP_SYS_ADMIN" ) +type CapabilitiesGetter interface { + GetCapabilities() []string +} + +type OCISpec specs.Spec + +type OCISpecCapabilities specs.LinuxCapabilities + // IsPrivileged returns true if the container is a privileged container. -func IsPrivileged(s *specs.Spec) bool { - if s.Process.Capabilities == nil { +func IsPrivileged(s CapabilitiesGetter) bool { + if s == nil { return false } - - // We only make sure that the bounding capabibility set has - // CAP_SYS_ADMIN. This allows us to make sure that the container was - // actually started as '--privileged', but also allow non-root users to - // access the privileged NVIDIA capabilities. - for _, c := range s.Process.Capabilities.Bounding { + for _, c := range s.GetCapabilities() { if c == capSysAdmin { return true } } + return false } + +func (s OCISpec) GetCapabilities() []string { + if s.Process == nil || s.Process.Capabilities == nil { + return nil + } + return (*OCISpecCapabilities)(s.Process.Capabilities).GetCapabilities() +} + +func (c OCISpecCapabilities) GetCapabilities() []string { + // We only make sure that the bounding capability set has + // CAP_SYS_ADMIN. This allows us to make sure that the container was + // actually started as '--privileged', but also allow non-root users to + // access the privileged NVIDIA capabilities. + return c.Bounding +} diff --git a/internal/config/image/privileged_test.go b/internal/config/image/privileged_test.go new file mode 100644 index 00000000..328c496a --- /dev/null +++ b/internal/config/image/privileged_test.go @@ -0,0 +1,57 @@ +/** +# Copyright (c) NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +**/ + +package image + +import ( + "testing" + + "github.com/opencontainers/runtime-spec/specs-go" + "github.com/stretchr/testify/require" +) + +func TestIsPrivileged(t *testing.T) { + var tests = []struct { + spec specs.Spec + expected bool + }{ + { + specs.Spec{ + Process: &specs.Process{ + Capabilities: &specs.LinuxCapabilities{ + Bounding: []string{"CAP_SYS_ADMIN"}, + }, + }, + }, + true, + }, + { + specs.Spec{ + Process: &specs.Process{ + Capabilities: &specs.LinuxCapabilities{ + Bounding: []string{"CAP_SYS_FOO"}, + }, + }, + }, + false, + }, + } + for i, tc := range tests { + privileged := IsPrivileged((*OCISpec)(&tc.spec)) + + require.Equal(t, tc.expected, privileged, "%d: %v", i, tc) + } +} diff --git a/internal/modifier/cdi.go b/internal/modifier/cdi.go index 90cd481b..6c7286af 100644 --- a/internal/modifier/cdi.go +++ b/internal/modifier/cdi.go @@ -79,7 +79,10 @@ func getDevicesFromSpec(logger logger.Interface, ociSpec oci.Spec, cfg *config.C return annotationDevices, nil } - container, err := image.NewCUDAImageFromSpec(rawSpec) + container, err := image.NewCUDAImageFromSpec( + rawSpec, + image.WithLogger(logger), + ) if err != nil { return nil, err } @@ -107,7 +110,7 @@ func getDevicesFromSpec(logger logger.Interface, ociSpec oci.Spec, cfg *config.C return nil, nil } - if cfg.AcceptEnvvarUnprivileged || image.IsPrivileged(rawSpec) { + if cfg.AcceptEnvvarUnprivileged || image.IsPrivileged((*image.OCISpec)(rawSpec)) { return devices, nil } diff --git a/internal/runtime/runtime_factory.go b/internal/runtime/runtime_factory.go index 9386c6ac..a7b454a9 100644 --- a/internal/runtime/runtime_factory.go +++ b/internal/runtime/runtime_factory.go @@ -70,7 +70,10 @@ func newSpecModifier(logger logger.Interface, cfg *config.Config, ociSpec oci.Sp return nil, fmt.Errorf("failed to load OCI spec: %v", err) } - image, err := image.NewCUDAImageFromSpec(rawSpec) + image, err := image.NewCUDAImageFromSpec( + rawSpec, + image.WithLogger(logger), + ) if err != nil { return nil, err }