Merge branch 'CNT-3998/cdi-accept-visible-devices-when-privileged' into 'main'

Honor accept-nvidia-visible-devices-envvar-when-unprivileged setting in CDI mode

See merge request nvidia/container-toolkit/container-toolkit!331
This commit is contained in:
Evan Lezar 2023-03-09 15:59:08 +00:00
commit 4c2eff4865
5 changed files with 90 additions and 23 deletions

View File

@ -10,6 +10,7 @@ import (
"strings" "strings"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image" "github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
"github.com/opencontainers/runtime-spec/specs-go"
"golang.org/x/mod/semver" "golang.org/x/mod/semver"
) )
@ -130,7 +131,7 @@ func isPrivileged(s *Spec) bool {
} }
var caps []string var caps []string
// If v1.1.0-rc1 <= OCI version < v1.0.0-rc5 parse s.Process.Capabilities as: // If v1.0.0-rc1 <= OCI version < v1.0.0-rc5 parse s.Process.Capabilities as:
// github.com/opencontainers/runtime-spec/blob/v1.0.0-rc1/specs-go/config.go#L30-L54 // github.com/opencontainers/runtime-spec/blob/v1.0.0-rc1/specs-go/config.go#L30-L54
rc1cmp := semver.Compare("v"+*s.Version, "v1.0.0-rc1") rc1cmp := semver.Compare("v"+*s.Version, "v1.0.0-rc1")
rc5cmp := semver.Compare("v"+*s.Version, "v1.0.0-rc5") rc5cmp := semver.Compare("v"+*s.Version, "v1.0.0-rc5")
@ -139,28 +140,31 @@ func isPrivileged(s *Spec) bool {
if err != nil { if err != nil {
log.Panicln("could not decode Process.Capabilities in OCI spec:", err) log.Panicln("could not decode Process.Capabilities in OCI spec:", err)
} }
// Otherwise, parse s.Process.Capabilities as:
// github.com/opencontainers/runtime-spec/blob/v1.0.0/specs-go/config.go#L30-L54
} else {
var lc LinuxCapabilities
err := json.Unmarshal(*s.Process.Capabilities, &lc)
if err != nil {
log.Panicln("could not decode Process.Capabilities in OCI spec:", err)
}
// We only make sure that the bounding capabibility set has
// CAP_SYS_ADMIN. This allows us to make sure that the container was
// actually started as '--privileged', but also allow non-root users to
// access the privileged NVIDIA capabilities.
caps = lc.Bounding
}
for _, c := range caps { for _, c := range caps {
if c == capSysAdmin { if c == capSysAdmin {
return true return true
} }
} }
return false return false
}
// Otherwise, parse s.Process.Capabilities as:
// github.com/opencontainers/runtime-spec/blob/v1.0.0/specs-go/config.go#L30-L54
process := specs.Process{
Env: s.Process.Env,
}
err := json.Unmarshal(*s.Process.Capabilities, &process.Capabilities)
if err != nil {
log.Panicln("could not decode Process.Capabilities in OCI spec:", err)
}
fullSpec := specs.Spec{
Version: *s.Version,
Process: &process,
}
return image.IsPrivileged(&fullSpec)
} }
func getDevicesFromEnvvar(image image.CUDA, swarmResourceEnvvars []string) *string { func getDevicesFromEnvvar(image image.CUDA, swarmResourceEnvvars []string) *string {

View File

@ -45,6 +45,8 @@ var (
// Config represents the contents of the config.toml file for the NVIDIA Container Toolkit // Config represents the contents of the config.toml file for the NVIDIA Container Toolkit
// Note: This is currently duplicated by the HookConfig in cmd/nvidia-container-toolkit/hook_config.go // Note: This is currently duplicated by the HookConfig in cmd/nvidia-container-toolkit/hook_config.go
type Config struct { type Config struct {
AcceptEnvvarUnprivileged bool `toml:"accept-nvidia-visible-devices-envvar-when-unprivileged"`
NVIDIAContainerCLIConfig ContainerCLIConfig `toml:"nvidia-container-cli"` NVIDIAContainerCLIConfig ContainerCLIConfig `toml:"nvidia-container-cli"`
NVIDIACTKConfig CTKConfig `toml:"nvidia-ctk"` NVIDIACTKConfig CTKConfig `toml:"nvidia-ctk"`
NVIDIAContainerRuntimeConfig RuntimeConfig `toml:"nvidia-container-runtime"` NVIDIAContainerRuntimeConfig RuntimeConfig `toml:"nvidia-container-runtime"`
@ -91,6 +93,8 @@ func getConfigFrom(toml *toml.Tree) (*Config, error) {
return cfg, nil return cfg, nil
} }
cfg.AcceptEnvvarUnprivileged = toml.GetDefault("accept-nvidia-visible-devices-envvar-when-unprivileged", cfg.AcceptEnvvarUnprivileged).(bool)
cfg.NVIDIAContainerCLIConfig = *getContainerCLIConfigFrom(toml) cfg.NVIDIAContainerCLIConfig = *getContainerCLIConfigFrom(toml)
cfg.NVIDIACTKConfig = *getCTKConfigFrom(toml) cfg.NVIDIACTKConfig = *getCTKConfigFrom(toml)
runtimeConfig, err := getRuntimeConfigFrom(toml) runtimeConfig, err := getRuntimeConfigFrom(toml)
@ -105,6 +109,7 @@ func getConfigFrom(toml *toml.Tree) (*Config, error) {
// getDefaultConfig defines the default values for the config // getDefaultConfig defines the default values for the config
func getDefaultConfig() *Config { func getDefaultConfig() *Config {
c := Config{ c := Config{
AcceptEnvvarUnprivileged: true,
NVIDIAContainerCLIConfig: *getDefaultContainerCLIConfig(), NVIDIAContainerCLIConfig: *getDefaultContainerCLIConfig(),
NVIDIACTKConfig: *getDefaultCTKConfig(), NVIDIACTKConfig: *getDefaultCTKConfig(),
NVIDIAContainerRuntimeConfig: *GetDefaultRuntimeConfig(), NVIDIAContainerRuntimeConfig: *GetDefaultRuntimeConfig(),

View File

@ -57,6 +57,7 @@ func TestGetConfig(t *testing.T) {
{ {
description: "empty config is default", description: "empty config is default",
expectedConfig: &Config{ expectedConfig: &Config{
AcceptEnvvarUnprivileged: true,
NVIDIAContainerCLIConfig: ContainerCLIConfig{ NVIDIAContainerCLIConfig: ContainerCLIConfig{
Root: "", Root: "",
}, },
@ -82,6 +83,7 @@ func TestGetConfig(t *testing.T) {
{ {
description: "config options set inline", description: "config options set inline",
contents: []string{ contents: []string{
"accept-nvidia-visible-devices-envvar-when-unprivileged = false",
"nvidia-container-cli.root = \"/bar/baz\"", "nvidia-container-cli.root = \"/bar/baz\"",
"nvidia-container-runtime.debug = \"/foo/bar\"", "nvidia-container-runtime.debug = \"/foo/bar\"",
"nvidia-container-runtime.experimental = true", "nvidia-container-runtime.experimental = true",
@ -94,6 +96,7 @@ func TestGetConfig(t *testing.T) {
"nvidia-ctk.path = \"/foo/bar/nvidia-ctk\"", "nvidia-ctk.path = \"/foo/bar/nvidia-ctk\"",
}, },
expectedConfig: &Config{ expectedConfig: &Config{
AcceptEnvvarUnprivileged: false,
NVIDIAContainerCLIConfig: ContainerCLIConfig{ NVIDIAContainerCLIConfig: ContainerCLIConfig{
Root: "/bar/baz", Root: "/bar/baz",
}, },
@ -119,6 +122,7 @@ func TestGetConfig(t *testing.T) {
{ {
description: "config options set in section", description: "config options set in section",
contents: []string{ contents: []string{
"accept-nvidia-visible-devices-envvar-when-unprivileged = false",
"[nvidia-container-cli]", "[nvidia-container-cli]",
"root = \"/bar/baz\"", "root = \"/bar/baz\"",
"[nvidia-container-runtime]", "[nvidia-container-runtime]",
@ -136,6 +140,7 @@ func TestGetConfig(t *testing.T) {
"path = \"/foo/bar/nvidia-ctk\"", "path = \"/foo/bar/nvidia-ctk\"",
}, },
expectedConfig: &Config{ expectedConfig: &Config{
AcceptEnvvarUnprivileged: false,
NVIDIAContainerCLIConfig: ContainerCLIConfig{ NVIDIAContainerCLIConfig: ContainerCLIConfig{
Root: "/bar/baz", Root: "/bar/baz",
}, },

View File

@ -0,0 +1,43 @@
/**
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package image
import (
"github.com/opencontainers/runtime-spec/specs-go"
)
const (
capSysAdmin = "CAP_SYS_ADMIN"
)
// IsPrivileged returns true if the container is a privileged container.
func IsPrivileged(s *specs.Spec) bool {
if s.Process.Capabilities == nil {
return false
}
// We only make sure that the bounding capabibility set has
// CAP_SYS_ADMIN. This allows us to make sure that the container was
// actually started as '--privileged', but also allow non-root users to
// access the privileged NVIDIA capabilities.
for _, c := range s.Process.Capabilities.Bounding {
if c == capSysAdmin {
return true
}
}
return false
}

View File

@ -37,7 +37,7 @@ type cdiModifier struct {
// CDI specifications available on the system. The NVIDIA_VISIBLE_DEVICES enviroment variable is // CDI specifications available on the system. The NVIDIA_VISIBLE_DEVICES enviroment variable is
// used to select the devices to include. // used to select the devices to include.
func NewCDIModifier(logger *logrus.Logger, cfg *config.Config, ociSpec oci.Spec) (oci.SpecModifier, error) { func NewCDIModifier(logger *logrus.Logger, cfg *config.Config, ociSpec oci.Spec) (oci.SpecModifier, error) {
devices, err := getDevicesFromSpec(logger, ociSpec, cfg.NVIDIAContainerRuntimeConfig.Modes.CDI.DefaultKind) devices, err := getDevicesFromSpec(logger, ociSpec, cfg)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to get required devices from OCI specification: %v", err) return nil, fmt.Errorf("failed to get required devices from OCI specification: %v", err)
} }
@ -61,7 +61,7 @@ func NewCDIModifier(logger *logrus.Logger, cfg *config.Config, ociSpec oci.Spec)
return m, nil return m, nil
} }
func getDevicesFromSpec(logger *logrus.Logger, ociSpec oci.Spec, defaultKind string) ([]string, error) { func getDevicesFromSpec(logger *logrus.Logger, ociSpec oci.Spec, cfg *config.Config) ([]string, error) {
rawSpec, err := ociSpec.Load() rawSpec, err := ociSpec.Load()
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to load OCI spec: %v", err) return nil, fmt.Errorf("failed to load OCI spec: %v", err)
@ -75,17 +75,17 @@ func getDevicesFromSpec(logger *logrus.Logger, ociSpec oci.Spec, defaultKind str
return annotationDevices, nil return annotationDevices, nil
} }
image, err := image.NewCUDAImageFromSpec(rawSpec) container, err := image.NewCUDAImageFromSpec(rawSpec)
if err != nil { if err != nil {
return nil, err return nil, err
} }
envDevices := image.DevicesFromEnvvars(visibleDevicesEnvvar) envDevices := container.DevicesFromEnvvars(visibleDevicesEnvvar)
var devices []string var devices []string
seen := make(map[string]bool) seen := make(map[string]bool)
for _, name := range envDevices.List() { for _, name := range envDevices.List() {
if !cdi.IsQualifiedName(name) { if !cdi.IsQualifiedName(name) {
name = fmt.Sprintf("%s=%s", defaultKind, name) name = fmt.Sprintf("%s=%s", cfg.NVIDIAContainerRuntimeConfig.Modes.CDI.DefaultKind, name)
} }
if seen[name] { if seen[name] {
logger.Debugf("Ignoring duplicate device %q", name) logger.Debugf("Ignoring duplicate device %q", name)
@ -94,6 +94,16 @@ func getDevicesFromSpec(logger *logrus.Logger, ociSpec oci.Spec, defaultKind str
devices = append(devices, name) devices = append(devices, name)
} }
if len(devices) == 0 {
return nil, nil
}
if cfg.AcceptEnvvarUnprivileged || image.IsPrivileged(rawSpec) {
return devices, nil
}
logger.Warningf("Ignoring devices specified in NVIDIA_VISIBLE_DEVICES: %v", devices)
return devices, nil return devices, nil
} }