mirror of
https://github.com/NVIDIA/nvidia-container-toolkit
synced 2025-04-23 07:34:23 +00:00
Merge branch 'CNT-3998/cdi-accept-visible-devices-when-privileged' into 'main'
Honor accept-nvidia-visible-devices-envvar-when-unprivileged setting in CDI mode See merge request nvidia/container-toolkit/container-toolkit!331
This commit is contained in:
commit
4c2eff4865
@ -10,6 +10,7 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
|
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
|
||||||
|
"github.com/opencontainers/runtime-spec/specs-go"
|
||||||
"golang.org/x/mod/semver"
|
"golang.org/x/mod/semver"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -130,7 +131,7 @@ func isPrivileged(s *Spec) bool {
|
|||||||
}
|
}
|
||||||
|
|
||||||
var caps []string
|
var caps []string
|
||||||
// If v1.1.0-rc1 <= OCI version < v1.0.0-rc5 parse s.Process.Capabilities as:
|
// If v1.0.0-rc1 <= OCI version < v1.0.0-rc5 parse s.Process.Capabilities as:
|
||||||
// github.com/opencontainers/runtime-spec/blob/v1.0.0-rc1/specs-go/config.go#L30-L54
|
// github.com/opencontainers/runtime-spec/blob/v1.0.0-rc1/specs-go/config.go#L30-L54
|
||||||
rc1cmp := semver.Compare("v"+*s.Version, "v1.0.0-rc1")
|
rc1cmp := semver.Compare("v"+*s.Version, "v1.0.0-rc1")
|
||||||
rc5cmp := semver.Compare("v"+*s.Version, "v1.0.0-rc5")
|
rc5cmp := semver.Compare("v"+*s.Version, "v1.0.0-rc5")
|
||||||
@ -139,28 +140,31 @@ func isPrivileged(s *Spec) bool {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
log.Panicln("could not decode Process.Capabilities in OCI spec:", err)
|
log.Panicln("could not decode Process.Capabilities in OCI spec:", err)
|
||||||
}
|
}
|
||||||
// Otherwise, parse s.Process.Capabilities as:
|
|
||||||
// github.com/opencontainers/runtime-spec/blob/v1.0.0/specs-go/config.go#L30-L54
|
|
||||||
} else {
|
|
||||||
var lc LinuxCapabilities
|
|
||||||
err := json.Unmarshal(*s.Process.Capabilities, &lc)
|
|
||||||
if err != nil {
|
|
||||||
log.Panicln("could not decode Process.Capabilities in OCI spec:", err)
|
|
||||||
}
|
|
||||||
// We only make sure that the bounding capabibility set has
|
|
||||||
// CAP_SYS_ADMIN. This allows us to make sure that the container was
|
|
||||||
// actually started as '--privileged', but also allow non-root users to
|
|
||||||
// access the privileged NVIDIA capabilities.
|
|
||||||
caps = lc.Bounding
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, c := range caps {
|
for _, c := range caps {
|
||||||
if c == capSysAdmin {
|
if c == capSysAdmin {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return false
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// Otherwise, parse s.Process.Capabilities as:
|
||||||
|
// github.com/opencontainers/runtime-spec/blob/v1.0.0/specs-go/config.go#L30-L54
|
||||||
|
process := specs.Process{
|
||||||
|
Env: s.Process.Env,
|
||||||
|
}
|
||||||
|
|
||||||
|
err := json.Unmarshal(*s.Process.Capabilities, &process.Capabilities)
|
||||||
|
if err != nil {
|
||||||
|
log.Panicln("could not decode Process.Capabilities in OCI spec:", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
fullSpec := specs.Spec{
|
||||||
|
Version: *s.Version,
|
||||||
|
Process: &process,
|
||||||
|
}
|
||||||
|
|
||||||
|
return image.IsPrivileged(&fullSpec)
|
||||||
}
|
}
|
||||||
|
|
||||||
func getDevicesFromEnvvar(image image.CUDA, swarmResourceEnvvars []string) *string {
|
func getDevicesFromEnvvar(image image.CUDA, swarmResourceEnvvars []string) *string {
|
||||||
|
@ -45,6 +45,8 @@ var (
|
|||||||
// Config represents the contents of the config.toml file for the NVIDIA Container Toolkit
|
// Config represents the contents of the config.toml file for the NVIDIA Container Toolkit
|
||||||
// Note: This is currently duplicated by the HookConfig in cmd/nvidia-container-toolkit/hook_config.go
|
// Note: This is currently duplicated by the HookConfig in cmd/nvidia-container-toolkit/hook_config.go
|
||||||
type Config struct {
|
type Config struct {
|
||||||
|
AcceptEnvvarUnprivileged bool `toml:"accept-nvidia-visible-devices-envvar-when-unprivileged"`
|
||||||
|
|
||||||
NVIDIAContainerCLIConfig ContainerCLIConfig `toml:"nvidia-container-cli"`
|
NVIDIAContainerCLIConfig ContainerCLIConfig `toml:"nvidia-container-cli"`
|
||||||
NVIDIACTKConfig CTKConfig `toml:"nvidia-ctk"`
|
NVIDIACTKConfig CTKConfig `toml:"nvidia-ctk"`
|
||||||
NVIDIAContainerRuntimeConfig RuntimeConfig `toml:"nvidia-container-runtime"`
|
NVIDIAContainerRuntimeConfig RuntimeConfig `toml:"nvidia-container-runtime"`
|
||||||
@ -91,6 +93,8 @@ func getConfigFrom(toml *toml.Tree) (*Config, error) {
|
|||||||
return cfg, nil
|
return cfg, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
cfg.AcceptEnvvarUnprivileged = toml.GetDefault("accept-nvidia-visible-devices-envvar-when-unprivileged", cfg.AcceptEnvvarUnprivileged).(bool)
|
||||||
|
|
||||||
cfg.NVIDIAContainerCLIConfig = *getContainerCLIConfigFrom(toml)
|
cfg.NVIDIAContainerCLIConfig = *getContainerCLIConfigFrom(toml)
|
||||||
cfg.NVIDIACTKConfig = *getCTKConfigFrom(toml)
|
cfg.NVIDIACTKConfig = *getCTKConfigFrom(toml)
|
||||||
runtimeConfig, err := getRuntimeConfigFrom(toml)
|
runtimeConfig, err := getRuntimeConfigFrom(toml)
|
||||||
@ -105,6 +109,7 @@ func getConfigFrom(toml *toml.Tree) (*Config, error) {
|
|||||||
// getDefaultConfig defines the default values for the config
|
// getDefaultConfig defines the default values for the config
|
||||||
func getDefaultConfig() *Config {
|
func getDefaultConfig() *Config {
|
||||||
c := Config{
|
c := Config{
|
||||||
|
AcceptEnvvarUnprivileged: true,
|
||||||
NVIDIAContainerCLIConfig: *getDefaultContainerCLIConfig(),
|
NVIDIAContainerCLIConfig: *getDefaultContainerCLIConfig(),
|
||||||
NVIDIACTKConfig: *getDefaultCTKConfig(),
|
NVIDIACTKConfig: *getDefaultCTKConfig(),
|
||||||
NVIDIAContainerRuntimeConfig: *GetDefaultRuntimeConfig(),
|
NVIDIAContainerRuntimeConfig: *GetDefaultRuntimeConfig(),
|
||||||
|
@ -57,6 +57,7 @@ func TestGetConfig(t *testing.T) {
|
|||||||
{
|
{
|
||||||
description: "empty config is default",
|
description: "empty config is default",
|
||||||
expectedConfig: &Config{
|
expectedConfig: &Config{
|
||||||
|
AcceptEnvvarUnprivileged: true,
|
||||||
NVIDIAContainerCLIConfig: ContainerCLIConfig{
|
NVIDIAContainerCLIConfig: ContainerCLIConfig{
|
||||||
Root: "",
|
Root: "",
|
||||||
},
|
},
|
||||||
@ -82,6 +83,7 @@ func TestGetConfig(t *testing.T) {
|
|||||||
{
|
{
|
||||||
description: "config options set inline",
|
description: "config options set inline",
|
||||||
contents: []string{
|
contents: []string{
|
||||||
|
"accept-nvidia-visible-devices-envvar-when-unprivileged = false",
|
||||||
"nvidia-container-cli.root = \"/bar/baz\"",
|
"nvidia-container-cli.root = \"/bar/baz\"",
|
||||||
"nvidia-container-runtime.debug = \"/foo/bar\"",
|
"nvidia-container-runtime.debug = \"/foo/bar\"",
|
||||||
"nvidia-container-runtime.experimental = true",
|
"nvidia-container-runtime.experimental = true",
|
||||||
@ -94,6 +96,7 @@ func TestGetConfig(t *testing.T) {
|
|||||||
"nvidia-ctk.path = \"/foo/bar/nvidia-ctk\"",
|
"nvidia-ctk.path = \"/foo/bar/nvidia-ctk\"",
|
||||||
},
|
},
|
||||||
expectedConfig: &Config{
|
expectedConfig: &Config{
|
||||||
|
AcceptEnvvarUnprivileged: false,
|
||||||
NVIDIAContainerCLIConfig: ContainerCLIConfig{
|
NVIDIAContainerCLIConfig: ContainerCLIConfig{
|
||||||
Root: "/bar/baz",
|
Root: "/bar/baz",
|
||||||
},
|
},
|
||||||
@ -119,6 +122,7 @@ func TestGetConfig(t *testing.T) {
|
|||||||
{
|
{
|
||||||
description: "config options set in section",
|
description: "config options set in section",
|
||||||
contents: []string{
|
contents: []string{
|
||||||
|
"accept-nvidia-visible-devices-envvar-when-unprivileged = false",
|
||||||
"[nvidia-container-cli]",
|
"[nvidia-container-cli]",
|
||||||
"root = \"/bar/baz\"",
|
"root = \"/bar/baz\"",
|
||||||
"[nvidia-container-runtime]",
|
"[nvidia-container-runtime]",
|
||||||
@ -136,6 +140,7 @@ func TestGetConfig(t *testing.T) {
|
|||||||
"path = \"/foo/bar/nvidia-ctk\"",
|
"path = \"/foo/bar/nvidia-ctk\"",
|
||||||
},
|
},
|
||||||
expectedConfig: &Config{
|
expectedConfig: &Config{
|
||||||
|
AcceptEnvvarUnprivileged: false,
|
||||||
NVIDIAContainerCLIConfig: ContainerCLIConfig{
|
NVIDIAContainerCLIConfig: ContainerCLIConfig{
|
||||||
Root: "/bar/baz",
|
Root: "/bar/baz",
|
||||||
},
|
},
|
||||||
|
43
internal/config/image/privileged.go
Normal file
43
internal/config/image/privileged.go
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
/**
|
||||||
|
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
**/
|
||||||
|
|
||||||
|
package image
|
||||||
|
|
||||||
|
import (
|
||||||
|
"github.com/opencontainers/runtime-spec/specs-go"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
capSysAdmin = "CAP_SYS_ADMIN"
|
||||||
|
)
|
||||||
|
|
||||||
|
// IsPrivileged returns true if the container is a privileged container.
|
||||||
|
func IsPrivileged(s *specs.Spec) bool {
|
||||||
|
if s.Process.Capabilities == nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// We only make sure that the bounding capabibility set has
|
||||||
|
// CAP_SYS_ADMIN. This allows us to make sure that the container was
|
||||||
|
// actually started as '--privileged', but also allow non-root users to
|
||||||
|
// access the privileged NVIDIA capabilities.
|
||||||
|
for _, c := range s.Process.Capabilities.Bounding {
|
||||||
|
if c == capSysAdmin {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
@ -37,7 +37,7 @@ type cdiModifier struct {
|
|||||||
// CDI specifications available on the system. The NVIDIA_VISIBLE_DEVICES enviroment variable is
|
// CDI specifications available on the system. The NVIDIA_VISIBLE_DEVICES enviroment variable is
|
||||||
// used to select the devices to include.
|
// used to select the devices to include.
|
||||||
func NewCDIModifier(logger *logrus.Logger, cfg *config.Config, ociSpec oci.Spec) (oci.SpecModifier, error) {
|
func NewCDIModifier(logger *logrus.Logger, cfg *config.Config, ociSpec oci.Spec) (oci.SpecModifier, error) {
|
||||||
devices, err := getDevicesFromSpec(logger, ociSpec, cfg.NVIDIAContainerRuntimeConfig.Modes.CDI.DefaultKind)
|
devices, err := getDevicesFromSpec(logger, ociSpec, cfg)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to get required devices from OCI specification: %v", err)
|
return nil, fmt.Errorf("failed to get required devices from OCI specification: %v", err)
|
||||||
}
|
}
|
||||||
@ -61,7 +61,7 @@ func NewCDIModifier(logger *logrus.Logger, cfg *config.Config, ociSpec oci.Spec)
|
|||||||
return m, nil
|
return m, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func getDevicesFromSpec(logger *logrus.Logger, ociSpec oci.Spec, defaultKind string) ([]string, error) {
|
func getDevicesFromSpec(logger *logrus.Logger, ociSpec oci.Spec, cfg *config.Config) ([]string, error) {
|
||||||
rawSpec, err := ociSpec.Load()
|
rawSpec, err := ociSpec.Load()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to load OCI spec: %v", err)
|
return nil, fmt.Errorf("failed to load OCI spec: %v", err)
|
||||||
@ -75,17 +75,17 @@ func getDevicesFromSpec(logger *logrus.Logger, ociSpec oci.Spec, defaultKind str
|
|||||||
return annotationDevices, nil
|
return annotationDevices, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
image, err := image.NewCUDAImageFromSpec(rawSpec)
|
container, err := image.NewCUDAImageFromSpec(rawSpec)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
envDevices := image.DevicesFromEnvvars(visibleDevicesEnvvar)
|
envDevices := container.DevicesFromEnvvars(visibleDevicesEnvvar)
|
||||||
|
|
||||||
var devices []string
|
var devices []string
|
||||||
seen := make(map[string]bool)
|
seen := make(map[string]bool)
|
||||||
for _, name := range envDevices.List() {
|
for _, name := range envDevices.List() {
|
||||||
if !cdi.IsQualifiedName(name) {
|
if !cdi.IsQualifiedName(name) {
|
||||||
name = fmt.Sprintf("%s=%s", defaultKind, name)
|
name = fmt.Sprintf("%s=%s", cfg.NVIDIAContainerRuntimeConfig.Modes.CDI.DefaultKind, name)
|
||||||
}
|
}
|
||||||
if seen[name] {
|
if seen[name] {
|
||||||
logger.Debugf("Ignoring duplicate device %q", name)
|
logger.Debugf("Ignoring duplicate device %q", name)
|
||||||
@ -94,6 +94,16 @@ func getDevicesFromSpec(logger *logrus.Logger, ociSpec oci.Spec, defaultKind str
|
|||||||
devices = append(devices, name)
|
devices = append(devices, name)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if len(devices) == 0 {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if cfg.AcceptEnvvarUnprivileged || image.IsPrivileged(rawSpec) {
|
||||||
|
return devices, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.Warningf("Ignoring devices specified in NVIDIA_VISIBLE_DEVICES: %v", devices)
|
||||||
|
|
||||||
return devices, nil
|
return devices, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user