mirror of
https://github.com/NVIDIA/nvidia-container-toolkit
synced 2024-11-22 00:08:11 +00:00
f6b1b1afad
This change ignores the value of NVIDIA_VISIBLE_DEVICES instead of raising an error when launching a container with insufficient permissions. This changes the behaviour under the following conditions: NVIDIA_VISIBLE_DEVICES is set and accept-nvidia-visible-devices-envvar-when-unprivileged = false (default: true) or privileged = false (default: false) This means that a user need not explicitly clear the NVIDIA_VISIBLE_DEVICES environment variable if no GPUs are to be used in unprivileged containers. Note that this envvar is set to 'all' by default in many CUDA images that are used as base images. Signed-off-by: Evan Lezar <elezar@nvidia.com>
434 lines
12 KiB
Go
434 lines
12 KiB
Go
package main
|
|
|
|
import (
|
|
"encoding/json"
|
|
"fmt"
|
|
"log"
|
|
"os"
|
|
"path"
|
|
"path/filepath"
|
|
"strconv"
|
|
"strings"
|
|
|
|
"golang.org/x/mod/semver"
|
|
)
|
|
|
|
var envSwarmGPU *string
|
|
|
|
const (
|
|
envCUDAVersion = "CUDA_VERSION"
|
|
envNVRequirePrefix = "NVIDIA_REQUIRE_"
|
|
envNVRequireCUDA = envNVRequirePrefix + "CUDA"
|
|
envNVDisableRequire = "NVIDIA_DISABLE_REQUIRE"
|
|
envNVVisibleDevices = "NVIDIA_VISIBLE_DEVICES"
|
|
envNVMigConfigDevices = "NVIDIA_MIG_CONFIG_DEVICES"
|
|
envNVMigMonitorDevices = "NVIDIA_MIG_MONITOR_DEVICES"
|
|
envNVDriverCapabilities = "NVIDIA_DRIVER_CAPABILITIES"
|
|
)
|
|
|
|
const (
|
|
allDriverCapabilities = "compute,compat32,graphics,utility,video,display,ngx"
|
|
defaultDriverCapabilities = "utility,compute"
|
|
)
|
|
|
|
const (
|
|
capSysAdmin = "CAP_SYS_ADMIN"
|
|
)
|
|
|
|
const (
|
|
deviceListAsVolumeMountsRoot = "/var/run/nvidia-container-devices"
|
|
)
|
|
|
|
type nvidiaConfig struct {
|
|
Devices string
|
|
MigConfigDevices string
|
|
MigMonitorDevices string
|
|
DriverCapabilities string
|
|
Requirements []string
|
|
DisableRequire bool
|
|
}
|
|
|
|
type containerConfig struct {
|
|
Pid int
|
|
Rootfs string
|
|
Env map[string]string
|
|
Nvidia *nvidiaConfig
|
|
}
|
|
|
|
// Root from OCI runtime spec
|
|
// github.com/opencontainers/runtime-spec/blob/v1.0.0/specs-go/config.go#L94-L100
|
|
type Root struct {
|
|
Path string `json:"path"`
|
|
}
|
|
|
|
// Process from OCI runtime spec
|
|
// github.com/opencontainers/runtime-spec/blob/v1.0.0/specs-go/config.go#L30-L57
|
|
type Process struct {
|
|
Env []string `json:"env,omitempty"`
|
|
Capabilities *json.RawMessage `json:"capabilities,omitempty" platform:"linux"`
|
|
}
|
|
|
|
// LinuxCapabilities from OCI runtime spec
|
|
// https://github.com/opencontainers/runtime-spec/blob/v1.0.0/specs-go/config.go#L61
|
|
type LinuxCapabilities struct {
|
|
Bounding []string `json:"bounding,omitempty" platform:"linux"`
|
|
Effective []string `json:"effective,omitempty" platform:"linux"`
|
|
Inheritable []string `json:"inheritable,omitempty" platform:"linux"`
|
|
Permitted []string `json:"permitted,omitempty" platform:"linux"`
|
|
Ambient []string `json:"ambient,omitempty" platform:"linux"`
|
|
}
|
|
|
|
// Mount from OCI runtime spec
|
|
// https://github.com/opencontainers/runtime-spec/blob/v1.0.0/specs-go/config.go#L103
|
|
type Mount struct {
|
|
Destination string `json:"destination"`
|
|
Type string `json:"type,omitempty" platform:"linux,solaris"`
|
|
Source string `json:"source,omitempty"`
|
|
Options []string `json:"options,omitempty"`
|
|
}
|
|
|
|
// Spec from OCI runtime spec
|
|
// We use pointers to structs, similarly to the latest version of runtime-spec:
|
|
// https://github.com/opencontainers/runtime-spec/blob/v1.0.0/specs-go/config.go#L5-L28
|
|
type Spec struct {
|
|
Version *string `json:"ociVersion"`
|
|
Process *Process `json:"process,omitempty"`
|
|
Root *Root `json:"root,omitempty"`
|
|
Mounts []Mount `json:"mounts,omitempty"`
|
|
}
|
|
|
|
// HookState holds state information about the hook
|
|
type HookState struct {
|
|
Pid int `json:"pid,omitempty"`
|
|
// After 17.06, runc is using the runtime spec:
|
|
// github.com/docker/runc/blob/17.06/libcontainer/configs/config.go#L262-L263
|
|
// github.com/opencontainers/runtime-spec/blob/v1.0.0/specs-go/state.go#L3-L17
|
|
Bundle string `json:"bundle"`
|
|
// Before 17.06, runc used a custom struct that didn't conform to the spec:
|
|
// github.com/docker/runc/blob/17.03.x/libcontainer/configs/config.go#L245-L252
|
|
BundlePath string `json:"bundlePath"`
|
|
}
|
|
|
|
func parseCudaVersion(cudaVersion string) (vmaj, vmin, vpatch uint32) {
|
|
if _, err := fmt.Sscanf(cudaVersion, "%d.%d.%d\n", &vmaj, &vmin, &vpatch); err != nil {
|
|
vpatch = 0
|
|
if _, err := fmt.Sscanf(cudaVersion, "%d.%d\n", &vmaj, &vmin); err != nil {
|
|
vmin = 0
|
|
if _, err := fmt.Sscanf(cudaVersion, "%d\n", &vmaj); err != nil {
|
|
log.Panicln("invalid CUDA version:", cudaVersion)
|
|
}
|
|
}
|
|
}
|
|
|
|
return
|
|
}
|
|
|
|
func getEnvMap(e []string) (m map[string]string) {
|
|
m = make(map[string]string)
|
|
for _, s := range e {
|
|
p := strings.SplitN(s, "=", 2)
|
|
if len(p) != 2 {
|
|
log.Panicln("environment error")
|
|
}
|
|
m[p[0]] = p[1]
|
|
}
|
|
return
|
|
}
|
|
|
|
func loadSpec(path string) (spec *Spec) {
|
|
f, err := os.Open(path)
|
|
if err != nil {
|
|
log.Panicln("could not open OCI spec:", err)
|
|
}
|
|
defer f.Close()
|
|
|
|
if err = json.NewDecoder(f).Decode(&spec); err != nil {
|
|
log.Panicln("could not decode OCI spec:", err)
|
|
}
|
|
if spec.Version == nil {
|
|
log.Panicln("Version is empty in OCI spec")
|
|
}
|
|
if spec.Process == nil {
|
|
log.Panicln("Process is empty in OCI spec")
|
|
}
|
|
if spec.Root == nil {
|
|
log.Panicln("Root is empty in OCI spec")
|
|
}
|
|
return
|
|
}
|
|
|
|
func isPrivileged(s *Spec) bool {
|
|
if s.Process.Capabilities == nil {
|
|
return false
|
|
}
|
|
|
|
var caps []string
|
|
// If v1.1.0-rc1 <= OCI version < v1.0.0-rc5 parse s.Process.Capabilities as:
|
|
// github.com/opencontainers/runtime-spec/blob/v1.0.0-rc1/specs-go/config.go#L30-L54
|
|
rc1cmp := semver.Compare("v"+*s.Version, "v1.0.0-rc1")
|
|
rc5cmp := semver.Compare("v"+*s.Version, "v1.0.0-rc5")
|
|
if (rc1cmp == 1 || rc1cmp == 0) && (rc5cmp == -1) {
|
|
err := json.Unmarshal(*s.Process.Capabilities, &caps)
|
|
if err != nil {
|
|
log.Panicln("could not decode Process.Capabilities in OCI spec:", err)
|
|
}
|
|
// Otherwise, parse s.Process.Capabilities as:
|
|
// github.com/opencontainers/runtime-spec/blob/v1.0.0/specs-go/config.go#L30-L54
|
|
} else {
|
|
var lc LinuxCapabilities
|
|
err := json.Unmarshal(*s.Process.Capabilities, &lc)
|
|
if err != nil {
|
|
log.Panicln("could not decode Process.Capabilities in OCI spec:", err)
|
|
}
|
|
// We only make sure that the bounding capabibility set has
|
|
// CAP_SYS_ADMIN. This allows us to make sure that the container was
|
|
// actually started as '--privileged', but also allow non-root users to
|
|
// access the privileged NVIDIA capabilities.
|
|
caps = lc.Bounding
|
|
}
|
|
|
|
for _, c := range caps {
|
|
if c == capSysAdmin {
|
|
return true
|
|
}
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
func isLegacyCUDAImage(env map[string]string) bool {
|
|
legacyCudaVersion := env[envCUDAVersion]
|
|
cudaRequire := env[envNVRequireCUDA]
|
|
return len(legacyCudaVersion) > 0 && len(cudaRequire) == 0
|
|
}
|
|
|
|
func getDevicesFromEnvvar(env map[string]string, legacyImage bool) *string {
|
|
// Build a list of envvars to consider.
|
|
envVars := []string{envNVVisibleDevices}
|
|
if envSwarmGPU != nil {
|
|
// The Swarm envvar has higher precedence.
|
|
envVars = append([]string{*envSwarmGPU}, envVars...)
|
|
}
|
|
|
|
// Grab a reference to devices from the first envvar
|
|
// in the list that actually exists in the environment.
|
|
var devices *string
|
|
for _, envVar := range envVars {
|
|
if devs, ok := env[envVar]; ok {
|
|
devices = &devs
|
|
}
|
|
}
|
|
|
|
// Environment variable unset with legacy image: default to "all".
|
|
if devices == nil && legacyImage {
|
|
all := "all"
|
|
return &all
|
|
}
|
|
|
|
// Environment variable unset or empty or "void": return nil
|
|
if devices == nil || len(*devices) == 0 || *devices == "void" {
|
|
return nil
|
|
}
|
|
|
|
// Environment variable set to "none": reset to "".
|
|
if *devices == "none" {
|
|
empty := ""
|
|
return &empty
|
|
}
|
|
|
|
// Any other value.
|
|
return devices
|
|
}
|
|
|
|
func getDevicesFromMounts(mounts []Mount) *string {
|
|
var devices []string
|
|
for _, m := range mounts {
|
|
root := filepath.Clean(deviceListAsVolumeMountsRoot)
|
|
source := filepath.Clean(m.Source)
|
|
destination := filepath.Clean(m.Destination)
|
|
|
|
// Only consider mounts who's host volume is /dev/null
|
|
if source != "/dev/null" {
|
|
continue
|
|
}
|
|
// Only consider container mount points that begin with 'root'
|
|
if len(destination) < len(root) {
|
|
continue
|
|
}
|
|
if destination[:len(root)] != root {
|
|
continue
|
|
}
|
|
// Grab the full path beyond 'root' and add it to the list of devices
|
|
device := destination[len(root):]
|
|
if len(device) > 0 && device[0] == '/' {
|
|
device = device[1:]
|
|
}
|
|
if len(device) == 0 {
|
|
continue
|
|
}
|
|
devices = append(devices, device)
|
|
}
|
|
|
|
if devices == nil {
|
|
return nil
|
|
}
|
|
|
|
ret := strings.Join(devices, ",")
|
|
return &ret
|
|
}
|
|
|
|
func getDevices(hookConfig *HookConfig, env map[string]string, mounts []Mount, privileged bool, legacyImage bool) *string {
|
|
// If enabled, try and get the device list from volume mounts first
|
|
if hookConfig.AcceptDeviceListAsVolumeMounts {
|
|
devices := getDevicesFromMounts(mounts)
|
|
if devices != nil {
|
|
return devices
|
|
}
|
|
}
|
|
|
|
// Fallback to reading from the environment variable if privileges are correct
|
|
devices := getDevicesFromEnvvar(env, legacyImage)
|
|
if devices == nil {
|
|
return nil
|
|
}
|
|
if privileged || hookConfig.AcceptEnvvarUnprivileged {
|
|
return devices
|
|
}
|
|
|
|
log.Printf("Ignoring devices specified in NVIDIA_VISIBLE_DEVICES (privileged=%v, config.accept-nvidia-visible-devices-envvar-when-unprivileged=%v) ", privileged, hookConfig.AcceptEnvvarUnprivileged)
|
|
|
|
return nil
|
|
}
|
|
|
|
func getMigConfigDevices(env map[string]string) *string {
|
|
if devices, ok := env[envNVMigConfigDevices]; ok {
|
|
return &devices
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func getMigMonitorDevices(env map[string]string) *string {
|
|
if devices, ok := env[envNVMigMonitorDevices]; ok {
|
|
return &devices
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func getDriverCapabilities(env map[string]string, legacyImage bool) *string {
|
|
// Grab a reference to the capabilities from the envvar
|
|
// if it actually exists in the environment.
|
|
var capabilities *string
|
|
if caps, ok := env[envNVDriverCapabilities]; ok {
|
|
capabilities = &caps
|
|
}
|
|
|
|
// Environment variable unset with legacy image: set all capabilities.
|
|
if capabilities == nil && legacyImage {
|
|
allCaps := allDriverCapabilities
|
|
return &allCaps
|
|
}
|
|
|
|
// Environment variable unset or set but empty: set default capabilities.
|
|
if capabilities == nil || len(*capabilities) == 0 {
|
|
defaultCaps := defaultDriverCapabilities
|
|
return &defaultCaps
|
|
}
|
|
|
|
// Environment variable set to "all": set all capabilities.
|
|
if *capabilities == "all" {
|
|
allCaps := allDriverCapabilities
|
|
return &allCaps
|
|
}
|
|
|
|
// Any other value
|
|
return capabilities
|
|
}
|
|
|
|
func getRequirements(env map[string]string, legacyImage bool) []string {
|
|
// All variables with the "NVIDIA_REQUIRE_" prefix are passed to nvidia-container-cli
|
|
var requirements []string
|
|
for name, value := range env {
|
|
if strings.HasPrefix(name, envNVRequirePrefix) {
|
|
requirements = append(requirements, value)
|
|
}
|
|
}
|
|
if legacyImage {
|
|
vmaj, vmin, _ := parseCudaVersion(env[envCUDAVersion])
|
|
cudaRequire := fmt.Sprintf("cuda>=%d.%d", vmaj, vmin)
|
|
requirements = append(requirements, cudaRequire)
|
|
}
|
|
return requirements
|
|
}
|
|
|
|
func getNvidiaConfig(hookConfig *HookConfig, env map[string]string, mounts []Mount, privileged bool) *nvidiaConfig {
|
|
legacyImage := isLegacyCUDAImage(env)
|
|
|
|
var devices string
|
|
if d := getDevices(hookConfig, env, mounts, privileged, legacyImage); d != nil {
|
|
devices = *d
|
|
} else {
|
|
// 'nil' devices means this is not a GPU container.
|
|
return nil
|
|
}
|
|
|
|
var migConfigDevices string
|
|
if d := getMigConfigDevices(env); d != nil {
|
|
migConfigDevices = *d
|
|
}
|
|
if !privileged && migConfigDevices != "" {
|
|
log.Panicln("cannot set MIG_CONFIG_DEVICES in non privileged container")
|
|
}
|
|
|
|
var migMonitorDevices string
|
|
if d := getMigMonitorDevices(env); d != nil {
|
|
migMonitorDevices = *d
|
|
}
|
|
if !privileged && migMonitorDevices != "" {
|
|
log.Panicln("cannot set MIG_MONITOR_DEVICES in non privileged container")
|
|
}
|
|
|
|
var driverCapabilities string
|
|
if c := getDriverCapabilities(env, legacyImage); c != nil {
|
|
driverCapabilities = *c
|
|
}
|
|
|
|
requirements := getRequirements(env, legacyImage)
|
|
|
|
// Don't fail on invalid values.
|
|
disableRequire, _ := strconv.ParseBool(env[envNVDisableRequire])
|
|
|
|
return &nvidiaConfig{
|
|
Devices: devices,
|
|
MigConfigDevices: migConfigDevices,
|
|
MigMonitorDevices: migMonitorDevices,
|
|
DriverCapabilities: driverCapabilities,
|
|
Requirements: requirements,
|
|
DisableRequire: disableRequire,
|
|
}
|
|
}
|
|
|
|
func getContainerConfig(hook HookConfig) (config containerConfig) {
|
|
var h HookState
|
|
d := json.NewDecoder(os.Stdin)
|
|
if err := d.Decode(&h); err != nil {
|
|
log.Panicln("could not decode container state:", err)
|
|
}
|
|
|
|
b := h.Bundle
|
|
if len(b) == 0 {
|
|
b = h.BundlePath
|
|
}
|
|
|
|
s := loadSpec(path.Join(b, "config.json"))
|
|
|
|
env := getEnvMap(s.Process.Env)
|
|
privileged := isPrivileged(s)
|
|
envSwarmGPU = hook.SwarmResource
|
|
return containerConfig{
|
|
Pid: h.Pid,
|
|
Rootfs: s.Root.Path,
|
|
Env: env,
|
|
Nvidia: getNvidiaConfig(&hook, env, s.Mounts, privileged),
|
|
}
|
|
}
|