mirror of
https://github.com/NVIDIA/nvidia-container-toolkit
synced 2024-11-22 00:08:11 +00:00
Merge branch '1.1.0-staging' into 'master'
1.1.0 staging See merge request nvidia/container-toolkit/container-toolkit!7
This commit is contained in:
commit
976428af2c
2
Makefile
2
Makefile
@ -5,7 +5,7 @@ MKDIR ?= mkdir
|
||||
DIST_DIR ?= $(CURDIR)/dist
|
||||
|
||||
LIB_NAME := nvidia-container-toolkit
|
||||
LIB_VERSION := 1.0.5
|
||||
LIB_VERSION := 1.1.0
|
||||
|
||||
GOLANG_VERSION := 1.14.2
|
||||
GOLANG_PKG_PATH := github.com/NVIDIA/container-toolkit/pkg
|
||||
|
@ -11,6 +11,7 @@ load-kmods = true
|
||||
#no-cgroups = false
|
||||
#user = "root:video"
|
||||
ldconfig = "@/sbin/ldconfig"
|
||||
#alpha-merge-visible-devices-envvars = false
|
||||
|
||||
[nvidia-container-runtime]
|
||||
#debug = "/var/log/nvidia-container-runtime.log"
|
||||
|
@ -11,6 +11,7 @@ load-kmods = true
|
||||
#no-cgroups = false
|
||||
#user = "root:video"
|
||||
ldconfig = "@/sbin/ldconfig"
|
||||
#alpha-merge-visible-devices-envvars = false
|
||||
|
||||
[nvidia-container-runtime]
|
||||
#debug = "/var/log/nvidia-container-runtime.log"
|
||||
|
@ -11,6 +11,7 @@ load-kmods = true
|
||||
#no-cgroups = false
|
||||
#user = "root:video"
|
||||
ldconfig = "@/sbin/ldconfig"
|
||||
#alpha-merge-visible-devices-envvars = false
|
||||
|
||||
[nvidia-container-runtime]
|
||||
#debug = "/var/log/nvidia-container-runtime.log"
|
||||
|
@ -11,6 +11,7 @@ load-kmods = true
|
||||
#no-cgroups = false
|
||||
user = "root:video"
|
||||
ldconfig = "@/sbin/ldconfig"
|
||||
#alpha-merge-visible-devices-envvars = false
|
||||
|
||||
[nvidia-container-runtime]
|
||||
#debug = "/var/log/nvidia-container-runtime.log"
|
||||
|
@ -11,6 +11,7 @@ load-kmods = true
|
||||
#no-cgroups = false
|
||||
#user = "root:video"
|
||||
ldconfig = "@/sbin/ldconfig.real"
|
||||
#alpha-merge-visible-devices-envvars = false
|
||||
|
||||
[nvidia-container-runtime]
|
||||
#debug = "/var/log/nvidia-container-runtime.log"
|
||||
|
131
container_config_test.go
Normal file
131
container_config_test.go
Normal file
@ -0,0 +1,131 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"github.com/stretchr/testify/require"
|
||||
"sort"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestMergeVisibleDevicesEnvvars(t *testing.T) {
|
||||
var tests = []struct {
|
||||
name string
|
||||
input []string
|
||||
expected string
|
||||
enableMerge bool
|
||||
}{
|
||||
{
|
||||
"Simple Merge Enabled",
|
||||
[]string{
|
||||
"NVIDIA_VISIBLE_DEVICES_0=0,1",
|
||||
"NVIDIA_VISIBLE_DEVICES_1=2,3",
|
||||
"NVIDIA_VISIBLE_DEVICES_WHATEVER=4,5",
|
||||
},
|
||||
"0,1,2,3,4,5",
|
||||
true,
|
||||
},
|
||||
{
|
||||
"Simple Merge Disabled",
|
||||
[]string{
|
||||
"NVIDIA_VISIBLE_DEVICES_0=0,1",
|
||||
"NVIDIA_VISIBLE_DEVICES_1=2,3",
|
||||
"NVIDIA_VISIBLE_DEVICES_WHATEVER=4,5",
|
||||
},
|
||||
"",
|
||||
false,
|
||||
},
|
||||
{
|
||||
"Merge No Override (Enabled)",
|
||||
[]string{
|
||||
"NVIDIA_VISIBLE_DEVICES=all",
|
||||
},
|
||||
"all",
|
||||
true,
|
||||
},
|
||||
{
|
||||
"Merge No Override (Disabled)",
|
||||
[]string{
|
||||
"NVIDIA_VISIBLE_DEVICES=all",
|
||||
},
|
||||
"all",
|
||||
false,
|
||||
},
|
||||
{
|
||||
"Merge Override (Enabled, Before)",
|
||||
[]string{
|
||||
"NVIDIA_VISIBLE_DEVICES=all",
|
||||
"NVIDIA_VISIBLE_DEVICES_0=0,1",
|
||||
"NVIDIA_VISIBLE_DEVICES_1=2,3",
|
||||
"NVIDIA_VISIBLE_DEVICES_WHATEVER=4,5",
|
||||
},
|
||||
"0,1,2,3,4,5",
|
||||
true,
|
||||
},
|
||||
{
|
||||
"Merge Override (Enabled, After)",
|
||||
[]string{
|
||||
"NVIDIA_VISIBLE_DEVICES_0=0,1",
|
||||
"NVIDIA_VISIBLE_DEVICES_1=2,3",
|
||||
"NVIDIA_VISIBLE_DEVICES_WHATEVER=4,5",
|
||||
"NVIDIA_VISIBLE_DEVICES=all",
|
||||
},
|
||||
"0,1,2,3,4,5",
|
||||
true,
|
||||
},
|
||||
{
|
||||
"Merge Override (Enabled, In Between)",
|
||||
[]string{
|
||||
"NVIDIA_VISIBLE_DEVICES_0=0,1",
|
||||
"NVIDIA_VISIBLE_DEVICES_1=2,3",
|
||||
"NVIDIA_VISIBLE_DEVICES=all",
|
||||
"NVIDIA_VISIBLE_DEVICES_WHATEVER=4,5",
|
||||
},
|
||||
"0,1,2,3,4,5",
|
||||
true,
|
||||
},
|
||||
{
|
||||
"Merge Override (Disabled, Before)",
|
||||
[]string{
|
||||
"NVIDIA_VISIBLE_DEVICES=all",
|
||||
"NVIDIA_VISIBLE_DEVICES_0=0,1",
|
||||
"NVIDIA_VISIBLE_DEVICES_1=2,3",
|
||||
"NVIDIA_VISIBLE_DEVICES_WHATEVER=4,5",
|
||||
},
|
||||
"all",
|
||||
false,
|
||||
},
|
||||
{
|
||||
"Merge Override (Disabled, After)",
|
||||
[]string{
|
||||
"NVIDIA_VISIBLE_DEVICES_0=0,1",
|
||||
"NVIDIA_VISIBLE_DEVICES_1=2,3",
|
||||
"NVIDIA_VISIBLE_DEVICES_WHATEVER=4,5",
|
||||
"NVIDIA_VISIBLE_DEVICES=all",
|
||||
},
|
||||
"all",
|
||||
false,
|
||||
},
|
||||
{
|
||||
"Merge Override (Disabled, In Between)",
|
||||
[]string{
|
||||
"NVIDIA_VISIBLE_DEVICES_0=0,1",
|
||||
"NVIDIA_VISIBLE_DEVICES_1=2,3",
|
||||
"NVIDIA_VISIBLE_DEVICES=all",
|
||||
"NVIDIA_VISIBLE_DEVICES_WHATEVER=4,5",
|
||||
},
|
||||
"all",
|
||||
false,
|
||||
},
|
||||
}
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
config := CLIConfig{
|
||||
AlphaMergeVisibleDevicesEnvvars: tc.enableMerge,
|
||||
}
|
||||
envvars := getEnvMap(tc.input, config)
|
||||
devices := strings.Split(envvars[envNVVisibleDevices], ",")
|
||||
sort.Strings(devices)
|
||||
require.Equal(t, tc.expected, strings.Join(devices, ","))
|
||||
})
|
||||
}
|
||||
}
|
@ -1,4 +1,12 @@
|
||||
nvidia-container-toolkit (@VERSION@) UNRELEASED; urgency=medium
|
||||
nvidia-container-toolkit (1.1.0-1) UNRELEASED; urgency=medium
|
||||
|
||||
* Add ability to merge envars of the form NVIDIA_VISIBLE_DEVICES_* (Closes: #XXXXXX)
|
||||
* Extend fields we inspect in the runc spec to include linux capabilities (Closes: #XXXXXX)
|
||||
* Add support for MIG (Closes: #XXXXXX)
|
||||
|
||||
-- NVIDIA CORPORATION <cudatools@nvidia.com> Wed, 07 Mar 2018 05:47:37 +0000
|
||||
|
||||
nvidia-container-toolkit (1.0.5-1) UNRELEASED; urgency=medium
|
||||
|
||||
* Initial release. Replaces older package nvidia-container-runtime-hook. (Closes: #XXXXXX)
|
||||
|
||||
|
@ -53,3 +53,7 @@ rm -f %{_bindir}/nvidia-container-runtime-hook
|
||||
/usr/share/containers/oci/hooks.d/oci-nvidia-hook.json
|
||||
|
||||
%changelog
|
||||
* Fri May 15 2020 NVIDIA CORPORATION <cudatools@nvidia.com> 1.1.0-1
|
||||
- Add ability to merge envars of the form NVIDIA_VISIBLE_DEVICES_*
|
||||
- Extend fields we inspect in the runc spec to include linux capabilities
|
||||
- Add support for MIG
|
||||
|
@ -18,6 +18,8 @@ const (
|
||||
envNVRequireCUDA = envNVRequirePrefix + "CUDA"
|
||||
envNVDisableRequire = "NVIDIA_DISABLE_REQUIRE"
|
||||
envNVVisibleDevices = "NVIDIA_VISIBLE_DEVICES"
|
||||
envNVMigConfigDevices = "NVIDIA_MIG_CONFIG_DEVICES"
|
||||
envNVMigMonitorDevices = "NVIDIA_MIG_MONITOR_DEVICES"
|
||||
envNVDriverCapabilities = "NVIDIA_DRIVER_CAPABILITIES"
|
||||
)
|
||||
|
||||
@ -26,8 +28,14 @@ const (
|
||||
defaultDriverCapabilities = "utility"
|
||||
)
|
||||
|
||||
const (
|
||||
capSysAdmin = "CAP_SYS_ADMIN"
|
||||
)
|
||||
|
||||
type nvidiaConfig struct {
|
||||
Devices string
|
||||
MigConfigDevices string
|
||||
MigMonitorDevices string
|
||||
DriverCapabilities string
|
||||
Requirements []string
|
||||
DisableRequire bool
|
||||
@ -47,7 +55,17 @@ type Root struct {
|
||||
|
||||
// github.com/opencontainers/runtime-spec/blob/v1.0.0/specs-go/config.go#L30-L57
|
||||
type Process struct {
|
||||
Env []string `json:"env,omitempty"`
|
||||
Env []string `json:"env,omitempty"`
|
||||
Capabilities *LinuxCapabilities `json:"capabilities,omitempty" platform:"linux"`
|
||||
}
|
||||
|
||||
// https://github.com/opencontainers/runtime-spec/blob/v1.0.0/specs-go/config.go#L61
|
||||
type LinuxCapabilities struct {
|
||||
Bounding []string `json:"bounding,omitempty" platform:"linux"`
|
||||
Effective []string `json:"effective,omitempty" platform:"linux"`
|
||||
Inheritable []string `json:"inheritable,omitempty" platform:"linux"`
|
||||
Permitted []string `json:"permitted,omitempty" platform:"linux"`
|
||||
Ambient []string `json:"ambient,omitempty" platform:"linux"`
|
||||
}
|
||||
|
||||
// We use pointers to structs, similarly to the latest version of runtime-spec:
|
||||
@ -82,7 +100,7 @@ func parseCudaVersion(cudaVersion string) (vmaj, vmin, vpatch uint32) {
|
||||
return
|
||||
}
|
||||
|
||||
func getEnvMap(e []string) (m map[string]string) {
|
||||
func getEnvMap(e []string, config CLIConfig) (m map[string]string) {
|
||||
m = make(map[string]string)
|
||||
for _, s := range e {
|
||||
p := strings.SplitN(s, "=", 2)
|
||||
@ -91,6 +109,17 @@ func getEnvMap(e []string) (m map[string]string) {
|
||||
}
|
||||
m[p[0]] = p[1]
|
||||
}
|
||||
if config.AlphaMergeVisibleDevicesEnvvars {
|
||||
var mergable []string
|
||||
for k, v := range m {
|
||||
if strings.HasPrefix(k, envNVVisibleDevices+"_") {
|
||||
mergable = append(mergable, v)
|
||||
}
|
||||
}
|
||||
if len(mergable) > 0 {
|
||||
m[envNVVisibleDevices] = strings.Join(mergable, ",")
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
@ -113,6 +142,31 @@ func loadSpec(path string) (spec *Spec) {
|
||||
return
|
||||
}
|
||||
|
||||
func isPrivileged(caps *LinuxCapabilities) bool {
|
||||
if caps == nil {
|
||||
return false
|
||||
}
|
||||
|
||||
hasCapSysAdmin := func(caps []string) bool {
|
||||
for _, c := range caps {
|
||||
if c == capSysAdmin {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// We only make sure that the bounding capabibility set has
|
||||
// CAP_SYS_ADMIN. This allows us to make sure that the container was
|
||||
// actually started as '--privileged', but also allow non-root users to
|
||||
// access the priviliged NVIDIA capabilities.
|
||||
if !hasCapSysAdmin(caps.Bounding) {
|
||||
return false
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
func getDevices(env map[string]string) *string {
|
||||
gpuVars := []string{envNVVisibleDevices}
|
||||
if envSwarmGPU != nil {
|
||||
@ -128,6 +182,26 @@ func getDevices(env map[string]string) *string {
|
||||
return nil
|
||||
}
|
||||
|
||||
func getMigConfigDevices(env map[string]string) *string {
|
||||
gpuVars := []string{envNVMigConfigDevices}
|
||||
for _, gpuVar := range gpuVars {
|
||||
if devices, ok := env[gpuVar]; ok {
|
||||
return &devices
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func getMigMonitorDevices(env map[string]string) *string {
|
||||
gpuVars := []string{envNVMigMonitorDevices}
|
||||
for _, gpuVar := range gpuVars {
|
||||
if devices, ok := env[gpuVar]; ok {
|
||||
return &devices
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func getDriverCapabilities(env map[string]string) *string {
|
||||
if capabilities, ok := env[envNVDriverCapabilities]; ok {
|
||||
return &capabilities
|
||||
@ -147,7 +221,7 @@ func getRequirements(env map[string]string) []string {
|
||||
}
|
||||
|
||||
// Mimic the new CUDA images if no capabilities or devices are specified.
|
||||
func getNvidiaConfigLegacy(env map[string]string) *nvidiaConfig {
|
||||
func getNvidiaConfigLegacy(env map[string]string, privileged bool) *nvidiaConfig {
|
||||
var devices string
|
||||
if d := getDevices(env); d == nil {
|
||||
// Environment variable unset: default to "all".
|
||||
@ -163,6 +237,22 @@ func getNvidiaConfigLegacy(env map[string]string) *nvidiaConfig {
|
||||
devices = ""
|
||||
}
|
||||
|
||||
var migConfigDevices string
|
||||
if d := getMigConfigDevices(env); d != nil {
|
||||
migConfigDevices = *d
|
||||
}
|
||||
if !privileged && migConfigDevices != "" {
|
||||
log.Panicln("cannot set MIG_CONFIG_DEVICES in non privileged container")
|
||||
}
|
||||
|
||||
var migMonitorDevices string
|
||||
if d := getMigMonitorDevices(env); d != nil {
|
||||
migMonitorDevices = *d
|
||||
}
|
||||
if !privileged && migMonitorDevices != "" {
|
||||
log.Panicln("cannot set MIG_MONITOR_DEVICES in non privileged container")
|
||||
}
|
||||
|
||||
var driverCapabilities string
|
||||
if c := getDriverCapabilities(env); c == nil {
|
||||
// Environment variable unset: default to "all".
|
||||
@ -189,18 +279,20 @@ func getNvidiaConfigLegacy(env map[string]string) *nvidiaConfig {
|
||||
|
||||
return &nvidiaConfig{
|
||||
Devices: devices,
|
||||
MigConfigDevices: migConfigDevices,
|
||||
MigMonitorDevices: migMonitorDevices,
|
||||
DriverCapabilities: driverCapabilities,
|
||||
Requirements: requirements,
|
||||
DisableRequire: disableRequire,
|
||||
}
|
||||
}
|
||||
|
||||
func getNvidiaConfig(env map[string]string) *nvidiaConfig {
|
||||
func getNvidiaConfig(env map[string]string, privileged bool) *nvidiaConfig {
|
||||
legacyCudaVersion := env[envCUDAVersion]
|
||||
cudaRequire := env[envNVRequireCUDA]
|
||||
if len(legacyCudaVersion) > 0 && len(cudaRequire) == 0 {
|
||||
// Legacy CUDA image detected.
|
||||
return getNvidiaConfigLegacy(env)
|
||||
return getNvidiaConfigLegacy(env, privileged)
|
||||
}
|
||||
|
||||
var devices string
|
||||
@ -215,6 +307,22 @@ func getNvidiaConfig(env map[string]string) *nvidiaConfig {
|
||||
devices = ""
|
||||
}
|
||||
|
||||
var migConfigDevices string
|
||||
if d := getMigConfigDevices(env); d != nil {
|
||||
migConfigDevices = *d
|
||||
}
|
||||
if !privileged && migConfigDevices != "" {
|
||||
log.Panicln("cannot set MIG_CONFIG_DEVICES in non privileged container")
|
||||
}
|
||||
|
||||
var migMonitorDevices string
|
||||
if d := getMigMonitorDevices(env); d != nil {
|
||||
migMonitorDevices = *d
|
||||
}
|
||||
if !privileged && migMonitorDevices != "" {
|
||||
log.Panicln("cannot set MIG_MONITOR_DEVICES in non privileged container")
|
||||
}
|
||||
|
||||
var driverCapabilities string
|
||||
if c := getDriverCapabilities(env); c == nil || len(*c) == 0 {
|
||||
// Environment variable unset or set but empty: use default capability.
|
||||
@ -234,6 +342,8 @@ func getNvidiaConfig(env map[string]string) *nvidiaConfig {
|
||||
|
||||
return &nvidiaConfig{
|
||||
Devices: devices,
|
||||
MigConfigDevices: migConfigDevices,
|
||||
MigMonitorDevices: migMonitorDevices,
|
||||
DriverCapabilities: driverCapabilities,
|
||||
Requirements: requirements,
|
||||
DisableRequire: disableRequire,
|
||||
@ -254,12 +364,13 @@ func getContainerConfig(hook HookConfig) (config containerConfig) {
|
||||
|
||||
s := loadSpec(path.Join(b, "config.json"))
|
||||
|
||||
env := getEnvMap(s.Process.Env)
|
||||
env := getEnvMap(s.Process.Env, hook.NvidiaContainerCLI)
|
||||
privileged := isPrivileged(s.Process.Capabilities)
|
||||
envSwarmGPU = hook.SwarmResource
|
||||
return containerConfig{
|
||||
Pid: h.Pid,
|
||||
Rootfs: s.Root.Path,
|
||||
Env: env,
|
||||
Nvidia: getNvidiaConfig(env),
|
||||
Nvidia: getNvidiaConfig(env, privileged),
|
||||
}
|
||||
}
|
||||
|
@ -20,16 +20,17 @@ var defaultPaths = [...]string{
|
||||
|
||||
// CLIConfig: options for nvidia-container-cli.
|
||||
type CLIConfig struct {
|
||||
Root *string `toml:"root"`
|
||||
Path *string `toml:"path"`
|
||||
Environment []string `toml:"environment"`
|
||||
Debug *string `toml:"debug"`
|
||||
Ldcache *string `toml:"ldcache"`
|
||||
LoadKmods bool `toml:"load-kmods"`
|
||||
NoPivot bool `toml:"no-pivot"`
|
||||
NoCgroups bool `toml:"no-cgroups"`
|
||||
User *string `toml:"user"`
|
||||
Ldconfig *string `toml:"ldconfig"`
|
||||
Root *string `toml:"root"`
|
||||
Path *string `toml:"path"`
|
||||
Environment []string `toml:"environment"`
|
||||
Debug *string `toml:"debug"`
|
||||
Ldcache *string `toml:"ldcache"`
|
||||
LoadKmods bool `toml:"load-kmods"`
|
||||
NoPivot bool `toml:"no-pivot"`
|
||||
NoCgroups bool `toml:"no-cgroups"`
|
||||
User *string `toml:"user"`
|
||||
Ldconfig *string `toml:"ldconfig"`
|
||||
AlphaMergeVisibleDevicesEnvvars bool `toml:"alpha-merge-visible-devices-envvars"`
|
||||
}
|
||||
|
||||
type HookConfig struct {
|
||||
@ -44,16 +45,17 @@ func getDefaultHookConfig() (config HookConfig) {
|
||||
DisableRequire: false,
|
||||
SwarmResource: nil,
|
||||
NvidiaContainerCLI: CLIConfig{
|
||||
Root: nil,
|
||||
Path: nil,
|
||||
Environment: []string{},
|
||||
Debug: nil,
|
||||
Ldcache: nil,
|
||||
LoadKmods: true,
|
||||
NoPivot: false,
|
||||
NoCgroups: false,
|
||||
User: nil,
|
||||
Ldconfig: nil,
|
||||
Root: nil,
|
||||
Path: nil,
|
||||
Environment: []string{},
|
||||
Debug: nil,
|
||||
Ldcache: nil,
|
||||
LoadKmods: true,
|
||||
NoPivot: false,
|
||||
NoCgroups: false,
|
||||
User: nil,
|
||||
Ldconfig: nil,
|
||||
AlphaMergeVisibleDevicesEnvvars: false,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
@ -126,6 +126,12 @@ func doPrestart() {
|
||||
if len(nvidia.Devices) > 0 {
|
||||
args = append(args, fmt.Sprintf("--device=%s", nvidia.Devices))
|
||||
}
|
||||
if len(nvidia.MigConfigDevices) > 0 {
|
||||
args = append(args, fmt.Sprintf("--mig-config=%s", nvidia.MigConfigDevices))
|
||||
}
|
||||
if len(nvidia.MigMonitorDevices) > 0 {
|
||||
args = append(args, fmt.Sprintf("--mig-monitor=%s", nvidia.MigMonitorDevices))
|
||||
}
|
||||
|
||||
for _, cap := range strings.Split(nvidia.DriverCapabilities, ",") {
|
||||
if len(cap) == 0 {
|
||||
|
Loading…
Reference in New Issue
Block a user