Merge branch 'internal-add-mig-config-monitor' into 'master'

Add support for mig-config and mig-monitor as privileged capabilities

See merge request dl/container-dev/nvidia-container-toolkit!3
This commit is contained in:
Kevin Klues 2020-05-04 11:49:35 -07:00
commit fcc1d116f0
2 changed files with 111 additions and 5 deletions

View File

@ -18,6 +18,8 @@ const (
envNVRequireCUDA = envNVRequirePrefix + "CUDA"
envNVDisableRequire = "NVIDIA_DISABLE_REQUIRE"
envNVVisibleDevices = "NVIDIA_VISIBLE_DEVICES"
envNVMigConfigDevices = "NVIDIA_MIG_CONFIG_DEVICES"
envNVMigMonitorDevices = "NVIDIA_MIG_MONITOR_DEVICES"
envNVDriverCapabilities = "NVIDIA_DRIVER_CAPABILITIES"
)
@ -26,8 +28,14 @@ const (
defaultDriverCapabilities = "utility"
)
const (
capSysAdmin = "CAP_SYS_ADMIN"
)
type nvidiaConfig struct {
Devices string
MigConfigDevices string
MigMonitorDevices string
DriverCapabilities string
Requirements []string
DisableRequire bool
@ -47,7 +55,17 @@ type Root struct {
// github.com/opencontainers/runtime-spec/blob/v1.0.0/specs-go/config.go#L30-L57
type Process struct {
Env []string `json:"env,omitempty"`
Env []string `json:"env,omitempty"`
Capabilities *LinuxCapabilities `json:"capabilities,omitempty" platform:"linux"`
}
// https://github.com/opencontainers/runtime-spec/blob/v1.0.0/specs-go/config.go#L61
type LinuxCapabilities struct {
Bounding []string `json:"bounding,omitempty" platform:"linux"`
Effective []string `json:"effective,omitempty" platform:"linux"`
Inheritable []string `json:"inheritable,omitempty" platform:"linux"`
Permitted []string `json:"permitted,omitempty" platform:"linux"`
Ambient []string `json:"ambient,omitempty" platform:"linux"`
}
// We use pointers to structs, similarly to the latest version of runtime-spec:
@ -124,6 +142,31 @@ func loadSpec(path string) (spec *Spec) {
return
}
func isPrivileged(caps *LinuxCapabilities) bool {
if caps == nil {
return false
}
hasCapSysAdmin := func(caps []string) bool {
for _, c := range caps {
if c == capSysAdmin {
return true
}
}
return false
}
// We only make sure that the bounding capabibility set has
// CAP_SYS_ADMIN. This allows us to make sure that the container was
// actually started as '--privileged', but also allow non-root users to
// access the priviliged NVIDIA capabilities.
if !hasCapSysAdmin(caps.Bounding) {
return false
}
return true
}
func getDevices(env map[string]string) *string {
gpuVars := []string{envNVVisibleDevices}
if envSwarmGPU != nil {
@ -139,6 +182,26 @@ func getDevices(env map[string]string) *string {
return nil
}
func getMigConfigDevices(env map[string]string) *string {
gpuVars := []string{envNVMigConfigDevices}
for _, gpuVar := range gpuVars {
if devices, ok := env[gpuVar]; ok {
return &devices
}
}
return nil
}
func getMigMonitorDevices(env map[string]string) *string {
gpuVars := []string{envNVMigMonitorDevices}
for _, gpuVar := range gpuVars {
if devices, ok := env[gpuVar]; ok {
return &devices
}
}
return nil
}
func getDriverCapabilities(env map[string]string) *string {
if capabilities, ok := env[envNVDriverCapabilities]; ok {
return &capabilities
@ -158,7 +221,7 @@ func getRequirements(env map[string]string) []string {
}
// Mimic the new CUDA images if no capabilities or devices are specified.
func getNvidiaConfigLegacy(env map[string]string) *nvidiaConfig {
func getNvidiaConfigLegacy(env map[string]string, privileged bool) *nvidiaConfig {
var devices string
if d := getDevices(env); d == nil {
// Environment variable unset: default to "all".
@ -174,6 +237,22 @@ func getNvidiaConfigLegacy(env map[string]string) *nvidiaConfig {
devices = ""
}
var migConfigDevices string
if d := getMigConfigDevices(env); d != nil {
migConfigDevices = *d
}
if !privileged && migConfigDevices != "" {
log.Panicln("cannot set MIG_CONFIG_DEVICES in non privileged container")
}
var migMonitorDevices string
if d := getMigMonitorDevices(env); d != nil {
migMonitorDevices = *d
}
if !privileged && migMonitorDevices != "" {
log.Panicln("cannot set MIG_MONITOR_DEVICES in non privileged container")
}
var driverCapabilities string
if c := getDriverCapabilities(env); c == nil {
// Environment variable unset: default to "all".
@ -200,18 +279,20 @@ func getNvidiaConfigLegacy(env map[string]string) *nvidiaConfig {
return &nvidiaConfig{
Devices: devices,
MigConfigDevices: migConfigDevices,
MigMonitorDevices: migMonitorDevices,
DriverCapabilities: driverCapabilities,
Requirements: requirements,
DisableRequire: disableRequire,
}
}
func getNvidiaConfig(env map[string]string) *nvidiaConfig {
func getNvidiaConfig(env map[string]string, privileged bool) *nvidiaConfig {
legacyCudaVersion := env[envCUDAVersion]
cudaRequire := env[envNVRequireCUDA]
if len(legacyCudaVersion) > 0 && len(cudaRequire) == 0 {
// Legacy CUDA image detected.
return getNvidiaConfigLegacy(env)
return getNvidiaConfigLegacy(env, privileged)
}
var devices string
@ -226,6 +307,22 @@ func getNvidiaConfig(env map[string]string) *nvidiaConfig {
devices = ""
}
var migConfigDevices string
if d := getMigConfigDevices(env); d != nil {
migConfigDevices = *d
}
if !privileged && migConfigDevices != "" {
log.Panicln("cannot set MIG_CONFIG_DEVICES in non privileged container")
}
var migMonitorDevices string
if d := getMigMonitorDevices(env); d != nil {
migMonitorDevices = *d
}
if !privileged && migMonitorDevices != "" {
log.Panicln("cannot set MIG_MONITOR_DEVICES in non privileged container")
}
var driverCapabilities string
if c := getDriverCapabilities(env); c == nil || len(*c) == 0 {
// Environment variable unset or set but empty: use default capability.
@ -245,6 +342,8 @@ func getNvidiaConfig(env map[string]string) *nvidiaConfig {
return &nvidiaConfig{
Devices: devices,
MigConfigDevices: migConfigDevices,
MigMonitorDevices: migMonitorDevices,
DriverCapabilities: driverCapabilities,
Requirements: requirements,
DisableRequire: disableRequire,
@ -266,11 +365,12 @@ func getContainerConfig(hook HookConfig) (config containerConfig) {
s := loadSpec(path.Join(b, "config.json"))
env := getEnvMap(s.Process.Env, hook.NvidiaContainerCLI)
privileged := isPrivileged(s.Process.Capabilities)
envSwarmGPU = hook.SwarmResource
return containerConfig{
Pid: h.Pid,
Rootfs: s.Root.Path,
Env: env,
Nvidia: getNvidiaConfig(env),
Nvidia: getNvidiaConfig(env, privileged),
}
}

View File

@ -126,6 +126,12 @@ func doPrestart() {
if len(nvidia.Devices) > 0 {
args = append(args, fmt.Sprintf("--device=%s", nvidia.Devices))
}
if len(nvidia.MigConfigDevices) > 0 {
args = append(args, fmt.Sprintf("--mig-config=%s", nvidia.MigConfigDevices))
}
if len(nvidia.MigMonitorDevices) > 0 {
args = append(args, fmt.Sprintf("--mig-monitor=%s", nvidia.MigMonitorDevices))
}
for _, cap := range strings.Split(nvidia.DriverCapabilities, ",") {
if len(cap) == 0 {