From 05012e7b7f0e8e947762b8498751ef62fafd2737 Mon Sep 17 00:00:00 2001 From: Kevin Klues Date: Fri, 20 Dec 2019 21:19:00 +0000 Subject: [PATCH 1/2] Extend fields we inspect in the runc spec to include linux capabilities This also includes a helper to look through the capabilities contained in the spec to determine if the container is privileged or not. Signed-off-by: Kevin Klues --- pkg/container_config.go | 52 +++++++++++++++++++++++++++++++++++++---- 1 file changed, 47 insertions(+), 5 deletions(-) diff --git a/pkg/container_config.go b/pkg/container_config.go index ac2ab7fa..bc23661f 100644 --- a/pkg/container_config.go +++ b/pkg/container_config.go @@ -26,6 +26,10 @@ const ( defaultDriverCapabilities = "utility" ) +const ( + capSysAdmin = "CAP_SYS_ADMIN" +) + type nvidiaConfig struct { Devices string DriverCapabilities string @@ -47,7 +51,17 @@ type Root struct { // github.com/opencontainers/runtime-spec/blob/v1.0.0/specs-go/config.go#L30-L57 type Process struct { - Env []string `json:"env,omitempty"` + Env []string `json:"env,omitempty"` + Capabilities *LinuxCapabilities `json:"capabilities,omitempty" platform:"linux"` +} + +// https://github.com/opencontainers/runtime-spec/blob/v1.0.0/specs-go/config.go#L61 +type LinuxCapabilities struct { + Bounding []string `json:"bounding,omitempty" platform:"linux"` + Effective []string `json:"effective,omitempty" platform:"linux"` + Inheritable []string `json:"inheritable,omitempty" platform:"linux"` + Permitted []string `json:"permitted,omitempty" platform:"linux"` + Ambient []string `json:"ambient,omitempty" platform:"linux"` } // We use pointers to structs, similarly to the latest version of runtime-spec: @@ -124,6 +138,31 @@ func loadSpec(path string) (spec *Spec) { return } +func isPrivileged(caps *LinuxCapabilities) bool { + if caps == nil { + return false + } + + hasCapSysAdmin := func(caps []string) bool { + for _, c := range caps { + if c == capSysAdmin { + return true + } + } + return false + } + + // We only make sure that the bounding capabibility set has + // CAP_SYS_ADMIN. This allows us to make sure that the container was + // actually started as '--privileged', but also allow non-root users to + // access the priviliged NVIDIA capabilities. + if !hasCapSysAdmin(caps.Bounding) { + return false + } + + return true +} + func getDevices(env map[string]string) *string { gpuVars := []string{envNVVisibleDevices} if envSwarmGPU != nil { @@ -158,7 +197,7 @@ func getRequirements(env map[string]string) []string { } // Mimic the new CUDA images if no capabilities or devices are specified. -func getNvidiaConfigLegacy(env map[string]string) *nvidiaConfig { +func getNvidiaConfigLegacy(env map[string]string, privileged bool) *nvidiaConfig { var devices string if d := getDevices(env); d == nil { // Environment variable unset: default to "all". @@ -200,18 +239,20 @@ func getNvidiaConfigLegacy(env map[string]string) *nvidiaConfig { return &nvidiaConfig{ Devices: devices, + MigConfigDevices: migConfigDevices, + MigMonitorDevices: migMonitorDevices, DriverCapabilities: driverCapabilities, Requirements: requirements, DisableRequire: disableRequire, } } -func getNvidiaConfig(env map[string]string) *nvidiaConfig { +func getNvidiaConfig(env map[string]string, privileged bool) *nvidiaConfig { legacyCudaVersion := env[envCUDAVersion] cudaRequire := env[envNVRequireCUDA] if len(legacyCudaVersion) > 0 && len(cudaRequire) == 0 { // Legacy CUDA image detected. - return getNvidiaConfigLegacy(env) + return getNvidiaConfigLegacy(env, privileged) } var devices string @@ -266,11 +307,12 @@ func getContainerConfig(hook HookConfig) (config containerConfig) { s := loadSpec(path.Join(b, "config.json")) env := getEnvMap(s.Process.Env, hook.NvidiaContainerCLI) + privileged := isPrivileged(s.Process.Capabilities) envSwarmGPU = hook.SwarmResource return containerConfig{ Pid: h.Pid, Rootfs: s.Root.Path, Env: env, - Nvidia: getNvidiaConfig(env), + Nvidia: getNvidiaConfig(env, privileged), } } From 8f387816bc4d7b899e7c8e3d656b29f4cf8f3b0d Mon Sep 17 00:00:00 2001 From: Kevin Klues Date: Fri, 20 Dec 2019 21:22:08 +0000 Subject: [PATCH 2/2] Add support for mig-config and mig-monitor as privileged flags These flags can only be injected into priviliged containers. If the container is unpriviliged, and one of these flags is specified, then we exit with an error. Signed-off-by: Kevin Klues --- pkg/container_config.go | 58 +++++++++++++++++++++++++++++++++++++++++ pkg/main.go | 6 +++++ 2 files changed, 64 insertions(+) diff --git a/pkg/container_config.go b/pkg/container_config.go index bc23661f..3c87c307 100644 --- a/pkg/container_config.go +++ b/pkg/container_config.go @@ -18,6 +18,8 @@ const ( envNVRequireCUDA = envNVRequirePrefix + "CUDA" envNVDisableRequire = "NVIDIA_DISABLE_REQUIRE" envNVVisibleDevices = "NVIDIA_VISIBLE_DEVICES" + envNVMigConfigDevices = "NVIDIA_MIG_CONFIG_DEVICES" + envNVMigMonitorDevices = "NVIDIA_MIG_MONITOR_DEVICES" envNVDriverCapabilities = "NVIDIA_DRIVER_CAPABILITIES" ) @@ -32,6 +34,8 @@ const ( type nvidiaConfig struct { Devices string + MigConfigDevices string + MigMonitorDevices string DriverCapabilities string Requirements []string DisableRequire bool @@ -178,6 +182,26 @@ func getDevices(env map[string]string) *string { return nil } +func getMigConfigDevices(env map[string]string) *string { + gpuVars := []string{envNVMigConfigDevices} + for _, gpuVar := range gpuVars { + if devices, ok := env[gpuVar]; ok { + return &devices + } + } + return nil +} + +func getMigMonitorDevices(env map[string]string) *string { + gpuVars := []string{envNVMigMonitorDevices} + for _, gpuVar := range gpuVars { + if devices, ok := env[gpuVar]; ok { + return &devices + } + } + return nil +} + func getDriverCapabilities(env map[string]string) *string { if capabilities, ok := env[envNVDriverCapabilities]; ok { return &capabilities @@ -213,6 +237,22 @@ func getNvidiaConfigLegacy(env map[string]string, privileged bool) *nvidiaConfig devices = "" } + var migConfigDevices string + if d := getMigConfigDevices(env); d != nil { + migConfigDevices = *d + } + if !privileged && migConfigDevices != "" { + log.Panicln("cannot set MIG_CONFIG_DEVICES in non privileged container") + } + + var migMonitorDevices string + if d := getMigMonitorDevices(env); d != nil { + migMonitorDevices = *d + } + if !privileged && migMonitorDevices != "" { + log.Panicln("cannot set MIG_MONITOR_DEVICES in non privileged container") + } + var driverCapabilities string if c := getDriverCapabilities(env); c == nil { // Environment variable unset: default to "all". @@ -267,6 +307,22 @@ func getNvidiaConfig(env map[string]string, privileged bool) *nvidiaConfig { devices = "" } + var migConfigDevices string + if d := getMigConfigDevices(env); d != nil { + migConfigDevices = *d + } + if !privileged && migConfigDevices != "" { + log.Panicln("cannot set MIG_CONFIG_DEVICES in non privileged container") + } + + var migMonitorDevices string + if d := getMigMonitorDevices(env); d != nil { + migMonitorDevices = *d + } + if !privileged && migMonitorDevices != "" { + log.Panicln("cannot set MIG_MONITOR_DEVICES in non privileged container") + } + var driverCapabilities string if c := getDriverCapabilities(env); c == nil || len(*c) == 0 { // Environment variable unset or set but empty: use default capability. @@ -286,6 +342,8 @@ func getNvidiaConfig(env map[string]string, privileged bool) *nvidiaConfig { return &nvidiaConfig{ Devices: devices, + MigConfigDevices: migConfigDevices, + MigMonitorDevices: migMonitorDevices, DriverCapabilities: driverCapabilities, Requirements: requirements, DisableRequire: disableRequire, diff --git a/pkg/main.go b/pkg/main.go index 010ff359..13f8197c 100644 --- a/pkg/main.go +++ b/pkg/main.go @@ -126,6 +126,12 @@ func doPrestart() { if len(nvidia.Devices) > 0 { args = append(args, fmt.Sprintf("--device=%s", nvidia.Devices)) } + if len(nvidia.MigConfigDevices) > 0 { + args = append(args, fmt.Sprintf("--mig-config=%s", nvidia.MigConfigDevices)) + } + if len(nvidia.MigMonitorDevices) > 0 { + args = append(args, fmt.Sprintf("--mig-monitor=%s", nvidia.MigMonitorDevices)) + } for _, cap := range strings.Split(nvidia.DriverCapabilities, ",") { if len(cap) == 0 {