2019-10-22 21:36:22 +00:00
|
|
|
package main
|
|
|
|
|
|
|
|
import (
|
|
|
|
"encoding/json"
|
|
|
|
"fmt"
|
|
|
|
"log"
|
|
|
|
"os"
|
|
|
|
"path"
|
|
|
|
"strconv"
|
|
|
|
"strings"
|
Add support for parsing Linux Capabilities for older OCI specs
This was added to fix a regression with support for the default runc
shipped with CentOS 7.
The version of runc that is installed by default on CentOS 7 is
1.0.0-rc2 which uses OCI spec 1.0.0-rc2-dev.
This is a prerelease of the OCI spec, which defines the capabilities
section of a process configuration to be a flat list of capabilities
(e.g. SYS_ADMIN, SYS_PTRACE, SYS_RAWIO, etc.)
https://github.com/opencontainers/runtime-spec/blob/v1.0.0-rc2/config.md#process-configuration
By the time the official 1.0.0 version of the OCI spec came out, the
capabilities section of a process configuration was expanded to include
embedded fields for effective, bounding, inheritable, permitted and
ambient (each of which can contain a flat list of capabilities of the
form SYS_ADMIN, SYS_PTRACE, SYS_RAWIO, etc.)
https://github.com/opencontainers/runtime-spec/blob/v1.0.0/config.md#linux-process
Previously, we only inspected the capabilities section of a process
configuration assuming it was in the format of OCI spec 1.0.0.
This patch makes sure we can parse the capaibilites in either format.
Signed-off-by: Kevin Klues <kklues@nvidia.com>
2020-06-03 19:19:31 +00:00
|
|
|
|
|
|
|
"golang.org/x/mod/semver"
|
2019-10-22 21:36:22 +00:00
|
|
|
)
|
|
|
|
|
|
|
|
var envSwarmGPU *string
|
|
|
|
|
|
|
|
const (
|
2019-12-20 16:02:51 +00:00
|
|
|
envCUDAVersion = "CUDA_VERSION"
|
2019-10-22 21:36:22 +00:00
|
|
|
envNVRequirePrefix = "NVIDIA_REQUIRE_"
|
|
|
|
envNVRequireCUDA = envNVRequirePrefix + "CUDA"
|
|
|
|
envNVDisableRequire = "NVIDIA_DISABLE_REQUIRE"
|
2019-12-20 16:02:51 +00:00
|
|
|
envNVVisibleDevices = "NVIDIA_VISIBLE_DEVICES"
|
2019-12-20 21:22:08 +00:00
|
|
|
envNVMigConfigDevices = "NVIDIA_MIG_CONFIG_DEVICES"
|
|
|
|
envNVMigMonitorDevices = "NVIDIA_MIG_MONITOR_DEVICES"
|
2019-12-20 16:02:51 +00:00
|
|
|
envNVDriverCapabilities = "NVIDIA_DRIVER_CAPABILITIES"
|
|
|
|
)
|
|
|
|
|
|
|
|
const (
|
2020-07-22 13:27:13 +00:00
|
|
|
allDriverCapabilities = "compute,compat32,graphics,utility,video,display,ngx"
|
2019-12-20 16:07:43 +00:00
|
|
|
defaultDriverCapabilities = "utility"
|
2019-10-22 21:36:22 +00:00
|
|
|
)
|
|
|
|
|
2019-12-20 21:19:00 +00:00
|
|
|
const (
|
|
|
|
capSysAdmin = "CAP_SYS_ADMIN"
|
|
|
|
)
|
|
|
|
|
2019-10-22 21:36:22 +00:00
|
|
|
type nvidiaConfig struct {
|
2019-12-20 16:07:43 +00:00
|
|
|
Devices string
|
2019-12-20 21:22:08 +00:00
|
|
|
MigConfigDevices string
|
|
|
|
MigMonitorDevices string
|
2019-12-20 16:07:43 +00:00
|
|
|
DriverCapabilities string
|
|
|
|
Requirements []string
|
|
|
|
DisableRequire bool
|
2019-10-22 21:36:22 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
type containerConfig struct {
|
|
|
|
Pid int
|
|
|
|
Rootfs string
|
|
|
|
Env map[string]string
|
|
|
|
Nvidia *nvidiaConfig
|
|
|
|
}
|
|
|
|
|
2020-07-24 11:41:38 +00:00
|
|
|
// Root from OCI runtime spec
|
2019-10-22 21:36:22 +00:00
|
|
|
// github.com/opencontainers/runtime-spec/blob/v1.0.0/specs-go/config.go#L94-L100
|
|
|
|
type Root struct {
|
|
|
|
Path string `json:"path"`
|
|
|
|
}
|
|
|
|
|
2020-07-24 11:41:38 +00:00
|
|
|
// Process from OCI runtime spec
|
2019-10-22 21:36:22 +00:00
|
|
|
// github.com/opencontainers/runtime-spec/blob/v1.0.0/specs-go/config.go#L30-L57
|
|
|
|
type Process struct {
|
Add support for parsing Linux Capabilities for older OCI specs
This was added to fix a regression with support for the default runc
shipped with CentOS 7.
The version of runc that is installed by default on CentOS 7 is
1.0.0-rc2 which uses OCI spec 1.0.0-rc2-dev.
This is a prerelease of the OCI spec, which defines the capabilities
section of a process configuration to be a flat list of capabilities
(e.g. SYS_ADMIN, SYS_PTRACE, SYS_RAWIO, etc.)
https://github.com/opencontainers/runtime-spec/blob/v1.0.0-rc2/config.md#process-configuration
By the time the official 1.0.0 version of the OCI spec came out, the
capabilities section of a process configuration was expanded to include
embedded fields for effective, bounding, inheritable, permitted and
ambient (each of which can contain a flat list of capabilities of the
form SYS_ADMIN, SYS_PTRACE, SYS_RAWIO, etc.)
https://github.com/opencontainers/runtime-spec/blob/v1.0.0/config.md#linux-process
Previously, we only inspected the capabilities section of a process
configuration assuming it was in the format of OCI spec 1.0.0.
This patch makes sure we can parse the capaibilites in either format.
Signed-off-by: Kevin Klues <kklues@nvidia.com>
2020-06-03 19:19:31 +00:00
|
|
|
Env []string `json:"env,omitempty"`
|
|
|
|
Capabilities *json.RawMessage `json:"capabilities,omitempty" platform:"linux"`
|
2019-12-20 21:19:00 +00:00
|
|
|
}
|
|
|
|
|
2020-07-24 11:41:38 +00:00
|
|
|
// LinuxCapabilities from OCI runtime spec
|
2019-12-20 21:19:00 +00:00
|
|
|
// https://github.com/opencontainers/runtime-spec/blob/v1.0.0/specs-go/config.go#L61
|
|
|
|
type LinuxCapabilities struct {
|
|
|
|
Bounding []string `json:"bounding,omitempty" platform:"linux"`
|
|
|
|
Effective []string `json:"effective,omitempty" platform:"linux"`
|
|
|
|
Inheritable []string `json:"inheritable,omitempty" platform:"linux"`
|
|
|
|
Permitted []string `json:"permitted,omitempty" platform:"linux"`
|
|
|
|
Ambient []string `json:"ambient,omitempty" platform:"linux"`
|
2019-10-22 21:36:22 +00:00
|
|
|
}
|
|
|
|
|
2020-07-23 13:50:42 +00:00
|
|
|
// Mount from OCI runtime spec
|
|
|
|
// https://github.com/opencontainers/runtime-spec/blob/v1.0.0/specs-go/config.go#L103
|
|
|
|
type Mount struct {
|
|
|
|
Destination string `json:"destination"`
|
|
|
|
Type string `json:"type,omitempty" platform:"linux,solaris"`
|
|
|
|
Source string `json:"source,omitempty"`
|
|
|
|
Options []string `json:"options,omitempty"`
|
|
|
|
}
|
|
|
|
|
2020-07-24 11:41:38 +00:00
|
|
|
// Spec from OCI runtime spec
|
2019-10-22 21:36:22 +00:00
|
|
|
// We use pointers to structs, similarly to the latest version of runtime-spec:
|
|
|
|
// https://github.com/opencontainers/runtime-spec/blob/v1.0.0/specs-go/config.go#L5-L28
|
|
|
|
type Spec struct {
|
Add support for parsing Linux Capabilities for older OCI specs
This was added to fix a regression with support for the default runc
shipped with CentOS 7.
The version of runc that is installed by default on CentOS 7 is
1.0.0-rc2 which uses OCI spec 1.0.0-rc2-dev.
This is a prerelease of the OCI spec, which defines the capabilities
section of a process configuration to be a flat list of capabilities
(e.g. SYS_ADMIN, SYS_PTRACE, SYS_RAWIO, etc.)
https://github.com/opencontainers/runtime-spec/blob/v1.0.0-rc2/config.md#process-configuration
By the time the official 1.0.0 version of the OCI spec came out, the
capabilities section of a process configuration was expanded to include
embedded fields for effective, bounding, inheritable, permitted and
ambient (each of which can contain a flat list of capabilities of the
form SYS_ADMIN, SYS_PTRACE, SYS_RAWIO, etc.)
https://github.com/opencontainers/runtime-spec/blob/v1.0.0/config.md#linux-process
Previously, we only inspected the capabilities section of a process
configuration assuming it was in the format of OCI spec 1.0.0.
This patch makes sure we can parse the capaibilites in either format.
Signed-off-by: Kevin Klues <kklues@nvidia.com>
2020-06-03 19:19:31 +00:00
|
|
|
Version *string `json:"ociVersion"`
|
2019-10-22 21:36:22 +00:00
|
|
|
Process *Process `json:"process,omitempty"`
|
|
|
|
Root *Root `json:"root,omitempty"`
|
2020-07-23 13:50:42 +00:00
|
|
|
Mounts []Mount `json:"mounts,omitempty"`
|
2019-10-22 21:36:22 +00:00
|
|
|
}
|
|
|
|
|
2020-07-24 11:41:38 +00:00
|
|
|
// HookState holds state information about the hook
|
2019-10-22 21:36:22 +00:00
|
|
|
type HookState struct {
|
|
|
|
Pid int `json:"pid,omitempty"`
|
|
|
|
// After 17.06, runc is using the runtime spec:
|
|
|
|
// github.com/docker/runc/blob/17.06/libcontainer/configs/config.go#L262-L263
|
|
|
|
// github.com/opencontainers/runtime-spec/blob/v1.0.0/specs-go/state.go#L3-L17
|
|
|
|
Bundle string `json:"bundle"`
|
|
|
|
// Before 17.06, runc used a custom struct that didn't conform to the spec:
|
|
|
|
// github.com/docker/runc/blob/17.03.x/libcontainer/configs/config.go#L245-L252
|
|
|
|
BundlePath string `json:"bundlePath"`
|
|
|
|
}
|
|
|
|
|
|
|
|
func parseCudaVersion(cudaVersion string) (vmaj, vmin, vpatch uint32) {
|
|
|
|
if _, err := fmt.Sscanf(cudaVersion, "%d.%d.%d\n", &vmaj, &vmin, &vpatch); err != nil {
|
|
|
|
vpatch = 0
|
|
|
|
if _, err := fmt.Sscanf(cudaVersion, "%d.%d\n", &vmaj, &vmin); err != nil {
|
|
|
|
vmin = 0
|
|
|
|
if _, err := fmt.Sscanf(cudaVersion, "%d\n", &vmaj); err != nil {
|
|
|
|
log.Panicln("invalid CUDA version:", cudaVersion)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
2020-07-20 14:13:49 +00:00
|
|
|
func getEnvMap(e []string) (m map[string]string) {
|
2019-10-22 21:36:22 +00:00
|
|
|
m = make(map[string]string)
|
|
|
|
for _, s := range e {
|
|
|
|
p := strings.SplitN(s, "=", 2)
|
|
|
|
if len(p) != 2 {
|
|
|
|
log.Panicln("environment error")
|
|
|
|
}
|
|
|
|
m[p[0]] = p[1]
|
|
|
|
}
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
|
|
|
func loadSpec(path string) (spec *Spec) {
|
|
|
|
f, err := os.Open(path)
|
|
|
|
if err != nil {
|
|
|
|
log.Panicln("could not open OCI spec:", err)
|
|
|
|
}
|
|
|
|
defer f.Close()
|
|
|
|
|
|
|
|
if err = json.NewDecoder(f).Decode(&spec); err != nil {
|
|
|
|
log.Panicln("could not decode OCI spec:", err)
|
|
|
|
}
|
Add support for parsing Linux Capabilities for older OCI specs
This was added to fix a regression with support for the default runc
shipped with CentOS 7.
The version of runc that is installed by default on CentOS 7 is
1.0.0-rc2 which uses OCI spec 1.0.0-rc2-dev.
This is a prerelease of the OCI spec, which defines the capabilities
section of a process configuration to be a flat list of capabilities
(e.g. SYS_ADMIN, SYS_PTRACE, SYS_RAWIO, etc.)
https://github.com/opencontainers/runtime-spec/blob/v1.0.0-rc2/config.md#process-configuration
By the time the official 1.0.0 version of the OCI spec came out, the
capabilities section of a process configuration was expanded to include
embedded fields for effective, bounding, inheritable, permitted and
ambient (each of which can contain a flat list of capabilities of the
form SYS_ADMIN, SYS_PTRACE, SYS_RAWIO, etc.)
https://github.com/opencontainers/runtime-spec/blob/v1.0.0/config.md#linux-process
Previously, we only inspected the capabilities section of a process
configuration assuming it was in the format of OCI spec 1.0.0.
This patch makes sure we can parse the capaibilites in either format.
Signed-off-by: Kevin Klues <kklues@nvidia.com>
2020-06-03 19:19:31 +00:00
|
|
|
if spec.Version == nil {
|
|
|
|
log.Panicln("Version is empty in OCI spec")
|
|
|
|
}
|
2019-10-22 21:36:22 +00:00
|
|
|
if spec.Process == nil {
|
|
|
|
log.Panicln("Process is empty in OCI spec")
|
|
|
|
}
|
|
|
|
if spec.Root == nil {
|
|
|
|
log.Panicln("Root is empty in OCI spec")
|
|
|
|
}
|
|
|
|
return
|
|
|
|
}
|
|
|
|
|
Add support for parsing Linux Capabilities for older OCI specs
This was added to fix a regression with support for the default runc
shipped with CentOS 7.
The version of runc that is installed by default on CentOS 7 is
1.0.0-rc2 which uses OCI spec 1.0.0-rc2-dev.
This is a prerelease of the OCI spec, which defines the capabilities
section of a process configuration to be a flat list of capabilities
(e.g. SYS_ADMIN, SYS_PTRACE, SYS_RAWIO, etc.)
https://github.com/opencontainers/runtime-spec/blob/v1.0.0-rc2/config.md#process-configuration
By the time the official 1.0.0 version of the OCI spec came out, the
capabilities section of a process configuration was expanded to include
embedded fields for effective, bounding, inheritable, permitted and
ambient (each of which can contain a flat list of capabilities of the
form SYS_ADMIN, SYS_PTRACE, SYS_RAWIO, etc.)
https://github.com/opencontainers/runtime-spec/blob/v1.0.0/config.md#linux-process
Previously, we only inspected the capabilities section of a process
configuration assuming it was in the format of OCI spec 1.0.0.
This patch makes sure we can parse the capaibilites in either format.
Signed-off-by: Kevin Klues <kklues@nvidia.com>
2020-06-03 19:19:31 +00:00
|
|
|
func isPrivileged(s *Spec) bool {
|
|
|
|
if s.Process.Capabilities == nil {
|
2019-12-20 21:19:00 +00:00
|
|
|
return false
|
|
|
|
}
|
|
|
|
|
Add support for parsing Linux Capabilities for older OCI specs
This was added to fix a regression with support for the default runc
shipped with CentOS 7.
The version of runc that is installed by default on CentOS 7 is
1.0.0-rc2 which uses OCI spec 1.0.0-rc2-dev.
This is a prerelease of the OCI spec, which defines the capabilities
section of a process configuration to be a flat list of capabilities
(e.g. SYS_ADMIN, SYS_PTRACE, SYS_RAWIO, etc.)
https://github.com/opencontainers/runtime-spec/blob/v1.0.0-rc2/config.md#process-configuration
By the time the official 1.0.0 version of the OCI spec came out, the
capabilities section of a process configuration was expanded to include
embedded fields for effective, bounding, inheritable, permitted and
ambient (each of which can contain a flat list of capabilities of the
form SYS_ADMIN, SYS_PTRACE, SYS_RAWIO, etc.)
https://github.com/opencontainers/runtime-spec/blob/v1.0.0/config.md#linux-process
Previously, we only inspected the capabilities section of a process
configuration assuming it was in the format of OCI spec 1.0.0.
This patch makes sure we can parse the capaibilites in either format.
Signed-off-by: Kevin Klues <kklues@nvidia.com>
2020-06-03 19:19:31 +00:00
|
|
|
var caps []string
|
|
|
|
// If v1.1.0-rc1 <= OCI version < v1.0.0-rc5 parse s.Process.Capabilities as:
|
|
|
|
// github.com/opencontainers/runtime-spec/blob/v1.0.0-rc1/specs-go/config.go#L30-L54
|
|
|
|
rc1cmp := semver.Compare("v"+*s.Version, "v1.0.0-rc1")
|
|
|
|
rc5cmp := semver.Compare("v"+*s.Version, "v1.0.0-rc5")
|
|
|
|
if (rc1cmp == 1 || rc1cmp == 0) && (rc5cmp == -1) {
|
|
|
|
err := json.Unmarshal(*s.Process.Capabilities, &caps)
|
|
|
|
if err != nil {
|
|
|
|
log.Panicln("could not decode Process.Capabilities in OCI spec:", err)
|
2019-12-20 21:19:00 +00:00
|
|
|
}
|
Add support for parsing Linux Capabilities for older OCI specs
This was added to fix a regression with support for the default runc
shipped with CentOS 7.
The version of runc that is installed by default on CentOS 7 is
1.0.0-rc2 which uses OCI spec 1.0.0-rc2-dev.
This is a prerelease of the OCI spec, which defines the capabilities
section of a process configuration to be a flat list of capabilities
(e.g. SYS_ADMIN, SYS_PTRACE, SYS_RAWIO, etc.)
https://github.com/opencontainers/runtime-spec/blob/v1.0.0-rc2/config.md#process-configuration
By the time the official 1.0.0 version of the OCI spec came out, the
capabilities section of a process configuration was expanded to include
embedded fields for effective, bounding, inheritable, permitted and
ambient (each of which can contain a flat list of capabilities of the
form SYS_ADMIN, SYS_PTRACE, SYS_RAWIO, etc.)
https://github.com/opencontainers/runtime-spec/blob/v1.0.0/config.md#linux-process
Previously, we only inspected the capabilities section of a process
configuration assuming it was in the format of OCI spec 1.0.0.
This patch makes sure we can parse the capaibilites in either format.
Signed-off-by: Kevin Klues <kklues@nvidia.com>
2020-06-03 19:19:31 +00:00
|
|
|
// Otherwise, parse s.Process.Capabilities as:
|
|
|
|
// github.com/opencontainers/runtime-spec/blob/v1.0.0/specs-go/config.go#L30-L54
|
|
|
|
} else {
|
|
|
|
var lc LinuxCapabilities
|
|
|
|
err := json.Unmarshal(*s.Process.Capabilities, &lc)
|
|
|
|
if err != nil {
|
|
|
|
log.Panicln("could not decode Process.Capabilities in OCI spec:", err)
|
|
|
|
}
|
|
|
|
// We only make sure that the bounding capabibility set has
|
|
|
|
// CAP_SYS_ADMIN. This allows us to make sure that the container was
|
|
|
|
// actually started as '--privileged', but also allow non-root users to
|
2020-07-24 11:41:38 +00:00
|
|
|
// access the privileged NVIDIA capabilities.
|
Add support for parsing Linux Capabilities for older OCI specs
This was added to fix a regression with support for the default runc
shipped with CentOS 7.
The version of runc that is installed by default on CentOS 7 is
1.0.0-rc2 which uses OCI spec 1.0.0-rc2-dev.
This is a prerelease of the OCI spec, which defines the capabilities
section of a process configuration to be a flat list of capabilities
(e.g. SYS_ADMIN, SYS_PTRACE, SYS_RAWIO, etc.)
https://github.com/opencontainers/runtime-spec/blob/v1.0.0-rc2/config.md#process-configuration
By the time the official 1.0.0 version of the OCI spec came out, the
capabilities section of a process configuration was expanded to include
embedded fields for effective, bounding, inheritable, permitted and
ambient (each of which can contain a flat list of capabilities of the
form SYS_ADMIN, SYS_PTRACE, SYS_RAWIO, etc.)
https://github.com/opencontainers/runtime-spec/blob/v1.0.0/config.md#linux-process
Previously, we only inspected the capabilities section of a process
configuration assuming it was in the format of OCI spec 1.0.0.
This patch makes sure we can parse the capaibilites in either format.
Signed-off-by: Kevin Klues <kklues@nvidia.com>
2020-06-03 19:19:31 +00:00
|
|
|
caps = lc.Bounding
|
2019-12-20 21:19:00 +00:00
|
|
|
}
|
|
|
|
|
Add support for parsing Linux Capabilities for older OCI specs
This was added to fix a regression with support for the default runc
shipped with CentOS 7.
The version of runc that is installed by default on CentOS 7 is
1.0.0-rc2 which uses OCI spec 1.0.0-rc2-dev.
This is a prerelease of the OCI spec, which defines the capabilities
section of a process configuration to be a flat list of capabilities
(e.g. SYS_ADMIN, SYS_PTRACE, SYS_RAWIO, etc.)
https://github.com/opencontainers/runtime-spec/blob/v1.0.0-rc2/config.md#process-configuration
By the time the official 1.0.0 version of the OCI spec came out, the
capabilities section of a process configuration was expanded to include
embedded fields for effective, bounding, inheritable, permitted and
ambient (each of which can contain a flat list of capabilities of the
form SYS_ADMIN, SYS_PTRACE, SYS_RAWIO, etc.)
https://github.com/opencontainers/runtime-spec/blob/v1.0.0/config.md#linux-process
Previously, we only inspected the capabilities section of a process
configuration assuming it was in the format of OCI spec 1.0.0.
This patch makes sure we can parse the capaibilites in either format.
Signed-off-by: Kevin Klues <kklues@nvidia.com>
2020-06-03 19:19:31 +00:00
|
|
|
for _, c := range caps {
|
|
|
|
if c == capSysAdmin {
|
|
|
|
return true
|
|
|
|
}
|
2019-12-20 21:19:00 +00:00
|
|
|
}
|
|
|
|
|
Add support for parsing Linux Capabilities for older OCI specs
This was added to fix a regression with support for the default runc
shipped with CentOS 7.
The version of runc that is installed by default on CentOS 7 is
1.0.0-rc2 which uses OCI spec 1.0.0-rc2-dev.
This is a prerelease of the OCI spec, which defines the capabilities
section of a process configuration to be a flat list of capabilities
(e.g. SYS_ADMIN, SYS_PTRACE, SYS_RAWIO, etc.)
https://github.com/opencontainers/runtime-spec/blob/v1.0.0-rc2/config.md#process-configuration
By the time the official 1.0.0 version of the OCI spec came out, the
capabilities section of a process configuration was expanded to include
embedded fields for effective, bounding, inheritable, permitted and
ambient (each of which can contain a flat list of capabilities of the
form SYS_ADMIN, SYS_PTRACE, SYS_RAWIO, etc.)
https://github.com/opencontainers/runtime-spec/blob/v1.0.0/config.md#linux-process
Previously, we only inspected the capabilities section of a process
configuration assuming it was in the format of OCI spec 1.0.0.
This patch makes sure we can parse the capaibilites in either format.
Signed-off-by: Kevin Klues <kklues@nvidia.com>
2020-06-03 19:19:31 +00:00
|
|
|
return false
|
2019-12-20 21:19:00 +00:00
|
|
|
}
|
|
|
|
|
2020-07-22 18:47:20 +00:00
|
|
|
func isLegacyCUDAImage(env map[string]string) bool {
|
|
|
|
legacyCudaVersion := env[envCUDAVersion]
|
|
|
|
cudaRequire := env[envNVRequireCUDA]
|
|
|
|
return len(legacyCudaVersion) > 0 && len(cudaRequire) == 0
|
|
|
|
}
|
|
|
|
|
2020-07-23 14:18:11 +00:00
|
|
|
func getDevicesFromEnvvar(env map[string]string, legacyImage bool) *string {
|
2020-07-23 11:04:03 +00:00
|
|
|
// Build a list of envvars to consider.
|
|
|
|
envVars := []string{envNVVisibleDevices}
|
2019-10-22 21:36:22 +00:00
|
|
|
if envSwarmGPU != nil {
|
2020-07-23 11:04:03 +00:00
|
|
|
// The Swarm envvar has higher precedence.
|
|
|
|
envVars = append([]string{*envSwarmGPU}, envVars...)
|
2019-10-22 21:36:22 +00:00
|
|
|
}
|
|
|
|
|
2020-07-23 11:04:03 +00:00
|
|
|
// Grab a reference to devices from the first envvar
|
|
|
|
// in the list that actually exists in the environment.
|
|
|
|
var devices *string
|
|
|
|
for _, envVar := range envVars {
|
|
|
|
if devs, ok := env[envVar]; ok {
|
|
|
|
devices = &devs
|
2019-10-22 21:36:22 +00:00
|
|
|
}
|
|
|
|
}
|
2020-07-23 11:04:03 +00:00
|
|
|
|
|
|
|
// Environment variable unset with legacy image: default to "all".
|
|
|
|
if devices == nil && legacyImage {
|
|
|
|
all := "all"
|
|
|
|
return &all
|
|
|
|
}
|
|
|
|
|
|
|
|
// Environment variable unset or empty or "void": return nil
|
|
|
|
if devices == nil || len(*devices) == 0 || *devices == "void" {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Environment variable set to "none": reset to "".
|
|
|
|
if *devices == "none" {
|
|
|
|
empty := ""
|
|
|
|
return &empty
|
|
|
|
}
|
|
|
|
|
|
|
|
// Any other value.
|
|
|
|
return devices
|
2019-10-22 21:36:22 +00:00
|
|
|
}
|
|
|
|
|
2020-07-23 14:18:11 +00:00
|
|
|
func getDevices(hookConfig *HookConfig, env map[string]string, mounts []Mount, privileged bool, legacyImage bool) *string {
|
|
|
|
return getDevicesFromEnvvar(env, legacyImage)
|
|
|
|
}
|
|
|
|
|
2019-12-20 21:22:08 +00:00
|
|
|
func getMigConfigDevices(env map[string]string) *string {
|
2020-07-23 10:36:38 +00:00
|
|
|
if devices, ok := env[envNVMigConfigDevices]; ok {
|
|
|
|
return &devices
|
2019-12-20 21:22:08 +00:00
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func getMigMonitorDevices(env map[string]string) *string {
|
2020-07-23 10:36:38 +00:00
|
|
|
if devices, ok := env[envNVMigMonitorDevices]; ok {
|
|
|
|
return &devices
|
2019-12-20 21:22:08 +00:00
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2020-07-23 11:04:03 +00:00
|
|
|
func getDriverCapabilities(env map[string]string, legacyImage bool) *string {
|
|
|
|
// Grab a reference to the capabilities from the envvar
|
|
|
|
// if it actually exists in the environment.
|
|
|
|
var capabilities *string
|
|
|
|
if caps, ok := env[envNVDriverCapabilities]; ok {
|
|
|
|
capabilities = &caps
|
2019-10-22 21:36:22 +00:00
|
|
|
}
|
2020-07-23 11:04:03 +00:00
|
|
|
|
|
|
|
// Environment variable unset with legacy image: set all capabilities.
|
|
|
|
if capabilities == nil && legacyImage {
|
|
|
|
allCaps := allDriverCapabilities
|
|
|
|
return &allCaps
|
|
|
|
}
|
|
|
|
|
|
|
|
// Environment variable unset or set but empty: set default capabilities.
|
|
|
|
if capabilities == nil || len(*capabilities) == 0 {
|
|
|
|
defaultCaps := defaultDriverCapabilities
|
|
|
|
return &defaultCaps
|
|
|
|
}
|
|
|
|
|
|
|
|
// Environment variable set to "all": set all capabilities.
|
|
|
|
if *capabilities == "all" {
|
|
|
|
allCaps := allDriverCapabilities
|
|
|
|
return &allCaps
|
|
|
|
}
|
|
|
|
|
|
|
|
// Any other value
|
|
|
|
return capabilities
|
2019-10-22 21:36:22 +00:00
|
|
|
}
|
|
|
|
|
2020-07-23 11:04:03 +00:00
|
|
|
func getRequirements(env map[string]string, legacyImage bool) []string {
|
2019-10-22 21:36:22 +00:00
|
|
|
// All variables with the "NVIDIA_REQUIRE_" prefix are passed to nvidia-container-cli
|
|
|
|
var requirements []string
|
|
|
|
for name, value := range env {
|
|
|
|
if strings.HasPrefix(name, envNVRequirePrefix) {
|
|
|
|
requirements = append(requirements, value)
|
|
|
|
}
|
|
|
|
}
|
2020-07-23 11:04:03 +00:00
|
|
|
if legacyImage {
|
|
|
|
vmaj, vmin, _ := parseCudaVersion(env[envCUDAVersion])
|
|
|
|
cudaRequire := fmt.Sprintf("cuda>=%d.%d", vmaj, vmin)
|
|
|
|
requirements = append(requirements, cudaRequire)
|
|
|
|
}
|
2019-10-22 21:36:22 +00:00
|
|
|
return requirements
|
|
|
|
}
|
|
|
|
|
2020-07-23 14:13:33 +00:00
|
|
|
func getNvidiaConfig(hookConfig *HookConfig, env map[string]string, mounts []Mount, privileged bool) *nvidiaConfig {
|
2020-07-23 11:04:03 +00:00
|
|
|
legacyImage := isLegacyCUDAImage(env)
|
|
|
|
|
2019-10-22 21:36:22 +00:00
|
|
|
var devices string
|
2020-07-23 14:13:33 +00:00
|
|
|
if d := getDevices(hookConfig, env, mounts, privileged, legacyImage); d != nil {
|
2020-07-23 11:04:03 +00:00
|
|
|
devices = *d
|
|
|
|
} else {
|
|
|
|
// 'nil' devices means this is not a GPU container.
|
2019-10-22 21:36:22 +00:00
|
|
|
return nil
|
|
|
|
}
|
2020-07-24 11:41:38 +00:00
|
|
|
|
2019-12-20 21:22:08 +00:00
|
|
|
var migConfigDevices string
|
|
|
|
if d := getMigConfigDevices(env); d != nil {
|
|
|
|
migConfigDevices = *d
|
|
|
|
}
|
|
|
|
if !privileged && migConfigDevices != "" {
|
|
|
|
log.Panicln("cannot set MIG_CONFIG_DEVICES in non privileged container")
|
|
|
|
}
|
|
|
|
|
|
|
|
var migMonitorDevices string
|
|
|
|
if d := getMigMonitorDevices(env); d != nil {
|
|
|
|
migMonitorDevices = *d
|
|
|
|
}
|
|
|
|
if !privileged && migMonitorDevices != "" {
|
|
|
|
log.Panicln("cannot set MIG_MONITOR_DEVICES in non privileged container")
|
|
|
|
}
|
|
|
|
|
2019-12-20 16:07:43 +00:00
|
|
|
var driverCapabilities string
|
2020-07-23 11:04:03 +00:00
|
|
|
if c := getDriverCapabilities(env, legacyImage); c != nil {
|
2019-12-20 16:07:43 +00:00
|
|
|
driverCapabilities = *c
|
2019-10-22 21:36:22 +00:00
|
|
|
}
|
|
|
|
|
2020-07-23 11:04:03 +00:00
|
|
|
requirements := getRequirements(env, legacyImage)
|
2019-10-22 21:36:22 +00:00
|
|
|
|
|
|
|
// Don't fail on invalid values.
|
|
|
|
disableRequire, _ := strconv.ParseBool(env[envNVDisableRequire])
|
|
|
|
|
|
|
|
return &nvidiaConfig{
|
2019-12-20 16:07:43 +00:00
|
|
|
Devices: devices,
|
2019-12-20 21:22:08 +00:00
|
|
|
MigConfigDevices: migConfigDevices,
|
|
|
|
MigMonitorDevices: migMonitorDevices,
|
2019-12-20 16:07:43 +00:00
|
|
|
DriverCapabilities: driverCapabilities,
|
|
|
|
Requirements: requirements,
|
|
|
|
DisableRequire: disableRequire,
|
2019-10-22 21:36:22 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func getContainerConfig(hook HookConfig) (config containerConfig) {
|
|
|
|
var h HookState
|
|
|
|
d := json.NewDecoder(os.Stdin)
|
|
|
|
if err := d.Decode(&h); err != nil {
|
|
|
|
log.Panicln("could not decode container state:", err)
|
|
|
|
}
|
|
|
|
|
|
|
|
b := h.Bundle
|
|
|
|
if len(b) == 0 {
|
|
|
|
b = h.BundlePath
|
|
|
|
}
|
|
|
|
|
|
|
|
s := loadSpec(path.Join(b, "config.json"))
|
|
|
|
|
2020-07-20 14:13:49 +00:00
|
|
|
env := getEnvMap(s.Process.Env)
|
Add support for parsing Linux Capabilities for older OCI specs
This was added to fix a regression with support for the default runc
shipped with CentOS 7.
The version of runc that is installed by default on CentOS 7 is
1.0.0-rc2 which uses OCI spec 1.0.0-rc2-dev.
This is a prerelease of the OCI spec, which defines the capabilities
section of a process configuration to be a flat list of capabilities
(e.g. SYS_ADMIN, SYS_PTRACE, SYS_RAWIO, etc.)
https://github.com/opencontainers/runtime-spec/blob/v1.0.0-rc2/config.md#process-configuration
By the time the official 1.0.0 version of the OCI spec came out, the
capabilities section of a process configuration was expanded to include
embedded fields for effective, bounding, inheritable, permitted and
ambient (each of which can contain a flat list of capabilities of the
form SYS_ADMIN, SYS_PTRACE, SYS_RAWIO, etc.)
https://github.com/opencontainers/runtime-spec/blob/v1.0.0/config.md#linux-process
Previously, we only inspected the capabilities section of a process
configuration assuming it was in the format of OCI spec 1.0.0.
This patch makes sure we can parse the capaibilites in either format.
Signed-off-by: Kevin Klues <kklues@nvidia.com>
2020-06-03 19:19:31 +00:00
|
|
|
privileged := isPrivileged(s)
|
2019-10-22 21:36:22 +00:00
|
|
|
envSwarmGPU = hook.SwarmResource
|
|
|
|
return containerConfig{
|
|
|
|
Pid: h.Pid,
|
|
|
|
Rootfs: s.Root.Path,
|
|
|
|
Env: env,
|
2020-07-23 14:13:33 +00:00
|
|
|
Nvidia: getNvidiaConfig(&hook, env, s.Mounts, privileged),
|
2019-10-22 21:36:22 +00:00
|
|
|
}
|
|
|
|
}
|