mirror of
https://github.com/NVIDIA/nvidia-container-toolkit
synced 2025-06-26 18:18:24 +00:00
Compare commits
41 Commits
v1.2.0
...
v1.3.0-rc.
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
26668097c4 | ||
|
|
caf2792463 | ||
|
|
b2be0b08ac | ||
|
|
edc5041636 | ||
|
|
2c1809475c | ||
|
|
7c00385797 | ||
|
|
322006c361 | ||
|
|
a25017fb8a | ||
|
|
928905ce94 | ||
|
|
7ed17bb9ca | ||
|
|
b50d86c174 | ||
|
|
bf342fb4c9 | ||
|
|
1791372f22 | ||
|
|
4448319605 | ||
|
|
2ea3150b60 | ||
|
|
32b4b09bc9 | ||
|
|
cc0a22a6d9 | ||
|
|
e48d23d107 | ||
|
|
430dda41e9 | ||
|
|
8bcd02ee5d | ||
|
|
4791fab747 | ||
|
|
7313069d4c | ||
|
|
a24b0c8b4e | ||
|
|
f46d1861d3 | ||
|
|
0a9dc3c653 | ||
|
|
889ebae1fe | ||
|
|
e4b9318de3 | ||
|
|
aec9a28bc3 | ||
|
|
2ae7cb07cf | ||
|
|
da36874e91 | ||
|
|
b9ef2db205 | ||
|
|
da6fbb343a | ||
|
|
647a805341 | ||
|
|
fe65573bdf | ||
|
|
a7fb33301c | ||
|
|
8b248b6631 | ||
|
|
d10144b3b1 | ||
|
|
ba9758c7ff | ||
|
|
d467b87ef9 | ||
|
|
2f4af74320 | ||
|
|
4e6e0ed4f1 |
@@ -1,9 +1,24 @@
|
||||
# Build packages for all supported OS / ARCH combinations
|
||||
|
||||
stages:
|
||||
- tests
|
||||
- build-one
|
||||
- build-all
|
||||
|
||||
.tests-setup: &tests-setup
|
||||
image: golang:1.14.4
|
||||
|
||||
rules:
|
||||
- when: always
|
||||
|
||||
variables:
|
||||
GITHUB_ROOT: "github.com/NVIDIA"
|
||||
PROJECT_GOPATH: "${GITHUB_ROOT}/nvidia-container-toolkit"
|
||||
|
||||
before_script:
|
||||
- mkdir -p ${GOPATH}/src/${GITHUB_ROOT}
|
||||
- ln -s ${CI_PROJECT_DIR} ${GOPATH}/src/${PROJECT_GOPATH}
|
||||
|
||||
.build-setup: &build-setup
|
||||
image: docker:19.03.8
|
||||
|
||||
@@ -17,6 +32,48 @@ stages:
|
||||
- apk add coreutils build-base sed git bash make
|
||||
- docker run --rm --privileged multiarch/qemu-user-static --reset -p yes -c yes
|
||||
|
||||
# Run a series of sanity-check tests over the code
|
||||
lint:
|
||||
<<: *tests-setup
|
||||
stage: tests
|
||||
script:
|
||||
- go get -u golang.org/x/lint/golint
|
||||
- golint -set_exit_status ${PROJECT_GOPATH}/pkg
|
||||
|
||||
vet:
|
||||
<<: *tests-setup
|
||||
stage: tests
|
||||
script:
|
||||
- go vet ${PROJECT_GOPATH}/pkg
|
||||
|
||||
unit_test:
|
||||
<<: *tests-setup
|
||||
stage: tests
|
||||
script:
|
||||
- go test ${PROJECT_GOPATH}/pkg
|
||||
|
||||
fmt:
|
||||
<<: *tests-setup
|
||||
stage: tests
|
||||
script:
|
||||
- res=$(gofmt -l pkg/*.go)
|
||||
- echo "$res"
|
||||
- test -z "$res"
|
||||
|
||||
ineffassign:
|
||||
<<: *tests-setup
|
||||
stage: tests
|
||||
script:
|
||||
- go get -u github.com/gordonklaus/ineffassign
|
||||
- ineffassign pkg/*.go
|
||||
|
||||
misspell:
|
||||
<<: *tests-setup
|
||||
stage: tests
|
||||
script:
|
||||
- go get -u github.com/client9/misspell/cmd/misspell
|
||||
- misspell pkg/*.go
|
||||
|
||||
# build-one jobs build packages for a single OS / ARCH combination.
|
||||
#
|
||||
# They are run during the first stage of the pipeline as a smoke test to ensure
|
||||
@@ -47,7 +104,7 @@ stages:
|
||||
- if: $CI_COMMIT_TAG
|
||||
when: always
|
||||
- if: $CI_MERGE_REQUEST_ID
|
||||
when: manual
|
||||
when: always
|
||||
|
||||
variables:
|
||||
ARTIFACTS_NAME: "${CI_PROJECT_NAME}-${CI_COMMIT_REF_SLUG}-${CI_JOB_NAME}-artifacts-${CI_PIPELINE_ID}"
|
||||
|
||||
5
Makefile
5
Makefile
@@ -5,10 +5,11 @@ MKDIR ?= mkdir
|
||||
DIST_DIR ?= $(CURDIR)/dist
|
||||
|
||||
LIB_NAME := nvidia-container-toolkit
|
||||
LIB_VERSION := 1.2.0
|
||||
LIB_VERSION := 1.3.0
|
||||
LIB_TAG ?= rc.2
|
||||
|
||||
GOLANG_VERSION := 1.14.2
|
||||
GOLANG_PKG_PATH := github.com/NVIDIA/container-toolkit/pkg
|
||||
GOLANG_PKG_PATH := github.com/NVIDIA/nvidia-container-toolkit/pkg
|
||||
|
||||
# By default run all native docker-based targets
|
||||
docker-native:
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
disable-require = false
|
||||
#swarm-resource = "DOCKER_RESOURCE_GPU"
|
||||
#accept-nvidia-visible-devices-envvar-when-unprivileged = true
|
||||
#accept-nvidia-visible-devices-as-volume-mounts = false
|
||||
|
||||
[nvidia-container-cli]
|
||||
#root = "/run/nvidia/driver"
|
||||
@@ -11,7 +13,6 @@ load-kmods = true
|
||||
#no-cgroups = false
|
||||
#user = "root:video"
|
||||
ldconfig = "@/sbin/ldconfig"
|
||||
#alpha-merge-visible-devices-envvars = false
|
||||
|
||||
[nvidia-container-runtime]
|
||||
#debug = "/var/log/nvidia-container-runtime.log"
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
disable-require = false
|
||||
#swarm-resource = "DOCKER_RESOURCE_GPU"
|
||||
#accept-nvidia-visible-devices-envvar-when-unprivileged = true
|
||||
#accept-nvidia-visible-devices-as-volume-mounts = false
|
||||
|
||||
[nvidia-container-cli]
|
||||
#root = "/run/nvidia/driver"
|
||||
@@ -11,7 +13,6 @@ load-kmods = true
|
||||
#no-cgroups = false
|
||||
#user = "root:video"
|
||||
ldconfig = "@/sbin/ldconfig"
|
||||
#alpha-merge-visible-devices-envvars = false
|
||||
|
||||
[nvidia-container-runtime]
|
||||
#debug = "/var/log/nvidia-container-runtime.log"
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
disable-require = false
|
||||
#swarm-resource = "DOCKER_RESOURCE_GPU"
|
||||
#accept-nvidia-visible-devices-envvar-when-unprivileged = true
|
||||
#accept-nvidia-visible-devices-as-volume-mounts = false
|
||||
|
||||
[nvidia-container-cli]
|
||||
#root = "/run/nvidia/driver"
|
||||
@@ -11,7 +13,6 @@ load-kmods = true
|
||||
#no-cgroups = false
|
||||
#user = "root:video"
|
||||
ldconfig = "@/sbin/ldconfig"
|
||||
#alpha-merge-visible-devices-envvars = false
|
||||
|
||||
[nvidia-container-runtime]
|
||||
#debug = "/var/log/nvidia-container-runtime.log"
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
disable-require = false
|
||||
#swarm-resource = "DOCKER_RESOURCE_GPU"
|
||||
#accept-nvidia-visible-devices-envvar-when-unprivileged = true
|
||||
#accept-nvidia-visible-devices-as-volume-mounts = false
|
||||
|
||||
[nvidia-container-cli]
|
||||
#root = "/run/nvidia/driver"
|
||||
@@ -11,7 +13,6 @@ load-kmods = true
|
||||
#no-cgroups = false
|
||||
user = "root:video"
|
||||
ldconfig = "@/sbin/ldconfig"
|
||||
#alpha-merge-visible-devices-envvars = false
|
||||
|
||||
[nvidia-container-runtime]
|
||||
#debug = "/var/log/nvidia-container-runtime.log"
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
disable-require = false
|
||||
#swarm-resource = "DOCKER_RESOURCE_GPU"
|
||||
#accept-nvidia-visible-devices-envvar-when-unprivileged = true
|
||||
#accept-nvidia-visible-devices-as-volume-mounts = false
|
||||
|
||||
[nvidia-container-cli]
|
||||
#root = "/run/nvidia/driver"
|
||||
@@ -11,7 +13,6 @@ load-kmods = true
|
||||
#no-cgroups = false
|
||||
#user = "root:video"
|
||||
ldconfig = "@/sbin/ldconfig.real"
|
||||
#alpha-merge-visible-devices-envvars = false
|
||||
|
||||
[nvidia-container-runtime]
|
||||
#debug = "/var/log/nvidia-container-runtime.log"
|
||||
|
||||
@@ -1,131 +0,0 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"github.com/stretchr/testify/require"
|
||||
"sort"
|
||||
"strings"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestMergeVisibleDevicesEnvvars(t *testing.T) {
|
||||
var tests = []struct {
|
||||
name string
|
||||
input []string
|
||||
expected string
|
||||
enableMerge bool
|
||||
}{
|
||||
{
|
||||
"Simple Merge Enabled",
|
||||
[]string{
|
||||
"NVIDIA_VISIBLE_DEVICES_0=0,1",
|
||||
"NVIDIA_VISIBLE_DEVICES_1=2,3",
|
||||
"NVIDIA_VISIBLE_DEVICES_WHATEVER=4,5",
|
||||
},
|
||||
"0,1,2,3,4,5",
|
||||
true,
|
||||
},
|
||||
{
|
||||
"Simple Merge Disabled",
|
||||
[]string{
|
||||
"NVIDIA_VISIBLE_DEVICES_0=0,1",
|
||||
"NVIDIA_VISIBLE_DEVICES_1=2,3",
|
||||
"NVIDIA_VISIBLE_DEVICES_WHATEVER=4,5",
|
||||
},
|
||||
"",
|
||||
false,
|
||||
},
|
||||
{
|
||||
"Merge No Override (Enabled)",
|
||||
[]string{
|
||||
"NVIDIA_VISIBLE_DEVICES=all",
|
||||
},
|
||||
"all",
|
||||
true,
|
||||
},
|
||||
{
|
||||
"Merge No Override (Disabled)",
|
||||
[]string{
|
||||
"NVIDIA_VISIBLE_DEVICES=all",
|
||||
},
|
||||
"all",
|
||||
false,
|
||||
},
|
||||
{
|
||||
"Merge Override (Enabled, Before)",
|
||||
[]string{
|
||||
"NVIDIA_VISIBLE_DEVICES=all",
|
||||
"NVIDIA_VISIBLE_DEVICES_0=0,1",
|
||||
"NVIDIA_VISIBLE_DEVICES_1=2,3",
|
||||
"NVIDIA_VISIBLE_DEVICES_WHATEVER=4,5",
|
||||
},
|
||||
"0,1,2,3,4,5",
|
||||
true,
|
||||
},
|
||||
{
|
||||
"Merge Override (Enabled, After)",
|
||||
[]string{
|
||||
"NVIDIA_VISIBLE_DEVICES_0=0,1",
|
||||
"NVIDIA_VISIBLE_DEVICES_1=2,3",
|
||||
"NVIDIA_VISIBLE_DEVICES_WHATEVER=4,5",
|
||||
"NVIDIA_VISIBLE_DEVICES=all",
|
||||
},
|
||||
"0,1,2,3,4,5",
|
||||
true,
|
||||
},
|
||||
{
|
||||
"Merge Override (Enabled, In Between)",
|
||||
[]string{
|
||||
"NVIDIA_VISIBLE_DEVICES_0=0,1",
|
||||
"NVIDIA_VISIBLE_DEVICES_1=2,3",
|
||||
"NVIDIA_VISIBLE_DEVICES=all",
|
||||
"NVIDIA_VISIBLE_DEVICES_WHATEVER=4,5",
|
||||
},
|
||||
"0,1,2,3,4,5",
|
||||
true,
|
||||
},
|
||||
{
|
||||
"Merge Override (Disabled, Before)",
|
||||
[]string{
|
||||
"NVIDIA_VISIBLE_DEVICES=all",
|
||||
"NVIDIA_VISIBLE_DEVICES_0=0,1",
|
||||
"NVIDIA_VISIBLE_DEVICES_1=2,3",
|
||||
"NVIDIA_VISIBLE_DEVICES_WHATEVER=4,5",
|
||||
},
|
||||
"all",
|
||||
false,
|
||||
},
|
||||
{
|
||||
"Merge Override (Disabled, After)",
|
||||
[]string{
|
||||
"NVIDIA_VISIBLE_DEVICES_0=0,1",
|
||||
"NVIDIA_VISIBLE_DEVICES_1=2,3",
|
||||
"NVIDIA_VISIBLE_DEVICES_WHATEVER=4,5",
|
||||
"NVIDIA_VISIBLE_DEVICES=all",
|
||||
},
|
||||
"all",
|
||||
false,
|
||||
},
|
||||
{
|
||||
"Merge Override (Disabled, In Between)",
|
||||
[]string{
|
||||
"NVIDIA_VISIBLE_DEVICES_0=0,1",
|
||||
"NVIDIA_VISIBLE_DEVICES_1=2,3",
|
||||
"NVIDIA_VISIBLE_DEVICES=all",
|
||||
"NVIDIA_VISIBLE_DEVICES_WHATEVER=4,5",
|
||||
},
|
||||
"all",
|
||||
false,
|
||||
},
|
||||
}
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
config := CLIConfig{
|
||||
AlphaMergeVisibleDevicesEnvvars: tc.enableMerge,
|
||||
}
|
||||
envvars := getEnvMap(tc.input, config)
|
||||
devices := strings.Split(envvars[envNVVisibleDevices], ",")
|
||||
sort.Strings(devices)
|
||||
require.Equal(t, tc.expected, strings.Join(devices, ","))
|
||||
})
|
||||
}
|
||||
}
|
||||
10
docker.mk
10
docker.mk
@@ -75,28 +75,30 @@ docker-all: $(AMD64_TARGETS) $(X86_64_TARGETS) \
|
||||
|
||||
# private ubuntu target
|
||||
--ubuntu%: OS := ubuntu
|
||||
--ubuntu%: LIB_VERSION := $(LIB_VERSION)$(if $(LIB_TAG),~$(LIB_TAG))
|
||||
--ubuntu%: PKG_REV := 1
|
||||
|
||||
# private debian target
|
||||
--debian%: OS := debian
|
||||
--debian%: LIB_VERSION := $(LIB_VERSION)$(if $(LIB_TAG),~$(LIB_TAG))
|
||||
--debian%: PKG_REV := 1
|
||||
|
||||
# private centos target
|
||||
--centos%: OS := centos
|
||||
--centos%: PKG_REV := 2
|
||||
--centos%: PKG_REV := $(if $(LIB_TAG),0.1.$(LIB_TAG),2)
|
||||
|
||||
# private amazonlinux target
|
||||
--amazonlinux%: OS := amazonlinux
|
||||
--amazonlinux%: PKG_REV = 2.amzn$(VERSION)
|
||||
--amazonlinux%: PKG_REV = $(if $(LIB_TAG),0.1.$(LIB_TAG).amzn$(VERSION),2.amzn$(VERSION))
|
||||
|
||||
# private opensuse-leap target
|
||||
--opensuse-leap%: OS = opensuse-leap
|
||||
--opensuse-leap%: BASEIMAGE = opensuse/leap:$(VERSION)
|
||||
--opensuse-leap%: PKG_REV := 1
|
||||
--opensuse-leap%: PKG_REV := $(if $(LIB_TAG),0.1.$(LIB_TAG),1)
|
||||
|
||||
# private rhel target (actually built on centos)
|
||||
--rhel%: OS := centos
|
||||
--rhel%: PKG_REV := 2
|
||||
--rhel%: PKG_REV := $(if $(LIB_TAG),0.1.$(LIB_TAG),2)
|
||||
--rhel%: VERSION = $(patsubst rhel%-$(ARCH),%,$(TARGET_PLATFORM))
|
||||
--rhel%: ARTIFACTS_DIR = $(DIST_DIR)/rhel$(VERSION)/$(ARCH)
|
||||
|
||||
|
||||
2
go.mod
2
go.mod
@@ -1,4 +1,4 @@
|
||||
module github.com/NVIDIA/container-toolkit
|
||||
module github.com/NVIDIA/nvidia-container-toolkit
|
||||
|
||||
go 1.14
|
||||
|
||||
|
||||
@@ -1,3 +1,28 @@
|
||||
nvidia-container-toolkit (1.3.0~rc.2-1) experimental; urgency=medium
|
||||
|
||||
* 2c180947 Add more tests for new semantics with device list from volume mounts
|
||||
* 7c003857 Refactor accepting device lists from volume mounts as a boolean
|
||||
|
||||
-- NVIDIA CORPORATION <cudatools@nvidia.com> Mon, 10 Aug 2020 15:05:34 -0700
|
||||
|
||||
nvidia-container-toolkit (1.3.0~rc.1-1) experimental; urgency=medium
|
||||
|
||||
* b50d86c1 Update build system to accept a TAG variable for things like rc.x
|
||||
* fe65573b Add common CI tests for things like golint, gofmt, unit tests, etc.
|
||||
* da6fbb34 Revert "Add ability to merge envars of the form NVIDIA_VISIBLE_DEVICES_*"
|
||||
* a7fb3330 Flip build-all targets to run automatically on merge requests
|
||||
* 8b248b66 Rename github.com/NVIDIA/container-toolkit to nvidia-container-toolkit
|
||||
* da36874e Add new config options to pull device list from mounted files instead of ENVVAR
|
||||
|
||||
-- NVIDIA CORPORATION <cudatools@nvidia.com> Fri, 24 Jul 2020 22:21:49 -0700
|
||||
|
||||
nvidia-container-toolkit (1.2.1-1) UNRELEASED; urgency=medium
|
||||
|
||||
* 4e6e0ed4 Add 'ngx' to list of *all* driver capabilities
|
||||
* 2f4af743 List config.toml as a config file in the RPM SPEC
|
||||
|
||||
-- NVIDIA CORPORATION <cudatools@nvidia.com> Wed, 22 Jul 2020 15:36:12 -0700
|
||||
|
||||
nvidia-container-toolkit (1.2.0-1) UNRELEASED; urgency=medium
|
||||
|
||||
* 8e0aab46 Fix repo listed in changelog for debian distributions
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
|
||||
Upstream-Name: nvidia-container-toolkit
|
||||
Source: https://github.com/NVIDIA/container-toolkit
|
||||
Source: https://github.com/NVIDIA/nvidia-container-toolkit
|
||||
|
||||
Files: *
|
||||
Copyright: 2017-2020 NVIDIA CORPORATION <cudatools@nvidia.com>
|
||||
|
||||
@@ -48,11 +48,27 @@ rm -f %{_bindir}/nvidia-container-runtime-hook
|
||||
%files
|
||||
%license LICENSE
|
||||
%{_bindir}/nvidia-container-toolkit
|
||||
/etc/nvidia-container-runtime/config.toml
|
||||
%config /etc/nvidia-container-runtime/config.toml
|
||||
/usr/libexec/oci/hooks.d/oci-nvidia-hook
|
||||
/usr/share/containers/oci/hooks.d/oci-nvidia-hook.json
|
||||
|
||||
%changelog
|
||||
* Mon Aug 10 2020 NVIDIA CORPORATION <cudatools@nvidia.com> 1.3.0-0.1.rc.2
|
||||
- 2c180947 Add more tests for new semantics with device list from volume mounts
|
||||
- 7c003857 Refactor accepting device lists from volume mounts as a boolean
|
||||
|
||||
* Fri Jul 24 2020 NVIDIA CORPORATION <cudatools@nvidia.com> 1.3.0-0.1.rc.1
|
||||
- b50d86c1 Update build system to accept a TAG variable for things like rc.x
|
||||
- fe65573b Add common CI tests for things like golint, gofmt, unit tests, etc.
|
||||
- da6fbb34 Revert "Add ability to merge envars of the form NVIDIA_VISIBLE_DEVICES_*"
|
||||
- a7fb3330 Flip build-all targets to run automatically on merge requests
|
||||
- 8b248b66 Rename github.com/NVIDIA/container-toolkit to nvidia-container-toolkit
|
||||
- da36874e Add new config options to pull device list from mounted files instead of ENVVAR
|
||||
|
||||
* Wed Jul 22 2020 NVIDIA CORPORATION <cudatools@nvidia.com> 1.2.1-1
|
||||
- 4e6e0ed4 Add 'ngx' to list of *all* driver capabilities
|
||||
- 2f4af743 List config.toml as a config file in the RPM SPEC
|
||||
|
||||
* Wed Jul 08 2020 NVIDIA CORPORATION <cudatools@nvidia.com> 1.2.0-1
|
||||
- 8e0aab46 Fix repo listed in changelog for debian distributions
|
||||
- 320bb6e4 Update dependence on libnvidia-container to 1.2.0
|
||||
|
||||
12
pkg/Godeps/Godeps.json
generated
12
pkg/Godeps/Godeps.json
generated
@@ -1,12 +0,0 @@
|
||||
{
|
||||
"ImportPath": "github.com/nvidia/nvidia-container-runtime/toolkit/nvidia-container-toolkit",
|
||||
"GoVersion": "go1.9",
|
||||
"GodepVersion": "v80",
|
||||
"Deps": [
|
||||
{
|
||||
"ImportPath": "github.com/BurntSushi/toml",
|
||||
"Comment": "v0.3.0-7-ga368813",
|
||||
"Rev": "a368813c5e648fee92e5f6c30e3944ff9d5e8895"
|
||||
}
|
||||
]
|
||||
}
|
||||
Binary file not shown.
@@ -6,6 +6,7 @@ import (
|
||||
"log"
|
||||
"os"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
@@ -26,7 +27,7 @@ const (
|
||||
)
|
||||
|
||||
const (
|
||||
allDriverCapabilities = "compute,compat32,graphics,utility,video,display"
|
||||
allDriverCapabilities = "compute,compat32,graphics,utility,video,display,ngx"
|
||||
defaultDriverCapabilities = "utility"
|
||||
)
|
||||
|
||||
@@ -34,6 +35,10 @@ const (
|
||||
capSysAdmin = "CAP_SYS_ADMIN"
|
||||
)
|
||||
|
||||
const (
|
||||
deviceListAsVolumeMountsRoot = "/var/run/nvidia-container-devices"
|
||||
)
|
||||
|
||||
type nvidiaConfig struct {
|
||||
Devices string
|
||||
MigConfigDevices string
|
||||
@@ -50,17 +55,20 @@ type containerConfig struct {
|
||||
Nvidia *nvidiaConfig
|
||||
}
|
||||
|
||||
// Root from OCI runtime spec
|
||||
// github.com/opencontainers/runtime-spec/blob/v1.0.0/specs-go/config.go#L94-L100
|
||||
type Root struct {
|
||||
Path string `json:"path"`
|
||||
}
|
||||
|
||||
// Process from OCI runtime spec
|
||||
// github.com/opencontainers/runtime-spec/blob/v1.0.0/specs-go/config.go#L30-L57
|
||||
type Process struct {
|
||||
Env []string `json:"env,omitempty"`
|
||||
Capabilities *json.RawMessage `json:"capabilities,omitempty" platform:"linux"`
|
||||
}
|
||||
|
||||
// LinuxCapabilities from OCI runtime spec
|
||||
// https://github.com/opencontainers/runtime-spec/blob/v1.0.0/specs-go/config.go#L61
|
||||
type LinuxCapabilities struct {
|
||||
Bounding []string `json:"bounding,omitempty" platform:"linux"`
|
||||
@@ -70,14 +78,26 @@ type LinuxCapabilities struct {
|
||||
Ambient []string `json:"ambient,omitempty" platform:"linux"`
|
||||
}
|
||||
|
||||
// Mount from OCI runtime spec
|
||||
// https://github.com/opencontainers/runtime-spec/blob/v1.0.0/specs-go/config.go#L103
|
||||
type Mount struct {
|
||||
Destination string `json:"destination"`
|
||||
Type string `json:"type,omitempty" platform:"linux,solaris"`
|
||||
Source string `json:"source,omitempty"`
|
||||
Options []string `json:"options,omitempty"`
|
||||
}
|
||||
|
||||
// Spec from OCI runtime spec
|
||||
// We use pointers to structs, similarly to the latest version of runtime-spec:
|
||||
// https://github.com/opencontainers/runtime-spec/blob/v1.0.0/specs-go/config.go#L5-L28
|
||||
type Spec struct {
|
||||
Version *string `json:"ociVersion"`
|
||||
Process *Process `json:"process,omitempty"`
|
||||
Root *Root `json:"root,omitempty"`
|
||||
Mounts []Mount `json:"mounts,omitempty"`
|
||||
}
|
||||
|
||||
// HookState holds state information about the hook
|
||||
type HookState struct {
|
||||
Pid int `json:"pid,omitempty"`
|
||||
// After 17.06, runc is using the runtime spec:
|
||||
@@ -103,7 +123,7 @@ func parseCudaVersion(cudaVersion string) (vmaj, vmin, vpatch uint32) {
|
||||
return
|
||||
}
|
||||
|
||||
func getEnvMap(e []string, config CLIConfig) (m map[string]string) {
|
||||
func getEnvMap(e []string) (m map[string]string) {
|
||||
m = make(map[string]string)
|
||||
for _, s := range e {
|
||||
p := strings.SplitN(s, "=", 2)
|
||||
@@ -112,17 +132,6 @@ func getEnvMap(e []string, config CLIConfig) (m map[string]string) {
|
||||
}
|
||||
m[p[0]] = p[1]
|
||||
}
|
||||
if config.AlphaMergeVisibleDevicesEnvvars {
|
||||
var mergable []string
|
||||
for k, v := range m {
|
||||
if strings.HasPrefix(k, envNVVisibleDevices+"_") {
|
||||
mergable = append(mergable, v)
|
||||
}
|
||||
}
|
||||
if len(mergable) > 0 {
|
||||
m[envNVVisibleDevices] = strings.Join(mergable, ",")
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
@@ -174,7 +183,7 @@ func isPrivileged(s *Spec) bool {
|
||||
// We only make sure that the bounding capabibility set has
|
||||
// CAP_SYS_ADMIN. This allows us to make sure that the container was
|
||||
// actually started as '--privileged', but also allow non-root users to
|
||||
// access the priviliged NVIDIA capabilities.
|
||||
// access the privileged NVIDIA capabilities.
|
||||
caps = lc.Bounding
|
||||
}
|
||||
|
||||
@@ -187,49 +196,156 @@ func isPrivileged(s *Spec) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func getDevices(env map[string]string) *string {
|
||||
gpuVars := []string{envNVVisibleDevices}
|
||||
func isLegacyCUDAImage(env map[string]string) bool {
|
||||
legacyCudaVersion := env[envCUDAVersion]
|
||||
cudaRequire := env[envNVRequireCUDA]
|
||||
return len(legacyCudaVersion) > 0 && len(cudaRequire) == 0
|
||||
}
|
||||
|
||||
func getDevicesFromEnvvar(env map[string]string, legacyImage bool) *string {
|
||||
// Build a list of envvars to consider.
|
||||
envVars := []string{envNVVisibleDevices}
|
||||
if envSwarmGPU != nil {
|
||||
// The Swarm resource has higher precedence.
|
||||
gpuVars = append([]string{*envSwarmGPU}, gpuVars...)
|
||||
// The Swarm envvar has higher precedence.
|
||||
envVars = append([]string{*envSwarmGPU}, envVars...)
|
||||
}
|
||||
|
||||
for _, gpuVar := range gpuVars {
|
||||
if devices, ok := env[gpuVar]; ok {
|
||||
return &devices
|
||||
// Grab a reference to devices from the first envvar
|
||||
// in the list that actually exists in the environment.
|
||||
var devices *string
|
||||
for _, envVar := range envVars {
|
||||
if devs, ok := env[envVar]; ok {
|
||||
devices = &devs
|
||||
}
|
||||
}
|
||||
|
||||
// Environment variable unset with legacy image: default to "all".
|
||||
if devices == nil && legacyImage {
|
||||
all := "all"
|
||||
return &all
|
||||
}
|
||||
|
||||
// Environment variable unset or empty or "void": return nil
|
||||
if devices == nil || len(*devices) == 0 || *devices == "void" {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Environment variable set to "none": reset to "".
|
||||
if *devices == "none" {
|
||||
empty := ""
|
||||
return &empty
|
||||
}
|
||||
|
||||
// Any other value.
|
||||
return devices
|
||||
}
|
||||
|
||||
func getDevicesFromMounts(mounts []Mount) *string {
|
||||
var devices []string
|
||||
for _, m := range mounts {
|
||||
root := filepath.Clean(deviceListAsVolumeMountsRoot)
|
||||
source := filepath.Clean(m.Source)
|
||||
destination := filepath.Clean(m.Destination)
|
||||
|
||||
// Only consider mounts who's host volume is /dev/null
|
||||
if source != "/dev/null" {
|
||||
continue
|
||||
}
|
||||
// Only consider container mount points that begin with 'root'
|
||||
if len(destination) < len(root) {
|
||||
continue
|
||||
}
|
||||
if destination[:len(root)] != root {
|
||||
continue
|
||||
}
|
||||
// Grab the full path beyond 'root' and add it to the list of devices
|
||||
device := destination[len(root):]
|
||||
if len(device) > 0 && device[0] == '/' {
|
||||
device = device[1:]
|
||||
}
|
||||
if len(device) == 0 {
|
||||
continue
|
||||
}
|
||||
devices = append(devices, device)
|
||||
}
|
||||
|
||||
if devices == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
ret := strings.Join(devices, ",")
|
||||
return &ret
|
||||
}
|
||||
|
||||
func getDevices(hookConfig *HookConfig, env map[string]string, mounts []Mount, privileged bool, legacyImage bool) *string {
|
||||
// If enabled, try and get the device list from volume mounts first
|
||||
if hookConfig.AcceptDeviceListAsVolumeMounts {
|
||||
devices := getDevicesFromMounts(mounts)
|
||||
if devices != nil {
|
||||
return devices
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback to reading from the environment variable if privileges are correct
|
||||
devices := getDevicesFromEnvvar(env, legacyImage)
|
||||
if devices == nil {
|
||||
return nil
|
||||
}
|
||||
if privileged || hookConfig.AcceptEnvvarUnprivileged {
|
||||
return devices
|
||||
}
|
||||
|
||||
// Error out otherwise
|
||||
log.Panicln("insufficient privileges to read device list from NVIDIA_VISIBLE_DEVICES envvar")
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func getMigConfigDevices(env map[string]string) *string {
|
||||
gpuVars := []string{envNVMigConfigDevices}
|
||||
for _, gpuVar := range gpuVars {
|
||||
if devices, ok := env[gpuVar]; ok {
|
||||
return &devices
|
||||
}
|
||||
if devices, ok := env[envNVMigConfigDevices]; ok {
|
||||
return &devices
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func getMigMonitorDevices(env map[string]string) *string {
|
||||
gpuVars := []string{envNVMigMonitorDevices}
|
||||
for _, gpuVar := range gpuVars {
|
||||
if devices, ok := env[gpuVar]; ok {
|
||||
return &devices
|
||||
}
|
||||
if devices, ok := env[envNVMigMonitorDevices]; ok {
|
||||
return &devices
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func getDriverCapabilities(env map[string]string) *string {
|
||||
if capabilities, ok := env[envNVDriverCapabilities]; ok {
|
||||
return &capabilities
|
||||
func getDriverCapabilities(env map[string]string, legacyImage bool) *string {
|
||||
// Grab a reference to the capabilities from the envvar
|
||||
// if it actually exists in the environment.
|
||||
var capabilities *string
|
||||
if caps, ok := env[envNVDriverCapabilities]; ok {
|
||||
capabilities = &caps
|
||||
}
|
||||
return nil
|
||||
|
||||
// Environment variable unset with legacy image: set all capabilities.
|
||||
if capabilities == nil && legacyImage {
|
||||
allCaps := allDriverCapabilities
|
||||
return &allCaps
|
||||
}
|
||||
|
||||
// Environment variable unset or set but empty: set default capabilities.
|
||||
if capabilities == nil || len(*capabilities) == 0 {
|
||||
defaultCaps := defaultDriverCapabilities
|
||||
return &defaultCaps
|
||||
}
|
||||
|
||||
// Environment variable set to "all": set all capabilities.
|
||||
if *capabilities == "all" {
|
||||
allCaps := allDriverCapabilities
|
||||
return &allCaps
|
||||
}
|
||||
|
||||
// Any other value
|
||||
return capabilities
|
||||
}
|
||||
|
||||
func getRequirements(env map[string]string) []string {
|
||||
func getRequirements(env map[string]string, legacyImage bool) []string {
|
||||
// All variables with the "NVIDIA_REQUIRE_" prefix are passed to nvidia-container-cli
|
||||
var requirements []string
|
||||
for name, value := range env {
|
||||
@@ -237,24 +353,23 @@ func getRequirements(env map[string]string) []string {
|
||||
requirements = append(requirements, value)
|
||||
}
|
||||
}
|
||||
if legacyImage {
|
||||
vmaj, vmin, _ := parseCudaVersion(env[envCUDAVersion])
|
||||
cudaRequire := fmt.Sprintf("cuda>=%d.%d", vmaj, vmin)
|
||||
requirements = append(requirements, cudaRequire)
|
||||
}
|
||||
return requirements
|
||||
}
|
||||
|
||||
// Mimic the new CUDA images if no capabilities or devices are specified.
|
||||
func getNvidiaConfigLegacy(env map[string]string, privileged bool) *nvidiaConfig {
|
||||
func getNvidiaConfig(hookConfig *HookConfig, env map[string]string, mounts []Mount, privileged bool) *nvidiaConfig {
|
||||
legacyImage := isLegacyCUDAImage(env)
|
||||
|
||||
var devices string
|
||||
if d := getDevices(env); d == nil {
|
||||
// Environment variable unset: default to "all".
|
||||
devices = "all"
|
||||
} else if len(*d) == 0 || *d == "void" {
|
||||
// Environment variable empty or "void": not a GPU container.
|
||||
return nil
|
||||
} else {
|
||||
// Environment variable non-empty and not "void".
|
||||
if d := getDevices(hookConfig, env, mounts, privileged, legacyImage); d != nil {
|
||||
devices = *d
|
||||
}
|
||||
if devices == "none" {
|
||||
devices = ""
|
||||
} else {
|
||||
// 'nil' devices means this is not a GPU container.
|
||||
return nil
|
||||
}
|
||||
|
||||
var migConfigDevices string
|
||||
@@ -274,88 +389,11 @@ func getNvidiaConfigLegacy(env map[string]string, privileged bool) *nvidiaConfig
|
||||
}
|
||||
|
||||
var driverCapabilities string
|
||||
if c := getDriverCapabilities(env); c == nil {
|
||||
// Environment variable unset: default to "all".
|
||||
driverCapabilities = allDriverCapabilities
|
||||
} else if len(*c) == 0 {
|
||||
// Environment variable empty: use default capability.
|
||||
driverCapabilities = defaultDriverCapabilities
|
||||
} else {
|
||||
// Environment variable non-empty.
|
||||
if c := getDriverCapabilities(env, legacyImage); c != nil {
|
||||
driverCapabilities = *c
|
||||
}
|
||||
if driverCapabilities == "all" {
|
||||
driverCapabilities = allDriverCapabilities
|
||||
}
|
||||
|
||||
requirements := getRequirements(env)
|
||||
|
||||
vmaj, vmin, _ := parseCudaVersion(env[envCUDAVersion])
|
||||
cudaRequire := fmt.Sprintf("cuda>=%d.%d", vmaj, vmin)
|
||||
requirements = append(requirements, cudaRequire)
|
||||
|
||||
// Don't fail on invalid values.
|
||||
disableRequire, _ := strconv.ParseBool(env[envNVDisableRequire])
|
||||
|
||||
return &nvidiaConfig{
|
||||
Devices: devices,
|
||||
MigConfigDevices: migConfigDevices,
|
||||
MigMonitorDevices: migMonitorDevices,
|
||||
DriverCapabilities: driverCapabilities,
|
||||
Requirements: requirements,
|
||||
DisableRequire: disableRequire,
|
||||
}
|
||||
}
|
||||
|
||||
func getNvidiaConfig(env map[string]string, privileged bool) *nvidiaConfig {
|
||||
legacyCudaVersion := env[envCUDAVersion]
|
||||
cudaRequire := env[envNVRequireCUDA]
|
||||
if len(legacyCudaVersion) > 0 && len(cudaRequire) == 0 {
|
||||
// Legacy CUDA image detected.
|
||||
return getNvidiaConfigLegacy(env, privileged)
|
||||
}
|
||||
|
||||
var devices string
|
||||
if d := getDevices(env); d == nil || len(*d) == 0 || *d == "void" {
|
||||
// Environment variable unset or empty or "void": not a GPU container.
|
||||
return nil
|
||||
} else {
|
||||
// Environment variable non-empty and not "void".
|
||||
devices = *d
|
||||
}
|
||||
if devices == "none" {
|
||||
devices = ""
|
||||
}
|
||||
|
||||
var migConfigDevices string
|
||||
if d := getMigConfigDevices(env); d != nil {
|
||||
migConfigDevices = *d
|
||||
}
|
||||
if !privileged && migConfigDevices != "" {
|
||||
log.Panicln("cannot set MIG_CONFIG_DEVICES in non privileged container")
|
||||
}
|
||||
|
||||
var migMonitorDevices string
|
||||
if d := getMigMonitorDevices(env); d != nil {
|
||||
migMonitorDevices = *d
|
||||
}
|
||||
if !privileged && migMonitorDevices != "" {
|
||||
log.Panicln("cannot set MIG_MONITOR_DEVICES in non privileged container")
|
||||
}
|
||||
|
||||
var driverCapabilities string
|
||||
if c := getDriverCapabilities(env); c == nil || len(*c) == 0 {
|
||||
// Environment variable unset or set but empty: use default capability.
|
||||
driverCapabilities = defaultDriverCapabilities
|
||||
} else {
|
||||
// Environment variable set and non-empty.
|
||||
driverCapabilities = *c
|
||||
}
|
||||
if driverCapabilities == "all" {
|
||||
driverCapabilities = allDriverCapabilities
|
||||
}
|
||||
|
||||
requirements := getRequirements(env)
|
||||
requirements := getRequirements(env, legacyImage)
|
||||
|
||||
// Don't fail on invalid values.
|
||||
disableRequire, _ := strconv.ParseBool(env[envNVDisableRequire])
|
||||
@@ -384,13 +422,13 @@ func getContainerConfig(hook HookConfig) (config containerConfig) {
|
||||
|
||||
s := loadSpec(path.Join(b, "config.json"))
|
||||
|
||||
env := getEnvMap(s.Process.Env, hook.NvidiaContainerCLI)
|
||||
env := getEnvMap(s.Process.Env)
|
||||
privileged := isPrivileged(s)
|
||||
envSwarmGPU = hook.SwarmResource
|
||||
return containerConfig{
|
||||
Pid: h.Pid,
|
||||
Rootfs: s.Root.Path,
|
||||
Env: env,
|
||||
Nvidia: getNvidiaConfig(env, privileged),
|
||||
Nvidia: getNvidiaConfig(&hook, env, s.Mounts, privileged),
|
||||
}
|
||||
}
|
||||
|
||||
681
pkg/container_test.go
Normal file
681
pkg/container_test.go
Normal file
@@ -0,0 +1,681 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"path/filepath"
|
||||
"reflect"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestGetNvidiaConfig(t *testing.T) {
|
||||
var tests = []struct {
|
||||
description string
|
||||
env map[string]string
|
||||
privileged bool
|
||||
expectedConfig *nvidiaConfig
|
||||
expectedPanic bool
|
||||
}{
|
||||
{
|
||||
description: "No environment, unprivileged",
|
||||
env: map[string]string{},
|
||||
privileged: false,
|
||||
expectedConfig: nil,
|
||||
},
|
||||
{
|
||||
description: "No environment, privileged",
|
||||
env: map[string]string{},
|
||||
privileged: true,
|
||||
expectedConfig: nil,
|
||||
},
|
||||
{
|
||||
description: "Legacy image, no devices, no capabilities, no requirements",
|
||||
env: map[string]string{
|
||||
envCUDAVersion: "9.0",
|
||||
},
|
||||
privileged: false,
|
||||
expectedConfig: &nvidiaConfig{
|
||||
Devices: "all",
|
||||
DriverCapabilities: allDriverCapabilities,
|
||||
Requirements: []string{"cuda>=9.0"},
|
||||
DisableRequire: false,
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "Legacy image, devices 'all', no capabilities, no requirements",
|
||||
env: map[string]string{
|
||||
envCUDAVersion: "9.0",
|
||||
envNVVisibleDevices: "all",
|
||||
},
|
||||
privileged: false,
|
||||
expectedConfig: &nvidiaConfig{
|
||||
Devices: "all",
|
||||
DriverCapabilities: allDriverCapabilities,
|
||||
Requirements: []string{"cuda>=9.0"},
|
||||
DisableRequire: false,
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "Legacy image, devices 'empty', no capabilities, no requirements",
|
||||
env: map[string]string{
|
||||
envCUDAVersion: "9.0",
|
||||
envNVVisibleDevices: "",
|
||||
},
|
||||
privileged: false,
|
||||
expectedConfig: nil,
|
||||
},
|
||||
{
|
||||
description: "Legacy image, devices 'void', no capabilities, no requirements",
|
||||
env: map[string]string{
|
||||
envCUDAVersion: "9.0",
|
||||
envNVVisibleDevices: "",
|
||||
},
|
||||
privileged: false,
|
||||
expectedConfig: nil,
|
||||
},
|
||||
{
|
||||
description: "Legacy image, devices 'none', no capabilities, no requirements",
|
||||
env: map[string]string{
|
||||
envCUDAVersion: "9.0",
|
||||
envNVVisibleDevices: "none",
|
||||
},
|
||||
privileged: false,
|
||||
expectedConfig: &nvidiaConfig{
|
||||
Devices: "",
|
||||
DriverCapabilities: allDriverCapabilities,
|
||||
Requirements: []string{"cuda>=9.0"},
|
||||
DisableRequire: false,
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "Legacy image, devices set, no capabilities, no requirements",
|
||||
env: map[string]string{
|
||||
envCUDAVersion: "9.0",
|
||||
envNVVisibleDevices: "gpu0,gpu1",
|
||||
},
|
||||
privileged: false,
|
||||
expectedConfig: &nvidiaConfig{
|
||||
Devices: "gpu0,gpu1",
|
||||
DriverCapabilities: allDriverCapabilities,
|
||||
Requirements: []string{"cuda>=9.0"},
|
||||
DisableRequire: false,
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "Legacy image, devices set, capabilities 'empty', no requirements",
|
||||
env: map[string]string{
|
||||
envCUDAVersion: "9.0",
|
||||
envNVVisibleDevices: "gpu0,gpu1",
|
||||
envNVDriverCapabilities: "",
|
||||
},
|
||||
privileged: false,
|
||||
expectedConfig: &nvidiaConfig{
|
||||
Devices: "gpu0,gpu1",
|
||||
DriverCapabilities: defaultDriverCapabilities,
|
||||
Requirements: []string{"cuda>=9.0"},
|
||||
DisableRequire: false,
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "Legacy image, devices set, capabilities 'all', no requirements",
|
||||
env: map[string]string{
|
||||
envCUDAVersion: "9.0",
|
||||
envNVVisibleDevices: "gpu0,gpu1",
|
||||
envNVDriverCapabilities: "all",
|
||||
},
|
||||
privileged: false,
|
||||
expectedConfig: &nvidiaConfig{
|
||||
Devices: "gpu0,gpu1",
|
||||
DriverCapabilities: allDriverCapabilities,
|
||||
Requirements: []string{"cuda>=9.0"},
|
||||
DisableRequire: false,
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "Legacy image, devices set, capabilities set, no requirements",
|
||||
env: map[string]string{
|
||||
envCUDAVersion: "9.0",
|
||||
envNVVisibleDevices: "gpu0,gpu1",
|
||||
envNVDriverCapabilities: "cap0,cap1",
|
||||
},
|
||||
privileged: false,
|
||||
expectedConfig: &nvidiaConfig{
|
||||
Devices: "gpu0,gpu1",
|
||||
DriverCapabilities: "cap0,cap1",
|
||||
Requirements: []string{"cuda>=9.0"},
|
||||
DisableRequire: false,
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "Legacy image, devices set, capabilities set, requirements set",
|
||||
env: map[string]string{
|
||||
envCUDAVersion: "9.0",
|
||||
envNVVisibleDevices: "gpu0,gpu1",
|
||||
envNVDriverCapabilities: "cap0,cap1",
|
||||
envNVRequirePrefix + "REQ0": "req0=true",
|
||||
envNVRequirePrefix + "REQ1": "req1=false",
|
||||
},
|
||||
privileged: false,
|
||||
expectedConfig: &nvidiaConfig{
|
||||
Devices: "gpu0,gpu1",
|
||||
DriverCapabilities: "cap0,cap1",
|
||||
Requirements: []string{"cuda>=9.0", "req0=true", "req1=false"},
|
||||
DisableRequire: false,
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "Legacy image, devices set, capabilities set, requirements set, disable requirements",
|
||||
env: map[string]string{
|
||||
envCUDAVersion: "9.0",
|
||||
envNVVisibleDevices: "gpu0,gpu1",
|
||||
envNVDriverCapabilities: "cap0,cap1",
|
||||
envNVRequirePrefix + "REQ0": "req0=true",
|
||||
envNVRequirePrefix + "REQ1": "req1=false",
|
||||
envNVDisableRequire: "true",
|
||||
},
|
||||
privileged: false,
|
||||
expectedConfig: &nvidiaConfig{
|
||||
Devices: "gpu0,gpu1",
|
||||
DriverCapabilities: "cap0,cap1",
|
||||
Requirements: []string{"cuda>=9.0", "req0=true", "req1=false"},
|
||||
DisableRequire: true,
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "Modern image, no devices, no capabilities, no requirements, no envCUDAVersion",
|
||||
env: map[string]string{
|
||||
envNVRequireCUDA: "cuda>=9.0",
|
||||
},
|
||||
privileged: false,
|
||||
expectedConfig: nil,
|
||||
},
|
||||
{
|
||||
description: "Modern image, no devices, no capabilities, no requirement, envCUDAVersion set",
|
||||
env: map[string]string{
|
||||
envCUDAVersion: "9.0",
|
||||
envNVRequireCUDA: "cuda>=9.0",
|
||||
},
|
||||
privileged: false,
|
||||
expectedConfig: nil,
|
||||
},
|
||||
{
|
||||
description: "Modern image, devices 'all', no capabilities, no requirements",
|
||||
env: map[string]string{
|
||||
envNVRequireCUDA: "cuda>=9.0",
|
||||
envNVVisibleDevices: "all",
|
||||
},
|
||||
privileged: false,
|
||||
expectedConfig: &nvidiaConfig{
|
||||
Devices: "all",
|
||||
DriverCapabilities: defaultDriverCapabilities,
|
||||
Requirements: []string{"cuda>=9.0"},
|
||||
DisableRequire: false,
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "Modern image, devices 'empty', no capabilities, no requirements",
|
||||
env: map[string]string{
|
||||
envNVRequireCUDA: "cuda>=9.0",
|
||||
envNVVisibleDevices: "",
|
||||
},
|
||||
privileged: false,
|
||||
expectedConfig: nil,
|
||||
},
|
||||
{
|
||||
description: "Modern image, devices 'void', no capabilities, no requirements",
|
||||
env: map[string]string{
|
||||
envNVRequireCUDA: "cuda>=9.0",
|
||||
envNVVisibleDevices: "",
|
||||
},
|
||||
privileged: false,
|
||||
expectedConfig: nil,
|
||||
},
|
||||
{
|
||||
description: "Modern image, devices 'none', no capabilities, no requirements",
|
||||
env: map[string]string{
|
||||
envNVRequireCUDA: "cuda>=9.0",
|
||||
envNVVisibleDevices: "none",
|
||||
},
|
||||
privileged: false,
|
||||
expectedConfig: &nvidiaConfig{
|
||||
Devices: "",
|
||||
DriverCapabilities: defaultDriverCapabilities,
|
||||
Requirements: []string{"cuda>=9.0"},
|
||||
DisableRequire: false,
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "Modern image, devices set, no capabilities, no requirements",
|
||||
env: map[string]string{
|
||||
envNVRequireCUDA: "cuda>=9.0",
|
||||
envNVVisibleDevices: "gpu0,gpu1",
|
||||
},
|
||||
privileged: false,
|
||||
expectedConfig: &nvidiaConfig{
|
||||
Devices: "gpu0,gpu1",
|
||||
DriverCapabilities: defaultDriverCapabilities,
|
||||
Requirements: []string{"cuda>=9.0"},
|
||||
DisableRequire: false,
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "Modern image, devices set, capabilities 'empty', no requirements",
|
||||
env: map[string]string{
|
||||
envNVRequireCUDA: "cuda>=9.0",
|
||||
envNVVisibleDevices: "gpu0,gpu1",
|
||||
envNVDriverCapabilities: "",
|
||||
},
|
||||
privileged: false,
|
||||
expectedConfig: &nvidiaConfig{
|
||||
Devices: "gpu0,gpu1",
|
||||
DriverCapabilities: defaultDriverCapabilities,
|
||||
Requirements: []string{"cuda>=9.0"},
|
||||
DisableRequire: false,
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "Modern image, devices set, capabilities 'all', no requirements",
|
||||
env: map[string]string{
|
||||
envNVRequireCUDA: "cuda>=9.0",
|
||||
envNVVisibleDevices: "gpu0,gpu1",
|
||||
envNVDriverCapabilities: "all",
|
||||
},
|
||||
privileged: false,
|
||||
expectedConfig: &nvidiaConfig{
|
||||
Devices: "gpu0,gpu1",
|
||||
DriverCapabilities: allDriverCapabilities,
|
||||
Requirements: []string{"cuda>=9.0"},
|
||||
DisableRequire: false,
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "Modern image, devices set, capabilities set, no requirements",
|
||||
env: map[string]string{
|
||||
envNVRequireCUDA: "cuda>=9.0",
|
||||
envNVVisibleDevices: "gpu0,gpu1",
|
||||
envNVDriverCapabilities: "cap0,cap1",
|
||||
},
|
||||
privileged: false,
|
||||
expectedConfig: &nvidiaConfig{
|
||||
Devices: "gpu0,gpu1",
|
||||
DriverCapabilities: "cap0,cap1",
|
||||
Requirements: []string{"cuda>=9.0"},
|
||||
DisableRequire: false,
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "Modern image, devices set, capabilities set, requirements set",
|
||||
env: map[string]string{
|
||||
envNVRequireCUDA: "cuda>=9.0",
|
||||
envNVVisibleDevices: "gpu0,gpu1",
|
||||
envNVDriverCapabilities: "cap0,cap1",
|
||||
envNVRequirePrefix + "REQ0": "req0=true",
|
||||
envNVRequirePrefix + "REQ1": "req1=false",
|
||||
},
|
||||
privileged: false,
|
||||
expectedConfig: &nvidiaConfig{
|
||||
Devices: "gpu0,gpu1",
|
||||
DriverCapabilities: "cap0,cap1",
|
||||
Requirements: []string{"cuda>=9.0", "req0=true", "req1=false"},
|
||||
DisableRequire: false,
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "Modern image, devices set, capabilities set, requirements set, disable requirements",
|
||||
env: map[string]string{
|
||||
envNVRequireCUDA: "cuda>=9.0",
|
||||
envNVVisibleDevices: "gpu0,gpu1",
|
||||
envNVDriverCapabilities: "cap0,cap1",
|
||||
envNVRequirePrefix + "REQ0": "req0=true",
|
||||
envNVRequirePrefix + "REQ1": "req1=false",
|
||||
envNVDisableRequire: "true",
|
||||
},
|
||||
privileged: false,
|
||||
expectedConfig: &nvidiaConfig{
|
||||
Devices: "gpu0,gpu1",
|
||||
DriverCapabilities: "cap0,cap1",
|
||||
Requirements: []string{"cuda>=9.0", "req0=true", "req1=false"},
|
||||
DisableRequire: true,
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "No cuda envs, devices 'all'",
|
||||
env: map[string]string{
|
||||
envNVVisibleDevices: "all",
|
||||
},
|
||||
privileged: false,
|
||||
|
||||
expectedConfig: &nvidiaConfig{
|
||||
Devices: "all",
|
||||
DriverCapabilities: defaultDriverCapabilities,
|
||||
Requirements: []string{},
|
||||
DisableRequire: false,
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "Modern image, devices 'all', migConfig set, privileged",
|
||||
env: map[string]string{
|
||||
envNVRequireCUDA: "cuda>=9.0",
|
||||
envNVVisibleDevices: "all",
|
||||
envNVMigConfigDevices: "mig0,mig1",
|
||||
},
|
||||
privileged: true,
|
||||
expectedConfig: &nvidiaConfig{
|
||||
Devices: "all",
|
||||
MigConfigDevices: "mig0,mig1",
|
||||
DriverCapabilities: defaultDriverCapabilities,
|
||||
Requirements: []string{"cuda>=9.0"},
|
||||
DisableRequire: false,
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "Modern image, devices 'all', migConfig set, unprivileged",
|
||||
env: map[string]string{
|
||||
envNVRequireCUDA: "cuda>=9.0",
|
||||
envNVVisibleDevices: "all",
|
||||
envNVMigConfigDevices: "mig0,mig1",
|
||||
},
|
||||
privileged: false,
|
||||
expectedPanic: true,
|
||||
},
|
||||
{
|
||||
description: "Modern image, devices 'all', migMonitor set, privileged",
|
||||
env: map[string]string{
|
||||
envNVRequireCUDA: "cuda>=9.0",
|
||||
envNVVisibleDevices: "all",
|
||||
envNVMigMonitorDevices: "mig0,mig1",
|
||||
},
|
||||
privileged: true,
|
||||
expectedConfig: &nvidiaConfig{
|
||||
Devices: "all",
|
||||
MigMonitorDevices: "mig0,mig1",
|
||||
DriverCapabilities: defaultDriverCapabilities,
|
||||
Requirements: []string{"cuda>=9.0"},
|
||||
DisableRequire: false,
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "Modern image, devices 'all', migMonitor set, unprivileged",
|
||||
env: map[string]string{
|
||||
envNVRequireCUDA: "cuda>=9.0",
|
||||
envNVVisibleDevices: "all",
|
||||
envNVMigMonitorDevices: "mig0,mig1",
|
||||
},
|
||||
privileged: false,
|
||||
expectedPanic: true,
|
||||
},
|
||||
}
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
// Wrap the call to getNvidiaConfig() in a closure.
|
||||
var config *nvidiaConfig
|
||||
getConfig := func() {
|
||||
hookConfig := getDefaultHookConfig()
|
||||
config = getNvidiaConfig(&hookConfig, tc.env, nil, tc.privileged)
|
||||
}
|
||||
|
||||
// For any tests that are expected to panic, make sure they do.
|
||||
if tc.expectedPanic {
|
||||
mustPanic(t, getConfig)
|
||||
return
|
||||
}
|
||||
|
||||
// For all other tests, just grab the config
|
||||
getConfig()
|
||||
|
||||
// And start comparing the test results to the expected results.
|
||||
if config == nil && tc.expectedConfig == nil {
|
||||
return
|
||||
}
|
||||
if config != nil && tc.expectedConfig != nil {
|
||||
if !reflect.DeepEqual(config.Devices, tc.expectedConfig.Devices) {
|
||||
t.Errorf("Unexpected nvidiaConfig (got: %v, wanted: %v)", config, tc.expectedConfig)
|
||||
}
|
||||
if !reflect.DeepEqual(config.MigConfigDevices, tc.expectedConfig.MigConfigDevices) {
|
||||
t.Errorf("Unexpected nvidiaConfig (got: %v, wanted: %v)", config, tc.expectedConfig)
|
||||
}
|
||||
if !reflect.DeepEqual(config.MigMonitorDevices, tc.expectedConfig.MigMonitorDevices) {
|
||||
t.Errorf("Unexpected nvidiaConfig (got: %v, wanted: %v)", config, tc.expectedConfig)
|
||||
}
|
||||
if !reflect.DeepEqual(config.DriverCapabilities, tc.expectedConfig.DriverCapabilities) {
|
||||
t.Errorf("Unexpected nvidiaConfig (got: %v, wanted: %v)", config, tc.expectedConfig)
|
||||
}
|
||||
if !elementsMatch(config.Requirements, tc.expectedConfig.Requirements) {
|
||||
t.Errorf("Unexpected nvidiaConfig (got: %v, wanted: %v)", config, tc.expectedConfig)
|
||||
}
|
||||
if !reflect.DeepEqual(config.DisableRequire, tc.expectedConfig.DisableRequire) {
|
||||
t.Errorf("Unexpected nvidiaConfig (got: %v, wanted: %v)", config, tc.expectedConfig)
|
||||
}
|
||||
return
|
||||
}
|
||||
t.Errorf("Unexpected nvidiaConfig (got: %v, wanted: %v)", config, tc.expectedConfig)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetDevicesFromMounts(t *testing.T) {
|
||||
var tests = []struct {
|
||||
description string
|
||||
mounts []Mount
|
||||
expectedDevices *string
|
||||
}{
|
||||
{
|
||||
description: "No mounts",
|
||||
mounts: nil,
|
||||
expectedDevices: nil,
|
||||
},
|
||||
{
|
||||
description: "Host path is not /dev/null",
|
||||
mounts: []Mount{
|
||||
{
|
||||
Source: "/not/dev/null",
|
||||
Destination: filepath.Join(deviceListAsVolumeMountsRoot, "GPU0"),
|
||||
},
|
||||
},
|
||||
expectedDevices: nil,
|
||||
},
|
||||
{
|
||||
description: "Container path is not prefixed by 'root'",
|
||||
mounts: []Mount{
|
||||
{
|
||||
Source: "/dev/null",
|
||||
Destination: filepath.Join("/other/prefix", "GPU0"),
|
||||
},
|
||||
},
|
||||
expectedDevices: nil,
|
||||
},
|
||||
{
|
||||
description: "Container path is only 'root'",
|
||||
mounts: []Mount{
|
||||
{
|
||||
Source: "/dev/null",
|
||||
Destination: deviceListAsVolumeMountsRoot,
|
||||
},
|
||||
},
|
||||
expectedDevices: nil,
|
||||
},
|
||||
{
|
||||
description: "Discover 2 devices",
|
||||
mounts: []Mount{
|
||||
{
|
||||
Source: "/dev/null",
|
||||
Destination: filepath.Join(deviceListAsVolumeMountsRoot, "GPU0"),
|
||||
},
|
||||
{
|
||||
Source: "/dev/null",
|
||||
Destination: filepath.Join(deviceListAsVolumeMountsRoot, "GPU1"),
|
||||
},
|
||||
},
|
||||
expectedDevices: &[]string{"GPU0,GPU1"}[0],
|
||||
},
|
||||
{
|
||||
description: "Discover 2 devices with slashes in the name",
|
||||
mounts: []Mount{
|
||||
{
|
||||
Source: "/dev/null",
|
||||
Destination: filepath.Join(deviceListAsVolumeMountsRoot, "GPU0-MIG0/0/1"),
|
||||
},
|
||||
{
|
||||
Source: "/dev/null",
|
||||
Destination: filepath.Join(deviceListAsVolumeMountsRoot, "GPU1-MIG0/0/1"),
|
||||
},
|
||||
},
|
||||
expectedDevices: &[]string{"GPU0-MIG0/0/1,GPU1-MIG0/0/1"}[0],
|
||||
},
|
||||
}
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
devices := getDevicesFromMounts(tc.mounts)
|
||||
if !reflect.DeepEqual(devices, tc.expectedDevices) {
|
||||
t.Errorf("Unexpected devices (got: %v, wanted: %v)", *devices, *tc.expectedDevices)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestDeviceListSourcePriority(t *testing.T) {
|
||||
var tests = []struct {
|
||||
description string
|
||||
mountDevices []Mount
|
||||
envvarDevices string
|
||||
privileged bool
|
||||
acceptUnprivileged bool
|
||||
acceptMounts bool
|
||||
expectedDevices *string
|
||||
expectedPanic bool
|
||||
}{
|
||||
{
|
||||
description: "Mount devices, unprivileged, no accept unprivileged",
|
||||
mountDevices: []Mount{
|
||||
{
|
||||
Source: "/dev/null",
|
||||
Destination: filepath.Join(deviceListAsVolumeMountsRoot, "GPU0"),
|
||||
},
|
||||
{
|
||||
Source: "/dev/null",
|
||||
Destination: filepath.Join(deviceListAsVolumeMountsRoot, "GPU1"),
|
||||
},
|
||||
},
|
||||
envvarDevices: "GPU2,GPU3",
|
||||
privileged: false,
|
||||
acceptUnprivileged: false,
|
||||
acceptMounts: true,
|
||||
expectedDevices: &[]string{"GPU0,GPU1"}[0],
|
||||
},
|
||||
{
|
||||
description: "No mount devices, unprivileged, no accept unprivileged",
|
||||
mountDevices: nil,
|
||||
envvarDevices: "GPU0,GPU1",
|
||||
privileged: false,
|
||||
acceptUnprivileged: false,
|
||||
acceptMounts: true,
|
||||
expectedPanic: true,
|
||||
},
|
||||
{
|
||||
description: "No mount devices, privileged, no accept unprivileged",
|
||||
mountDevices: nil,
|
||||
envvarDevices: "GPU0,GPU1",
|
||||
privileged: true,
|
||||
acceptUnprivileged: false,
|
||||
acceptMounts: true,
|
||||
expectedDevices: &[]string{"GPU0,GPU1"}[0],
|
||||
},
|
||||
{
|
||||
description: "No mount devices, unprivileged, accept unprivileged",
|
||||
mountDevices: nil,
|
||||
envvarDevices: "GPU0,GPU1",
|
||||
privileged: false,
|
||||
acceptUnprivileged: true,
|
||||
acceptMounts: true,
|
||||
expectedDevices: &[]string{"GPU0,GPU1"}[0],
|
||||
},
|
||||
{
|
||||
description: "Mount devices, unprivileged, accept unprivileged, no accept mounts",
|
||||
mountDevices: []Mount{
|
||||
{
|
||||
Source: "/dev/null",
|
||||
Destination: filepath.Join(deviceListAsVolumeMountsRoot, "GPU0"),
|
||||
},
|
||||
{
|
||||
Source: "/dev/null",
|
||||
Destination: filepath.Join(deviceListAsVolumeMountsRoot, "GPU1"),
|
||||
},
|
||||
},
|
||||
envvarDevices: "GPU2,GPU3",
|
||||
privileged: false,
|
||||
acceptUnprivileged: true,
|
||||
acceptMounts: false,
|
||||
expectedDevices: &[]string{"GPU2,GPU3"}[0],
|
||||
},
|
||||
{
|
||||
description: "Mount devices, unprivileged, no accept unprivileged, no accept mounts",
|
||||
mountDevices: []Mount{
|
||||
{
|
||||
Source: "/dev/null",
|
||||
Destination: filepath.Join(deviceListAsVolumeMountsRoot, "GPU0"),
|
||||
},
|
||||
{
|
||||
Source: "/dev/null",
|
||||
Destination: filepath.Join(deviceListAsVolumeMountsRoot, "GPU1"),
|
||||
},
|
||||
},
|
||||
envvarDevices: "GPU2,GPU3",
|
||||
privileged: false,
|
||||
acceptUnprivileged: false,
|
||||
acceptMounts: false,
|
||||
expectedPanic: true,
|
||||
},
|
||||
}
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
// Wrap the call to getDevices() in a closure.
|
||||
var devices *string
|
||||
getDevices := func() {
|
||||
env := map[string]string{
|
||||
envNVVisibleDevices: tc.envvarDevices,
|
||||
}
|
||||
hookConfig := getDefaultHookConfig()
|
||||
hookConfig.AcceptEnvvarUnprivileged = tc.acceptUnprivileged
|
||||
hookConfig.AcceptDeviceListAsVolumeMounts = tc.acceptMounts
|
||||
devices = getDevices(&hookConfig, env, tc.mountDevices, tc.privileged, false)
|
||||
}
|
||||
|
||||
// For any tests that are expected to panic, make sure they do.
|
||||
if tc.expectedPanic {
|
||||
mustPanic(t, getDevices)
|
||||
return
|
||||
}
|
||||
|
||||
// For all other tests, just grab the devices and check the results
|
||||
getDevices()
|
||||
if !reflect.DeepEqual(devices, tc.expectedDevices) {
|
||||
t.Errorf("Unexpected devices (got: %v, wanted: %v)", *devices, *tc.expectedDevices)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func elementsMatch(slice0, slice1 []string) bool {
|
||||
map0 := make(map[string]int)
|
||||
map1 := make(map[string]int)
|
||||
|
||||
for _, e := range slice0 {
|
||||
map0[e]++
|
||||
}
|
||||
|
||||
for _, e := range slice1 {
|
||||
map1[e]++
|
||||
}
|
||||
|
||||
for k0, v0 := range map0 {
|
||||
if map1[k0] != v0 {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
for k1, v1 := range map1 {
|
||||
if map0[k1] != v1 {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
@@ -18,44 +18,47 @@ var defaultPaths = [...]string{
|
||||
configPath,
|
||||
}
|
||||
|
||||
// CLIConfig: options for nvidia-container-cli.
|
||||
// CLIConfig : options for nvidia-container-cli.
|
||||
type CLIConfig struct {
|
||||
Root *string `toml:"root"`
|
||||
Path *string `toml:"path"`
|
||||
Environment []string `toml:"environment"`
|
||||
Debug *string `toml:"debug"`
|
||||
Ldcache *string `toml:"ldcache"`
|
||||
LoadKmods bool `toml:"load-kmods"`
|
||||
NoPivot bool `toml:"no-pivot"`
|
||||
NoCgroups bool `toml:"no-cgroups"`
|
||||
User *string `toml:"user"`
|
||||
Ldconfig *string `toml:"ldconfig"`
|
||||
AlphaMergeVisibleDevicesEnvvars bool `toml:"alpha-merge-visible-devices-envvars"`
|
||||
Root *string `toml:"root"`
|
||||
Path *string `toml:"path"`
|
||||
Environment []string `toml:"environment"`
|
||||
Debug *string `toml:"debug"`
|
||||
Ldcache *string `toml:"ldcache"`
|
||||
LoadKmods bool `toml:"load-kmods"`
|
||||
NoPivot bool `toml:"no-pivot"`
|
||||
NoCgroups bool `toml:"no-cgroups"`
|
||||
User *string `toml:"user"`
|
||||
Ldconfig *string `toml:"ldconfig"`
|
||||
}
|
||||
|
||||
// HookConfig : options for the nvidia-container-toolkit.
|
||||
type HookConfig struct {
|
||||
DisableRequire bool `toml:"disable-require"`
|
||||
SwarmResource *string `toml:"swarm-resource"`
|
||||
DisableRequire bool `toml:"disable-require"`
|
||||
SwarmResource *string `toml:"swarm-resource"`
|
||||
AcceptEnvvarUnprivileged bool `toml:"accept-nvidia-visible-devices-envvar-when-unprivileged"`
|
||||
AcceptDeviceListAsVolumeMounts bool `toml:"accept-nvidia-visible-devices-as-volume-mounts"`
|
||||
|
||||
NvidiaContainerCLI CLIConfig `toml:"nvidia-container-cli"`
|
||||
}
|
||||
|
||||
func getDefaultHookConfig() (config HookConfig) {
|
||||
return HookConfig{
|
||||
DisableRequire: false,
|
||||
SwarmResource: nil,
|
||||
DisableRequire: false,
|
||||
SwarmResource: nil,
|
||||
AcceptEnvvarUnprivileged: true,
|
||||
AcceptDeviceListAsVolumeMounts: false,
|
||||
NvidiaContainerCLI: CLIConfig{
|
||||
Root: nil,
|
||||
Path: nil,
|
||||
Environment: []string{},
|
||||
Debug: nil,
|
||||
Ldcache: nil,
|
||||
LoadKmods: true,
|
||||
NoPivot: false,
|
||||
NoCgroups: false,
|
||||
User: nil,
|
||||
Ldconfig: nil,
|
||||
AlphaMergeVisibleDevicesEnvvars: false,
|
||||
Root: nil,
|
||||
Path: nil,
|
||||
Environment: []string{},
|
||||
Debug: nil,
|
||||
Ldcache: nil,
|
||||
LoadKmods: true,
|
||||
NoPivot: false,
|
||||
NoCgroups: false,
|
||||
User: nil,
|
||||
Ldconfig: nil,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,8 +1,8 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"encoding/json"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestParseCudaVersionValid(t *testing.T) {
|
||||
|
||||
Reference in New Issue
Block a user