Compare commits

...

41 Commits

Author SHA1 Message Date
Kevin Klues
26668097c4 Merge branch 'upstream-bump-1.3.0-rc.2' into 'master'
Bump to version 1.3.0 rc.2

See merge request nvidia/container-toolkit/container-toolkit!21
2020-08-10 15:33:25 +00:00
Kevin Klues
caf2792463 Update changelogs for 1.3.0-rc.2
Signed-off-by: Kevin Klues <kklues@nvidia.com>
2020-08-10 13:08:17 +00:00
Kevin Klues
b2be0b08ac Bump version to 1.3.0-rc.2
Signed-off-by: Kevin Klues <kklues@nvidia.com>
2020-08-10 13:03:00 +00:00
Kevin Klues
edc5041636 Merge branch 'upstream-update-devices-from-volume-mounts-semantics' into 'master'
Refactor accepting device lists from volume mounts as a boolean

See merge request nvidia/container-toolkit/container-toolkit!20
2020-08-07 18:40:56 +00:00
Kevin Klues
2c1809475c Add more tests for new semantics with device list from volume mounts
Signed-off-by: Kevin Klues <kklues@nvidia.com>
2020-08-07 16:30:31 +00:00
Kevin Klues
7c00385797 Refactor accepting device lists from volume mounts as a boolean
Also hard code the "root" path where these volume mounts will be looked
for rather than making it configurable.

Signed-off-by: Kevin Klues <kklues@nvidia.com>
2020-08-07 16:30:19 +00:00
Kevin Klues
322006c361 Merge branch 'upstream-bump-1.3.0-rc.1' into 'master'
Bump version to 1.3.0-rc.1

See merge request nvidia/container-toolkit/container-toolkit!19
2020-07-24 20:36:38 +00:00
Kevin Klues
a25017fb8a Merge branch 'upstream-build-prerelease' into 'master'
Update build system to accept a TAG variable for things like rc.x

See merge request nvidia/container-toolkit/container-toolkit!18
2020-07-24 20:22:00 +00:00
Kevin Klues
928905ce94 Update changelogs for 1.3.0-rc.1
Signed-off-by: Kevin Klues <kklues@nvidia.com>
2020-07-24 20:10:42 +00:00
Kevin Klues
7ed17bb9ca Bump version to 1.3.0-rc.1
Signed-off-by: Kevin Klues <kklues@nvidia.com>
2020-07-24 20:03:48 +00:00
Kevin Klues
b50d86c174 Update build system to accept a TAG variable for things like rc.x
Signed-off-by: Kevin Klues <kklues@nvidia.com>
2020-07-24 19:54:29 +00:00
Kevin Klues
bf342fb4c9 Merge branch 'upstream-fix-ci' into 'master'
Generalize CI variables

See merge request nvidia/container-toolkit/container-toolkit!17
2020-07-24 14:28:49 +00:00
Kevin Klues
1791372f22 Generalize CI variables
Signed-off-by: Kevin Klues <kklues@nvidia.com>
2020-07-24 14:01:39 +00:00
Kevin Klues
4448319605 Merge branch 'upstream-add-alternate-device-list' into 'master'
Add the ability to pull the device list from mounted files instead of just Envvars

See merge request nvidia/container-toolkit/container-toolkit!15
2020-07-24 13:18:53 +00:00
Kevin Klues
2ea3150b60 Merge branch 'upstream-simplify-nvidia-config-generation' into 'master'
Simplify logic for `nvidiaConfig` generation

See merge request nvidia/container-toolkit/container-toolkit!14
2020-07-24 13:18:35 +00:00
Kevin Klues
32b4b09bc9 Add tests to verify priority of device list from mounts vs. envvar
Signed-off-by: Kevin Klues <kklues@nvidia.com>
2020-07-24 12:50:05 +00:00
Kevin Klues
cc0a22a6d9 Consolidate logic for building nvidiaConfig into a single function
Signed-off-by: Kevin Klues <kklues@nvidia.com>
2020-07-24 12:50:05 +00:00
Kevin Klues
e48d23d107 Add test for getDevicesFromMounts()
Signed-off-by: Kevin Klues <kklues@nvidia.com>
2020-07-24 12:50:05 +00:00
Kevin Klues
430dda41e9 Remove getNvidiaConfigLegacy() function
A subsequent commit will add equivalent functionality back in

Signed-off-by: Kevin Klues <kklues@nvidia.com>
2020-07-24 12:50:05 +00:00
Kevin Klues
8bcd02ee5d Add logic implementing getDevicesFromMounts()
Signed-off-by: Kevin Klues <kklues@nvidia.com>
2020-07-24 12:50:05 +00:00
Kevin Klues
4791fab747 Simplify getMigConfigDevices() and getMigMonitorDevices()
Signed-off-by: Kevin Klues <kklues@nvidia.com>
2020-07-24 12:50:05 +00:00
Kevin Klues
7313069d4c Update getDevices() to account for getting the devices list from mounts
Signed-off-by: Kevin Klues <kklues@nvidia.com>
2020-07-24 12:50:05 +00:00
Kevin Klues
a24b0c8b4e Split isLegacyCUDAImage() into its own helper function
Signed-off-by: Kevin Klues <kklues@nvidia.com>
2020-07-24 12:50:05 +00:00
Kevin Klues
f46d1861d3 Add stub implementation for getDevicesFromMounts()
Signed-off-by: Kevin Klues <kklues@nvidia.com>
2020-07-24 12:50:05 +00:00
Kevin Klues
0a9dc3c653 Add test to make sure that getNvidiaConfig() operates as expected
Signed-off-by: Kevin Klues <kklues@nvidia.com>
2020-07-24 12:50:05 +00:00
Kevin Klues
889ebae1fe Pull logic to get the device list from ENVVARs out to its own function
Signed-off-by: Kevin Klues <kklues@nvidia.com>
2020-07-24 12:50:05 +00:00
Kevin Klues
e4b9318de3 Only run gofmt over go files under pkg/ in CI
Signed-off-by: Kevin Klues <kklues@nvidia.com>
2020-07-24 12:50:05 +00:00
Kevin Klues
aec9a28bc3 Push HookConfig and privileged flags down to getDevices() call
Signed-off-by: Kevin Klues <kklues@nvidia.com>
2020-07-24 12:50:05 +00:00
Kevin Klues
2ae7cb07cf Add ability to consider container mounts to generate nvidiaConfig
Signed-off-by: Kevin Klues <kklues@nvidia.com>
2020-07-24 12:50:05 +00:00
Kevin Klues
da36874e91 Add new config options to pull device list from mounted files not ENVVAR
Signed-off-by: Kevin Klues <kklues@nvidia.com>
2020-07-24 12:50:05 +00:00
Kevin Klues
b9ef2db205 Remove unnecessary files from version control
Signed-off-by: Kevin Klues <kklues@nvidia.com>
2020-07-24 12:50:05 +00:00
Kevin Klues
da6fbb343a Revert "Add ability to merge envars of the form NVIDIA_VISIBLE_DEVICES_*"
This reverts commit 01b4381282.
2020-07-24 12:50:05 +00:00
Kevin Klues
647a805341 Merge branch 'upstream-add-ci-tests' into 'master'
Add common CI tests for things like golint, gofmt, unit tests, etc.

See merge request nvidia/container-toolkit/container-toolkit!16
2020-07-24 12:39:45 +00:00
Kevin Klues
fe65573bdf Add common CI tests for things like golint, gofmt, unit tests, etc
This commit also fixes the minor issues uncovered while running these
tests locally.

Signed-off-by: Kevin Klues <kklues@nvidia.com>
2020-07-24 12:14:26 +00:00
Kevin Klues
a7fb33301c Flip build-all targets to run automatically on merge requests
Signed-off-by: Kevin Klues <kklues@nvidia.com>
2020-07-24 12:14:26 +00:00
Kevin Klues
8b248b6631 Rename github.com/NVIDIA/container-toolkit to nvidia-container-toolkit
The repo name on github recently changed, so all references here should
as well.

Signed-off-by: Kevin Klues <kklues@nvidia.com>
2020-07-24 11:40:45 +00:00
Kevin Klues
d10144b3b1 Merge branch 'upstream-add-ngx-all-driver-caps' into 'master'
Add 'ngx' to list of *all* driver capabilities -- Prepare patch release for 1.2.1

See merge request nvidia/container-toolkit/container-toolkit!13
2020-07-22 15:21:11 +00:00
Kevin Klues
ba9758c7ff Update changelogs for 1.2.1
Signed-off-by: Kevin Klues <kklues@nvidia.com>
2020-07-22 13:41:44 +00:00
Kevin Klues
d467b87ef9 Bump version to 1.2.1
Signed-off-by: Kevin Klues <kklues@nvidia.com>
2020-07-22 13:39:31 +00:00
Kevin Klues
2f4af74320 List config.toml as a config file in the RPM SPEC
Signed-off-by: Kevin Klues <kklues@nvidia.com>
2020-07-22 13:39:22 +00:00
Kevin Klues
4e6e0ed4f1 Add 'ngx' to list of *all* driver capabilities
Signed-off-by: Kevin Klues <kklues@nvidia.com>
2020-07-22 13:29:39 +00:00
19 changed files with 1001 additions and 316 deletions

View File

@@ -1,9 +1,24 @@
# Build packages for all supported OS / ARCH combinations
stages:
- tests
- build-one
- build-all
.tests-setup: &tests-setup
image: golang:1.14.4
rules:
- when: always
variables:
GITHUB_ROOT: "github.com/NVIDIA"
PROJECT_GOPATH: "${GITHUB_ROOT}/nvidia-container-toolkit"
before_script:
- mkdir -p ${GOPATH}/src/${GITHUB_ROOT}
- ln -s ${CI_PROJECT_DIR} ${GOPATH}/src/${PROJECT_GOPATH}
.build-setup: &build-setup
image: docker:19.03.8
@@ -17,6 +32,48 @@ stages:
- apk add coreutils build-base sed git bash make
- docker run --rm --privileged multiarch/qemu-user-static --reset -p yes -c yes
# Run a series of sanity-check tests over the code
lint:
<<: *tests-setup
stage: tests
script:
- go get -u golang.org/x/lint/golint
- golint -set_exit_status ${PROJECT_GOPATH}/pkg
vet:
<<: *tests-setup
stage: tests
script:
- go vet ${PROJECT_GOPATH}/pkg
unit_test:
<<: *tests-setup
stage: tests
script:
- go test ${PROJECT_GOPATH}/pkg
fmt:
<<: *tests-setup
stage: tests
script:
- res=$(gofmt -l pkg/*.go)
- echo "$res"
- test -z "$res"
ineffassign:
<<: *tests-setup
stage: tests
script:
- go get -u github.com/gordonklaus/ineffassign
- ineffassign pkg/*.go
misspell:
<<: *tests-setup
stage: tests
script:
- go get -u github.com/client9/misspell/cmd/misspell
- misspell pkg/*.go
# build-one jobs build packages for a single OS / ARCH combination.
#
# They are run during the first stage of the pipeline as a smoke test to ensure
@@ -47,7 +104,7 @@ stages:
- if: $CI_COMMIT_TAG
when: always
- if: $CI_MERGE_REQUEST_ID
when: manual
when: always
variables:
ARTIFACTS_NAME: "${CI_PROJECT_NAME}-${CI_COMMIT_REF_SLUG}-${CI_JOB_NAME}-artifacts-${CI_PIPELINE_ID}"

View File

@@ -5,10 +5,11 @@ MKDIR ?= mkdir
DIST_DIR ?= $(CURDIR)/dist
LIB_NAME := nvidia-container-toolkit
LIB_VERSION := 1.2.0
LIB_VERSION := 1.3.0
LIB_TAG ?= rc.2
GOLANG_VERSION := 1.14.2
GOLANG_PKG_PATH := github.com/NVIDIA/container-toolkit/pkg
GOLANG_PKG_PATH := github.com/NVIDIA/nvidia-container-toolkit/pkg
# By default run all native docker-based targets
docker-native:

View File

@@ -1,5 +1,7 @@
disable-require = false
#swarm-resource = "DOCKER_RESOURCE_GPU"
#accept-nvidia-visible-devices-envvar-when-unprivileged = true
#accept-nvidia-visible-devices-as-volume-mounts = false
[nvidia-container-cli]
#root = "/run/nvidia/driver"
@@ -11,7 +13,6 @@ load-kmods = true
#no-cgroups = false
#user = "root:video"
ldconfig = "@/sbin/ldconfig"
#alpha-merge-visible-devices-envvars = false
[nvidia-container-runtime]
#debug = "/var/log/nvidia-container-runtime.log"

View File

@@ -1,5 +1,7 @@
disable-require = false
#swarm-resource = "DOCKER_RESOURCE_GPU"
#accept-nvidia-visible-devices-envvar-when-unprivileged = true
#accept-nvidia-visible-devices-as-volume-mounts = false
[nvidia-container-cli]
#root = "/run/nvidia/driver"
@@ -11,7 +13,6 @@ load-kmods = true
#no-cgroups = false
#user = "root:video"
ldconfig = "@/sbin/ldconfig"
#alpha-merge-visible-devices-envvars = false
[nvidia-container-runtime]
#debug = "/var/log/nvidia-container-runtime.log"

View File

@@ -1,5 +1,7 @@
disable-require = false
#swarm-resource = "DOCKER_RESOURCE_GPU"
#accept-nvidia-visible-devices-envvar-when-unprivileged = true
#accept-nvidia-visible-devices-as-volume-mounts = false
[nvidia-container-cli]
#root = "/run/nvidia/driver"
@@ -11,7 +13,6 @@ load-kmods = true
#no-cgroups = false
#user = "root:video"
ldconfig = "@/sbin/ldconfig"
#alpha-merge-visible-devices-envvars = false
[nvidia-container-runtime]
#debug = "/var/log/nvidia-container-runtime.log"

View File

@@ -1,5 +1,7 @@
disable-require = false
#swarm-resource = "DOCKER_RESOURCE_GPU"
#accept-nvidia-visible-devices-envvar-when-unprivileged = true
#accept-nvidia-visible-devices-as-volume-mounts = false
[nvidia-container-cli]
#root = "/run/nvidia/driver"
@@ -11,7 +13,6 @@ load-kmods = true
#no-cgroups = false
user = "root:video"
ldconfig = "@/sbin/ldconfig"
#alpha-merge-visible-devices-envvars = false
[nvidia-container-runtime]
#debug = "/var/log/nvidia-container-runtime.log"

View File

@@ -1,5 +1,7 @@
disable-require = false
#swarm-resource = "DOCKER_RESOURCE_GPU"
#accept-nvidia-visible-devices-envvar-when-unprivileged = true
#accept-nvidia-visible-devices-as-volume-mounts = false
[nvidia-container-cli]
#root = "/run/nvidia/driver"
@@ -11,7 +13,6 @@ load-kmods = true
#no-cgroups = false
#user = "root:video"
ldconfig = "@/sbin/ldconfig.real"
#alpha-merge-visible-devices-envvars = false
[nvidia-container-runtime]
#debug = "/var/log/nvidia-container-runtime.log"

View File

@@ -1,131 +0,0 @@
package main
import (
"github.com/stretchr/testify/require"
"sort"
"strings"
"testing"
)
func TestMergeVisibleDevicesEnvvars(t *testing.T) {
var tests = []struct {
name string
input []string
expected string
enableMerge bool
}{
{
"Simple Merge Enabled",
[]string{
"NVIDIA_VISIBLE_DEVICES_0=0,1",
"NVIDIA_VISIBLE_DEVICES_1=2,3",
"NVIDIA_VISIBLE_DEVICES_WHATEVER=4,5",
},
"0,1,2,3,4,5",
true,
},
{
"Simple Merge Disabled",
[]string{
"NVIDIA_VISIBLE_DEVICES_0=0,1",
"NVIDIA_VISIBLE_DEVICES_1=2,3",
"NVIDIA_VISIBLE_DEVICES_WHATEVER=4,5",
},
"",
false,
},
{
"Merge No Override (Enabled)",
[]string{
"NVIDIA_VISIBLE_DEVICES=all",
},
"all",
true,
},
{
"Merge No Override (Disabled)",
[]string{
"NVIDIA_VISIBLE_DEVICES=all",
},
"all",
false,
},
{
"Merge Override (Enabled, Before)",
[]string{
"NVIDIA_VISIBLE_DEVICES=all",
"NVIDIA_VISIBLE_DEVICES_0=0,1",
"NVIDIA_VISIBLE_DEVICES_1=2,3",
"NVIDIA_VISIBLE_DEVICES_WHATEVER=4,5",
},
"0,1,2,3,4,5",
true,
},
{
"Merge Override (Enabled, After)",
[]string{
"NVIDIA_VISIBLE_DEVICES_0=0,1",
"NVIDIA_VISIBLE_DEVICES_1=2,3",
"NVIDIA_VISIBLE_DEVICES_WHATEVER=4,5",
"NVIDIA_VISIBLE_DEVICES=all",
},
"0,1,2,3,4,5",
true,
},
{
"Merge Override (Enabled, In Between)",
[]string{
"NVIDIA_VISIBLE_DEVICES_0=0,1",
"NVIDIA_VISIBLE_DEVICES_1=2,3",
"NVIDIA_VISIBLE_DEVICES=all",
"NVIDIA_VISIBLE_DEVICES_WHATEVER=4,5",
},
"0,1,2,3,4,5",
true,
},
{
"Merge Override (Disabled, Before)",
[]string{
"NVIDIA_VISIBLE_DEVICES=all",
"NVIDIA_VISIBLE_DEVICES_0=0,1",
"NVIDIA_VISIBLE_DEVICES_1=2,3",
"NVIDIA_VISIBLE_DEVICES_WHATEVER=4,5",
},
"all",
false,
},
{
"Merge Override (Disabled, After)",
[]string{
"NVIDIA_VISIBLE_DEVICES_0=0,1",
"NVIDIA_VISIBLE_DEVICES_1=2,3",
"NVIDIA_VISIBLE_DEVICES_WHATEVER=4,5",
"NVIDIA_VISIBLE_DEVICES=all",
},
"all",
false,
},
{
"Merge Override (Disabled, In Between)",
[]string{
"NVIDIA_VISIBLE_DEVICES_0=0,1",
"NVIDIA_VISIBLE_DEVICES_1=2,3",
"NVIDIA_VISIBLE_DEVICES=all",
"NVIDIA_VISIBLE_DEVICES_WHATEVER=4,5",
},
"all",
false,
},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
config := CLIConfig{
AlphaMergeVisibleDevicesEnvvars: tc.enableMerge,
}
envvars := getEnvMap(tc.input, config)
devices := strings.Split(envvars[envNVVisibleDevices], ",")
sort.Strings(devices)
require.Equal(t, tc.expected, strings.Join(devices, ","))
})
}
}

View File

@@ -75,28 +75,30 @@ docker-all: $(AMD64_TARGETS) $(X86_64_TARGETS) \
# private ubuntu target
--ubuntu%: OS := ubuntu
--ubuntu%: LIB_VERSION := $(LIB_VERSION)$(if $(LIB_TAG),~$(LIB_TAG))
--ubuntu%: PKG_REV := 1
# private debian target
--debian%: OS := debian
--debian%: LIB_VERSION := $(LIB_VERSION)$(if $(LIB_TAG),~$(LIB_TAG))
--debian%: PKG_REV := 1
# private centos target
--centos%: OS := centos
--centos%: PKG_REV := 2
--centos%: PKG_REV := $(if $(LIB_TAG),0.1.$(LIB_TAG),2)
# private amazonlinux target
--amazonlinux%: OS := amazonlinux
--amazonlinux%: PKG_REV = 2.amzn$(VERSION)
--amazonlinux%: PKG_REV = $(if $(LIB_TAG),0.1.$(LIB_TAG).amzn$(VERSION),2.amzn$(VERSION))
# private opensuse-leap target
--opensuse-leap%: OS = opensuse-leap
--opensuse-leap%: BASEIMAGE = opensuse/leap:$(VERSION)
--opensuse-leap%: PKG_REV := 1
--opensuse-leap%: PKG_REV := $(if $(LIB_TAG),0.1.$(LIB_TAG),1)
# private rhel target (actually built on centos)
--rhel%: OS := centos
--rhel%: PKG_REV := 2
--rhel%: PKG_REV := $(if $(LIB_TAG),0.1.$(LIB_TAG),2)
--rhel%: VERSION = $(patsubst rhel%-$(ARCH),%,$(TARGET_PLATFORM))
--rhel%: ARTIFACTS_DIR = $(DIST_DIR)/rhel$(VERSION)/$(ARCH)

2
go.mod
View File

@@ -1,4 +1,4 @@
module github.com/NVIDIA/container-toolkit
module github.com/NVIDIA/nvidia-container-toolkit
go 1.14

View File

@@ -1,3 +1,28 @@
nvidia-container-toolkit (1.3.0~rc.2-1) experimental; urgency=medium
* 2c180947 Add more tests for new semantics with device list from volume mounts
* 7c003857 Refactor accepting device lists from volume mounts as a boolean
-- NVIDIA CORPORATION <cudatools@nvidia.com> Mon, 10 Aug 2020 15:05:34 -0700
nvidia-container-toolkit (1.3.0~rc.1-1) experimental; urgency=medium
* b50d86c1 Update build system to accept a TAG variable for things like rc.x
* fe65573b Add common CI tests for things like golint, gofmt, unit tests, etc.
* da6fbb34 Revert "Add ability to merge envars of the form NVIDIA_VISIBLE_DEVICES_*"
* a7fb3330 Flip build-all targets to run automatically on merge requests
* 8b248b66 Rename github.com/NVIDIA/container-toolkit to nvidia-container-toolkit
* da36874e Add new config options to pull device list from mounted files instead of ENVVAR
-- NVIDIA CORPORATION <cudatools@nvidia.com> Fri, 24 Jul 2020 22:21:49 -0700
nvidia-container-toolkit (1.2.1-1) UNRELEASED; urgency=medium
* 4e6e0ed4 Add 'ngx' to list of *all* driver capabilities
* 2f4af743 List config.toml as a config file in the RPM SPEC
-- NVIDIA CORPORATION <cudatools@nvidia.com> Wed, 22 Jul 2020 15:36:12 -0700
nvidia-container-toolkit (1.2.0-1) UNRELEASED; urgency=medium
* 8e0aab46 Fix repo listed in changelog for debian distributions

View File

@@ -1,6 +1,6 @@
Format: http://www.debian.org/doc/packaging-manuals/copyright-format/1.0/
Upstream-Name: nvidia-container-toolkit
Source: https://github.com/NVIDIA/container-toolkit
Source: https://github.com/NVIDIA/nvidia-container-toolkit
Files: *
Copyright: 2017-2020 NVIDIA CORPORATION <cudatools@nvidia.com>

View File

@@ -48,11 +48,27 @@ rm -f %{_bindir}/nvidia-container-runtime-hook
%files
%license LICENSE
%{_bindir}/nvidia-container-toolkit
/etc/nvidia-container-runtime/config.toml
%config /etc/nvidia-container-runtime/config.toml
/usr/libexec/oci/hooks.d/oci-nvidia-hook
/usr/share/containers/oci/hooks.d/oci-nvidia-hook.json
%changelog
* Mon Aug 10 2020 NVIDIA CORPORATION <cudatools@nvidia.com> 1.3.0-0.1.rc.2
- 2c180947 Add more tests for new semantics with device list from volume mounts
- 7c003857 Refactor accepting device lists from volume mounts as a boolean
* Fri Jul 24 2020 NVIDIA CORPORATION <cudatools@nvidia.com> 1.3.0-0.1.rc.1
- b50d86c1 Update build system to accept a TAG variable for things like rc.x
- fe65573b Add common CI tests for things like golint, gofmt, unit tests, etc.
- da6fbb34 Revert "Add ability to merge envars of the form NVIDIA_VISIBLE_DEVICES_*"
- a7fb3330 Flip build-all targets to run automatically on merge requests
- 8b248b66 Rename github.com/NVIDIA/container-toolkit to nvidia-container-toolkit
- da36874e Add new config options to pull device list from mounted files instead of ENVVAR
* Wed Jul 22 2020 NVIDIA CORPORATION <cudatools@nvidia.com> 1.2.1-1
- 4e6e0ed4 Add 'ngx' to list of *all* driver capabilities
- 2f4af743 List config.toml as a config file in the RPM SPEC
* Wed Jul 08 2020 NVIDIA CORPORATION <cudatools@nvidia.com> 1.2.0-1
- 8e0aab46 Fix repo listed in changelog for debian distributions
- 320bb6e4 Update dependence on libnvidia-container to 1.2.0

12
pkg/Godeps/Godeps.json generated
View File

@@ -1,12 +0,0 @@
{
"ImportPath": "github.com/nvidia/nvidia-container-runtime/toolkit/nvidia-container-toolkit",
"GoVersion": "go1.9",
"GodepVersion": "v80",
"Deps": [
{
"ImportPath": "github.com/BurntSushi/toml",
"Comment": "v0.3.0-7-ga368813",
"Rev": "a368813c5e648fee92e5f6c30e3944ff9d5e8895"
}
]
}

Binary file not shown.

View File

@@ -6,6 +6,7 @@ import (
"log"
"os"
"path"
"path/filepath"
"strconv"
"strings"
@@ -26,7 +27,7 @@ const (
)
const (
allDriverCapabilities = "compute,compat32,graphics,utility,video,display"
allDriverCapabilities = "compute,compat32,graphics,utility,video,display,ngx"
defaultDriverCapabilities = "utility"
)
@@ -34,6 +35,10 @@ const (
capSysAdmin = "CAP_SYS_ADMIN"
)
const (
deviceListAsVolumeMountsRoot = "/var/run/nvidia-container-devices"
)
type nvidiaConfig struct {
Devices string
MigConfigDevices string
@@ -50,17 +55,20 @@ type containerConfig struct {
Nvidia *nvidiaConfig
}
// Root from OCI runtime spec
// github.com/opencontainers/runtime-spec/blob/v1.0.0/specs-go/config.go#L94-L100
type Root struct {
Path string `json:"path"`
}
// Process from OCI runtime spec
// github.com/opencontainers/runtime-spec/blob/v1.0.0/specs-go/config.go#L30-L57
type Process struct {
Env []string `json:"env,omitempty"`
Capabilities *json.RawMessage `json:"capabilities,omitempty" platform:"linux"`
}
// LinuxCapabilities from OCI runtime spec
// https://github.com/opencontainers/runtime-spec/blob/v1.0.0/specs-go/config.go#L61
type LinuxCapabilities struct {
Bounding []string `json:"bounding,omitempty" platform:"linux"`
@@ -70,14 +78,26 @@ type LinuxCapabilities struct {
Ambient []string `json:"ambient,omitempty" platform:"linux"`
}
// Mount from OCI runtime spec
// https://github.com/opencontainers/runtime-spec/blob/v1.0.0/specs-go/config.go#L103
type Mount struct {
Destination string `json:"destination"`
Type string `json:"type,omitempty" platform:"linux,solaris"`
Source string `json:"source,omitempty"`
Options []string `json:"options,omitempty"`
}
// Spec from OCI runtime spec
// We use pointers to structs, similarly to the latest version of runtime-spec:
// https://github.com/opencontainers/runtime-spec/blob/v1.0.0/specs-go/config.go#L5-L28
type Spec struct {
Version *string `json:"ociVersion"`
Process *Process `json:"process,omitempty"`
Root *Root `json:"root,omitempty"`
Mounts []Mount `json:"mounts,omitempty"`
}
// HookState holds state information about the hook
type HookState struct {
Pid int `json:"pid,omitempty"`
// After 17.06, runc is using the runtime spec:
@@ -103,7 +123,7 @@ func parseCudaVersion(cudaVersion string) (vmaj, vmin, vpatch uint32) {
return
}
func getEnvMap(e []string, config CLIConfig) (m map[string]string) {
func getEnvMap(e []string) (m map[string]string) {
m = make(map[string]string)
for _, s := range e {
p := strings.SplitN(s, "=", 2)
@@ -112,17 +132,6 @@ func getEnvMap(e []string, config CLIConfig) (m map[string]string) {
}
m[p[0]] = p[1]
}
if config.AlphaMergeVisibleDevicesEnvvars {
var mergable []string
for k, v := range m {
if strings.HasPrefix(k, envNVVisibleDevices+"_") {
mergable = append(mergable, v)
}
}
if len(mergable) > 0 {
m[envNVVisibleDevices] = strings.Join(mergable, ",")
}
}
return
}
@@ -174,7 +183,7 @@ func isPrivileged(s *Spec) bool {
// We only make sure that the bounding capabibility set has
// CAP_SYS_ADMIN. This allows us to make sure that the container was
// actually started as '--privileged', but also allow non-root users to
// access the priviliged NVIDIA capabilities.
// access the privileged NVIDIA capabilities.
caps = lc.Bounding
}
@@ -187,49 +196,156 @@ func isPrivileged(s *Spec) bool {
return false
}
func getDevices(env map[string]string) *string {
gpuVars := []string{envNVVisibleDevices}
func isLegacyCUDAImage(env map[string]string) bool {
legacyCudaVersion := env[envCUDAVersion]
cudaRequire := env[envNVRequireCUDA]
return len(legacyCudaVersion) > 0 && len(cudaRequire) == 0
}
func getDevicesFromEnvvar(env map[string]string, legacyImage bool) *string {
// Build a list of envvars to consider.
envVars := []string{envNVVisibleDevices}
if envSwarmGPU != nil {
// The Swarm resource has higher precedence.
gpuVars = append([]string{*envSwarmGPU}, gpuVars...)
// The Swarm envvar has higher precedence.
envVars = append([]string{*envSwarmGPU}, envVars...)
}
for _, gpuVar := range gpuVars {
if devices, ok := env[gpuVar]; ok {
return &devices
// Grab a reference to devices from the first envvar
// in the list that actually exists in the environment.
var devices *string
for _, envVar := range envVars {
if devs, ok := env[envVar]; ok {
devices = &devs
}
}
// Environment variable unset with legacy image: default to "all".
if devices == nil && legacyImage {
all := "all"
return &all
}
// Environment variable unset or empty or "void": return nil
if devices == nil || len(*devices) == 0 || *devices == "void" {
return nil
}
// Environment variable set to "none": reset to "".
if *devices == "none" {
empty := ""
return &empty
}
// Any other value.
return devices
}
func getDevicesFromMounts(mounts []Mount) *string {
var devices []string
for _, m := range mounts {
root := filepath.Clean(deviceListAsVolumeMountsRoot)
source := filepath.Clean(m.Source)
destination := filepath.Clean(m.Destination)
// Only consider mounts who's host volume is /dev/null
if source != "/dev/null" {
continue
}
// Only consider container mount points that begin with 'root'
if len(destination) < len(root) {
continue
}
if destination[:len(root)] != root {
continue
}
// Grab the full path beyond 'root' and add it to the list of devices
device := destination[len(root):]
if len(device) > 0 && device[0] == '/' {
device = device[1:]
}
if len(device) == 0 {
continue
}
devices = append(devices, device)
}
if devices == nil {
return nil
}
ret := strings.Join(devices, ",")
return &ret
}
func getDevices(hookConfig *HookConfig, env map[string]string, mounts []Mount, privileged bool, legacyImage bool) *string {
// If enabled, try and get the device list from volume mounts first
if hookConfig.AcceptDeviceListAsVolumeMounts {
devices := getDevicesFromMounts(mounts)
if devices != nil {
return devices
}
}
// Fallback to reading from the environment variable if privileges are correct
devices := getDevicesFromEnvvar(env, legacyImage)
if devices == nil {
return nil
}
if privileged || hookConfig.AcceptEnvvarUnprivileged {
return devices
}
// Error out otherwise
log.Panicln("insufficient privileges to read device list from NVIDIA_VISIBLE_DEVICES envvar")
return nil
}
func getMigConfigDevices(env map[string]string) *string {
gpuVars := []string{envNVMigConfigDevices}
for _, gpuVar := range gpuVars {
if devices, ok := env[gpuVar]; ok {
return &devices
}
if devices, ok := env[envNVMigConfigDevices]; ok {
return &devices
}
return nil
}
func getMigMonitorDevices(env map[string]string) *string {
gpuVars := []string{envNVMigMonitorDevices}
for _, gpuVar := range gpuVars {
if devices, ok := env[gpuVar]; ok {
return &devices
}
if devices, ok := env[envNVMigMonitorDevices]; ok {
return &devices
}
return nil
}
func getDriverCapabilities(env map[string]string) *string {
if capabilities, ok := env[envNVDriverCapabilities]; ok {
return &capabilities
func getDriverCapabilities(env map[string]string, legacyImage bool) *string {
// Grab a reference to the capabilities from the envvar
// if it actually exists in the environment.
var capabilities *string
if caps, ok := env[envNVDriverCapabilities]; ok {
capabilities = &caps
}
return nil
// Environment variable unset with legacy image: set all capabilities.
if capabilities == nil && legacyImage {
allCaps := allDriverCapabilities
return &allCaps
}
// Environment variable unset or set but empty: set default capabilities.
if capabilities == nil || len(*capabilities) == 0 {
defaultCaps := defaultDriverCapabilities
return &defaultCaps
}
// Environment variable set to "all": set all capabilities.
if *capabilities == "all" {
allCaps := allDriverCapabilities
return &allCaps
}
// Any other value
return capabilities
}
func getRequirements(env map[string]string) []string {
func getRequirements(env map[string]string, legacyImage bool) []string {
// All variables with the "NVIDIA_REQUIRE_" prefix are passed to nvidia-container-cli
var requirements []string
for name, value := range env {
@@ -237,24 +353,23 @@ func getRequirements(env map[string]string) []string {
requirements = append(requirements, value)
}
}
if legacyImage {
vmaj, vmin, _ := parseCudaVersion(env[envCUDAVersion])
cudaRequire := fmt.Sprintf("cuda>=%d.%d", vmaj, vmin)
requirements = append(requirements, cudaRequire)
}
return requirements
}
// Mimic the new CUDA images if no capabilities or devices are specified.
func getNvidiaConfigLegacy(env map[string]string, privileged bool) *nvidiaConfig {
func getNvidiaConfig(hookConfig *HookConfig, env map[string]string, mounts []Mount, privileged bool) *nvidiaConfig {
legacyImage := isLegacyCUDAImage(env)
var devices string
if d := getDevices(env); d == nil {
// Environment variable unset: default to "all".
devices = "all"
} else if len(*d) == 0 || *d == "void" {
// Environment variable empty or "void": not a GPU container.
return nil
} else {
// Environment variable non-empty and not "void".
if d := getDevices(hookConfig, env, mounts, privileged, legacyImage); d != nil {
devices = *d
}
if devices == "none" {
devices = ""
} else {
// 'nil' devices means this is not a GPU container.
return nil
}
var migConfigDevices string
@@ -274,88 +389,11 @@ func getNvidiaConfigLegacy(env map[string]string, privileged bool) *nvidiaConfig
}
var driverCapabilities string
if c := getDriverCapabilities(env); c == nil {
// Environment variable unset: default to "all".
driverCapabilities = allDriverCapabilities
} else if len(*c) == 0 {
// Environment variable empty: use default capability.
driverCapabilities = defaultDriverCapabilities
} else {
// Environment variable non-empty.
if c := getDriverCapabilities(env, legacyImage); c != nil {
driverCapabilities = *c
}
if driverCapabilities == "all" {
driverCapabilities = allDriverCapabilities
}
requirements := getRequirements(env)
vmaj, vmin, _ := parseCudaVersion(env[envCUDAVersion])
cudaRequire := fmt.Sprintf("cuda>=%d.%d", vmaj, vmin)
requirements = append(requirements, cudaRequire)
// Don't fail on invalid values.
disableRequire, _ := strconv.ParseBool(env[envNVDisableRequire])
return &nvidiaConfig{
Devices: devices,
MigConfigDevices: migConfigDevices,
MigMonitorDevices: migMonitorDevices,
DriverCapabilities: driverCapabilities,
Requirements: requirements,
DisableRequire: disableRequire,
}
}
func getNvidiaConfig(env map[string]string, privileged bool) *nvidiaConfig {
legacyCudaVersion := env[envCUDAVersion]
cudaRequire := env[envNVRequireCUDA]
if len(legacyCudaVersion) > 0 && len(cudaRequire) == 0 {
// Legacy CUDA image detected.
return getNvidiaConfigLegacy(env, privileged)
}
var devices string
if d := getDevices(env); d == nil || len(*d) == 0 || *d == "void" {
// Environment variable unset or empty or "void": not a GPU container.
return nil
} else {
// Environment variable non-empty and not "void".
devices = *d
}
if devices == "none" {
devices = ""
}
var migConfigDevices string
if d := getMigConfigDevices(env); d != nil {
migConfigDevices = *d
}
if !privileged && migConfigDevices != "" {
log.Panicln("cannot set MIG_CONFIG_DEVICES in non privileged container")
}
var migMonitorDevices string
if d := getMigMonitorDevices(env); d != nil {
migMonitorDevices = *d
}
if !privileged && migMonitorDevices != "" {
log.Panicln("cannot set MIG_MONITOR_DEVICES in non privileged container")
}
var driverCapabilities string
if c := getDriverCapabilities(env); c == nil || len(*c) == 0 {
// Environment variable unset or set but empty: use default capability.
driverCapabilities = defaultDriverCapabilities
} else {
// Environment variable set and non-empty.
driverCapabilities = *c
}
if driverCapabilities == "all" {
driverCapabilities = allDriverCapabilities
}
requirements := getRequirements(env)
requirements := getRequirements(env, legacyImage)
// Don't fail on invalid values.
disableRequire, _ := strconv.ParseBool(env[envNVDisableRequire])
@@ -384,13 +422,13 @@ func getContainerConfig(hook HookConfig) (config containerConfig) {
s := loadSpec(path.Join(b, "config.json"))
env := getEnvMap(s.Process.Env, hook.NvidiaContainerCLI)
env := getEnvMap(s.Process.Env)
privileged := isPrivileged(s)
envSwarmGPU = hook.SwarmResource
return containerConfig{
Pid: h.Pid,
Rootfs: s.Root.Path,
Env: env,
Nvidia: getNvidiaConfig(env, privileged),
Nvidia: getNvidiaConfig(&hook, env, s.Mounts, privileged),
}
}

681
pkg/container_test.go Normal file
View File

@@ -0,0 +1,681 @@
package main
import (
"path/filepath"
"reflect"
"testing"
)
func TestGetNvidiaConfig(t *testing.T) {
var tests = []struct {
description string
env map[string]string
privileged bool
expectedConfig *nvidiaConfig
expectedPanic bool
}{
{
description: "No environment, unprivileged",
env: map[string]string{},
privileged: false,
expectedConfig: nil,
},
{
description: "No environment, privileged",
env: map[string]string{},
privileged: true,
expectedConfig: nil,
},
{
description: "Legacy image, no devices, no capabilities, no requirements",
env: map[string]string{
envCUDAVersion: "9.0",
},
privileged: false,
expectedConfig: &nvidiaConfig{
Devices: "all",
DriverCapabilities: allDriverCapabilities,
Requirements: []string{"cuda>=9.0"},
DisableRequire: false,
},
},
{
description: "Legacy image, devices 'all', no capabilities, no requirements",
env: map[string]string{
envCUDAVersion: "9.0",
envNVVisibleDevices: "all",
},
privileged: false,
expectedConfig: &nvidiaConfig{
Devices: "all",
DriverCapabilities: allDriverCapabilities,
Requirements: []string{"cuda>=9.0"},
DisableRequire: false,
},
},
{
description: "Legacy image, devices 'empty', no capabilities, no requirements",
env: map[string]string{
envCUDAVersion: "9.0",
envNVVisibleDevices: "",
},
privileged: false,
expectedConfig: nil,
},
{
description: "Legacy image, devices 'void', no capabilities, no requirements",
env: map[string]string{
envCUDAVersion: "9.0",
envNVVisibleDevices: "",
},
privileged: false,
expectedConfig: nil,
},
{
description: "Legacy image, devices 'none', no capabilities, no requirements",
env: map[string]string{
envCUDAVersion: "9.0",
envNVVisibleDevices: "none",
},
privileged: false,
expectedConfig: &nvidiaConfig{
Devices: "",
DriverCapabilities: allDriverCapabilities,
Requirements: []string{"cuda>=9.0"},
DisableRequire: false,
},
},
{
description: "Legacy image, devices set, no capabilities, no requirements",
env: map[string]string{
envCUDAVersion: "9.0",
envNVVisibleDevices: "gpu0,gpu1",
},
privileged: false,
expectedConfig: &nvidiaConfig{
Devices: "gpu0,gpu1",
DriverCapabilities: allDriverCapabilities,
Requirements: []string{"cuda>=9.0"},
DisableRequire: false,
},
},
{
description: "Legacy image, devices set, capabilities 'empty', no requirements",
env: map[string]string{
envCUDAVersion: "9.0",
envNVVisibleDevices: "gpu0,gpu1",
envNVDriverCapabilities: "",
},
privileged: false,
expectedConfig: &nvidiaConfig{
Devices: "gpu0,gpu1",
DriverCapabilities: defaultDriverCapabilities,
Requirements: []string{"cuda>=9.0"},
DisableRequire: false,
},
},
{
description: "Legacy image, devices set, capabilities 'all', no requirements",
env: map[string]string{
envCUDAVersion: "9.0",
envNVVisibleDevices: "gpu0,gpu1",
envNVDriverCapabilities: "all",
},
privileged: false,
expectedConfig: &nvidiaConfig{
Devices: "gpu0,gpu1",
DriverCapabilities: allDriverCapabilities,
Requirements: []string{"cuda>=9.0"},
DisableRequire: false,
},
},
{
description: "Legacy image, devices set, capabilities set, no requirements",
env: map[string]string{
envCUDAVersion: "9.0",
envNVVisibleDevices: "gpu0,gpu1",
envNVDriverCapabilities: "cap0,cap1",
},
privileged: false,
expectedConfig: &nvidiaConfig{
Devices: "gpu0,gpu1",
DriverCapabilities: "cap0,cap1",
Requirements: []string{"cuda>=9.0"},
DisableRequire: false,
},
},
{
description: "Legacy image, devices set, capabilities set, requirements set",
env: map[string]string{
envCUDAVersion: "9.0",
envNVVisibleDevices: "gpu0,gpu1",
envNVDriverCapabilities: "cap0,cap1",
envNVRequirePrefix + "REQ0": "req0=true",
envNVRequirePrefix + "REQ1": "req1=false",
},
privileged: false,
expectedConfig: &nvidiaConfig{
Devices: "gpu0,gpu1",
DriverCapabilities: "cap0,cap1",
Requirements: []string{"cuda>=9.0", "req0=true", "req1=false"},
DisableRequire: false,
},
},
{
description: "Legacy image, devices set, capabilities set, requirements set, disable requirements",
env: map[string]string{
envCUDAVersion: "9.0",
envNVVisibleDevices: "gpu0,gpu1",
envNVDriverCapabilities: "cap0,cap1",
envNVRequirePrefix + "REQ0": "req0=true",
envNVRequirePrefix + "REQ1": "req1=false",
envNVDisableRequire: "true",
},
privileged: false,
expectedConfig: &nvidiaConfig{
Devices: "gpu0,gpu1",
DriverCapabilities: "cap0,cap1",
Requirements: []string{"cuda>=9.0", "req0=true", "req1=false"},
DisableRequire: true,
},
},
{
description: "Modern image, no devices, no capabilities, no requirements, no envCUDAVersion",
env: map[string]string{
envNVRequireCUDA: "cuda>=9.0",
},
privileged: false,
expectedConfig: nil,
},
{
description: "Modern image, no devices, no capabilities, no requirement, envCUDAVersion set",
env: map[string]string{
envCUDAVersion: "9.0",
envNVRequireCUDA: "cuda>=9.0",
},
privileged: false,
expectedConfig: nil,
},
{
description: "Modern image, devices 'all', no capabilities, no requirements",
env: map[string]string{
envNVRequireCUDA: "cuda>=9.0",
envNVVisibleDevices: "all",
},
privileged: false,
expectedConfig: &nvidiaConfig{
Devices: "all",
DriverCapabilities: defaultDriverCapabilities,
Requirements: []string{"cuda>=9.0"},
DisableRequire: false,
},
},
{
description: "Modern image, devices 'empty', no capabilities, no requirements",
env: map[string]string{
envNVRequireCUDA: "cuda>=9.0",
envNVVisibleDevices: "",
},
privileged: false,
expectedConfig: nil,
},
{
description: "Modern image, devices 'void', no capabilities, no requirements",
env: map[string]string{
envNVRequireCUDA: "cuda>=9.0",
envNVVisibleDevices: "",
},
privileged: false,
expectedConfig: nil,
},
{
description: "Modern image, devices 'none', no capabilities, no requirements",
env: map[string]string{
envNVRequireCUDA: "cuda>=9.0",
envNVVisibleDevices: "none",
},
privileged: false,
expectedConfig: &nvidiaConfig{
Devices: "",
DriverCapabilities: defaultDriverCapabilities,
Requirements: []string{"cuda>=9.0"},
DisableRequire: false,
},
},
{
description: "Modern image, devices set, no capabilities, no requirements",
env: map[string]string{
envNVRequireCUDA: "cuda>=9.0",
envNVVisibleDevices: "gpu0,gpu1",
},
privileged: false,
expectedConfig: &nvidiaConfig{
Devices: "gpu0,gpu1",
DriverCapabilities: defaultDriverCapabilities,
Requirements: []string{"cuda>=9.0"},
DisableRequire: false,
},
},
{
description: "Modern image, devices set, capabilities 'empty', no requirements",
env: map[string]string{
envNVRequireCUDA: "cuda>=9.0",
envNVVisibleDevices: "gpu0,gpu1",
envNVDriverCapabilities: "",
},
privileged: false,
expectedConfig: &nvidiaConfig{
Devices: "gpu0,gpu1",
DriverCapabilities: defaultDriverCapabilities,
Requirements: []string{"cuda>=9.0"},
DisableRequire: false,
},
},
{
description: "Modern image, devices set, capabilities 'all', no requirements",
env: map[string]string{
envNVRequireCUDA: "cuda>=9.0",
envNVVisibleDevices: "gpu0,gpu1",
envNVDriverCapabilities: "all",
},
privileged: false,
expectedConfig: &nvidiaConfig{
Devices: "gpu0,gpu1",
DriverCapabilities: allDriverCapabilities,
Requirements: []string{"cuda>=9.0"},
DisableRequire: false,
},
},
{
description: "Modern image, devices set, capabilities set, no requirements",
env: map[string]string{
envNVRequireCUDA: "cuda>=9.0",
envNVVisibleDevices: "gpu0,gpu1",
envNVDriverCapabilities: "cap0,cap1",
},
privileged: false,
expectedConfig: &nvidiaConfig{
Devices: "gpu0,gpu1",
DriverCapabilities: "cap0,cap1",
Requirements: []string{"cuda>=9.0"},
DisableRequire: false,
},
},
{
description: "Modern image, devices set, capabilities set, requirements set",
env: map[string]string{
envNVRequireCUDA: "cuda>=9.0",
envNVVisibleDevices: "gpu0,gpu1",
envNVDriverCapabilities: "cap0,cap1",
envNVRequirePrefix + "REQ0": "req0=true",
envNVRequirePrefix + "REQ1": "req1=false",
},
privileged: false,
expectedConfig: &nvidiaConfig{
Devices: "gpu0,gpu1",
DriverCapabilities: "cap0,cap1",
Requirements: []string{"cuda>=9.0", "req0=true", "req1=false"},
DisableRequire: false,
},
},
{
description: "Modern image, devices set, capabilities set, requirements set, disable requirements",
env: map[string]string{
envNVRequireCUDA: "cuda>=9.0",
envNVVisibleDevices: "gpu0,gpu1",
envNVDriverCapabilities: "cap0,cap1",
envNVRequirePrefix + "REQ0": "req0=true",
envNVRequirePrefix + "REQ1": "req1=false",
envNVDisableRequire: "true",
},
privileged: false,
expectedConfig: &nvidiaConfig{
Devices: "gpu0,gpu1",
DriverCapabilities: "cap0,cap1",
Requirements: []string{"cuda>=9.0", "req0=true", "req1=false"},
DisableRequire: true,
},
},
{
description: "No cuda envs, devices 'all'",
env: map[string]string{
envNVVisibleDevices: "all",
},
privileged: false,
expectedConfig: &nvidiaConfig{
Devices: "all",
DriverCapabilities: defaultDriverCapabilities,
Requirements: []string{},
DisableRequire: false,
},
},
{
description: "Modern image, devices 'all', migConfig set, privileged",
env: map[string]string{
envNVRequireCUDA: "cuda>=9.0",
envNVVisibleDevices: "all",
envNVMigConfigDevices: "mig0,mig1",
},
privileged: true,
expectedConfig: &nvidiaConfig{
Devices: "all",
MigConfigDevices: "mig0,mig1",
DriverCapabilities: defaultDriverCapabilities,
Requirements: []string{"cuda>=9.0"},
DisableRequire: false,
},
},
{
description: "Modern image, devices 'all', migConfig set, unprivileged",
env: map[string]string{
envNVRequireCUDA: "cuda>=9.0",
envNVVisibleDevices: "all",
envNVMigConfigDevices: "mig0,mig1",
},
privileged: false,
expectedPanic: true,
},
{
description: "Modern image, devices 'all', migMonitor set, privileged",
env: map[string]string{
envNVRequireCUDA: "cuda>=9.0",
envNVVisibleDevices: "all",
envNVMigMonitorDevices: "mig0,mig1",
},
privileged: true,
expectedConfig: &nvidiaConfig{
Devices: "all",
MigMonitorDevices: "mig0,mig1",
DriverCapabilities: defaultDriverCapabilities,
Requirements: []string{"cuda>=9.0"},
DisableRequire: false,
},
},
{
description: "Modern image, devices 'all', migMonitor set, unprivileged",
env: map[string]string{
envNVRequireCUDA: "cuda>=9.0",
envNVVisibleDevices: "all",
envNVMigMonitorDevices: "mig0,mig1",
},
privileged: false,
expectedPanic: true,
},
}
for _, tc := range tests {
t.Run(tc.description, func(t *testing.T) {
// Wrap the call to getNvidiaConfig() in a closure.
var config *nvidiaConfig
getConfig := func() {
hookConfig := getDefaultHookConfig()
config = getNvidiaConfig(&hookConfig, tc.env, nil, tc.privileged)
}
// For any tests that are expected to panic, make sure they do.
if tc.expectedPanic {
mustPanic(t, getConfig)
return
}
// For all other tests, just grab the config
getConfig()
// And start comparing the test results to the expected results.
if config == nil && tc.expectedConfig == nil {
return
}
if config != nil && tc.expectedConfig != nil {
if !reflect.DeepEqual(config.Devices, tc.expectedConfig.Devices) {
t.Errorf("Unexpected nvidiaConfig (got: %v, wanted: %v)", config, tc.expectedConfig)
}
if !reflect.DeepEqual(config.MigConfigDevices, tc.expectedConfig.MigConfigDevices) {
t.Errorf("Unexpected nvidiaConfig (got: %v, wanted: %v)", config, tc.expectedConfig)
}
if !reflect.DeepEqual(config.MigMonitorDevices, tc.expectedConfig.MigMonitorDevices) {
t.Errorf("Unexpected nvidiaConfig (got: %v, wanted: %v)", config, tc.expectedConfig)
}
if !reflect.DeepEqual(config.DriverCapabilities, tc.expectedConfig.DriverCapabilities) {
t.Errorf("Unexpected nvidiaConfig (got: %v, wanted: %v)", config, tc.expectedConfig)
}
if !elementsMatch(config.Requirements, tc.expectedConfig.Requirements) {
t.Errorf("Unexpected nvidiaConfig (got: %v, wanted: %v)", config, tc.expectedConfig)
}
if !reflect.DeepEqual(config.DisableRequire, tc.expectedConfig.DisableRequire) {
t.Errorf("Unexpected nvidiaConfig (got: %v, wanted: %v)", config, tc.expectedConfig)
}
return
}
t.Errorf("Unexpected nvidiaConfig (got: %v, wanted: %v)", config, tc.expectedConfig)
})
}
}
func TestGetDevicesFromMounts(t *testing.T) {
var tests = []struct {
description string
mounts []Mount
expectedDevices *string
}{
{
description: "No mounts",
mounts: nil,
expectedDevices: nil,
},
{
description: "Host path is not /dev/null",
mounts: []Mount{
{
Source: "/not/dev/null",
Destination: filepath.Join(deviceListAsVolumeMountsRoot, "GPU0"),
},
},
expectedDevices: nil,
},
{
description: "Container path is not prefixed by 'root'",
mounts: []Mount{
{
Source: "/dev/null",
Destination: filepath.Join("/other/prefix", "GPU0"),
},
},
expectedDevices: nil,
},
{
description: "Container path is only 'root'",
mounts: []Mount{
{
Source: "/dev/null",
Destination: deviceListAsVolumeMountsRoot,
},
},
expectedDevices: nil,
},
{
description: "Discover 2 devices",
mounts: []Mount{
{
Source: "/dev/null",
Destination: filepath.Join(deviceListAsVolumeMountsRoot, "GPU0"),
},
{
Source: "/dev/null",
Destination: filepath.Join(deviceListAsVolumeMountsRoot, "GPU1"),
},
},
expectedDevices: &[]string{"GPU0,GPU1"}[0],
},
{
description: "Discover 2 devices with slashes in the name",
mounts: []Mount{
{
Source: "/dev/null",
Destination: filepath.Join(deviceListAsVolumeMountsRoot, "GPU0-MIG0/0/1"),
},
{
Source: "/dev/null",
Destination: filepath.Join(deviceListAsVolumeMountsRoot, "GPU1-MIG0/0/1"),
},
},
expectedDevices: &[]string{"GPU0-MIG0/0/1,GPU1-MIG0/0/1"}[0],
},
}
for _, tc := range tests {
t.Run(tc.description, func(t *testing.T) {
devices := getDevicesFromMounts(tc.mounts)
if !reflect.DeepEqual(devices, tc.expectedDevices) {
t.Errorf("Unexpected devices (got: %v, wanted: %v)", *devices, *tc.expectedDevices)
}
})
}
}
func TestDeviceListSourcePriority(t *testing.T) {
var tests = []struct {
description string
mountDevices []Mount
envvarDevices string
privileged bool
acceptUnprivileged bool
acceptMounts bool
expectedDevices *string
expectedPanic bool
}{
{
description: "Mount devices, unprivileged, no accept unprivileged",
mountDevices: []Mount{
{
Source: "/dev/null",
Destination: filepath.Join(deviceListAsVolumeMountsRoot, "GPU0"),
},
{
Source: "/dev/null",
Destination: filepath.Join(deviceListAsVolumeMountsRoot, "GPU1"),
},
},
envvarDevices: "GPU2,GPU3",
privileged: false,
acceptUnprivileged: false,
acceptMounts: true,
expectedDevices: &[]string{"GPU0,GPU1"}[0],
},
{
description: "No mount devices, unprivileged, no accept unprivileged",
mountDevices: nil,
envvarDevices: "GPU0,GPU1",
privileged: false,
acceptUnprivileged: false,
acceptMounts: true,
expectedPanic: true,
},
{
description: "No mount devices, privileged, no accept unprivileged",
mountDevices: nil,
envvarDevices: "GPU0,GPU1",
privileged: true,
acceptUnprivileged: false,
acceptMounts: true,
expectedDevices: &[]string{"GPU0,GPU1"}[0],
},
{
description: "No mount devices, unprivileged, accept unprivileged",
mountDevices: nil,
envvarDevices: "GPU0,GPU1",
privileged: false,
acceptUnprivileged: true,
acceptMounts: true,
expectedDevices: &[]string{"GPU0,GPU1"}[0],
},
{
description: "Mount devices, unprivileged, accept unprivileged, no accept mounts",
mountDevices: []Mount{
{
Source: "/dev/null",
Destination: filepath.Join(deviceListAsVolumeMountsRoot, "GPU0"),
},
{
Source: "/dev/null",
Destination: filepath.Join(deviceListAsVolumeMountsRoot, "GPU1"),
},
},
envvarDevices: "GPU2,GPU3",
privileged: false,
acceptUnprivileged: true,
acceptMounts: false,
expectedDevices: &[]string{"GPU2,GPU3"}[0],
},
{
description: "Mount devices, unprivileged, no accept unprivileged, no accept mounts",
mountDevices: []Mount{
{
Source: "/dev/null",
Destination: filepath.Join(deviceListAsVolumeMountsRoot, "GPU0"),
},
{
Source: "/dev/null",
Destination: filepath.Join(deviceListAsVolumeMountsRoot, "GPU1"),
},
},
envvarDevices: "GPU2,GPU3",
privileged: false,
acceptUnprivileged: false,
acceptMounts: false,
expectedPanic: true,
},
}
for _, tc := range tests {
t.Run(tc.description, func(t *testing.T) {
// Wrap the call to getDevices() in a closure.
var devices *string
getDevices := func() {
env := map[string]string{
envNVVisibleDevices: tc.envvarDevices,
}
hookConfig := getDefaultHookConfig()
hookConfig.AcceptEnvvarUnprivileged = tc.acceptUnprivileged
hookConfig.AcceptDeviceListAsVolumeMounts = tc.acceptMounts
devices = getDevices(&hookConfig, env, tc.mountDevices, tc.privileged, false)
}
// For any tests that are expected to panic, make sure they do.
if tc.expectedPanic {
mustPanic(t, getDevices)
return
}
// For all other tests, just grab the devices and check the results
getDevices()
if !reflect.DeepEqual(devices, tc.expectedDevices) {
t.Errorf("Unexpected devices (got: %v, wanted: %v)", *devices, *tc.expectedDevices)
}
})
}
}
func elementsMatch(slice0, slice1 []string) bool {
map0 := make(map[string]int)
map1 := make(map[string]int)
for _, e := range slice0 {
map0[e]++
}
for _, e := range slice1 {
map1[e]++
}
for k0, v0 := range map0 {
if map1[k0] != v0 {
return false
}
}
for k1, v1 := range map1 {
if map0[k1] != v1 {
return false
}
}
return true
}

View File

@@ -18,44 +18,47 @@ var defaultPaths = [...]string{
configPath,
}
// CLIConfig: options for nvidia-container-cli.
// CLIConfig : options for nvidia-container-cli.
type CLIConfig struct {
Root *string `toml:"root"`
Path *string `toml:"path"`
Environment []string `toml:"environment"`
Debug *string `toml:"debug"`
Ldcache *string `toml:"ldcache"`
LoadKmods bool `toml:"load-kmods"`
NoPivot bool `toml:"no-pivot"`
NoCgroups bool `toml:"no-cgroups"`
User *string `toml:"user"`
Ldconfig *string `toml:"ldconfig"`
AlphaMergeVisibleDevicesEnvvars bool `toml:"alpha-merge-visible-devices-envvars"`
Root *string `toml:"root"`
Path *string `toml:"path"`
Environment []string `toml:"environment"`
Debug *string `toml:"debug"`
Ldcache *string `toml:"ldcache"`
LoadKmods bool `toml:"load-kmods"`
NoPivot bool `toml:"no-pivot"`
NoCgroups bool `toml:"no-cgroups"`
User *string `toml:"user"`
Ldconfig *string `toml:"ldconfig"`
}
// HookConfig : options for the nvidia-container-toolkit.
type HookConfig struct {
DisableRequire bool `toml:"disable-require"`
SwarmResource *string `toml:"swarm-resource"`
DisableRequire bool `toml:"disable-require"`
SwarmResource *string `toml:"swarm-resource"`
AcceptEnvvarUnprivileged bool `toml:"accept-nvidia-visible-devices-envvar-when-unprivileged"`
AcceptDeviceListAsVolumeMounts bool `toml:"accept-nvidia-visible-devices-as-volume-mounts"`
NvidiaContainerCLI CLIConfig `toml:"nvidia-container-cli"`
}
func getDefaultHookConfig() (config HookConfig) {
return HookConfig{
DisableRequire: false,
SwarmResource: nil,
DisableRequire: false,
SwarmResource: nil,
AcceptEnvvarUnprivileged: true,
AcceptDeviceListAsVolumeMounts: false,
NvidiaContainerCLI: CLIConfig{
Root: nil,
Path: nil,
Environment: []string{},
Debug: nil,
Ldcache: nil,
LoadKmods: true,
NoPivot: false,
NoCgroups: false,
User: nil,
Ldconfig: nil,
AlphaMergeVisibleDevicesEnvvars: false,
Root: nil,
Path: nil,
Environment: []string{},
Debug: nil,
Ldcache: nil,
LoadKmods: true,
NoPivot: false,
NoCgroups: false,
User: nil,
Ldconfig: nil,
},
}
}

View File

@@ -1,8 +1,8 @@
package main
import (
"testing"
"encoding/json"
"testing"
)
func TestParseCudaVersionValid(t *testing.T) {