Compare commits

..

27 Commits

Author SHA1 Message Date
Evan Lezar
8551c71a8a Merge branch 'master' into experimental 2022-01-31 14:07:36 +01:00
Evan Lezar
7ee81e91de Merge branch 'fix-dev-null-typo' into 'experimental'
Correct typo in check for null device

See merge request nvidia/container-toolkit/container-toolkit!101
2022-01-25 11:03:00 +00:00
Evan Lezar
5a6860adbf Correct typo in check for null device
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-01-25 11:18:28 +01:00
Evan Lezar
ce2b0dfdf0 Merge branch 'fix-log-level' into 'experimental'
Correctly set log-level from config.toml

See merge request nvidia/container-toolkit/container-toolkit!94
2022-01-14 12:08:45 +00:00
Evan Lezar
6f0129bdf2 Remove amazonlinux1 package builds from CI
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-01-14 12:03:43 +01:00
Evan Lezar
ffd98424d8 Correctly set log-level from config.toml
This change contains a bugfix where the log-level (if specified in the config.toml file)
would be assigned to config.Root instead of config.LogLevel.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2022-01-14 10:29:12 +01:00
Evan Lezar
cf192169a8 Add basic README for experimental runtime
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2021-12-08 21:43:06 +01:00
Evan Lezar
eeabd2b94d Merge branch 'master' into experimental 2021-12-08 20:44:10 +01:00
Evan Lezar
d81329da1f Merge branch 'master' into experimental 2021-11-16 13:52:58 +01:00
Evan Lezar
3b84f527cc Merge branch 'master' into experimental
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2021-11-12 11:17:02 +01:00
Evan Lezar
8da6e42d88 Merge branch 'master' into experimental 2021-10-27 15:00:41 +02:00
Evan Lezar
1a755d471a Use functions in oci package to parse arguments
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2021-10-18 14:36:10 +02:00
Evan Lezar
c05f9dffc5 Update imports
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2021-10-18 14:36:10 +02:00
Evan Lezar
7c5b913e09 Move pkg/* to internal/*
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2021-10-18 14:36:10 +02:00
Evan Lezar
c0d2d41f23 Remove unused internal/oci package
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2021-10-18 14:36:09 +02:00
Evan Lezar
08cf87eb21 Use oci package from experimental branch for nvidia-container-runtime
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2021-10-18 14:36:09 +02:00
Evan Lezar
d3a9f512c4 Merge branch 'merge-into-experimental' into 'experimental'
Merge master into experimental

See merge request nvidia/container-toolkit/container-toolkit!56
2021-10-18 12:33:55 +00:00
Evan Lezar
73baa74ea8 Merge master into experimental 2021-10-18 12:33:55 +00:00
Evan Lezar
39aa24690c Merge branch 'master' into experimental 2021-09-28 16:32:40 +02:00
Evan Lezar
dc6b7c703b Merge branch 'master' into experimental 2021-09-09 15:42:20 +02:00
Evan Lezar
b044b56c6e Merge branch 'migrate-experimental-runtime' into 'experimental'
Merge code from experimental runtime

See merge request nvidia/container-toolkit/container-toolkit!37
2021-07-05 11:49:44 +00:00
Evan Lezar
46cdf8c41a Address ineffectual assignment error
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2021-07-01 16:13:22 +02:00
Evan Lezar
22ee5eb64f Run CI checks in development image
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2021-07-01 15:59:20 +02:00
Evan Lezar
a7f3398a68 Update makefile with Development docker image
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2021-06-30 14:08:06 +02:00
Evan Lezar
72fe259745 Run go mod vendor and tidy
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2021-06-30 14:07:36 +02:00
Evan Lezar
3b27d91026 Update package references
This change replaces the reference to gitlab.com/nvidia/cloud-native/container-toolkit
with github.com/NVIDIA/nvidia-container-toolkit matching this project.

Signed-off-by: Evan Lezar <elezar@nvidia.com>
2021-06-30 14:07:36 +02:00
Evan Lezar
d3997eceb2 Copy files from nvidia-container-toolkit
Signed-off-by: Evan Lezar <elezar@nvidia.com>
2021-06-30 14:07:34 +02:00
589 changed files with 43191 additions and 60457 deletions

View File

@@ -20,7 +20,6 @@ default:
variables:
GIT_SUBMODULE_STRATEGY: recursive
BUILDIMAGE: "${CI_REGISTRY_IMAGE}/build:${CI_COMMIT_SHORT_SHA}"
BUILD_MULTI_ARCH_IMAGES: "true"
stages:
- image
@@ -33,6 +32,7 @@ stages:
- test
- scan
- release
- build-all
# Define the distribution targets
.dist-amazonlinux2:
@@ -42,12 +42,11 @@ stages:
.dist-centos7:
variables:
DIST: centos7
CVE_UPDATES: "cyrus-sasl-lib"
CVE_UPDATES: "nss"
.dist-centos8:
variables:
DIST: centos8
CVE_UPDATES: "cyrus-sasl-lib"
.dist-debian10:
variables:
@@ -64,7 +63,6 @@ stages:
.dist-ubi8:
variables:
DIST: ubi8
CVE_UPDATES: "cyrus-sasl-lib"
.dist-ubuntu16.04:
variables:
@@ -73,12 +71,6 @@ stages:
.dist-ubuntu18.04:
variables:
DIST: ubuntu18.04
CVE_UPDATES: "libsasl2-2 libsasl2-modules-db"
.dist-ubuntu20.04:
variables:
DIST: ubuntu20.04
CVE_UPDATES: "libsasl2-2 libsasl2-modules-db"
.dist-packaging:
variables:
@@ -105,15 +97,6 @@ stages:
variables:
ARCH: x86_64
# Define the platform targets
.platform-amd64:
variables:
PLATFORM: linux/amd64
.platform-arm64:
variables:
PLATFORM: linux/arm64
# Define test helpers
.integration:
stage: test
@@ -135,30 +118,20 @@ test-packaging:
needs:
- image-packaging
# Download the regctl binary for use in the release steps
.regctl-setup:
before_script:
- export REGCTL_VERSION=v0.3.10
- apk add --no-cache curl
- mkdir -p bin
- curl -sSLo bin/regctl https://github.com/regclient/regclient/releases/download/${REGCTL_VERSION}/regctl-linux-amd64
- chmod a+x bin/regctl
- export PATH=$(pwd)/bin:${PATH}
# .release forms the base of the deployment jobs which push images to the CI registry.
# This is extended with the version to be deployed (e.g. the SHA or TAG) and the
# target os.
.release:
stage: release
stage:
release
variables:
# Define the source image for the release
IMAGE_NAME: "${CI_REGISTRY_IMAGE}/container-toolkit"
VERSION: "${CI_COMMIT_SHORT_SHA}"
# OUT_IMAGE_VERSION is overridden for external releases
OUT_IMAGE_VERSION: "${CI_COMMIT_SHORT_SHA}"
stage: release
before_script:
- !reference [.regctl-setup, before_script]
# We ensure that the OUT_IMAGE_VERSION is set
- 'echo Version: ${OUT_IMAGE_VERSION} ; [[ -n "${OUT_IMAGE_VERSION}" ]] || exit 1'
@@ -166,16 +139,16 @@ test-packaging:
# need to tag the image.
# Note: a leading 'v' is stripped from the version if present
- apk add --no-cache make bash
script:
# Log in to the "output" registry, tag the image and push the image
- 'echo "Logging in to CI registry ${CI_REGISTRY}"'
- regctl registry login "${CI_REGISTRY}" -u "${CI_REGISTRY_USER}" -p "${CI_REGISTRY_PASSWORD}"
- '[ ${CI_REGISTRY} = ${OUT_REGISTRY} ] || echo "Logging in to output registry ${OUT_REGISTRY}"'
- '[ ${CI_REGISTRY} = ${OUT_REGISTRY} ] || regctl registry login "${OUT_REGISTRY}" -u "${OUT_REGISTRY_USER}" -p "${OUT_REGISTRY_TOKEN}"'
# Since OUT_IMAGE_NAME and OUT_IMAGE_VERSION are set, this will push the CI image to the
# Target
- make -f build/container/Makefile push-${DIST}
- docker login -u "${CI_REGISTRY_USER}" -p "${CI_REGISTRY_PASSWORD}" "${CI_REGISTRY}"
- docker pull "${IMAGE_NAME}:${VERSION}-${DIST}"
script:
- docker tag "${IMAGE_NAME}:${VERSION}-${DIST}" "${OUT_IMAGE_NAME}:${OUT_IMAGE_VERSION}-${DIST}"
# Log in to the "output" registry, tag the image and push the image
- 'echo "Logging in to output registry ${OUT_REGISTRY}"'
- docker logout
- docker login -u "${OUT_REGISTRY_USER}" -p "${OUT_REGISTRY_TOKEN}" "${OUT_REGISTRY}"
- make IMAGE_NAME=${OUT_IMAGE_NAME} VERSION=${OUT_IMAGE_VERSION} -f build/container/Makefile push-${DIST}
# Define a staging release step that pushes an image to an internal "staging" repository
# This is triggered for all pipelines (i.e. not only tags) to test the pipeline steps
@@ -190,7 +163,7 @@ test-packaging:
OUT_IMAGE_NAME: "${CI_REGISTRY_IMAGE}/staging/container-toolkit"
# Define an external release step that pushes an image to an external repository.
# This includes a devlopment image off main.
# This includes a devlopment image off master.
.release:external:
extends:
- .release
@@ -234,16 +207,6 @@ release:staging-ubuntu18.04:
- test-crio-ubuntu18.04
- test-docker-ubuntu18.04
release:staging-ubuntu20.04:
extends:
- .release:staging
- .dist-ubuntu20.04
needs:
- test-toolkit-ubuntu20.04
- test-containerd-ubuntu20.04
- test-crio-ubuntu20.04
- test-docker-ubuntu20.04
release:staging-packaging:
extends:
- .release:staging

3
.gitignore vendored
View File

@@ -1,9 +1,8 @@
dist
*.swp
*.swo
/coverage.out*
/coverage.out
/test/output/
/nvidia-container-runtime
/nvidia-container-toolkit
/nvidia-ctk
/shared-*

View File

@@ -94,9 +94,8 @@ unit-tests:
- .multi-arch-build
- .package-artifacts
stage: package-build
timeout: 2h 30m
script:
- ./scripts/build-packages.sh ${DIST}-${ARCH}
- ./scripts/release.sh ${DIST}-${ARCH}
artifacts:
name: ${ARTIFACTS_NAME}
@@ -194,33 +193,19 @@ package-ubuntu18.04-ppc64le:
- .dist-ubuntu18.04
- .arch-ppc64le
.buildx-setup:
before_script:
- export BUILDX_VERSION=v0.6.3
- apk add --no-cache curl
- mkdir -p ~/.docker/cli-plugins
- curl -sSLo ~/.docker/cli-plugins/docker-buildx "https://github.com/docker/buildx/releases/download/${BUILDX_VERSION}/buildx-${BUILDX_VERSION}.linux-amd64"
- chmod a+x ~/.docker/cli-plugins/docker-buildx
- docker buildx create --use --platform=linux/amd64,linux/arm64
- '[[ -n "${SKIP_QEMU_SETUP}" ]] || docker run --rm --privileged multiarch/qemu-user-static --reset -p yes'
# Define the image build targets
.image-build:
stage: image-build
variables:
IMAGE_NAME: "${CI_REGISTRY_IMAGE}/container-toolkit"
VERSION: "${CI_COMMIT_SHORT_SHA}"
PUSH_ON_BUILD: "true"
before_script:
- !reference [.buildx-setup, before_script]
- apk add --no-cache bash make
- 'echo "Logging in to CI registry ${CI_REGISTRY}"'
- docker login -u "${CI_REGISTRY_USER}" -p "${CI_REGISTRY_PASSWORD}" "${CI_REGISTRY}"
script:
- make -f build/container/Makefile build-${DIST}
- make -f build/container/Makefile push-${DIST}
image-centos7:
extends:
@@ -247,10 +232,9 @@ image-ubi8:
- .package-artifacts
- .dist-ubi8
needs:
# Note: The ubi8 image uses the centos8 packages
- package-centos8-aarch64
- package-centos8-x86_64
- package-centos8-ppc64le
# Note: The ubi8 image currently uses the centos7 packages
- package-centos7-ppc64le
- package-centos7-x86_64
image-ubuntu18.04:
extends:
@@ -262,16 +246,6 @@ image-ubuntu18.04:
- package-ubuntu18.04-arm64
- package-ubuntu18.04-ppc64le
image-ubuntu20.04:
extends:
- .image-build
- .package-artifacts
- .dist-ubuntu20.04
needs:
- package-ubuntu18.04-amd64
- package-ubuntu18.04-arm64
- package-ubuntu18.04-ppc64le
# The DIST=packaging target creates an image containing all built packages
image-packaging:
extends:
@@ -353,31 +327,46 @@ test-docker-ubuntu18.04:
needs:
- image-ubuntu18.04
test-toolkit-ubuntu20.04:
# build-all jobs build packages for every OS / ARCH combination we support.
#
# They are run under two conditions:
# 1) Automatically whenever a new tag is pushed to the repo (e.g. v1.1.0)
# 2) Manually by a reviewer just before merging a MR.
.build-all-for-arch:
variables:
# Setting DIST=docker invokes the docker- release targets
DIST: docker
extends:
- .test:toolkit
- .dist-ubuntu20.04
needs:
- image-ubuntu20.04
- .package-build
stage: build-all
timeout: 2h 30m
rules:
- if: $CI_COMMIT_TAG
when: always
test-containerd-ubuntu20.04:
# The full set of build-all jobs organized to
# have builds for each ARCH run in parallel.
build-all-amd64:
extends:
- .test:containerd
- .dist-ubuntu20.04
needs:
- image-ubuntu20.04
- .build-all-for-arch
- .arch-amd64
test-crio-ubuntu20.04:
build-all-x86_64:
extends:
- .test:crio
- .dist-ubuntu20.04
needs:
- image-ubuntu20.04
- .build-all-for-arch
- .arch-x86_64
test-docker-ubuntu20.04:
build-all-ppc64le:
extends:
- .test:docker
- .dist-ubuntu20.04
needs:
- image-ubuntu20.04
- .build-all-for-arch
- .arch-ppc64le
build-all-arm64:
extends:
- .build-all-for-arch
- .arch-arm64
build-all-aarch64:
extends:
- .build-all-for-arch
- .arch-aarch64

1
.gitmodules vendored
View File

@@ -1,7 +1,6 @@
[submodule "third_party/libnvidia-container"]
path = third_party/libnvidia-container
url = https://gitlab.com/nvidia/container-toolkit/libnvidia-container.git
branch = main
[submodule "third_party/nvidia-container-runtime"]
path = third_party/nvidia-container-runtime
url = https://gitlab.com/nvidia/container-toolkit/container-runtime.git

View File

@@ -27,8 +27,8 @@ default:
variables:
DOCKER_DRIVER: overlay2
DOCKER_TLS_CERTDIR: "/certs"
# Release "devel"-tagged images off the main branch
RELEASE_DEVEL_BRANCH: "main"
# Release "devel"-tagged images off the master branch
RELEASE_DEVEL_BRANCH: "master"
DEVEL_RELEASE_IMAGE_VERSION: "devel"
# On the multi-arch builder we don't need the qemu setup.
SKIP_QEMU_SETUP: "1"
@@ -46,7 +46,6 @@ variables:
OUT_REGISTRY_TOKEN: "${CI_REGISTRY_PASSWORD}"
OUT_REGISTRY: "${CI_REGISTRY}"
OUT_IMAGE_NAME: "${CI_REGISTRY_IMAGE}/container-toolkit"
PUSH_MULTIPLE_TAGS: "false"
# We delay the job start to allow the public pipeline to generate the required images.
when: delayed
start_in: 30 minutes
@@ -57,13 +56,13 @@ variables:
- job_execution_timeout
- stuck_or_timeout_failure
before_script:
- !reference [.regctl-setup, before_script]
- apk add --no-cache make bash
- >
regctl manifest get ${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION}-${DIST} --list > /dev/null && echo "${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION}-${DIST}" || ( echo "${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION}-${DIST} does not exist" && sleep infinity )
docker pull ${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION}-${DIST} > /dev/null && echo "${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION}-${DIST}" || ( echo "${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION}-${DIST} does not exist" && sleep infinity )
script:
- regctl registry login "${OUT_REGISTRY}" -u "${OUT_REGISTRY_USER}" -p "${OUT_REGISTRY_TOKEN}"
- make -f build/container/Makefile IMAGE=${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION}-${DIST} OUT_IMAGE=${OUT_IMAGE_NAME}:${CI_COMMIT_SHORT_SHA}-${DIST} push-${DIST}
- docker pull ${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION}-${DIST}
- docker tag ${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION}-${DIST} ${OUT_IMAGE_NAME}:${CI_COMMIT_SHORT_SHA}-${DIST}
- docker login -u "${OUT_REGISTRY_USER}" -p "${OUT_REGISTRY_TOKEN}" "${OUT_REGISTRY}"
- docker push ${OUT_IMAGE_NAME}:${CI_COMMIT_SHORT_SHA}-${DIST}
image-centos7:
extends:
@@ -85,11 +84,6 @@ image-ubuntu18.04:
- .image-pull
- .dist-ubuntu18.04
image-ubuntu20.04:
extends:
- .image-pull
- .dist-ubuntu20.04
# The DIST=packaging target creates an image containing all built packages
image-packaging:
extends:
@@ -118,7 +112,7 @@ image-packaging:
before_script:
- docker login -u "${CI_REGISTRY_USER}" -p "${CI_REGISTRY_PASSWORD}" "${CI_REGISTRY}"
# TODO: We should specify the architecture here and scan all architectures
- docker pull --platform="${PLATFORM}" "${IMAGE}"
- docker pull "${IMAGE}"
- docker save "${IMAGE}" -o "${IMAGE_ARCHIVE}"
- AuthHeader=$(echo -n $SSA_CLIENT_ID:$SSA_CLIENT_SECRET | base64 -w0)
- >
@@ -137,91 +131,34 @@ image-packaging:
- policy_evaluation.json
# Define the scan targets
scan-centos7-amd64:
scan-centos7:
extends:
- .scan
- .dist-centos7
- .platform-amd64
needs:
- image-centos7
scan-centos7-arm64:
extends:
- .scan
- .dist-centos7
- .platform-arm64
needs:
- image-centos7
- scan-centos7-amd64
scan-centos8-amd64:
scan-centos8:
extends:
- .scan
- .dist-centos8
- .platform-amd64
needs:
- image-centos8
scan-centos8-arm64:
extends:
- .scan
- .dist-centos8
- .platform-arm64
needs:
- image-centos8
- scan-centos8-amd64
scan-ubuntu18.04-amd64:
scan-ubuntu18.04:
extends:
- .scan
- .dist-ubuntu18.04
- .platform-amd64
needs:
- image-ubuntu18.04
scan-ubuntu18.04-arm64:
extends:
- .scan
- .dist-ubuntu18.04
- .platform-arm64
needs:
- image-ubuntu18.04
- scan-ubuntu18.04-amd64
scan-ubuntu20.04-amd64:
extends:
- .scan
- .dist-ubuntu20.04
- .platform-amd64
needs:
- image-ubuntu20.04
scan-ubuntu20.04-arm64:
extends:
- .scan
- .dist-ubuntu20.04
- .platform-arm64
needs:
- image-ubuntu20.04
- scan-ubuntu20.04-amd64
scan-ubi8-amd64:
scan-ubi8:
extends:
- .scan
- .dist-ubi8
- .platform-amd64
needs:
- image-ubi8
scan-ubi8-arm64:
extends:
- .scan
- .dist-ubi8
- .platform-arm64
needs:
- image-ubi8
- scan-ubi8-amd64
# Define external release helpers
.release:ngc:
extends:
@@ -232,6 +169,15 @@ scan-ubi8-arm64:
OUT_REGISTRY: "${NGC_REGISTRY}"
OUT_IMAGE_NAME: "${NGC_REGISTRY_IMAGE}"
.release:dockerhub:
extends:
- .release:external
variables:
OUT_REGISTRY_USER: "${REGISTRY_USER}"
OUT_REGISTRY_TOKEN: "${REGISTRY_TOKEN}"
OUT_REGISTRY: "${DOCKERHUB_REGISTRY}"
OUT_IMAGE_NAME: "${REGISTRY_IMAGE}"
release:staging-ubuntu18.04:
extends:
- .release:staging
@@ -239,13 +185,6 @@ release:staging-ubuntu18.04:
needs:
- image-ubuntu18.04
release:staging-ubuntu20.04:
extends:
- .release:staging
- .dist-ubuntu20.04
needs:
- image-ubuntu20.04
# Define the external release targets
# Release to NGC
release:ngc-centos7:
@@ -258,17 +197,33 @@ release:ngc-centos8:
- .release:ngc
- .dist-centos8
release:ngc-ubuntu18.04:
release:ngc-ubuntu18:
extends:
- .release:ngc
- .dist-ubuntu18.04
release:ngc-ubuntu20.04:
extends:
- .release:ngc
- .dist-ubuntu20.04
release:ngc-ubi8:
extends:
- .release:ngc
- .dist-ubi8
# Release to Dockerhub
release:dockerhub-centos7:
extends:
- .release:dockerhub
- .dist-centos7
release:dockerhub-centos8:
extends:
- .release:dockerhub
- .dist-centos8
release:dockerhub-ubuntu18:
extends:
- .release:dockerhub
- .dist-ubuntu18.04
release:dockerhub-ubi8:
extends:
- .release:dockerhub
- .dist-ubi8

View File

@@ -13,7 +13,7 @@ The `nvidia-container-toolkit` resides in this repo directly.
In oder to build the packages, the following command is executed
```sh
./scripts/build-packages.sh TARGET
./scripts/build-all-components.sh TARGET
```
where `TARGET` is a make target that is valid for each of the sub-components.
@@ -21,8 +21,6 @@ These include:
* `ubuntu18.04-amd64`
* `centos8-x86_64`
If no `TARGET` is specified, all valid release targets are built.
The packages are generated in the `dist` folder.
## Testing local changes
@@ -39,23 +37,9 @@ The [test/release](./test/release/) folder contains documentation on how the ins
## Releasing
In order to release packages required for a release, a utility script
[`scripts/release-packages.sh`](./scripts/release-packages.sh) is provided.
This script can be executed as follows:
```bash
GPG_LOCAL_USER="GPG_USER" \
MASTER_KEY_PATH=/path/to/gpg-master.key \
SUB_KEY_PATH=/path/to/gpg-subkey.key \
./scripts/release-packages.sh REPO PACKAGE_REPO_ROOT [REFERENCE]
A utility script [`scripts/release.sh`](./scripts/release.sh) is provided to build
packages required for release. If run without arguments, all supported distribution-architecture combinations are built. A specific distribution-architecture pair can also be provided
```sh
./scripts/release.sh ubuntu18.04-amd64
```
Where `REPO` is one of `stable` or `experimental`, `PACKAGE_REPO_ROOT` is the local path to the `libnvidia-container` repository checked out to the `gh-pages` branch, and `REFERENCE` is the git SHA that is to be released. If reference is not specified `HEAD` is assumed.
This scripts performs the following basic functions:
* Pulls the package image defined by the `REFERENCE` git SHA from the staging registry,
* Copies the required packages to the package repository at `PACKAGE_REPO_ROOT/REPO`,
* Signs the packages using the specified GPG keys
While the last two are performed, commits are added to the package repository. These can be pushed to the relevant repository.
where the `amd64` builds for `ubuntu18.04` are provided as an example.

View File

@@ -38,6 +38,8 @@ EXAMPLE_TARGETS := $(patsubst %,example-%, $(EXAMPLES))
CMDS := $(patsubst ./cmd/%/,%,$(sort $(dir $(wildcard ./cmd/*/))))
CMD_TARGETS := $(patsubst %,cmd-%, $(CMDS))
$(info CMD_TARGETS=$(CMD_TARGETS))
CHECK_TARGETS := assert-fmt vet lint ineffassign misspell
MAKE_TARGETS := binaries build check fmt lint-internal test examples cmds coverage generate $(CHECK_TARGETS)
@@ -46,12 +48,6 @@ TARGETS := $(MAKE_TARGETS) $(EXAMPLE_TARGETS) $(CMD_TARGETS)
DOCKER_TARGETS := $(patsubst %,docker-%, $(TARGETS))
.PHONY: $(TARGETS) $(DOCKER_TARGETS)
ifeq ($(VERSION),)
CLI_VERSION = $(LIB_VERSION)$(if $(LIB_TAG),-$(LIB_TAG))
else
CLI_VERSION = $(VERSION)
endif
GOOS ?= linux
binaries: cmds
@@ -60,7 +56,7 @@ cmd-%: COMMAND_BUILD_OPTIONS = -o $(PREFIX)/$(*)
endif
cmds: $(CMD_TARGETS)
$(CMD_TARGETS): cmd-%:
GOOS=$(GOOS) go build -ldflags "-s -w -X github.com/NVIDIA/nvidia-container-toolkit/internal/info.gitCommit=$(GIT_COMMIT) -X github.com/NVIDIA/nvidia-container-toolkit/internal/info.version=$(CLI_VERSION)" $(COMMAND_BUILD_OPTIONS) $(MODULE)/cmd/$(*)
GOOS=$(GOOS) go build -ldflags "-s -w" $(COMMAND_BUILD_OPTIONS) $(MODULE)/cmd/$(*)
build:
GOOS=$(GOOS) go build ./...
@@ -94,7 +90,11 @@ ineffassign:
lint:
# We use `go list -f '{{.Dir}}' $(MODULE)/...` to skip the `vendor` folder.
go list -f '{{.Dir}}' $(MODULE)/... | xargs golint -set_exit_status
go list -f '{{.Dir}}' $(MODULE)/... | grep -v /internal/ | xargs golint -set_exit_status
lint-internal:
# We use `go list -f '{{.Dir}}' $(MODULE)/...` to skip the `vendor` folder.
go list -f '{{.Dir}}' $(MODULE)/internal/... | xargs golint -set_exit_status
misspell:
misspell $(MODULE)/...
@@ -142,15 +142,3 @@ $(DOCKER_TARGETS): docker-%: .build-image
--user $$(id -u):$$(id -g) \
$(BUILDIMAGE) \
make $(*)
# Start an interactive shell using the development image.
PHONY: .shell
.shell:
$(DOCKER) run \
--rm \
-ti \
-e GOCACHE=/tmp/.cache \
-v $(PWD):$(PWD) \
-w $(PWD) \
--user $$(id -u):$$(id -g) \
$(BUILDIMAGE)

View File

@@ -1,6 +1,6 @@
# NVIDIA Container Toolkit
[![GitHub license](https://img.shields.io/github/license/NVIDIA/nvidia-container-toolkit?style=flat-square)](https://raw.githubusercontent.com/NVIDIA/nvidia-container-toolkit/main/LICENSE)
[![GitHub license](https://img.shields.io/github/license/NVIDIA/nvidia-container-toolkit?style=flat-square)](https://raw.githubusercontent.com/NVIDIA/nvidia-container-toolkit/master/LICENSE)
[![Documentation](https://img.shields.io/badge/documentation-wiki-blue.svg?style=flat-square)](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/overview.html)
[![Package repository](https://img.shields.io/badge/packages-repository-b956e8.svg?style=flat-square)](https://nvidia.github.io/libnvidia-container)

View File

@@ -42,16 +42,6 @@ RUN GOPATH=/artifacts go install -ldflags="-s -w -X 'main.Version=${VERSION}'" .
FROM nvidia/cuda:${CUDA_VERSION}-base-${BASE_DIST}
ARG BASE_DIST
# See https://www.centos.org/centos-linux-eol/
# and https://stackoverflow.com/a/70930049 for move to vault.centos.org
# and https://serverfault.com/questions/1093922/failing-to-run-yum-update-in-centos-8 for move to vault.epel.cloud
RUN [[ "${BASE_DIST}" != "centos8" ]] || \
( \
sed -i 's/mirrorlist/#mirrorlist/g' /etc/yum.repos.d/CentOS-Linux-* && \
sed -i 's|#baseurl=http://mirror.centos.org|baseurl=http://vault.epel.cloud|g' /etc/yum.repos.d/CentOS-Linux-* \
)
ENV NVIDIA_DISABLE_REQUIRE="true"
ENV NVIDIA_VISIBLE_DEVICES=all
ENV NVIDIA_DRIVER_CAPABILITIES=utility
@@ -63,10 +53,8 @@ COPY ${ARTIFACTS_ROOT}/${PACKAGE_DIST} /artifacts/packages/${PACKAGE_DIST}
WORKDIR /artifacts/packages
ARG PACKAGE_VERSION
ARG TARGETARCH
ENV PACKAGE_ARCH ${TARGETARCH}
RUN PACKAGE_ARCH=${PACKAGE_ARCH/amd64/x86_64} && PACKAGE_ARCH=${PACKAGE_ARCH/arm64/aarch64} && \
yum localinstall -y \
ARG PACKAGE_ARCH
RUN yum localinstall -y \
${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container1-${PACKAGE_VERSION}*.rpm \
${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container-tools-${PACKAGE_VERSION}*.rpm \
${PACKAGE_DIST}/${PACKAGE_ARCH}/nvidia-container-toolkit-${PACKAGE_VERSION}*.rpm

View File

@@ -40,15 +40,11 @@ COPY . .
RUN GOPATH=/artifacts go install -ldflags="-s -w -X 'main.Version=${VERSION}'" ./tools/...
FROM nvcr.io/nvidia/cuda:${CUDA_VERSION}-base-${BASE_DIST}
# Remove the CUDA repository configurations to avoid issues with rotated GPG keys
RUN rm -f /etc/apt/sources.list.d/cuda.list
FROM nvidia/cuda:${CUDA_VERSION}-base-${BASE_DIST}
ARG DEBIAN_FRONTEND=noninteractive
RUN apt-get update && apt-get install -y --no-install-recommends \
libcap2 \
curl \
&& \
rm -rf /var/lib/apt/lists/*
@@ -63,17 +59,7 @@ COPY ${ARTIFACTS_ROOT}/${PACKAGE_DIST} /artifacts/packages/${PACKAGE_DIST}
WORKDIR /artifacts/packages
ARG PACKAGE_VERSION
ARG TARGETARCH
ENV PACKAGE_ARCH ${TARGETARCH}
ARG LIBNVIDIA_CONTAINER_REPO="https://nvidia.github.io/libnvidia-container"
ARG LIBNVIDIA_CONTAINER0_VERSION
RUN if [ "${PACKAGE_ARCH}" = "arm64" ]; then \
curl -L ${LIBNVIDIA_CONTAINER_REPO}/${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container0_${LIBNVIDIA_CONTAINER0_VERSION}_${PACKAGE_ARCH}.deb \
--output ${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container0_${LIBNVIDIA_CONTAINER0_VERSION}_${PACKAGE_ARCH}.deb && \
dpkg -i ${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container0_${LIBNVIDIA_CONTAINER0_VERSION}_${PACKAGE_ARCH}.deb; \
fi
ARG PACKAGE_ARCH
RUN dpkg -i \
${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container1_${PACKAGE_VERSION}*.deb \
${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container-tools_${PACKAGE_VERSION}*.deb \
@@ -95,11 +81,4 @@ LABEL description="See summary"
COPY ./LICENSE /licenses/LICENSE
# Install / upgrade packages here that are required to resolve CVEs
ARG CVE_UPDATES
RUN if [ -n "${CVE_UPDATES}" ]; then \
apt-get update && apt-get upgrade -y ${CVE_UPDATES} && \
rm -rf /var/lib/apt/lists/*; \
fi
ENTRYPOINT ["/work/nvidia-toolkit"]

View File

@@ -12,14 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
BUILD_MULTI_ARCH_IMAGES ?= false
DOCKER ?= docker
BUILDX =
ifeq ($(BUILD_MULTI_ARCH_IMAGES),true)
BUILDX = buildx
endif
DOCKER ?= docker
MKDIR ?= mkdir
DIST_DIR ?= $(CURDIR)/dist
@@ -32,47 +25,36 @@ IMAGE_NAME := $(REGISTRY)/container-toolkit
endif
VERSION ?= $(LIB_VERSION)$(if $(LIB_TAG),-$(LIB_TAG))
IMAGE_VERSION := $(VERSION)
IMAGE_TAG ?= $(VERSION)-$(DIST)
IMAGE = $(IMAGE_NAME):$(IMAGE_TAG)
OUT_IMAGE_NAME ?= $(IMAGE_NAME)
OUT_IMAGE_VERSION ?= $(IMAGE_VERSION)
OUT_IMAGE_TAG = $(OUT_IMAGE_VERSION)-$(DIST)
OUT_IMAGE = $(OUT_IMAGE_NAME):$(OUT_IMAGE_TAG)
##### Public rules #####
DEFAULT_PUSH_TARGET := ubuntu18.04
DISTRIBUTIONS := ubuntu20.04 ubuntu18.04 ubi8 centos7 centos8
TARGETS := ubuntu20.04 ubuntu18.04 ubi8 centos7 centos8
META_TARGETS := packaging
BUILD_TARGETS := $(patsubst %,build-%,$(DISTRIBUTIONS) $(META_TARGETS))
PUSH_TARGETS := $(patsubst %,push-%,$(DISTRIBUTIONS) $(META_TARGETS))
TEST_TARGETS := $(patsubst %,test-%, $(DISTRIBUTIONS))
BUILD_TARGETS := $(patsubst %,build-%,$(TARGETS) $(META_TARGETS))
PUSH_TARGETS := $(patsubst %,push-%,$(TARGETS) $(META_TARGETS))
TEST_TARGETS := $(patsubst %,test-%, $(TARGETS))
.PHONY: $(DISTRIBUTIONS) $(PUSH_TARGETS) $(BUILD_TARGETS) $(TEST_TARGETS)
.PHONY: $(TARGETS) $(PUSH_TARGETS) $(BUILD_TARGETS) $(TEST_TARGETS)
ifneq ($(BUILD_MULTI_ARCH_IMAGES),true)
include $(CURDIR)/build/container/native-only.mk
else
include $(CURDIR)/build/container/multi-arch.mk
endif
push-%: DIST = $(*)
$(PUSH_TARGETS): push-%:
$(DOCKER) push "$(IMAGE_NAME):$(IMAGE_TAG)"
# For the default push target we also push a short tag equal to the version.
# We skip this for the development release
DEVEL_RELEASE_IMAGE_VERSION ?= devel
PUSH_MULTIPLE_TAGS ?= true
ifeq ($(strip $(OUT_IMAGE_VERSION)),$(DEVEL_RELEASE_IMAGE_VERSION))
PUSH_MULTIPLE_TAGS = false
endif
ifeq ($(PUSH_MULTIPLE_TAGS),true)
ifneq ($(strip $(VERSION)),$(DEVEL_RELEASE_IMAGE_VERSION))
push-$(DEFAULT_PUSH_TARGET): push-short
endif
push-short:
$(DOCKER) tag "$(IMAGE_NAME):$(VERSION)-$(DEFAULT_PUSH_TARGET)" "$(IMAGE_NAME):$(VERSION)"
$(DOCKER) push "$(IMAGE_NAME):$(VERSION)"
push-%: DIST = $(*)
push-short: DIST = $(DEFAULT_PUSH_TARGET)
build-%: DIST = $(*)
build-%: DOCKERFILE = $(CURDIR)/build/container/Dockerfile.$(DOCKERFILE_SUFFIX)
@@ -82,17 +64,16 @@ ARTIFACTS_ROOT ?= $(shell realpath --relative-to=$(CURDIR) $(DIST_DIR))
# Use a generic build target to build the relevant images
$(BUILD_TARGETS): build-%: $(ARTIFACTS_ROOT)
DOCKER_BUILDKIT=1 \
$(DOCKER) $(BUILDX) build --pull \
$(DOCKER_BUILD_OPTIONS) \
$(DOCKER_BUILD_PLATFORM_OPTIONS) \
$(DOCKER) build --pull \
--platform=linux/amd64 \
--tag $(IMAGE) \
--build-arg ARTIFACTS_ROOT="$(ARTIFACTS_ROOT)" \
--build-arg BASE_DIST="$(BASE_DIST)" \
--build-arg CUDA_VERSION="$(CUDA_VERSION)" \
--build-arg GOLANG_VERSION="$(GOLANG_VERSION)" \
--build-arg LIBNVIDIA_CONTAINER0_VERSION="$(LIBNVIDIA_CONTAINER0_DEPENDENCY)" \
--build-arg PACKAGE_DIST="$(PACKAGE_DIST)" \
--build-arg PACKAGE_VERSION="$(PACKAGE_VERSION)" \
--build-arg PACKAGE_ARCH="$(PACKAGE_ARCH)" \
--build-arg VERSION="$(VERSION)" \
--build-arg CVE_UPDATES="$(CVE_UPDATES)" \
-f $(DOCKERFILE) \
@@ -101,17 +82,20 @@ $(BUILD_TARGETS): build-%: $(ARTIFACTS_ROOT)
build-ubuntu%: BASE_DIST = $(*)
build-ubuntu%: DOCKERFILE_SUFFIX := ubuntu
build-ubuntu%: PACKAGE_DIST = ubuntu18.04
build-ubuntu%: PACKAGE_ARCH := amd64
build-ubuntu%: PACKAGE_DIST = $(BASE_DIST)
build-ubuntu%: PACKAGE_VERSION := $(LIB_VERSION)$(if $(LIB_TAG),~$(LIB_TAG))
build-ubuntu%: LIBNVIDIA_CONTAINER0_DEPENDENCY=$(LIBNVIDIA_CONTAINER0_VERSION)
# TODO: Update this to use the centos8 packages
build-ubi8: BASE_DIST := ubi8
build-ubi8: DOCKERFILE_SUFFIX := centos
build-ubi8: PACKAGE_DIST = centos8
build-ubi8: PACKAGE_ARCH := x86_64
build-ubi8: PACKAGE_DIST = centos7
build-ubi8: PACKAGE_VERSION := $(LIB_VERSION)-$(if $(LIB_TAG),0.1.$(LIB_TAG),1)
build-centos%: BASE_DIST = $(*)
build-centos%: DOCKERFILE_SUFFIX := centos
build-centos%: PACKAGE_ARCH := x86_64
build-centos%: PACKAGE_DIST = $(BASE_DIST)
build-centos%: PACKAGE_VERSION := $(LIB_VERSION)-$(if $(LIB_TAG),0.1.$(LIB_TAG),1)

View File

@@ -1,34 +0,0 @@
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
PUSH_ON_BUILD ?= false
DOCKER_BUILD_OPTIONS = --output=type=image,push=$(PUSH_ON_BUILD)
DOCKER_BUILD_PLATFORM_OPTIONS = --platform=linux/amd64,linux/arm64
REGCTL ?= regctl
$(PUSH_TARGETS): push-%:
$(REGCTL) \
image copy \
$(IMAGE) $(OUT_IMAGE)
push-short:
$(REGCTL) \
image copy \
$(IMAGE) $(OUT_IMAGE_NAME):$(OUT_IMAGE_VERSION)
# We only have x86_64 packages for centos7
build-centos7: DOCKER_BUILD_PLATFORM_OPTIONS = --platform=linux/amd64
# We only generate a single image for packaging targets
build-packaging: DOCKER_BUILD_PLATFORM_OPTIONS = --platform=linux/amd64

View File

@@ -0,0 +1,170 @@
# The Experimental NVIDIA Container Runtime
## Introduction
The experimental NVIDIA Container Runtime is a proof-of-concept runtime that
approaches the problem of making GPUs (or other NVIDIA devices) available in
containerized environments in a different manner to the existing
[NVIDIA Container Runtime](../nvidia-container-runtime). Wherease the current
runtime relies on the [NVIDIA Container Library](https://github.com/NVIDIA/libnvidia-container)
to perform the modifications to a container, the experiemental runtime aims to
express the required modifications in terms of changes to a container's [OCI
runtime specification](https://github.com/opencontainers/runtime-spec). This
also aligns with open initiatives such as the [Container Device Interface (CDI)](https://github.com/container-orchestrated-devices/container-device-interface).
## Known Limitations
* The path of NVIDIA CUDA libraries / binaries injected into the container currently match that of the host system. This means that on an Ubuntu-based host systems these would be at `/usr/lib/x86_64-linux-gnu`
even if the container distribution would normally expect these at another location (e.g. `/usr/lib64`)
* Tools such as `nvidia-smi` may create additional device nodes in the container when run. This is
prevented in the "classic" runtime (and the NVIDIA Container Library) by modifying
the `/proc/driver/nvidia/params` file in the container.
* Other `NVIDIA_*` environment variables (e.g. `NVIDIA_DRIVER_CAPABILITIES`) are
not considered to filter mounted libraries or binaries. This is equivalent to
always using `NVIDIA_DRIVER_CAPABILITIES=all`.
## Building / Installing
The experimental NVIDIA Container Runtime is a self-contained golang binary and
can thus be built from source, or installed directly using `go install`.
### From source
After cloning the `nvidia-container-toolkit` repository from [GitLab](https://gitlab.com/nvidia/container-toolkit/container-toolkit)
or from the read-only mirror on [GitHub](https://github.com/NVIDIA/nvidia-container-toolkit)
running the following make command in the repository root:
```bash
make cmd-nvidia-container-runtime.experimental
```
will create an executable file `nvidia-container-runtime.experimental` in the
root.
A dockerized target:
```bash
make docker-cmd-nvidia-container-runtime.experimental
```
will also create the executable file `nvidia-container-runtime.experimental`
without requiring the setup of a development environment (with the exception)
of having `make` and `docker` installed.
### Go install
The experimental NVIDIA Container Runtime can also be `go installed` by running
the following command:
```bash
go install github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-container-runtime.experimental@experimental
```
which will build and install the `nvidia-container-runtime.experimental`
executable in the `${GOPATH}/bin` folder.
## Using the Runtime
The experimental NVIDIA Container Runtime is intended as a drop-in replacement
for the "classic" NVIDIA Container Runtime. As such it is used in the same
way (with the exception of the known limitiations noted above).
In general terms, to use the experimental NVIDIA Container Runtime to launch a
container with GPU support, it should be inserted as a shim for the desired
low-level OCI-compliant runtime (e.g. `runc` or `crun`). How this is achieved
depends on how containers are being launched.
### Docker
In the case of `docker` for example, the runtime must be registered with the
Docker daemon. This can be done by modifying the `/etc/docker/daemon.json` file
to contain the following:
```json
{
"runtimes": {
"nvidia-experimental": {
"path": "nvidia-container-runtime.experimental",
"runtimeArgs": []
}
},
}
```
This can then be invoked from docker by including the `--runtime=nvidia-experimental`
option when executing a `docker run` command.
### Runc
If `runc` is being used to run a container directly substituting the `runc`
command for `nvidia-container-runtime.experimental` should be sufficient as
the latter will `exec` to `runc` once the required (in-place) modifications have
been made to the container's OCI spec (`config.json` file).
## Configuration
### Runtime Path
The experimental NVIDIA Container Runtime allows for the path to the low-level
runtime to be specified. This is done by setting the following option in the
`/etc/nvidia-container-runtime/config.toml` file or setting the
`NVIDIA_CONTAINER_RUNTIME_PATH` environment variable.
```toml
[nvidia-container-runtime.experimental]
runtime-path = "/path/to/low-level-runtime"
```
This path can be set to the path for `runc` or `crun` on a system and if it is
a relative path, the `PATH` is searched for a matching executable.
### Device Selection
In order to select a specific device, the experimental NVIDIA Container Runtime
mimics the behaviour of the "classic" runtime. That is to say that the values of
certain [environment variables](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/user-guide.html#environment-variables-oci-spec)
**in the container's OCI specification** control the behaviour of the runtime.
#### `NVIDIA_VISIBLE_DEVICES`
This variable controls which GPUs will be made accessible inside the container.
##### Possible values
* `0,1,2`, `GPU-fef8089b` …: a comma-separated list of GPU UUID(s) or index(es).
* `all`: all GPUs will be accessible, this is the default value in our container images.
* `none`: no GPU will be accessible, but driver capabilities will be enabled.
* `void` or *empty* or *unset*: `nvidia-container-runtime` will have the same behavior as `runc`.
**Note**: When running on a MIG capable device, the following values will also be available:
* `0:0,0:1,1:0`, `MIG-GPU-fef8089b/0/1` …: a comma-separated list of MIG Device UUID(s) or index(es).
Where the MIG device indices have the form `<GPU Device Index>:<MIG Device Index>` as seen in the example output:
```
$ nvidia-smi -L
GPU 0: Graphics Device (UUID: GPU-b8ea3855-276c-c9cb-b366-c6fa655957c5)
MIG Device 0: (UUID: MIG-GPU-b8ea3855-276c-c9cb-b366-c6fa655957c5/1/0)
MIG Device 1: (UUID: MIG-GPU-b8ea3855-276c-c9cb-b366-c6fa655957c5/1/1)
MIG Device 2: (UUID: MIG-GPU-b8ea3855-276c-c9cb-b366-c6fa655957c5/11/0)
```
#### `NVIDIA_MIG_CONFIG_DEVICES`
This variable controls which of the visible GPUs can have their MIG
configuration managed from within the container. This includes enabling and
disabling MIG mode, creating and destroying GPU Instances and Compute
Instances, etc.
##### Possible values
* `all`: Allow all MIG-capable GPUs in the visible device list to have their
MIG configurations managed.
**Note**:
* This feature is only available on MIG capable devices (e.g. the A100).
* To use this feature, the container must be started with `CAP_SYS_ADMIN` privileges.
* When not running as `root`, the container user must have read access to the
`/proc/driver/nvidia/capabilities/mig/config` file on the host.
#### `NVIDIA_MIG_MONITOR_DEVICES`
This variable controls which of the visible GPUs can have aggregate information
about all of their MIG devices monitored from within the container. This
includes inspecting the aggregate memory usage, listing the aggregate running
processes, etc.
##### Possible values
* `all`: Allow all MIG-capable GPUs in the visible device list to have their
MIG devices monitored.
**Note**:
* This feature is only available on MIG capable devices (e.g. the A100).
* To use this feature, the container must be started with `CAP_SYS_ADMIN` privileges.
* When not running as `root`, the container user must have read access to the
`/proc/driver/nvidia/capabilities/mig/monitor` file on the host.

View File

@@ -0,0 +1,52 @@
/*
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
*/
package config
import (
"fmt"
log "github.com/sirupsen/logrus"
)
// list represents a list of config updaters that are applied in order, with later
// configs having preference.
type list struct {
logger *log.Logger
configs []configUpdater
}
var _ configUpdater = (*list)(nil)
func newListWithLogger(logger *log.Logger, ci ...configUpdater) configUpdater {
c := list{
logger: logger,
configs: ci,
}
return &c
}
func (c list) Update(cfg *Config) error {
for i, u := range c.configs {
c.logger.Debugf("Applying config %v: %v", i, u)
err := u.Update(cfg)
if err != nil {
return fmt.Errorf("error applying config %v: %v", i, err)
}
}
return nil
}

View File

@@ -0,0 +1,100 @@
/*
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
*/
package config
import (
"fmt"
"testing"
testlog "github.com/sirupsen/logrus/hooks/test"
"github.com/stretchr/testify/require"
)
func TestComposite(t *testing.T) {
logger, _ := testlog.NewNullLogger()
// Empty list
c := newListWithLogger(logger)
cfg := &Config{}
err := c.Update(cfg)
require.NoError(t, err)
require.EqualValues(t, &Config{}, cfg)
// Add a single mock config
mockConfigUpdater := &configUpdaterMock{
UpdateFunc: func(cfg *Config) error {
cfg.DebugFilePath = "updated"
return nil
},
}
c = newListWithLogger(logger, mockConfigUpdater)
cfg = &Config{}
err = c.Update(cfg)
require.NoError(t, err)
require.EqualValues(t, &Config{DebugFilePath: "updated"}, cfg)
require.Len(t, mockConfigUpdater.UpdateCalls(), 1)
// Reset the calls
mockConfigUpdater.calls = struct{ Update []struct{ Config *Config } }{}
// define an error config
errorConfigUpdater := &configUpdaterMock{
UpdateFunc: func(cfg *Config) error {
return fmt.Errorf("mock error")
},
}
c = newListWithLogger(logger, errorConfigUpdater, mockConfigUpdater)
cfg = &Config{}
err = c.Update(cfg)
require.Error(t, err)
require.EqualValues(t, &Config{}, cfg)
require.Len(t, errorConfigUpdater.UpdateCalls(), 1)
require.Len(t, mockConfigUpdater.UpdateCalls(), 0)
// Reset the calls
mockConfigUpdater.calls = struct{ Update []struct{ Config *Config } }{}
errorConfigUpdater.calls = struct{ Update []struct{ Config *Config } }{}
// Change the order of the config and error
c = newListWithLogger(logger, mockConfigUpdater, errorConfigUpdater)
cfg = &Config{}
err = c.Update(cfg)
require.Error(t, err)
require.EqualValues(t, &Config{DebugFilePath: "updated"}, cfg)
require.Len(t, errorConfigUpdater.UpdateCalls(), 1)
require.Len(t, mockConfigUpdater.UpdateCalls(), 1)
// Reset the calls
mockConfigUpdater.calls = struct{ Update []struct{ Config *Config } }{}
errorConfigUpdater.calls = struct{ Update []struct{ Config *Config } }{}
// Call the mock twice
c = newListWithLogger(logger, mockConfigUpdater, mockConfigUpdater)
cfg = &Config{}
err = c.Update(cfg)
require.NoError(t, err)
require.EqualValues(t, &Config{DebugFilePath: "updated"}, cfg)
require.Len(t, mockConfigUpdater.UpdateCalls(), 2)
}

View File

@@ -0,0 +1,63 @@
/*
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
*/
package config
import (
"fmt"
log "github.com/sirupsen/logrus"
)
// Config defines the configuration options for the NVIDIA Container Runtime
type Config struct {
// Root defines the root of the file system to be used for locating mounts
Root string
// DebugFilePath defines a log file to print debug output to
DebugFilePath string
// RuntimePath defines the path to an OCI compliant runtime
RuntimePath string
// LogLevel defines the logging level for the application
LogLevel string
}
//go:generate moq -stub -out config_mock.go . configUpdater
// configUpdate represents an interface for applying updates to a config.
type configUpdater interface {
Update(*Config) error
}
// GetConfig returns a config struct with the values resolved. The values are defined in order of
// priority:
// 1. From the associated environment variables
// 2. From the loaded config file
// 3. From the default values defined in the `defaultConfig` function
func GetConfig(logger *log.Logger) (*Config, error) {
cfg := &Config{}
configs := newListWithLogger(logger,
newDefaultConfig(),
newDefaultConfigFileWithLogger(logger),
newConfigFromEnvironment(),
)
err := configs.Update(cfg)
if err != nil {
return nil, fmt.Errorf("error getting config: %v", err)
}
return cfg, nil
}

View File

@@ -0,0 +1,76 @@
// Code generated by moq; DO NOT EDIT.
// github.com/matryer/moq
package config
import (
"sync"
)
// Ensure, that configUpdaterMock does implement configUpdater.
// If this is not the case, regenerate this file with moq.
var _ configUpdater = &configUpdaterMock{}
// configUpdaterMock is a mock implementation of configUpdater.
//
// func TestSomethingThatUsesconfigUpdater(t *testing.T) {
//
// // make and configure a mocked configUpdater
// mockedconfigUpdater := &configUpdaterMock{
// UpdateFunc: func(config *Config) error {
// panic("mock out the Update method")
// },
// }
//
// // use mockedconfigUpdater in code that requires configUpdater
// // and then make assertions.
//
// }
type configUpdaterMock struct {
// UpdateFunc mocks the Update method.
UpdateFunc func(config *Config) error
// calls tracks calls to the methods.
calls struct {
// Update holds details about calls to the Update method.
Update []struct {
// Config is the config argument value.
Config *Config
}
}
lockUpdate sync.RWMutex
}
// Update calls UpdateFunc.
func (mock *configUpdaterMock) Update(config *Config) error {
callInfo := struct {
Config *Config
}{
Config: config,
}
mock.lockUpdate.Lock()
mock.calls.Update = append(mock.calls.Update, callInfo)
mock.lockUpdate.Unlock()
if mock.UpdateFunc == nil {
var (
errOut error
)
return errOut
}
return mock.UpdateFunc(config)
}
// UpdateCalls gets all the calls that were made to Update.
// Check the length with:
// len(mockedconfigUpdater.UpdateCalls())
func (mock *configUpdaterMock) UpdateCalls() []struct {
Config *Config
} {
var calls []struct {
Config *Config
}
mock.lockUpdate.RLock()
calls = mock.calls.Update
mock.lockUpdate.RUnlock()
return calls
}

View File

@@ -0,0 +1,58 @@
/*
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
*/
package config
import (
"io/ioutil"
"os"
"path"
"path/filepath"
"testing"
testlog "github.com/sirupsen/logrus/hooks/test"
"github.com/stretchr/testify/require"
)
func TestGetConfigWithCustomConfig(t *testing.T) {
wd, err := os.Getwd()
require.NoError(t, err)
// By default debug is disabled
contents := []byte("[nvidia-container-runtime]\ndebug = \"/nvidia-container-toolkit.log\"")
testDir := path.Join(wd, "test")
filename := path.Join(testDir, configFileRelativePath)
previousConfig, present := os.LookupEnv(configOverride)
os.Setenv(configOverride, testDir)
defer func() {
if present {
os.Setenv(configOverride, previousConfig)
} else {
os.Unsetenv(configOverride)
}
}()
require.NoError(t, os.MkdirAll(filepath.Dir(filename), 0766))
require.NoError(t, ioutil.WriteFile(filename, contents, 0766))
defer func() { require.NoError(t, os.RemoveAll(testDir)) }()
logger, _ := testlog.NewNullLogger()
cfg, err := GetConfig(logger)
require.NoError(t, err)
require.Equal(t, "/nvidia-container-toolkit.log", cfg.DebugFilePath)
}

View File

@@ -0,0 +1,47 @@
/*
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
*/
package config
import (
log "github.com/sirupsen/logrus"
)
type defaultConfig struct{}
var _ configUpdater = (*defaultConfig)(nil)
func newDefaultConfig() configUpdater {
c := defaultConfig{}
return &c
}
// Update defines the the default values for the config options
func (c defaultConfig) Update(cfg *Config) error {
*cfg = Config{
DebugFilePath: "/dev/null",
LogLevel: log.InfoLevel.String(),
}
return nil
}
func getDefaultConfig() *Config {
cfg := &Config{}
defaultConfig{}.Update(cfg)
return cfg
}

View File

@@ -0,0 +1,37 @@
/*
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
*/
package config
import (
"testing"
"github.com/stretchr/testify/require"
)
func TestDefaultConfigUpdate(t *testing.T) {
cfg := &Config{}
require.Empty(t, cfg.DebugFilePath)
c := defaultConfig{}
err := c.Update(cfg)
require.NoError(t, err)
require.Equal(t, "/dev/null", cfg.DebugFilePath)
require.Equal(t, "", cfg.Root)
}

View File

@@ -0,0 +1,61 @@
/*
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
*/
package config
import (
"os"
"strings"
)
const (
debugFilePathEnvvarName = "NVIDIA_CONTAINER_RUNTIME_DEBUG"
runtimePathEnvvarName = "NVIDIA_CONTAINER_RUNTIME_PATH"
rootEnvvarName = "NVIDIA_CONTAINER_RUNTIME_ROOT"
logLevelEnvvarName = "NVIDIA_CONTAINER_RUNTIME_LOG_LEVEL"
)
type envConfig struct{}
func newConfigFromEnvironment() configUpdater {
c := envConfig{}
return &c
}
func (c envConfig) Update(cfg *Config) error {
debugFilePathEnvvar, exists := os.LookupEnv(debugFilePathEnvvarName)
if exists && strings.TrimSpace(debugFilePathEnvvar) != "" {
cfg.DebugFilePath = debugFilePathEnvvar
}
runtimePathEnvvar, exists := os.LookupEnv(runtimePathEnvvarName)
if exists && strings.TrimSpace(runtimePathEnvvar) != "" {
cfg.RuntimePath = runtimePathEnvvar
}
rootEnvvar, exists := os.LookupEnv(rootEnvvarName)
if exists && strings.TrimSpace(rootEnvvar) != "" {
cfg.Root = rootEnvvar
}
logLevelEnvvar, exists := os.LookupEnv(logLevelEnvvarName)
if exists && strings.TrimSpace(logLevelEnvvar) != "" {
cfg.LogLevel = logLevelEnvvar
}
return nil
}

View File

@@ -1,5 +1,5 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
/*
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -12,13 +12,18 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
*/
package constraints
package config
//go:generate moq -stub -out constraint_mock.go . Constraint
// Constraint represents a constraint that is to be evaluated
type Constraint interface {
String() string
Assert() error
// noop implements a configUpdater that does not update a config.
type noop struct{}
func newNoopConfigUpdater() configUpdater {
c := noop{}
return &c
}
func (c noop) Update(cfg *Config) error {
return nil
}

View File

@@ -0,0 +1,36 @@
/*
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
*/
package config
import (
"testing"
"github.com/stretchr/testify/require"
)
func TestNoopDoesNotModifyConfig(t *testing.T) {
cfg := &Config{
DebugFilePath: "test-path",
}
c := newNoopConfigUpdater()
err := c.Update(cfg)
require.NoError(t, err)
require.Equal(t, "test-path", cfg.DebugFilePath)
}

View File

@@ -0,0 +1,158 @@
/*
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
*/
package config
import (
"fmt"
"io"
"os"
"path/filepath"
"github.com/pelletier/go-toml"
log "github.com/sirupsen/logrus"
)
const (
configFileRelativePath = "nvidia-container-runtime/config.toml"
configOverride = "XDG_CONFIG_HOME"
defaultConfigRoot = "/etc"
nvidiaContainerCliSection = "nvidia-container-cli"
nvidiaContainerRuntimeConfigSection = "nvidia-container-runtime"
nvidiaContainerRuntimeExperimentalConfigSection = "nvidia-container-runtime.experimental"
)
type tomlConfig struct {
logger *log.Logger
path string
sections []tomlSection
}
type tomlSection struct {
section string
keys map[string]struct{}
}
func newDefaultConfigFileWithLogger(logger *log.Logger) configUpdater {
configDir := defaultConfigRoot
if XDGConfigDir := os.Getenv(configOverride); len(XDGConfigDir) != 0 {
configDir = XDGConfigDir
}
configFilePath := filepath.Join(configDir, configFileRelativePath)
return newConfigFromFileWithLogger(logger, configFilePath)
}
func newConfigFromFileWithLogger(logger *log.Logger, filepath string) configUpdater {
if _, err := os.Stat(filepath); os.IsNotExist(err) {
logger.Warnf("The config file '%v' does not exist", filepath)
return newNoopConfigUpdater()
}
sections := []tomlSection{
{
section: nvidiaContainerRuntimeConfigSection,
},
{
section: nvidiaContainerRuntimeExperimentalConfigSection,
},
{
section: nvidiaContainerCliSection,
keys: map[string]struct{}{
"root": {},
},
},
}
c := tomlConfig{
logger: logger,
path: filepath,
sections: sections,
}
return &c
}
func (c tomlConfig) Update(cfg *Config) error {
configFile, err := os.Open(c.path)
if err != nil {
return fmt.Errorf("error opening config file %v: %v", c.path, err)
}
defer configFile.Close()
return c.updateFromReader(cfg, configFile)
}
func (c tomlConfig) updateFromReader(cfg *Config, reader io.Reader) error {
toml, err := toml.LoadReader(reader)
if err != nil {
return fmt.Errorf("error reading TOML contents: %v", err)
}
for _, section := range c.sections {
if v, ok := section.GetStringFrom(toml, "debug"); ok {
cfg.DebugFilePath = v
}
if v, ok := section.GetStringFrom(toml, "runtime-path"); ok {
cfg.RuntimePath = v
}
if v, ok := section.GetStringFrom(toml, "root"); ok {
cfg.Root = v
}
if v, ok := section.GetStringFrom(toml, "log-level"); ok {
cfg.LogLevel = v
}
}
return nil
}
func (c tomlSection) GetStringFrom(toml *toml.Tree, key string) (string, bool) {
value := c.GetFrom(toml, key)
if value != nil {
if v, ok := value.(string); ok {
return v, ok
}
}
return "", false
}
func (c tomlSection) GetFrom(toml *toml.Tree, key string) interface{} {
if !c.validKey(key) {
return nil
}
return toml.Get(c.configKey(key))
}
func (c tomlSection) validKey(key string) bool {
if c.keys == nil {
return true
}
_, exists := c.keys[key]
return exists
}
func (c tomlSection) configKey(key string) string {
if c.section == "" {
return key
}
return c.section + "." + key
}

View File

@@ -0,0 +1,259 @@
/*
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
*/
package config
import (
"fmt"
"io"
"io/ioutil"
"os"
"path"
"path/filepath"
"strings"
"testing"
"testing/iotest"
testlog "github.com/sirupsen/logrus/hooks/test"
"github.com/stretchr/testify/require"
)
func TestUpdateFromReader(t *testing.T) {
logger, _ := testlog.NewNullLogger()
testCases := []struct {
description string
readerError bool
lines []string
expected *Config
expectedError bool
}{
{
description: "reader error returns error",
readerError: true,
expectedError: true,
expected: getDefaultConfig(),
},
{
description: "empty config returns defaults",
lines: []string{},
expected: &Config{
DebugFilePath: "/dev/null",
LogLevel: "info",
},
},
{
description: "debug output is set",
lines: []string{"nvidia-container-runtime.debug=\"nvidia-container-toolkit.log\""},
expected: &Config{
DebugFilePath: "nvidia-container-toolkit.log",
LogLevel: "info",
},
},
{
description: "debug output is set in section",
lines: []string{"[nvidia-container-runtime]", "debug=\"nvidia-container-toolkit.log\""},
expected: &Config{
DebugFilePath: "nvidia-container-toolkit.log",
LogLevel: "info",
},
},
{
description: "blank debug is set",
lines: []string{"nvidia-container-runtime.debug=\"\""},
expected: &Config{
DebugFilePath: "",
LogLevel: "info",
},
},
{
description: "non-string debug is ignored",
lines: []string{"nvidia-container-runtime.debug=2"},
expected: &Config{
DebugFilePath: "/dev/null",
LogLevel: "info",
},
},
{
description: "log-level is set",
lines: []string{"nvidia-container-runtime.log-level=\"trace\""},
expected: &Config{
DebugFilePath: "/dev/null",
LogLevel: "trace",
},
},
}
for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
cfg := getDefaultConfig()
c := tomlConfig{
logger: logger,
sections: []tomlSection{
{section: nvidiaContainerRuntimeConfigSection},
},
}
var reader io.Reader
if tc.readerError {
reader = iotest.ErrReader(fmt.Errorf("error"))
} else {
reader = strings.NewReader(strings.Join(tc.lines, "\n"))
}
err := c.updateFromReader(cfg, reader)
if tc.expectedError {
require.Error(t, err)
} else {
require.NoError(t, err)
}
require.EqualValues(t, tc.expected, cfg)
})
}
}
func TestUpdateFromReaderExperimental(t *testing.T) {
logger, _ := testlog.NewNullLogger()
testCases := []struct {
readerError bool
lines []string
expected *Config
expectedError bool
}{
{
lines: []string{"nvidia-container-runtime.debug=\"nvidia-container-toolkit.log\""},
expected: getDefaultConfig(),
},
{
lines: []string{"[nvidia-container-runtime]", "debug=\"nvidia-container-toolkit.log\""},
expected: getDefaultConfig(),
},
{
lines: []string{"nvidia-container-runtime.experimental.debug=\"\""},
expected: &Config{
DebugFilePath: "",
LogLevel: "info",
},
},
{
lines: []string{"nvidia-container-runtime.experimental.debug=2"},
expected: getDefaultConfig(),
},
{
lines: []string{"nvidia-container-runtime.experimental.debug=\"nvidia-container-toolkit.log\""},
expected: &Config{
DebugFilePath: "nvidia-container-toolkit.log",
LogLevel: "info",
},
},
{
lines: []string{"[nvidia-container-runtime.experimental]", "debug=\"nvidia-container-toolkit.log\""},
expected: &Config{
DebugFilePath: "nvidia-container-toolkit.log",
LogLevel: "info",
},
},
{
lines: []string{
"nvidia-container-runtime.debug=\"nvidia-container-toolkit.log\"",
"nvidia-container-runtime.experimental.debug=\"nvidia-container-exp-toolkit.log\"",
},
expected: &Config{
DebugFilePath: "nvidia-container-exp-toolkit.log",
LogLevel: "info",
},
},
}
for i, tc := range testCases {
cfg := getDefaultConfig()
c := tomlConfig{
logger: logger,
sections: []tomlSection{
{section: nvidiaContainerRuntimeExperimentalConfigSection},
},
}
var reader io.Reader
if tc.readerError {
reader = iotest.ErrReader(fmt.Errorf("error"))
} else {
reader = strings.NewReader(strings.Join(tc.lines, "\n"))
}
err := c.updateFromReader(cfg, reader)
if tc.expectedError {
require.Error(t, err, "%d: %v", i, tc)
} else {
require.NoError(t, err, "%d: %v", i, tc)
}
require.EqualValues(t, tc.expected, cfg, "%d: %v", i, tc)
}
}
func TestConfigFromFile(t *testing.T) {
wd, err := os.Getwd()
require.NoError(t, err)
// By default debug is disabled
lines := []string{
"[nvidia-container-cli]",
"root = \"/run/nvidia/driver\"",
"[nvidia-container-runtime]",
"#debug = \"/nvidia-container-toolkit.log\"",
"",
"[nvidia-container-runtime.experimental]",
"debug = \"/nvidia-container-toolkit.experimental.log\"",
}
contents := []byte(strings.Join(lines, "\n"))
testDir := path.Join(wd, "test")
filename := path.Join(testDir, configFileRelativePath)
require.NoError(t, os.MkdirAll(filepath.Dir(filename), 0766))
require.NoError(t, ioutil.WriteFile(filename, contents, 0766))
defer func() { require.NoError(t, os.RemoveAll(testDir)) }()
logger, _ := testlog.NewNullLogger()
c := newConfigFromFileWithLogger(logger, filename)
cfg := getDefaultConfig()
err = c.Update(cfg)
require.NoError(t, err)
require.Equal(t, "/nvidia-container-toolkit.experimental.log", cfg.DebugFilePath)
require.Equal(t, "/run/nvidia/driver", cfg.Root)
}
func TestConfigFromNonexistentFileReturnsNoop(t *testing.T) {
logger, _ := testlog.NewNullLogger()
c := newConfigFromFileWithLogger(logger, "/does/not/exist")
n, ok := c.(*noop)
require.True(t, ok)
require.NotNil(t, n)
}
func TestGetConfigKey(t *testing.T) {
require.Equal(t, "key", tomlSection{}.configKey("key"))
require.Equal(t, "section.key", tomlSection{section: "section"}.configKey("key"))
}

View File

@@ -0,0 +1,144 @@
/*
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
*/
package main
import (
"fmt"
"os"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
"github.com/NVIDIA/nvidia-container-toolkit/internal/ensure"
"github.com/NVIDIA/nvidia-container-toolkit/internal/filter"
"github.com/NVIDIA/nvidia-container-toolkit/internal/modify"
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
"github.com/NVIDIA/nvidia-container-toolkit/internal/runtime"
log "github.com/sirupsen/logrus"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-container-runtime.experimental/config"
)
const (
visibleDevicesEnvvar = "NVIDIA_VISIBLE_DEVICES"
visibleDevicesVoid = "void"
)
var logger = log.New()
func main() {
cfg, err := config.GetConfig(logger)
if err != nil {
logger.Errorf("Error loading config: %v", err)
os.Exit(1)
}
if cfg.DebugFilePath != "" && cfg.DebugFilePath != "/dev/null" {
logFile, err := os.OpenFile(cfg.DebugFilePath, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
if err != nil {
logger.Errorf("Error opening debug log file: %v", err)
os.Exit(1)
}
defer logFile.Close()
logger.SetOutput(logFile)
}
logLevel, err := log.ParseLevel(cfg.LogLevel)
if err == nil {
logger.SetLevel(logLevel)
} else {
logger.Warnf("Invalid log-level '%v'; using '%v'", cfg.LogLevel, logger.Level.String())
}
logger.Infof("Starting nvidia-container-runtime: %v", os.Args)
logger.Debugf("Using config=%+v", cfg)
if err := run(cfg, os.Args); err != nil {
logger.Errorf("Error running runtime: %v", err)
os.Exit(1)
}
}
func run(cfg *config.Config, args []string) error {
logger.Debugf("running with args=%v", args)
// We create a low-level runtime
lowLevelRuntime, err := createLowLevelRuntime(logger, cfg)
if err != nil {
return fmt.Errorf("error constructing low-level runtime: %v", err)
}
if !oci.HasCreateSubcommand(args) {
logger.Infof("No modification of OCI specification required")
logger.Infof("Forwarding command to runtime")
return lowLevelRuntime.Exec(args)
}
// We create the OCI spec that is to be modified
ociSpec, bundleDir, err := oci.NewSpecFromArgs(args)
if err != nil {
return fmt.Errorf("error constructing OCI spec: %v", err)
}
err = ociSpec.Load()
if err != nil {
return fmt.Errorf("error loading OCI specification: %v", err)
}
visibleDevices, exists := ociSpec.LookupEnv(visibleDevicesEnvvar)
if !exists || visibleDevices == "" || visibleDevices == visibleDevicesVoid {
logger.Infof("Using low-level runtime: %v=%v (exists=%v)", visibleDevicesEnvvar, visibleDevices, exists)
return lowLevelRuntime.Exec(os.Args)
}
// We create the modifier that will be applied by the Modifying Runtime Wrapper
modifier, err := createModifier(cfg.Root, bundleDir, visibleDevices, ociSpec)
if err != nil {
return fmt.Errorf("error constructing modifer: %v", err)
}
// We construct the Modifying runtime
r := runtime.NewModifyingRuntimeWrapperWithLogger(logger, lowLevelRuntime, ociSpec, modifier)
return r.Exec(os.Args)
}
func createLowLevelRuntime(logger *log.Logger, cfg *config.Config) (oci.Runtime, error) {
if cfg.RuntimePath == "" {
return oci.NewLowLevelRuntimeWithLogger(logger, "docker-runc", "runc")
}
logger.Infof("Creating runtime with path %v", cfg.RuntimePath)
return oci.NewRuntimeForPathWithLogger(logger, cfg.RuntimePath)
}
func createModifier(root string, bundleDir string, visibleDevices string, env filter.EnvLookup) (modify.Modifier, error) {
// We set up the modifier using discovery
discovered, err := discover.NewNVMLServerWithLogger(logger, root)
if err != nil {
return nil, fmt.Errorf("error discovering devices: %v", err)
}
// We apply a filter to the discovered devices
selected := filter.NewSelectDevicesFromWithLogger(logger, discovered, visibleDevices, env)
// We ensure that the selected devices are available
available := ensure.NewEnsureDevicesWithLogger(logger, selected, root)
// We construct the modifer for the OCI spec
modifier := modify.NewModifierWithLoggerFor(logger, available, root, bundleDir)
return modifier, nil
}

View File

@@ -1,25 +0,0 @@
# The NVIDIA Container Runtime
The NVIDIA Container Runtime is a shim for OCI-compliant low-level runtimes such as [runc](https://github.com/opencontainers/runc). When a `create` command is detected, the incoming [OCI runtime specification](https://github.com/opencontainers/runtime-spec) is modified in place and the command is forwarded to the low-level runtime.
## Standard Mode
In the standard mode configuration, the NVIDIA Container Runtime adds a [`prestart` hook](https://github.com/opencontainers/runtime-spec/blob/master/config.md#prestart) to the incomming OCI specification that invokes the NVIDIA Container Runtime Hook for all containers created. This hook checks whether NVIDIA devices are requested and ensures GPU access is configured using the `nvidia-container-cli` from project [libnvidia-container](https://github.com/NVIDIA/libnvidia-container).
## Experimental Mode
The NVIDIA Container Runtime can be configured in an experimental mode by setting the following options in the runtime's `config.toml` file:
```toml
[nvidia-container-runtime]
experimental = true
```
When this setting is enabled, the modifications made to the OCI specification are controlled by the `nvidia-container-runtime.discover-mode` option, with the following mode supported:
* `"legacy"`: This mode mirrors the behaviour of the standard mode, inserting the NVIDIA Container Runtime Hook as a `prestart` hook into the container's OCI specification.
* `"csv"`: This mode uses CSV files at `/etc/nvidia-container-runtime/host-files-for-container.d` to define the devices and mounts that are to be injected into a container when it is created.
### Notes on using the docker CLI
The `docker` CLI supports the `--gpus` flag to select GPUs for inclusion in a container. Since specifying this flag inserts the same NVIDIA Container Runtime Hook into the OCI runtime specification. When experimental mode is activated, the NVIDIA Container Runtime detects the presence of the hook and raises an error. This requirement will be relaxed in the near future.

View File

@@ -20,220 +20,60 @@ import (
"fmt"
"io"
"os"
"path/filepath"
"runtime"
"strconv"
"strings"
"github.com/sirupsen/logrus"
"github.com/tsaikd/KDGoLib/logrusutil"
)
// Logger adds a way to manage output to a log file to a logrus.Logger
type Logger struct {
*logrus.Logger
previousLogger *logrus.Logger
logFiles []*os.File
previousOutput io.Writer
logFile *os.File
}
// NewLogger creates an empty logger
// NewLogger constructs a Logger with a preddefined formatter
func NewLogger() *Logger {
return &Logger{
Logger: logrus.New(),
logrusLogger := logrus.New()
formatter := &logrusutil.ConsoleLogFormatter{
TimestampFormat: "2006/01/02 15:04:07",
Flag: logrusutil.Ltime,
}
logger := &Logger{
Logger: logrusLogger,
}
logger.SetFormatter(formatter)
return logger
}
// UpdateLogger constructs a Logger with a preddefined formatter
func UpdateLogger(filename string, logLevel string, argv []string) (*Logger, error) {
configFromArgs := parseArgs(argv)
level, logLevelError := configFromArgs.getLevel(logLevel)
var logFiles []*os.File
var argLogFileError error
// We don't create log files if the version argument is supplied
if !configFromArgs.version {
configLogFile, err := createLogFile(filename)
if err != nil {
return logger, fmt.Errorf("error opening debug log file: %v", err)
}
if configLogFile != nil {
logFiles = append(logFiles, configLogFile)
}
argLogFile, err := createLogFile(configFromArgs.file)
if argLogFile != nil {
logFiles = append(logFiles, argLogFile)
}
argLogFileError = err
// LogToFile opens the specified file for appending and sets the logger to
// output to the opened file. A reference to the file pointer is stored to
// allow this to be closed.
func (l *Logger) LogToFile(filename string) error {
logFile, err := os.OpenFile(filename, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
if err != nil {
return fmt.Errorf("error opening debug log file: %v", err)
}
l := &Logger{
Logger: logrus.New(),
previousLogger: logger.Logger,
logFiles: logFiles,
}
l.logFile = logFile
l.previousOutput = l.Out
l.SetOutput(logFile)
l.SetLevel(level)
if level == logrus.DebugLevel {
logrus.SetReportCaller(true)
// Shorten function and file names reported by the logger, by
// trimming common "github.com/opencontainers/runc" prefix.
// This is only done for text formatter.
_, file, _, _ := runtime.Caller(0)
prefix := filepath.Dir(file) + "/"
logrus.SetFormatter(&logrus.TextFormatter{
CallerPrettyfier: func(f *runtime.Frame) (string, string) {
function := strings.TrimPrefix(f.Function, prefix) + "()"
fileLine := strings.TrimPrefix(f.File, prefix) + ":" + strconv.Itoa(f.Line)
return function, fileLine
},
})
}
if configFromArgs.format == "json" {
l.SetFormatter(new(logrus.JSONFormatter))
}
if len(logFiles) == 0 {
l.SetOutput(io.Discard)
} else if len(logFiles) == 1 {
l.SetOutput(logFiles[0])
} else if len(logFiles) > 1 {
var writers []io.Writer
for _, f := range logFiles {
writers = append(writers, f)
}
l.SetOutput(io.MultiWriter(writers...))
}
if logLevelError != nil {
l.Warn(logLevelError)
}
if argLogFileError != nil {
l.Warnf("Failed to open log file: %v", argLogFileError)
}
return l, nil
return nil
}
// Reset closes the log file (if any) and resets the logger output to what it
// was before UpdateLogger was called.
func (l *Logger) Reset() error {
defer func() {
previous := l.previousLogger
if previous == nil {
previous = logrus.New()
}
logger = &Logger{Logger: previous}
}()
var errs []error
for _, f := range l.logFiles {
err := f.Close()
if err != nil {
errs = append(errs, err)
}
// CloseFile closes the log file (if any) and resets the logger output to what it
// was before LogToFile was called.
func (l *Logger) CloseFile() error {
if l.logFile == nil {
return nil
}
logFile := l.logFile
l.SetOutput(l.previousOutput)
l.logFile = nil
var err error
for _, e := range errs {
if err == nil {
err = e
continue
}
return fmt.Errorf("%v; %w", e, err)
}
return err
}
func createLogFile(filename string) (*os.File, error) {
if filename != "" && filename != os.DevNull {
return os.OpenFile(filename, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
}
return nil, nil
}
type loggerConfig struct {
file string
format string
debug bool
version bool
}
func (c loggerConfig) getLevel(logLevel string) (logrus.Level, error) {
if c.debug {
return logrus.DebugLevel, nil
}
if logLevel, err := logrus.ParseLevel(logLevel); err == nil {
return logLevel, nil
}
return logrus.InfoLevel, fmt.Errorf("invalid log-level '%v'", logLevel)
}
// Informed by Taken from https://github.com/opencontainers/runc/blob/7fd8b57001f5bfa102e89cb434d96bf71f7c1d35/main.go#L182
func parseArgs(args []string) loggerConfig {
c := loggerConfig{}
expected := map[string]*string{
"log-format": &c.format,
"log": &c.file,
}
found := make(map[string]bool)
for i := 0; i < len(args); i++ {
if len(found) == 4 {
break
}
param := args[i]
parts := strings.SplitN(param, "=", 2)
trimmed := strings.TrimLeft(parts[0], "-")
// If this is not a flag we continue
if parts[0] == trimmed {
continue
}
// Check the version flag
if trimmed == "version" {
c.version = true
found["version"] = true
// For the version flag we don't process any other flags
continue
}
// Check the debug flag
if trimmed == "debug" {
c.debug = true
found["debug"] = true
continue
}
destination, exists := expected[trimmed]
if !exists {
continue
}
var value string
if len(parts) == 2 {
value = parts[2]
} else if i+1 < len(args) {
value = args[i+1]
i++
} else {
continue
}
*destination = value
found[trimmed] = true
}
return c
return logFile.Close()
}

View File

@@ -3,82 +3,87 @@ package main
import (
"fmt"
"os"
"strings"
"path"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
"github.com/NVIDIA/nvidia-container-toolkit/internal/info"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/pelletier/go-toml"
)
// version must be set by go build's -X main.version= option in the Makefile.
var version = "unknown"
const (
configOverride = "XDG_CONFIG_HOME"
configFilePath = "nvidia-container-runtime/config.toml"
// gitCommit will be the hash that the binary was built from
// and will be populated by the Makefile
var gitCommit = ""
hookDefaultFilePath = "/usr/bin/nvidia-container-runtime-hook"
)
var (
configDir = "/etc/"
)
var logger = NewLogger()
func main() {
err := run(os.Args)
if err != nil {
logger.Errorf("%v", err)
logger.Errorf("Error running %v: %v", os.Args, err)
os.Exit(1)
}
}
// run is an entry point that allows for idiomatic handling of errors
// when calling from the main function.
func run(argv []string) (rerr error) {
printVersion := hasVersionFlag(argv)
if printVersion {
fmt.Printf("%v version %v\n", "NVIDIA Container Runtime", info.GetVersionString(fmt.Sprintf("spec: %v", specs.Version)))
}
cfg, err := config.GetConfig()
func run(argv []string) (err error) {
cfg, err := getConfig()
if err != nil {
return fmt.Errorf("error loading config: %v", err)
}
logger, err = UpdateLogger(
cfg.NVIDIAContainerRuntimeConfig.DebugFilePath,
cfg.NVIDIAContainerRuntimeConfig.LogLevel,
argv,
)
err = logger.LogToFile(cfg.debugFilePath)
if err != nil {
return fmt.Errorf("failed to set up logger: %v", err)
return fmt.Errorf("error opening debug log file: %v", err)
}
defer logger.Reset()
defer func() {
// We capture and log a returning error before closing the log file.
if err != nil {
logger.Errorf("Error running %v: %v", argv, err)
}
logger.CloseFile()
}()
logger.Debugf("Command line arguments: %v", argv)
runtime, err := newNVIDIAContainerRuntime(logger.Logger, cfg, argv)
r, err := newRuntime(argv)
if err != nil {
return fmt.Errorf("failed to create NVIDIA Container Runtime: %v", err)
return fmt.Errorf("error creating runtime: %v", err)
}
if printVersion {
fmt.Print("\n")
}
return runtime.Exec(argv)
logger.Printf("Running %s\n", argv[0])
return r.Exec(argv)
}
// TODO: This should be refactored / combined with parseArgs in logger.
func hasVersionFlag(args []string) bool {
for i := 0; i < len(args); i++ {
param := args[i]
type config struct {
debugFilePath string
}
parts := strings.SplitN(param, "=", 2)
trimmed := strings.TrimLeft(parts[0], "-")
// If this is not a flag we continue
if parts[0] == trimmed {
continue
}
// getConfig sets up the config struct. Values are read from a toml file
// or set via the environment.
func getConfig() (*config, error) {
cfg := &config{}
// Check the version flag
if trimmed == "version" {
return true
}
if XDGConfigDir := os.Getenv(configOverride); len(XDGConfigDir) != 0 {
configDir = XDGConfigDir
}
return false
configFilePath := path.Join(configDir, configFilePath)
tomlContent, err := os.ReadFile(configFilePath)
if err != nil {
return nil, err
}
toml, err := toml.Load(string(tomlContent))
if err != nil {
return nil, err
}
cfg.debugFilePath = toml.GetDefault("nvidia-container-runtime.debug", "/dev/null").(string)
return cfg, nil
}

View File

@@ -3,15 +3,15 @@ package main
import (
"bytes"
"encoding/json"
"fmt"
"io/ioutil"
"os"
"os/exec"
"path/filepath"
"runtime"
"strings"
"testing"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-container-runtime/modifier"
"github.com/NVIDIA/nvidia-container-toolkit/internal/test"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/stretchr/testify/require"
)
@@ -24,10 +24,6 @@ const (
unmodifiedSpecFileSuffix = "test/input/test_spec.json"
)
const (
runcExecutableName = "runc"
)
type testConfig struct {
root string
binPath string
@@ -39,7 +35,7 @@ func TestMain(m *testing.M) {
// TEST SETUP
// Determine the module root and the test binary path
var err error
moduleRoot, err := test.GetModuleRoot()
moduleRoot, err := getModuleRoot()
if err != nil {
logger.Fatalf("error in test setup: could not get module root: %v", err)
}
@@ -47,7 +43,7 @@ func TestMain(m *testing.M) {
testInputPath := filepath.Join(moduleRoot, "test", "input")
// Set the environment variables for the test
os.Setenv("PATH", test.PrependToPath(testBinPath, moduleRoot))
os.Setenv("PATH", prependToPath(testBinPath, moduleRoot))
os.Setenv("XDG_CONFIG_HOME", testInputPath)
// Confirm that the environment is configured correctly
@@ -75,6 +71,31 @@ func TestMain(m *testing.M) {
os.Exit(exitCode)
}
func getModuleRoot() (string, error) {
_, filename, _, _ := runtime.Caller(0)
return hasGoMod(filename)
}
func hasGoMod(dir string) (string, error) {
if dir == "" || dir == "/" {
return "", fmt.Errorf("module root not found")
}
_, err := os.Stat(filepath.Join(dir, "go.mod"))
if err != nil {
return hasGoMod(filepath.Dir(dir))
}
return dir, nil
}
func prependToPath(additionalPaths ...string) string {
paths := strings.Split(os.Getenv("PATH"), ":")
paths = append(additionalPaths, paths...)
return strings.Join(paths, ":")
}
// case 1) nvidia-container-runtime run --bundle
// case 2) nvidia-container-runtime create --bundle
// - Confirm the runtime handles bad input correctly
@@ -84,6 +105,11 @@ func TestBadInput(t *testing.T) {
t.Fatal(err)
}
cmdRun := exec.Command(nvidiaRuntime, "run", "--bundle")
t.Logf("executing: %s\n", strings.Join(cmdRun.Args, " "))
output, err := cmdRun.CombinedOutput()
require.Errorf(t, err, "runtime should return an error", "output=%v", string(output))
cmdCreate := exec.Command(nvidiaRuntime, "create", "--bundle")
t.Logf("executing: %s\n", strings.Join(cmdCreate.Args, " "))
err = cmdCreate.Run()
@@ -167,11 +193,11 @@ func TestDuplicateHook(t *testing.T) {
require.Equal(t, 1, nvidiaHookCount(spec.Hooks), "exactly one nvidia prestart hook should be inserted correctly into config.json")
}
// addNVIDIAHook is a basic wrapper for an addHookModifier that is used for
// addNVIDIAHook is a basic wrapper for nvidiaContainerRunime.addNVIDIAHook that is used for
// testing.
func addNVIDIAHook(spec *specs.Spec) error {
m := modifier.NewStableRuntimeModifier(logger.Logger)
return m.Modify(spec)
r := nvidiaContainerRuntime{logger: logger.Logger}
return r.addNVIDIAHook(spec)
}
func (c testConfig) getRuntimeSpec() (specs.Spec, error) {
@@ -244,3 +270,24 @@ func nvidiaHookCount(hooks *specs.Hooks) int {
}
return count
}
func TestGetConfigWithCustomConfig(t *testing.T) {
wd, err := os.Getwd()
require.NoError(t, err)
// By default debug is disabled
contents := []byte("[nvidia-container-runtime]\ndebug = \"/nvidia-container-toolkit.log\"")
testDir := filepath.Join(wd, "test")
filename := filepath.Join(testDir, configFilePath)
os.Setenv(configOverride, testDir)
require.NoError(t, os.MkdirAll(filepath.Dir(filename), 0766))
require.NoError(t, ioutil.WriteFile(filename, contents, 0766))
defer func() { require.NoError(t, os.RemoveAll(testDir)) }()
cfg, err := getConfig()
require.NoError(t, err)
require.Equal(t, cfg.debugFilePath, "/nvidia-container-toolkit.log")
}

View File

@@ -1,166 +0,0 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package modifier
import (
"fmt"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
"github.com/NVIDIA/nvidia-container-toolkit/internal/cuda"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover/csv"
"github.com/NVIDIA/nvidia-container-toolkit/internal/edits"
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
"github.com/NVIDIA/nvidia-container-toolkit/internal/requirements"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/sirupsen/logrus"
)
// csvMode represents the modifications as performed by the csv runtime mode
type csvMode struct {
logger *logrus.Logger
discoverer discover.Discover
}
const (
visibleDevicesEnvvar = "NVIDIA_VISIBLE_DEVICES"
visibleDevicesVoid = "void"
nvidiaRequireJetpackEnvvar = "NVIDIA_REQUIRE_JETPACK"
)
// NewCSVModifier creates a modifier that applies modications to an OCI spec if required by the runtime wrapper.
// The modifications are defined by CSV MountSpecs.
func NewCSVModifier(logger *logrus.Logger, cfg *config.Config, ociSpec oci.Spec) (oci.SpecModifier, error) {
rawSpec, err := ociSpec.Load()
if err != nil {
return nil, fmt.Errorf("failed to load OCI spec: %v", err)
}
// In experimental mode, we check whether a modification is required at all and return the lowlevelRuntime directly
// if no modification is required.
visibleDevices, exists := ociSpec.LookupEnv(visibleDevicesEnvvar)
if !exists || visibleDevices == "" || visibleDevices == visibleDevicesVoid {
logger.Infof("No modification required: %v=%v (exists=%v)", visibleDevicesEnvvar, visibleDevices, exists)
return nil, nil
}
logger.Infof("Constructing modifier from config: %+v", *cfg)
config := &discover.Config{
Root: cfg.NVIDIAContainerCLIConfig.Root,
NVIDIAContainerToolkitCLIExecutablePath: cfg.NVIDIACTKConfig.Path,
}
// TODO: Once the devices have been encapsulated in the CUDA image, this can be moved to before the
// visible devices are checked.
image, err := image.NewCUDAImageFromSpec(rawSpec)
if err != nil {
return nil, err
}
if err := checkRequirements(logger, &image); err != nil {
return nil, fmt.Errorf("requirements not met: %v", err)
}
csvFiles, err := csv.GetFileList(cfg.NVIDIAContainerRuntimeConfig.Modes.CSV.MountSpecPath)
if err != nil {
return nil, fmt.Errorf("failed to get list of CSV files: %v", err)
}
nvidiaRequireJetpack, _ := ociSpec.LookupEnv(nvidiaRequireJetpackEnvvar)
if nvidiaRequireJetpack != "csv-mounts=all" {
csvFiles = csv.BaseFilesOnly(csvFiles)
}
csvDiscoverer, err := discover.NewFromCSVFiles(logger, csvFiles, config.Root)
if err != nil {
return nil, fmt.Errorf("failed to create CSV discoverer: %v", err)
}
ldcacheUpdateHook, err := discover.NewLDCacheUpdateHook(logger, csvDiscoverer, config)
if err != nil {
return nil, fmt.Errorf("failed to create ldcach update hook discoverer: %v", err)
}
createSymlinksHook, err := discover.NewCreateSymlinksHook(logger, csvFiles, csvDiscoverer, config)
if err != nil {
return nil, fmt.Errorf("failed to create symlink hook discoverer: %v", err)
}
d := discover.NewList(csvDiscoverer, ldcacheUpdateHook, createSymlinksHook)
return newModifierFromDiscoverer(logger, d)
}
// newModifierFromDiscoverer created a modifier that aplies the discovered
// modifications to an OCI spec if require by the runtime wrapper.
func newModifierFromDiscoverer(logger *logrus.Logger, d discover.Discover) (oci.SpecModifier, error) {
m := csvMode{
logger: logger,
discoverer: d,
}
return &m, nil
}
// Modify applies the required modifications to the incomming OCI spec. These modifications
// are applied in-place.
func (m csvMode) Modify(spec *specs.Spec) error {
err := nvidiaContainerRuntimeHookRemover{m.logger}.Modify(spec)
if err != nil {
return fmt.Errorf("failed to remove existing hooks: %v", err)
}
specEdits, err := edits.NewSpecEdits(m.logger, m.discoverer)
if err != nil {
return fmt.Errorf("failed to get required container edits: %v", err)
}
return specEdits.Modify(spec)
}
func checkRequirements(logger *logrus.Logger, image *image.CUDA) error {
if image.HasDisableRequire() {
// TODO: We could print the real value here instead
logger.Debugf("NVIDIA_DISABLE_REQUIRE=%v; skipping requirement checks", true)
return nil
}
imageRequirements, err := image.GetRequirements()
if err != nil {
// TODO: Should we treat this as a failure, or just issue a warning?
return fmt.Errorf("failed to get image requirements: %v", err)
}
r := requirements.New(logger, imageRequirements)
cudaVersion, err := cuda.Version()
if err != nil {
logger.Warnf("Failed to get CUDA version: %v", err)
} else {
r.AddVersionProperty(requirements.CUDA, cudaVersion)
}
compteCapability, err := cuda.ComputeCapability(0)
if err != nil {
logger.Warnf("Failed to get CUDA Compute Capability: %v", err)
} else {
r.AddVersionProperty(requirements.ARCH, compteCapability)
}
return r.Assert()
}

View File

@@ -1,284 +0,0 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package modifier
import (
"fmt"
"testing"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
"github.com/opencontainers/runtime-spec/specs-go"
testlog "github.com/sirupsen/logrus/hooks/test"
"github.com/stretchr/testify/require"
)
func TestNewCSVModifier(t *testing.T) {
logger, _ := testlog.NewNullLogger()
testCases := []struct {
description string
cfg *config.Config
spec oci.Spec
visibleDevices string
expectedError error
expectedNil bool
}{
{
description: "spec load error returns error",
spec: &oci.SpecMock{
LoadFunc: func() (*specs.Spec, error) {
return nil, fmt.Errorf("load failed")
},
},
expectedError: fmt.Errorf("load failed"),
},
{
description: "visible devices not set returns nil",
visibleDevices: "NOT_SET",
expectedNil: true,
},
{
description: "visible devices empty returns nil",
visibleDevices: "",
expectedNil: true,
},
{
description: "visible devices 'void' returns nil",
visibleDevices: "void",
expectedNil: true,
},
}
for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
spec := tc.spec
if spec == nil {
spec = &oci.SpecMock{
LookupEnvFunc: func(s string) (string, bool) {
if tc.visibleDevices != "NOT_SET" && s == visibleDevicesEnvvar {
return tc.visibleDevices, true
}
return "", false
},
}
}
m, err := NewCSVModifier(logger, tc.cfg, spec)
if tc.expectedError != nil {
require.Error(t, err)
} else {
require.NoError(t, err)
}
if tc.expectedNil || tc.expectedError != nil {
require.Nil(t, m)
} else {
require.NotNil(t, m)
}
})
}
}
func TestExperimentalModifier(t *testing.T) {
logger, _ := testlog.NewNullLogger()
testCases := []struct {
description string
discover *discover.DiscoverMock
spec *specs.Spec
expectedError error
expectedSpec *specs.Spec
}{
{
description: "empty discoverer does not modify spec",
discover: &discover.DiscoverMock{},
},
{
description: "failed hooks discoverer returns error",
discover: &discover.DiscoverMock{
HooksFunc: func() ([]discover.Hook, error) {
return nil, fmt.Errorf("discover.Hooks error")
},
},
expectedError: fmt.Errorf("discover.Hooks error"),
},
{
description: "discovered hooks are injected into spec",
spec: &specs.Spec{},
discover: &discover.DiscoverMock{
HooksFunc: func() ([]discover.Hook, error) {
hooks := []discover.Hook{
{
Lifecycle: "prestart",
Path: "/hook/a",
Args: []string{"/hook/a", "arga"},
},
{
Lifecycle: "createContainer",
Path: "/hook/b",
Args: []string{"/hook/b", "argb"},
},
}
return hooks, nil
},
},
expectedSpec: &specs.Spec{
Hooks: &specs.Hooks{
Prestart: []specs.Hook{
{
Path: "/hook/a",
Args: []string{"/hook/a", "arga"},
},
},
CreateContainer: []specs.Hook{
{
Path: "/hook/b",
Args: []string{"/hook/b", "argb"},
},
},
},
},
},
{
description: "existing hooks are maintained",
spec: &specs.Spec{
Hooks: &specs.Hooks{
Prestart: []specs.Hook{
{
Path: "/hook/a",
Args: []string{"/hook/a", "arga"},
},
},
},
},
discover: &discover.DiscoverMock{
HooksFunc: func() ([]discover.Hook, error) {
hooks := []discover.Hook{
{
Lifecycle: "prestart",
Path: "/hook/b",
Args: []string{"/hook/b", "argb"},
},
}
return hooks, nil
},
},
expectedSpec: &specs.Spec{
Hooks: &specs.Hooks{
Prestart: []specs.Hook{
{
Path: "/hook/a",
Args: []string{"/hook/a", "arga"},
},
{
Path: "/hook/b",
Args: []string{"/hook/b", "argb"},
},
},
},
},
},
{
description: "modification removes existing nvidia-container-runtime-hook",
spec: &specs.Spec{
Hooks: &specs.Hooks{
Prestart: []specs.Hook{
{
Path: "/path/to/nvidia-container-runtime-hook",
Args: []string{"/path/to/nvidia-container-runtime-hook", "prestart"},
},
},
},
},
discover: &discover.DiscoverMock{
HooksFunc: func() ([]discover.Hook, error) {
hooks := []discover.Hook{
{
Lifecycle: "prestart",
Path: "/hook/b",
Args: []string{"/hook/b", "argb"},
},
}
return hooks, nil
},
},
expectedSpec: &specs.Spec{
Hooks: &specs.Hooks{
Prestart: []specs.Hook{
{
Path: "/hook/b",
Args: []string{"/hook/b", "argb"},
},
},
},
},
},
{
description: "modification removes existing nvidia-container-toolkit",
spec: &specs.Spec{
Hooks: &specs.Hooks{
Prestart: []specs.Hook{
{
Path: "/path/to/nvidia-container-toolkit",
Args: []string{"/path/to/nvidia-container-toolkit", "prestart"},
},
},
},
},
discover: &discover.DiscoverMock{
HooksFunc: func() ([]discover.Hook, error) {
hooks := []discover.Hook{
{
Lifecycle: "prestart",
Path: "/hook/b",
Args: []string{"/hook/b", "argb"},
},
}
return hooks, nil
},
},
expectedSpec: &specs.Spec{
Hooks: &specs.Hooks{
Prestart: []specs.Hook{
{
Path: "/hook/b",
Args: []string{"/hook/b", "argb"},
},
},
},
},
},
}
for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
m, err := newModifierFromDiscoverer(logger, tc.discover)
require.NoError(t, err)
err = m.Modify(tc.spec)
if tc.expectedError != nil {
require.Error(t, err)
} else {
require.NoError(t, err)
}
require.EqualValues(t, tc.expectedSpec, tc.spec)
})
}
}

View File

@@ -1,79 +0,0 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package modifier
import (
"path/filepath"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/sirupsen/logrus"
)
// nvidiaContainerRuntimeHookRemover is a spec modifer that detects and removes inserted nvidia-container-runtime hooks
type nvidiaContainerRuntimeHookRemover struct {
logger *logrus.Logger
}
var _ oci.SpecModifier = (*nvidiaContainerRuntimeHookRemover)(nil)
// Modify removes any NVIDIA Container Runtime hooks from the provided spec
func (m nvidiaContainerRuntimeHookRemover) Modify(spec *specs.Spec) error {
if spec == nil {
return nil
}
if spec.Hooks == nil {
return nil
}
if len(spec.Hooks.Prestart) == 0 {
return nil
}
var newPrestart []specs.Hook
for _, hook := range spec.Hooks.Prestart {
if isNVIDIAContainerRuntimeHook(&hook) {
m.logger.Debugf("Removing hook %v", hook)
continue
}
newPrestart = append(newPrestart, hook)
}
if len(newPrestart) != len(spec.Hooks.Prestart) {
m.logger.Debugf("Updating 'prestart' hooks to %v", newPrestart)
spec.Hooks.Prestart = newPrestart
}
return nil
}
// isNVIDIAContainerRuntimeHook checks if the provided hook is an nvidia-container-runtime-hook
// or nvidia-container-toolkit hook. These are included, for example, by the non-experimental
// nvidia-container-runtime or docker when specifying the --gpus flag.
func isNVIDIAContainerRuntimeHook(hook *specs.Hook) bool {
bins := map[string]struct{}{
config.NVIDIAContainerRuntimeHookExecutable: {},
config.NVIDIAContainerToolkitExecutable: {},
}
_, exists := bins[filepath.Base(hook.Path)]
return exists
}

View File

@@ -1,77 +0,0 @@
/*
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
*/
package modifier
import (
"os"
"os/exec"
"path/filepath"
"strings"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/sirupsen/logrus"
)
// NewStableRuntimeModifier creates an OCI spec modifier that inserts the NVIDIA Container Runtime Hook into an OCI
// spec. The specified logger is used to capture log output.
func NewStableRuntimeModifier(logger *logrus.Logger) oci.SpecModifier {
m := stableRuntimeModifier{logger: logger}
return &m
}
// stableRuntimeModifier modifies an OCI spec inplace, inserting the nvidia-container-runtime-hook as a
// prestart hook. If the hook is already present, no modification is made.
type stableRuntimeModifier struct {
logger *logrus.Logger
}
// Modify applies the required modification to the incoming OCI spec, inserting the nvidia-container-runtime-hook
// as a prestart hook.
func (m stableRuntimeModifier) Modify(spec *specs.Spec) error {
path, err := exec.LookPath(config.NVIDIAContainerRuntimeHookExecutable)
if err != nil {
path = filepath.Join(config.DefaultExecutableDir, config.NVIDIAContainerRuntimeHookExecutable)
_, err = os.Stat(path)
if err != nil {
return err
}
}
m.logger.Infof("Using prestart hook path: %s", path)
args := []string{path}
if spec.Hooks == nil {
spec.Hooks = &specs.Hooks{}
} else if len(spec.Hooks.Prestart) != 0 {
for _, hook := range spec.Hooks.Prestart {
if strings.Contains(hook.Path, config.NVIDIAContainerRuntimeHookExecutable) {
m.logger.Infof("existing nvidia prestart hook found in OCI spec")
return nil
}
}
}
spec.Hooks.Prestart = append(spec.Hooks.Prestart, specs.Hook{
Path: path,
Args: append(args, "prestart"),
})
return nil
}

View File

@@ -1,170 +0,0 @@
/*
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
*/
package modifier
import (
"os"
"path/filepath"
"testing"
"github.com/NVIDIA/nvidia-container-toolkit/internal/test"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/sirupsen/logrus"
testlog "github.com/sirupsen/logrus/hooks/test"
"github.com/stretchr/testify/require"
)
type testConfig struct {
root string
binPath string
}
var cfg *testConfig
func TestMain(m *testing.M) {
// TEST SETUP
// Determine the module root and the test binary path
var err error
moduleRoot, err := test.GetModuleRoot()
if err != nil {
logrus.Fatalf("error in test setup: could not get module root: %v", err)
}
testBinPath := filepath.Join(moduleRoot, "test", "bin")
// Set the environment variables for the test
os.Setenv("PATH", test.PrependToPath(testBinPath, moduleRoot))
// Store the root and binary paths in the test Config
cfg = &testConfig{
root: moduleRoot,
binPath: testBinPath,
}
// RUN TESTS
exitCode := m.Run()
os.Exit(exitCode)
}
func TestAddHookModifier(t *testing.T) {
logger, logHook := testlog.NewNullLogger()
testHookPath := filepath.Join(cfg.binPath, "nvidia-container-runtime-hook")
testCases := []struct {
description string
spec specs.Spec
expectedError error
expectedSpec specs.Spec
}{
{
description: "empty spec adds hook",
spec: specs.Spec{},
expectedSpec: specs.Spec{
Hooks: &specs.Hooks{
Prestart: []specs.Hook{
{
Path: testHookPath,
Args: []string{testHookPath, "prestart"},
},
},
},
},
},
{
description: "spec with empty hooks adds hook",
spec: specs.Spec{
Hooks: &specs.Hooks{},
},
expectedSpec: specs.Spec{
Hooks: &specs.Hooks{
Prestart: []specs.Hook{
{
Path: testHookPath,
Args: []string{testHookPath, "prestart"},
},
},
},
},
},
{
description: "hook is not replaced",
spec: specs.Spec{
Hooks: &specs.Hooks{
Prestart: []specs.Hook{
{
Path: "nvidia-container-runtime-hook",
},
},
},
},
expectedSpec: specs.Spec{
Hooks: &specs.Hooks{
Prestart: []specs.Hook{
{
Path: "nvidia-container-runtime-hook",
},
},
},
},
},
{
description: "other hooks are not replaced",
spec: specs.Spec{
Hooks: &specs.Hooks{
Prestart: []specs.Hook{
{
Path: "some-hook",
},
},
},
},
expectedSpec: specs.Spec{
Hooks: &specs.Hooks{
Prestart: []specs.Hook{
{
Path: "some-hook",
},
{
Path: testHookPath,
Args: []string{testHookPath, "prestart"},
},
},
},
},
},
}
for _, tc := range testCases {
logHook.Reset()
t.Run(tc.description, func(t *testing.T) {
m := NewStableRuntimeModifier(logger)
err := m.Modify(&tc.spec)
if tc.expectedError != nil {
require.Error(t, err)
} else {
require.NoError(t, err)
}
require.EqualValues(t, tc.expectedSpec, tc.spec)
})
}
}

View File

@@ -0,0 +1,132 @@
/*
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
*/
package main
import (
"fmt"
"os"
"os/exec"
"strings"
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
"github.com/opencontainers/runtime-spec/specs-go"
log "github.com/sirupsen/logrus"
)
// nvidiaContainerRuntime encapsulates the NVIDIA Container Runtime. It wraps the specified runtime, conditionally
// modifying the specified OCI specification before invoking the runtime.
type nvidiaContainerRuntime struct {
logger *log.Logger
runtime oci.Runtime
ociSpec oci.Spec
}
var _ oci.Runtime = (*nvidiaContainerRuntime)(nil)
// newNvidiaContainerRuntime is a constructor for a standard runtime shim.
func newNvidiaContainerRuntimeWithLogger(logger *log.Logger, runtime oci.Runtime, ociSpec oci.Spec) (oci.Runtime, error) {
r := nvidiaContainerRuntime{
logger: logger,
runtime: runtime,
ociSpec: ociSpec,
}
return &r, nil
}
// Exec defines the entrypoint for the NVIDIA Container Runtime. A check is performed to see whether modifications
// to the OCI spec are required -- and applicable modifcations applied. The supplied arguments are then
// forwarded to the underlying runtime's Exec method.
func (r nvidiaContainerRuntime) Exec(args []string) error {
if r.modificationRequired(args) {
err := r.modifyOCISpec()
if err != nil {
return fmt.Errorf("error modifying OCI spec: %v", err)
}
}
r.logger.Println("Forwarding command to runtime")
return r.runtime.Exec(args)
}
// modificationRequired checks the intput arguments to determine whether a modification
// to the OCI spec is required.
func (r nvidiaContainerRuntime) modificationRequired(args []string) bool {
if oci.HasCreateSubcommand(args) {
r.logger.Infof("'create' command detected; modification required")
return true
}
r.logger.Infof("No modification required")
return false
}
// modifyOCISpec loads and modifies the OCI spec specified in the nvidiaContainerRuntime
// struct. The spec is modified in-place and written to the same file as the input after
// modifcationas are applied.
func (r nvidiaContainerRuntime) modifyOCISpec() error {
err := r.ociSpec.Load()
if err != nil {
return fmt.Errorf("error loading OCI specification for modification: %v", err)
}
err = r.ociSpec.Modify(r.addNVIDIAHook)
if err != nil {
return fmt.Errorf("error injecting NVIDIA Container Runtime hook: %v", err)
}
err = r.ociSpec.Flush()
if err != nil {
return fmt.Errorf("error writing modified OCI specification: %v", err)
}
return nil
}
// addNVIDIAHook modifies the specified OCI specification in-place, inserting a
// prestart hook.
func (r nvidiaContainerRuntime) addNVIDIAHook(spec *specs.Spec) error {
path, err := exec.LookPath("nvidia-container-runtime-hook")
if err != nil {
path = hookDefaultFilePath
_, err = os.Stat(path)
if err != nil {
return err
}
}
r.logger.Printf("prestart hook path: %s\n", path)
args := []string{path}
if spec.Hooks == nil {
spec.Hooks = &specs.Hooks{}
} else if len(spec.Hooks.Prestart) != 0 {
for _, hook := range spec.Hooks.Prestart {
if !strings.Contains(hook.Path, "nvidia-container-runtime-hook") {
continue
}
r.logger.Println("existing nvidia prestart hook in OCI spec file")
return nil
}
}
spec.Hooks.Prestart = append(spec.Hooks.Prestart, specs.Hook{
Path: path,
Args: append(args, "prestart"),
})
return nil
}

View File

@@ -0,0 +1,203 @@
/*
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
*/
package main
import (
"fmt"
"strings"
"testing"
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
"github.com/opencontainers/runtime-spec/specs-go"
testlog "github.com/sirupsen/logrus/hooks/test"
"github.com/stretchr/testify/require"
)
func TestAddNvidiaHook(t *testing.T) {
logger, logHook := testlog.NewNullLogger()
shim := nvidiaContainerRuntime{
logger: logger,
}
testCases := []struct {
spec *specs.Spec
errorPrefix string
shouldNotAdd bool
}{
{
spec: &specs.Spec{},
},
{
spec: &specs.Spec{
Hooks: &specs.Hooks{},
},
},
{
spec: &specs.Spec{
Hooks: &specs.Hooks{
Prestart: []specs.Hook{{
Path: "some-hook",
}},
},
},
},
{
spec: &specs.Spec{
Hooks: &specs.Hooks{
Prestart: []specs.Hook{{
Path: "nvidia-container-runtime-hook",
}},
},
},
shouldNotAdd: true,
},
}
for i, tc := range testCases {
logHook.Reset()
var numPrestartHooks int
if tc.spec.Hooks != nil {
numPrestartHooks = len(tc.spec.Hooks.Prestart)
}
err := shim.addNVIDIAHook(tc.spec)
if tc.errorPrefix == "" {
require.NoErrorf(t, err, "%d: %v", i, tc)
} else {
require.Truef(t, strings.HasPrefix(err.Error(), tc.errorPrefix), "%d: %v", i, tc)
require.NotNilf(t, tc.spec.Hooks, "%d: %v", i, tc)
require.Equalf(t, 1, nvidiaHookCount(tc.spec.Hooks), "%d: %v", i, tc)
if tc.shouldNotAdd {
require.Equal(t, numPrestartHooks+1, len(tc.spec.Hooks.Poststart), "%d: %v", i, tc)
} else {
require.Equal(t, numPrestartHooks+1, len(tc.spec.Hooks.Poststart), "%d: %v", i, tc)
nvidiaHook := tc.spec.Hooks.Poststart[len(tc.spec.Hooks.Poststart)-1]
// TODO: This assumes that the hook has been set up in the makefile
expectedPath := "/usr/bin/nvidia-container-runtime-hook"
require.Equalf(t, expectedPath, nvidiaHook.Path, "%d: %v", i, tc)
require.Equalf(t, []string{expectedPath, "prestart"}, nvidiaHook.Args, "%d: %v", i, tc)
require.Emptyf(t, nvidiaHook.Env, "%d: %v", i, tc)
require.Nilf(t, nvidiaHook.Timeout, "%d: %v", i, tc)
}
}
}
}
func TestNvidiaContainerRuntime(t *testing.T) {
logger, hook := testlog.NewNullLogger()
testCases := []struct {
shim nvidiaContainerRuntime
shouldModify bool
args []string
modifyError error
writeError error
}{
{
shim: nvidiaContainerRuntime{},
shouldModify: false,
},
{
shim: nvidiaContainerRuntime{},
args: []string{"create"},
shouldModify: true,
},
{
shim: nvidiaContainerRuntime{},
args: []string{"--bundle=create"},
shouldModify: false,
},
{
shim: nvidiaContainerRuntime{},
args: []string{"--bundle", "create"},
shouldModify: false,
},
{
shim: nvidiaContainerRuntime{},
args: []string{"create"},
shouldModify: true,
},
{
shim: nvidiaContainerRuntime{},
args: []string{"create"},
modifyError: fmt.Errorf("error modifying"),
shouldModify: true,
},
{
shim: nvidiaContainerRuntime{},
args: []string{"create"},
writeError: fmt.Errorf("error writing"),
shouldModify: true,
},
}
for i, tc := range testCases {
tc.shim.logger = logger
hook.Reset()
ociMock := &oci.SpecMock{
ModifyFunc: func(specModifier oci.SpecModifier) error {
return tc.modifyError
},
FlushFunc: func() error {
return tc.writeError
},
}
require.Equal(t, tc.shouldModify, tc.shim.modificationRequired(tc.args), "%d: %v", i, tc)
tc.shim.ociSpec = ociMock
tc.shim.runtime = &MockShim{}
err := tc.shim.Exec(tc.args)
if tc.modifyError != nil || tc.writeError != nil {
require.Error(t, err, "%d: %v", i, tc)
} else {
require.NoError(t, err, "%d: %v", i, tc)
}
if tc.shouldModify {
require.Equal(t, 1, len(ociMock.ModifyCalls()), "%d: %v", i, tc)
} else {
require.Equal(t, 0, len(ociMock.ModifyCalls()), "%d: %v", i, tc)
}
writeExpected := tc.shouldModify && tc.modifyError == nil
if writeExpected {
require.Equal(t, 1, len(ociMock.FlushCalls()), "%d: %v", i, tc)
} else {
require.Equal(t, 0, len(ociMock.FlushCalls()), "%d: %v", i, tc)
}
}
}
type MockShim struct {
called bool
args []string
returnError error
}
func (m *MockShim) Exec(args []string) error {
m.called = true
m.args = args
return m.returnError
}

View File

@@ -1,5 +1,5 @@
/*
# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -19,55 +19,56 @@ package main
import (
"fmt"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-container-runtime/modifier"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
"github.com/NVIDIA/nvidia-container-toolkit/internal/info"
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
"github.com/NVIDIA/nvidia-container-toolkit/internal/runtime"
"github.com/sirupsen/logrus"
)
// newNVIDIAContainerRuntime is a factory method that constructs a runtime based on the selected configuration and specified logger
func newNVIDIAContainerRuntime(logger *logrus.Logger, cfg *config.Config, argv []string) (oci.Runtime, error) {
lowLevelRuntime, err := oci.NewLowLevelRuntime(logger, cfg.NVIDIAContainerRuntimeConfig.Runtimes)
if err != nil {
return nil, fmt.Errorf("error constructing low-level runtime: %v", err)
}
const (
ociSpecFileName = "config.json"
dockerRuncExecutableName = "docker-runc"
runcExecutableName = "runc"
)
if !oci.HasCreateSubcommand(argv) {
logger.Debugf("Skipping modifier for non-create subcommand")
return lowLevelRuntime, nil
}
ociSpec, err := oci.NewSpec(logger, argv)
// newRuntime is a factory method that constructs a runtime based on the selected configuration.
func newRuntime(argv []string) (oci.Runtime, error) {
ociSpec, err := newOCISpec(argv)
if err != nil {
return nil, fmt.Errorf("error constructing OCI specification: %v", err)
}
specModifier, err := newSpecModifier(logger, cfg, ociSpec, argv)
runc, err := newRuncRuntime()
if err != nil {
return nil, fmt.Errorf("failed to construct OCI spec modifier: %v", err)
return nil, fmt.Errorf("error constructing runc runtime: %v", err)
}
// Create the wrapping runtime with the specified modifier
r := runtime.NewModifyingRuntimeWrapper(
logger,
lowLevelRuntime,
ociSpec,
specModifier,
)
r, err := newNvidiaContainerRuntimeWithLogger(logger.Logger, runc, ociSpec)
if err != nil {
return nil, fmt.Errorf("error constructing NVIDIA Container Runtime: %v", err)
}
return r, nil
}
// newSpecModifier is a factory method that creates constructs an OCI spec modifer based on the provided config.
func newSpecModifier(logger *logrus.Logger, cfg *config.Config, ociSpec oci.Spec, argv []string) (oci.SpecModifier, error) {
switch info.ResolveAutoMode(logger, cfg.NVIDIAContainerRuntimeConfig.Mode) {
case "legacy":
return modifier.NewStableRuntimeModifier(logger), nil
case "csv":
return modifier.NewCSVModifier(logger, cfg, ociSpec)
// newOCISpec constructs an OCI spec for the provided arguments
func newOCISpec(argv []string) (oci.Spec, error) {
bundleDir, err := oci.GetBundleDir(argv)
if err != nil {
return nil, fmt.Errorf("error parsing command line arguments: %v", err)
}
logger.Infof("Using bundle directory: %v", bundleDir)
return nil, fmt.Errorf("invalid runtime mode: %v", cfg.NVIDIAContainerRuntimeConfig.Mode)
ociSpecPath := oci.GetSpecFilePath(bundleDir)
logger.Infof("Using OCI specification file path: %v", ociSpecPath)
ociSpec := oci.NewSpecFromFile(ociSpecPath)
return ociSpec, nil
}
// newRuncRuntime locates the runc binary and wraps it in a SyscallExecRuntime
func newRuncRuntime() (oci.Runtime, error) {
return oci.NewLowLevelRuntimeWithLogger(
logger.Logger,
dockerRuncExecutableName,
runcExecutableName,
)
}

View File

@@ -17,113 +17,14 @@
package main
import (
"encoding/json"
"os"
"path/filepath"
"testing"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
"github.com/opencontainers/runtime-spec/specs-go"
testlog "github.com/sirupsen/logrus/hooks/test"
"github.com/stretchr/testify/require"
)
func TestFactoryMethod(t *testing.T) {
logger, _ := testlog.NewNullLogger()
func TestConstructor(t *testing.T) {
shim, err := newRuntime([]string{})
testCases := []struct {
description string
cfg *config.Config
spec *specs.Spec
expectedError bool
}{
{
description: "empty config raises error",
cfg: &config.Config{
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{},
},
expectedError: true,
},
{
description: "config with runtime raises no error",
cfg: &config.Config{
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
Runtimes: []string{"runc"},
Mode: "legacy",
},
},
},
{
description: "csv mode is supported",
cfg: &config.Config{
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
Runtimes: []string{"runc"},
Mode: "csv",
},
},
spec: &specs.Spec{
Process: &specs.Process{
Env: []string{
"NVIDIA_VISIBLE_DEVICES=all",
},
},
},
},
{
description: "non-legacy discover mode raises error",
cfg: &config.Config{
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
Runtimes: []string{"runc"},
Mode: "non-legacy",
},
},
expectedError: true,
},
{
description: "legacy discover mode returns modifier",
cfg: &config.Config{
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
Runtimes: []string{"runc"},
Mode: "legacy",
},
},
},
{
description: "csv discover mode returns modifier",
cfg: &config.Config{
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
Runtimes: []string{"runc"},
Mode: "csv",
},
},
},
{
description: "empty mode raises error",
cfg: &config.Config{
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
Runtimes: []string{"runc"},
},
},
expectedError: true,
},
}
for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
bundleDir := t.TempDir()
specFile, err := os.Create(filepath.Join(bundleDir, "config.json"))
require.NoError(t, err)
require.NoError(t, json.NewEncoder(specFile).Encode(tc.spec))
argv := []string{"--bundle", bundleDir, "create"}
_, err = newNVIDIAContainerRuntime(logger, tc.cfg, argv)
if tc.expectedError {
require.Error(t, err)
} else {
require.NoError(t, err)
}
})
}
require.NoError(t, err)
require.NotNil(t, shim)
}

View File

@@ -7,9 +7,9 @@ import (
"os"
"path"
"path/filepath"
"strconv"
"strings"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
"golang.org/x/mod/semver"
)
@@ -104,6 +104,32 @@ type HookState struct {
BundlePath string `json:"bundlePath"`
}
func parseCudaVersion(cudaVersion string) (vmaj, vmin, vpatch uint32) {
if _, err := fmt.Sscanf(cudaVersion, "%d.%d.%d\n", &vmaj, &vmin, &vpatch); err != nil {
vpatch = 0
if _, err := fmt.Sscanf(cudaVersion, "%d.%d\n", &vmaj, &vmin); err != nil {
vmin = 0
if _, err := fmt.Sscanf(cudaVersion, "%d\n", &vmaj); err != nil {
log.Panicln("invalid CUDA version:", cudaVersion)
}
}
}
return
}
func getEnvMap(e []string) (m map[string]string) {
m = make(map[string]string)
for _, s := range e {
p := strings.SplitN(s, "=", 2)
if len(p) != 2 {
log.Panicln("environment error")
}
m[p[0]] = p[1]
}
return
}
func loadSpec(path string) (spec *Spec) {
f, err := os.Open(path)
if err != nil {
@@ -165,6 +191,12 @@ func isPrivileged(s *Spec) bool {
return false
}
func isLegacyCUDAImage(env map[string]string) bool {
legacyCudaVersion := env[envCUDAVersion]
cudaRequire := env[envNVRequireCUDA]
return len(legacyCudaVersion) > 0 && len(cudaRequire) == 0
}
func getDevicesFromEnvvar(env map[string]string, legacyImage bool) *string {
// Build a list of envvars to consider.
envVars := []string{envNVVisibleDevices}
@@ -303,11 +335,27 @@ func getDriverCapabilities(env map[string]string, supportedDriverCapabilities Dr
return capabilities
}
func getNvidiaConfig(hookConfig *HookConfig, image image.CUDA, mounts []Mount, privileged bool) *nvidiaConfig {
legacyImage := image.IsLegacy()
func getRequirements(env map[string]string, legacyImage bool) []string {
// All variables with the "NVIDIA_REQUIRE_" prefix are passed to nvidia-container-cli
var requirements []string
for name, value := range env {
if strings.HasPrefix(name, envNVRequirePrefix) {
requirements = append(requirements, value)
}
}
if legacyImage {
vmaj, vmin, _ := parseCudaVersion(env[envCUDAVersion])
cudaRequire := fmt.Sprintf("cuda>=%d.%d", vmaj, vmin)
requirements = append(requirements, cudaRequire)
}
return requirements
}
func getNvidiaConfig(hookConfig *HookConfig, env map[string]string, mounts []Mount, privileged bool) *nvidiaConfig {
legacyImage := isLegacyCUDAImage(env)
var devices string
if d := getDevices(hookConfig, image, mounts, privileged, legacyImage); d != nil {
if d := getDevices(hookConfig, env, mounts, privileged, legacyImage); d != nil {
devices = *d
} else {
// 'nil' devices means this is not a GPU container.
@@ -315,7 +363,7 @@ func getNvidiaConfig(hookConfig *HookConfig, image image.CUDA, mounts []Mount, p
}
var migConfigDevices string
if d := getMigConfigDevices(image); d != nil {
if d := getMigConfigDevices(env); d != nil {
migConfigDevices = *d
}
if !privileged && migConfigDevices != "" {
@@ -323,21 +371,19 @@ func getNvidiaConfig(hookConfig *HookConfig, image image.CUDA, mounts []Mount, p
}
var migMonitorDevices string
if d := getMigMonitorDevices(image); d != nil {
if d := getMigMonitorDevices(env); d != nil {
migMonitorDevices = *d
}
if !privileged && migMonitorDevices != "" {
log.Panicln("cannot set MIG_MONITOR_DEVICES in non privileged container")
}
driverCapabilities := getDriverCapabilities(image, hookConfig.SupportedDriverCapabilities, legacyImage).String()
driverCapabilities := getDriverCapabilities(env, hookConfig.SupportedDriverCapabilities, legacyImage).String()
requirements, err := image.GetRequirements()
if err != nil {
log.Panicln("failed to get requirements", err)
}
requirements := getRequirements(env, legacyImage)
disableRequire := image.HasDisableRequire()
// Don't fail on invalid values.
disableRequire, _ := strconv.ParseBool(env[envNVDisableRequire])
return &nvidiaConfig{
Devices: devices,
@@ -363,17 +409,13 @@ func getContainerConfig(hook HookConfig) (config containerConfig) {
s := loadSpec(path.Join(b, "config.json"))
image, err := image.NewCUDAImageFromEnv(s.Process.Env)
if err != nil {
log.Panicln(err)
}
env := getEnvMap(s.Process.Env)
privileged := isPrivileged(s)
envSwarmGPU = hook.SwarmResource
return containerConfig{
Pid: h.Pid,
Rootfs: s.Root.Path,
Env: image,
Nvidia: getNvidiaConfig(&hook, image, s.Mounts, privileged),
Env: env,
Nvidia: getNvidiaConfig(&hook, env, s.Mounts, privileged),
}
}

View File

@@ -7,7 +7,6 @@ import (
"reflect"
"github.com/BurntSushi/toml"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
)
const (
@@ -42,11 +41,10 @@ type HookConfig struct {
AcceptDeviceListAsVolumeMounts bool `toml:"accept-nvidia-visible-devices-as-volume-mounts"`
SupportedDriverCapabilities DriverCapabilities `toml:"supported-driver-capabilities"`
NvidiaContainerCLI CLIConfig `toml:"nvidia-container-cli"`
NVIDIAContainerRuntime config.RuntimeConfig `toml:"nvidia-container-runtime"`
NvidiaContainerCLI CLIConfig `toml:"nvidia-container-cli"`
}
func getDefaultHookConfig() HookConfig {
func getDefaultHookConfig() (config HookConfig) {
return HookConfig{
DisableRequire: false,
SwarmResource: nil,
@@ -65,7 +63,6 @@ func getDefaultHookConfig() HookConfig {
User: nil,
Ldconfig: nil,
},
NVIDIAContainerRuntime: *config.GetDefaultRuntimeConfig(),
}
}

View File

@@ -7,6 +7,51 @@ import (
"github.com/stretchr/testify/require"
)
func TestParseCudaVersionValid(t *testing.T) {
var tests = []struct {
version string
expected [3]uint32
}{
{"0", [3]uint32{0, 0, 0}},
{"8", [3]uint32{8, 0, 0}},
{"7.5", [3]uint32{7, 5, 0}},
{"9.0.116", [3]uint32{9, 0, 116}},
{"4294967295.4294967295.4294967295", [3]uint32{4294967295, 4294967295, 4294967295}},
}
for i, c := range tests {
vmaj, vmin, vpatch := parseCudaVersion(c.version)
version := [3]uint32{vmaj, vmin, vpatch}
require.Equal(t, c.expected, version, "%d: %v", i, c)
}
}
func TestParseCudaVersionInvalid(t *testing.T) {
var tests = []string{
"foo",
"foo.5.10",
"9.0.116.50",
"9.0.116foo",
"7.foo",
"9.0.bar",
"9.4294967296",
"9.0.116.",
"9..0",
"9.",
".5.10",
"-9",
"+9",
"-9.1.116",
"-9.-1.-116",
}
for _, c := range tests {
require.Panics(t, func() {
parseCudaVersion(c)
}, "parseCudaVersion(%v)", c)
}
}
func TestIsPrivileged(t *testing.T) {
var tests = []struct {
spec string

View File

@@ -6,21 +6,20 @@ import (
"log"
"os"
"os/exec"
"path"
"path/filepath"
"runtime"
"runtime/debug"
"strconv"
"strings"
"syscall"
"github.com/NVIDIA/nvidia-container-toolkit/internal/info"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
)
var (
debugflag = flag.Bool("debug", false, "enable debug output")
versionflag = flag.Bool("version", false, "enable version output")
configflag = flag.String("config", "", "configuration file")
debugflag = flag.Bool("debug", false, "enable debug output")
configflag = flag.String("config", "", "configuration file")
defaultPATH = []string{"/usr/local/sbin", "/usr/local/bin", "/usr/sbin", "/usr/bin", "/sbin", "/bin"}
)
func exit() {
@@ -36,16 +35,28 @@ func exit() {
os.Exit(0)
}
func getPATH(config CLIConfig) string {
dirs := filepath.SplitList(os.Getenv("PATH"))
// directories from the hook environment have higher precedence
dirs = append(dirs, defaultPATH...)
if config.Root != nil {
rootDirs := []string{}
for _, dir := range dirs {
rootDirs = append(rootDirs, path.Join(*config.Root, dir))
}
// directories with the root prefix have higher precedence
dirs = append(rootDirs, dirs...)
}
return strings.Join(dirs, ":")
}
func getCLIPath(config CLIConfig) string {
if config.Path != nil {
return *config.Path
}
var root string
if config.Root != nil {
root = *config.Root
}
if err := os.Setenv("PATH", lookup.GetPath(root)); err != nil {
if err := os.Setenv("PATH", getPATH(config)); err != nil {
log.Panicln("couldn't set PATH variable:", err)
}
@@ -74,10 +85,6 @@ func doPrestart() {
hook := getHookConfig()
cli := hook.NvidiaContainerCLI
if info.ResolveAutoMode(&logInterceptor{}, hook.NVIDIAContainerRuntime.Mode) != "legacy" {
log.Panicln("invoking the NVIDIA Container Runtime Hook directly (e.g. specifying the docker --gpus flag) is not supported. Please use the NVIDIA Container Runtime instead.")
}
container := getContainerConfig(hook)
nvidia := container.Nvidia
if nvidia == nil {
@@ -160,11 +167,6 @@ func main() {
flag.Usage = usage
flag.Parse()
if *versionflag {
fmt.Printf("%v version %v\n", "NVIDIA Container Runtime Hook", info.GetVersionString())
return
}
args := flag.Args()
if len(args) == 0 {
flag.Usage()
@@ -184,12 +186,3 @@ func main() {
os.Exit(2)
}
}
// logInterceptor implements the info.Logger interface to allow for logging from this function.
type logInterceptor struct{}
func (l *logInterceptor) Infof(format string, args ...interface{}) {
log.Printf(format, args...)
}
func (l *logInterceptor) Debugf(format string, args ...interface{}) {}

View File

@@ -1,3 +0,0 @@
# NVIDIA Container Toolkit CLI
The NVIDIA Container Toolkit CLI `nvidia-ctk` provides a number of utilities that are useful for working with the NVIDIA Container Toolkit.

View File

@@ -1,229 +0,0 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package symlinks
import (
"fmt"
"os"
"path/filepath"
"strings"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover/csv"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
"github.com/sirupsen/logrus"
"github.com/urfave/cli/v2"
)
type command struct {
logger *logrus.Logger
}
type config struct {
hostRoot string
filenames cli.StringSlice
links cli.StringSlice
containerSpec string
}
// NewCommand constructs a hook command with the specified logger
func NewCommand(logger *logrus.Logger) *cli.Command {
c := command{
logger: logger,
}
return c.build()
}
// build
func (m command) build() *cli.Command {
cfg := config{}
// Create the '' command
c := cli.Command{
Name: "create-symlinks",
Usage: "A hook to create symlinks in the container. This can be used to proces CSV mount specs",
Action: func(c *cli.Context) error {
return m.run(c, &cfg)
},
}
c.Flags = []cli.Flag{
&cli.StringFlag{
Name: "host-root",
Usage: "The root on the host filesystem to use to resolve symlinks",
Destination: &cfg.hostRoot,
},
&cli.StringSliceFlag{
Name: "csv-filename",
Usage: "Specify a (CSV) filename to process",
Destination: &cfg.filenames,
},
&cli.StringSliceFlag{
Name: "link",
Usage: "Specify a specific link to create. The link is specified as source:target",
Destination: &cfg.links,
},
&cli.StringFlag{
Name: "container-spec",
Usage: "Specify the path to the OCI container spec. If empty or '-' the spec will be read from STDIN",
Destination: &cfg.containerSpec,
},
}
return &c
}
func (m command) run(c *cli.Context, cfg *config) error {
s, err := oci.LoadContainerState(cfg.containerSpec)
if err != nil {
return fmt.Errorf("failed to load container state: %v", err)
}
containerRoot, err := s.GetContainerRoot()
if err != nil {
return fmt.Errorf("failed to determined container root: %v", err)
}
csvFiles := cfg.filenames.Value()
chainLocator := lookup.NewSymlinkChainLocator(m.logger, cfg.hostRoot)
var candidates []string
for _, file := range csvFiles {
mountSpecs, err := csv.NewCSVFileParser(m.logger, file).Parse()
if err != nil {
m.logger.Debugf("Skipping CSV file %v: %v", file, err)
continue
}
for _, ms := range mountSpecs {
if ms.Type != csv.MountSpecSym {
continue
}
targets, err := chainLocator.Locate(ms.Path)
if err != nil {
m.logger.Warnf("Failed to locate symlink %v", ms.Path)
}
candidates = append(candidates, targets...)
}
}
created := make(map[string]bool)
// candidates is a list of absolute paths to symlinks in a chain, or the final target of the chain.
for _, candidate := range candidates {
targets, err := m.Locate(candidate)
if err != nil {
m.logger.Debugf("Skipping invalid link: %v", err)
continue
} else if len(targets) != 1 {
m.logger.Debugf("Unexepected number of targets: %v", targets)
continue
} else if targets[0] == candidate {
m.logger.Debugf("%v is not a symlink", candidate)
continue
}
err = m.createLink(created, cfg.hostRoot, containerRoot, targets[0], candidate)
if err != nil {
m.logger.Warnf("Failed to create link %v: %v", []string{targets[0], candidate}, err)
}
}
links := cfg.links.Value()
for _, l := range links {
parts := strings.Split(l, ":")
if len(parts) != 2 {
m.logger.Warnf("Invalid link specification %v", l)
continue
}
err := m.createLink(created, cfg.hostRoot, containerRoot, parts[0], parts[1])
if err != nil {
m.logger.Warnf("Failed to create link %v: %v", parts, err)
}
}
return nil
}
func (m command) createLink(created map[string]bool, hostRoot string, containerRoot string, target string, link string) error {
linkPath, err := changeRoot(hostRoot, containerRoot, link)
if err != nil {
m.logger.Warnf("Failed to resolve path for link %v relative to %v: %v", link, containerRoot, err)
}
if created[linkPath] {
m.logger.Debugf("Link %v already created", linkPath)
return nil
}
targetPath, err := changeRoot(hostRoot, "/", target)
if err != nil {
m.logger.Warnf("Failed to resolve path for target %v relative to %v: %v", target, "/", err)
}
m.logger.Infof("Symlinking %v to %v", linkPath, targetPath)
err = os.MkdirAll(filepath.Dir(linkPath), 0755)
if err != nil {
return fmt.Errorf("failed to create directory: %v", err)
}
err = os.Symlink(target, linkPath)
if err != nil {
return fmt.Errorf("failed to create symlink: %v", err)
}
return nil
}
func changeRoot(current string, new string, path string) (string, error) {
if !filepath.IsAbs(path) {
return path, nil
}
relative := path
if current != "" {
r, err := filepath.Rel(current, path)
if err != nil {
return "", err
}
relative = r
}
return filepath.Join(new, relative), nil
}
// Locate returns the link target of the specified filename or an empty slice if the
// specified filename is not a symlink.
func (m command) Locate(filename string) ([]string, error) {
info, err := os.Lstat(filename)
if err != nil {
return nil, fmt.Errorf("failed to get file info: %v", info)
}
if info.Mode()&os.ModeSymlink == 0 {
m.logger.Debugf("%v is not a symlink", filename)
return nil, nil
}
target, err := os.Readlink(filename)
if err != nil {
return nil, fmt.Errorf("error checking symlink: %v", err)
}
m.logger.Debugf("Resolved link: '%v' => '%v'", filename, target)
return []string{target}, nil
}

View File

@@ -1,52 +0,0 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package hook
import (
symlinks "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/hook/create-symlinks"
ldcache "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/hook/update-ldcache"
"github.com/sirupsen/logrus"
"github.com/urfave/cli/v2"
)
type hookCommand struct {
logger *logrus.Logger
}
// NewCommand constructs a hook command with the specified logger
func NewCommand(logger *logrus.Logger) *cli.Command {
c := hookCommand{
logger: logger,
}
return c.build()
}
// build
func (m hookCommand) build() *cli.Command {
// Create the 'hook' command
hook := cli.Command{
Name: "hook",
Usage: "A collection of hooks that may be injected into an OCI spec",
}
hook.Subcommands = []*cli.Command{
ldcache.NewCommand(m.logger),
symlinks.NewCommand(m.logger),
}
return &hook
}

View File

@@ -1,129 +0,0 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package ldcache
import (
"fmt"
"os"
"path/filepath"
"syscall"
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
"github.com/sirupsen/logrus"
"github.com/urfave/cli/v2"
)
type command struct {
logger *logrus.Logger
}
type config struct {
folders cli.StringSlice
containerSpec string
}
// NewCommand constructs an update-ldcache command with the specified logger
func NewCommand(logger *logrus.Logger) *cli.Command {
c := command{
logger: logger,
}
return c.build()
}
// build the update-ldcache command
func (m command) build() *cli.Command {
cfg := config{}
// Create the 'update-ldcache' command
c := cli.Command{
Name: "update-ldcache",
Usage: "Update ldcache in a container by running ldconfig",
Action: func(c *cli.Context) error {
return m.run(c, &cfg)
},
}
c.Flags = []cli.Flag{
&cli.StringSliceFlag{
Name: "folder",
Usage: "Specifiy a folder to add to /etc/ld.so.conf before updating the ld cache",
Destination: &cfg.folders,
},
&cli.StringFlag{
Name: "container-spec",
Usage: "Specify the path to the OCI container spec. If empty or '-' the spec will be read from STDIN",
Destination: &cfg.containerSpec,
},
}
return &c
}
func (m command) run(c *cli.Context, cfg *config) error {
s, err := oci.LoadContainerState(cfg.containerSpec)
if err != nil {
return fmt.Errorf("failed to load container state: %v", err)
}
containerRoot, err := s.GetContainerRoot()
if err != nil {
return fmt.Errorf("failed to determined container root: %v", err)
}
err = m.createConfig(containerRoot, cfg.folders.Value())
if err != nil {
return fmt.Errorf("failed to update ld.so.conf: %v", err)
}
args := []string{"/sbin/ldconfig"}
if containerRoot != "" {
args = append(args, "-r", containerRoot)
}
return syscall.Exec(args[0], args, nil)
}
// createConfig creates (or updates) /etc/ld.so.conf.d/nvcr-<RANDOM_STRING>.conf in the container
// to include the required paths.
func (m command) createConfig(root string, folders []string) error {
if len(folders) == 0 {
m.logger.Debugf("No folders to add to /etc/ld.so.conf")
return nil
}
configFile, err := os.CreateTemp(filepath.Join(root, "/etc/ld.so.conf.d"), "nvcr-*.conf")
if err != nil {
return fmt.Errorf("failed to create config file: %v", err)
}
defer configFile.Close()
m.logger.Debugf("Adding folders %v to %v", folders, configFile.Name())
configured := make(map[string]bool)
for _, folder := range folders {
if configured[folder] {
continue
}
_, err = configFile.WriteString(fmt.Sprintf("%s\n", folder))
if err != nil {
return fmt.Errorf("failed to update ld.so.conf.d: %v", err)
}
configured[folder] = true
}
return nil
}

View File

@@ -1,81 +0,0 @@
/**
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package main
import (
"os"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/hook"
"github.com/NVIDIA/nvidia-container-toolkit/internal/info"
log "github.com/sirupsen/logrus"
cli "github.com/urfave/cli/v2"
)
var logger = log.New()
// config defines the options that can be set for the CLI through config files,
// environment variables, or command line flags
type config struct {
// Debug indicates whether the CLI is started in "debug" mode
Debug bool
}
func main() {
// Create a config struct to hold the parsed environment variables or command line flags
config := config{}
// Create the top-level CLI
c := cli.NewApp()
c.Name = "NVIDIA Container Toolkit CLI"
c.UseShortOptionHandling = true
c.EnableBashCompletion = true
c.Usage = "Tools to configure the NVIDIA Container Toolkit"
c.Version = info.GetVersionString()
// Setup the flags for this command
c.Flags = []cli.Flag{
&cli.BoolFlag{
Name: "debug",
Aliases: []string{"d"},
Usage: "Enable debug-level logging",
Destination: &config.Debug,
EnvVars: []string{"NVIDIA_CTK_DEBUG"},
},
}
// Set log-level for all subcommands
c.Before = func(c *cli.Context) error {
logLevel := log.InfoLevel
if config.Debug {
logLevel = log.DebugLevel
}
logger.SetLevel(logLevel)
return nil
}
// Define the subcommands
c.Commands = []*cli.Command{
hook.NewCommand(logger),
}
// Run the CLI
err := c.Run(os.Args)
if err != nil {
log.Errorf("%v", err)
log.Exit(1)
}
}

View File

@@ -16,4 +16,3 @@ ldconfig = "@/sbin/ldconfig"
[nvidia-container-runtime]
#debug = "/var/log/nvidia-container-runtime.log"
#experimental = false

View File

@@ -16,4 +16,3 @@ ldconfig = "@/sbin/ldconfig"
[nvidia-container-runtime]
#debug = "/var/log/nvidia-container-runtime.log"
#experimental = false

View File

@@ -16,4 +16,3 @@ ldconfig = "@/sbin/ldconfig"
[nvidia-container-runtime]
#debug = "/var/log/nvidia-container-runtime.log"
#experimental = false

View File

@@ -16,4 +16,3 @@ ldconfig = "@/sbin/ldconfig"
[nvidia-container-runtime]
#debug = "/var/log/nvidia-container-runtime.log"
#experimental = false

View File

@@ -16,11 +16,3 @@ ldconfig = "@/sbin/ldconfig.real"
[nvidia-container-runtime]
#debug = "/var/log/nvidia-container-runtime.log"
#experimental = false
# Specify the runtimes to consider. This list is processed in order and the PATH
# searched for matching executables unless the entry is an absolute path.
runtimes = [
"docker-runc",
"runc",
]

View File

@@ -0,0 +1,19 @@
disable-require = false
supported-driver-capabilities = "compute,compat32,graphics,utility,video,display"
#swarm-resource = "DOCKER_RESOURCE_GPU"
#accept-nvidia-visible-devices-envvar-when-unprivileged = true
#accept-nvidia-visible-devices-as-volume-mounts = false
[nvidia-container-cli]
#root = "/run/nvidia/driver"
#path = "/usr/bin/nvidia-container-cli"
environment = []
#debug = "/var/log/nvidia-container-toolkit.log"
#ldcache = "/etc/ld.so.cache"
load-kmods = true
#no-cgroups = false
#user = "root:video"
ldconfig = "@/sbin/ldconfig.real"
[nvidia-container-runtime]
#debug = "/var/log/nvidia-container-runtime.log"

View File

@@ -4,10 +4,11 @@ FROM ${BASEIMAGE}
RUN yum install -y \
ca-certificates \
gcc \
wget \
git \
make \
rpm-build \
make && \
wget \
&& \
rm -rf /var/cache/yum/*
ARG GOLANG_VERSION=0.0.0
@@ -41,8 +42,6 @@ RUN mkdir -p $DIST_DIR /dist
WORKDIR $GOPATH/src/nvidia-container-toolkit
COPY . .
ARG GIT_COMMIT
ENV GIT_COMMIT ${GIT_COMMIT}
RUN make PREFIX=${DIST_DIR} cmds
ARG CONFIG_TOML_SUFFIX

View File

@@ -4,10 +4,11 @@ FROM ${BASEIMAGE}
RUN yum install -y \
ca-certificates \
gcc \
wget \
git \
make \
rpm-build && \
rpm-build \
wget \
&& \
rm -rf /var/cache/yum/*
ARG GOLANG_VERSION=0.0.0
@@ -41,8 +42,6 @@ RUN mkdir -p $DIST_DIR /dist
WORKDIR $GOPATH/src/nvidia-container-toolkit
COPY . .
ARG GIT_COMMIT
ENV GIT_COMMIT ${GIT_COMMIT}
RUN make PREFIX=${DIST_DIR} cmds
ARG CONFIG_TOML_SUFFIX

View File

@@ -48,8 +48,6 @@ RUN mkdir -p $DIST_DIR /dist
WORKDIR $GOPATH/src/nvidia-container-toolkit
COPY . .
ARG GIT_COMMIT
ENV GIT_COMMIT ${GIT_COMMIT}
RUN make PREFIX=${DIST_DIR} cmds
ARG CONFIG_TOML_SUFFIX

View File

@@ -14,7 +14,7 @@
ARG GOLANG_VERSION=x.x.x
FROM golang:${GOLANG_VERSION}
RUN go install golang.org/x/lint/golint@latest
RUN go install github.com/matryer/moq@latest
RUN go install github.com/gordonklaus/ineffassign@latest
RUN go install github.com/client9/misspell/cmd/misspell@latest
RUN go get -u golang.org/x/lint/golint
RUN go get -u github.com/matryer/moq
RUN go get -u github.com/gordonklaus/ineffassign
RUN go get -u github.com/client9/misspell/cmd/misspell

View File

@@ -39,8 +39,6 @@ RUN mkdir -p $DIST_DIR /dist
WORKDIR $GOPATH/src/nvidia-container-toolkit
COPY . .
ARG GIT_COMMIT
ENV GIT_COMMIT ${GIT_COMMIT}
RUN make PREFIX=${DIST_DIR} cmds
# Hook for Project Atomic's fork of Docker: https://github.com/projectatomic/docker/tree/docker-1.13.1-rhel#add-dockerhooks-exec-custom-hooks-for-prestartpoststop-containerspatch

View File

@@ -46,8 +46,6 @@ RUN mkdir -p $DIST_DIR /dist
WORKDIR $GOPATH/src/nvidia-container-toolkit
COPY . .
ARG GIT_COMMIT
ENV GIT_COMMIT ${GIT_COMMIT}
RUN make PREFIX=${DIST_DIR} cmds
ARG CONFIG_TOML_SUFFIX

View File

@@ -98,7 +98,6 @@ docker-all: $(AMD64_TARGETS) $(X86_64_TARGETS) \
# private centos target
--centos%: OS := centos
--centos%: PKG_REV := $(if $(LIB_TAG),0.1.$(LIB_TAG),1)
--centos8%: BASEIMAGE = quay.io/centos/centos:stream8
# private amazonlinux target
--amazonlinux%: OS := amazonlinux
@@ -114,7 +113,6 @@ docker-all: $(AMD64_TARGETS) $(X86_64_TARGETS) \
--rhel%: PKG_REV := $(if $(LIB_TAG),0.1.$(LIB_TAG),1)
--rhel%: VERSION = $(patsubst rhel%-$(ARCH),%,$(TARGET_PLATFORM))
--rhel%: ARTIFACTS_DIR = $(DIST_DIR)/rhel$(VERSION)/$(ARCH)
--rhel8%: BASEIMAGE = quay.io/centos/centos:stream8
# We allow the CONFIG_TOML_SUFFIX to be overridden.
CONFIG_TOML_SUFFIX ?= $(OS)
@@ -124,18 +122,15 @@ docker-build-%:
docker pull --platform=linux/$(ARCH) $(BASEIMAGE)
DOCKER_BUILDKIT=1 \
$(DOCKER) build \
--platform=linux/$(ARCH) \
--progress=plain \
--build-arg BASEIMAGE="$(BASEIMAGE)" \
--build-arg GOLANG_VERSION="$(GOLANG_VERSION)" \
--build-arg PKG_VERS="$(LIB_VERSION)" \
--build-arg PKG_REV="$(PKG_REV)" \
--build-arg CONFIG_TOML_SUFFIX="$(CONFIG_TOML_SUFFIX)" \
--build-arg GIT_COMMIT="$(GIT_COMMIT)" \
--tag $(BUILDIMAGE) \
--file $(DOCKERFILE) .
$(DOCKER) run \
--platform=linux/$(ARCH) \
-e DISTRIB \
-e SECTION \
-v $(ARTIFACTS_DIR):/dist \

View File

@@ -14,37 +14,43 @@
# limitations under the License.
*/
package lookup
package main
import (
"fmt"
"encoding/json"
"os"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
log "github.com/sirupsen/logrus"
)
// NewDirectoryLocator creates a Locator that can be used to find directories at the specified root. A logger
// is also specified.
func NewDirectoryLocator(logger *log.Logger, root string) Locator {
l := file{
logger: logger,
prefixes: []string{root},
filter: assertDirectory,
}
func main() {
log.Infof("Starting device discovery with NVML")
return &l
}
// assertDirectory checks wither the specified path is a directory.
func assertDirectory(filename string) error {
info, err := os.Stat(filename)
d, err := discover.NewNVMLServer("")
if err != nil {
return fmt.Errorf("error getting info for %v: %v", filename, err)
log.Errorf("Error creating NVML Server: %v", err)
return
}
if !info.IsDir() {
return fmt.Errorf("specified path '%v' is not a directory", filename)
devices, err := d.Devices()
if err != nil {
log.Errorf("Error discovering devices: %v", err)
return
}
return nil
mounts, err := d.Mounts()
if err != nil {
log.Errorf("Error discovering mounts: %v", err)
return
}
enc := json.NewEncoder(os.Stdout)
enc.SetIndent("", " ")
log.Infof("Discovered devices:")
enc.Encode(devices)
log.Infof("Discovered libraries:")
enc.Encode(mounts)
}

65
examples/filter/main.go Normal file
View File

@@ -0,0 +1,65 @@
/*
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
*/
package main
import (
"encoding/json"
"os"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
"github.com/NVIDIA/nvidia-container-toolkit/internal/filter"
log "github.com/sirupsen/logrus"
)
func main() {
d, err := discover.NewNVMLServer("")
if err != nil {
log.Errorf("Error discovering devices: %v", err)
}
selected := filter.NewSelectDevicesFrom(d, "all", nil)
devices, err := selected.Devices()
if err != nil {
log.Errorf("Error discovering devices: %v", err)
return
}
mounts, err := selected.Mounts()
if err != nil {
log.Errorf("Error discovering mounts: %v", err)
return
}
hooks, err := selected.Hooks()
if err != nil {
log.Errorf("Error discovering hooks: %v", err)
return
}
enc := json.NewEncoder(os.Stdout)
enc.SetIndent("", " ")
log.Infof("Discovered devices:")
enc.Encode(devices)
log.Infof("Discovered libraries:")
enc.Encode(mounts)
log.Infof("Discovered hook:")
enc.Encode(hooks)
}

View File

@@ -0,0 +1,26 @@
package main
import (
"github.com/NVIDIA/nvidia-container-toolkit/internal/ldcache"
log "github.com/sirupsen/logrus"
)
var logger = log.StandardLogger()
func main() {
logger.SetLevel(log.DebugLevel)
logger.Infof("Starting device discovery with NVML")
cache, err := ldcache.NewLDCacheWithLogger(logger, "/run/nvidia/driver")
if err != nil {
logger.Errorf("Error loading ldcache: %v", err)
return
}
defer cache.Close()
libs32, libs64 := cache.Lookup("lib")
logger.Infof("32-bit: %v", libs32)
logger.Infof("64-bit: %v", libs64)
}

15
go.mod
View File

@@ -3,16 +3,15 @@ module github.com/NVIDIA/nvidia-container-toolkit
go 1.14
require (
github.com/BurntSushi/toml v1.0.0
github.com/NVIDIA/go-nvml v0.11.6-0
github.com/container-orchestrated-devices/container-device-interface v0.3.1-0.20220224133719-e5457123010b
github.com/containers/podman/v4 v4.0.3
github.com/opencontainers/runtime-spec v1.0.3-0.20211214071223-8958f93039ab
github.com/pelletier/go-toml v1.9.4
github.com/BurntSushi/toml v0.3.1
github.com/NVIDIA/go-nvml v0.11.1-0
github.com/containers/podman/v2 v2.2.1
github.com/opencontainers/runtime-spec v1.0.3-0.20211101234015-a3c33d663ebc
github.com/pelletier/go-toml v1.9.3
github.com/sirupsen/logrus v1.8.1
github.com/stretchr/testify v1.7.0
github.com/tsaikd/KDGoLib v0.0.0-20191001134900-7f3cf518e07d
github.com/urfave/cli/v2 v2.3.0
golang.org/x/mod v0.5.0
golang.org/x/sys v0.0.0-20220114195835-da31bd327af9
golang.org/x/mod v0.3.0
golang.org/x/sys v0.0.0-20210426230700-d19ff857e887
)

1506
go.sum

File diff suppressed because it is too large Load Diff

View File

@@ -1,48 +0,0 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package config
import (
"github.com/pelletier/go-toml"
)
// ContainerCLIConfig stores the options for the nvidia-container-cli
type ContainerCLIConfig struct {
Root string
}
// getContainerCLIConfigFrom reads the nvidia container runtime config from the specified toml Tree.
func getContainerCLIConfigFrom(toml *toml.Tree) *ContainerCLIConfig {
cfg := getDefaultContainerCLIConfig()
if toml == nil {
return cfg
}
cfg.Root = toml.GetDefault("nvidia-container-cli.root", cfg.Root).(string)
return cfg
}
// getDefaultContainerCLIConfig defines the default values for the config
func getDefaultContainerCLIConfig() *ContainerCLIConfig {
c := ContainerCLIConfig{
Root: "",
}
return &c
}

View File

@@ -1,114 +0,0 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package config
import (
"fmt"
"io"
"os"
"path"
"github.com/pelletier/go-toml"
)
const (
configOverride = "XDG_CONFIG_HOME"
configFilePath = "nvidia-container-runtime/config.toml"
)
var (
// DefaultExecutableDir specifies the default path to use for executables if they cannot be located in the path.
DefaultExecutableDir = "/usr/bin"
// NVIDIAContainerRuntimeHookExecutable is the executable name for the NVIDIA Container Runtime Hook
NVIDIAContainerRuntimeHookExecutable = "nvidia-container-runtime-hook"
// NVIDIAContainerToolkitExecutable is the executable name for the NVIDIA Container Toolkit (an alias for the NVIDIA Container Runtime Hook)
NVIDIAContainerToolkitExecutable = "nvidia-container-toolkit"
configDir = "/etc/"
)
// Config represents the contents of the config.toml file for the NVIDIA Container Toolkit
// Note: This is currently duplicated by the HookConfig in cmd/nvidia-container-toolkit/hook_config.go
type Config struct {
NVIDIAContainerCLIConfig ContainerCLIConfig `toml:"nvidia-container-cli"`
NVIDIACTKConfig CTKConfig `toml:"nvidia-ctk"`
NVIDIAContainerRuntimeConfig RuntimeConfig `toml:"nvidia-container-runtime"`
}
// GetConfig sets up the config struct. Values are read from a toml file
// or set via the environment.
func GetConfig() (*Config, error) {
if XDGConfigDir := os.Getenv(configOverride); len(XDGConfigDir) != 0 {
configDir = XDGConfigDir
}
configFilePath := path.Join(configDir, configFilePath)
tomlFile, err := os.Open(configFilePath)
if err != nil {
return nil, fmt.Errorf("failed to open config file %v: %v", configFilePath, err)
}
defer tomlFile.Close()
cfg, err := loadConfigFrom(tomlFile)
if err != nil {
return nil, fmt.Errorf("failed to read config values: %v", err)
}
return cfg, nil
}
// loadRuntimeConfigFrom reads the config from the specified Reader
func loadConfigFrom(reader io.Reader) (*Config, error) {
toml, err := toml.LoadReader(reader)
if err != nil {
return nil, err
}
return getConfigFrom(toml)
}
// getConfigFrom reads the nvidia container runtime config from the specified toml Tree.
func getConfigFrom(toml *toml.Tree) (*Config, error) {
cfg := getDefaultConfig()
if toml == nil {
return cfg, nil
}
cfg.NVIDIAContainerCLIConfig = *getContainerCLIConfigFrom(toml)
cfg.NVIDIACTKConfig = *getCTKConfigFrom(toml)
runtimeConfig, err := getRuntimeConfigFrom(toml)
if err != nil {
return nil, fmt.Errorf("failed to load nvidia-container-runtime config: %v", err)
}
cfg.NVIDIAContainerRuntimeConfig = *runtimeConfig
return cfg, nil
}
// getDefaultConfig defines the default values for the config
func getDefaultConfig() *Config {
c := Config{
NVIDIAContainerCLIConfig: *getDefaultContainerCLIConfig(),
NVIDIACTKConfig: *getDefaultCTKConfig(),
NVIDIAContainerRuntimeConfig: *GetDefaultRuntimeConfig(),
}
return &c
}

View File

@@ -1,165 +0,0 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package config
import (
"io/ioutil"
"os"
"path/filepath"
"strings"
"testing"
"github.com/stretchr/testify/require"
)
func TestGetConfigWithCustomConfig(t *testing.T) {
wd, err := os.Getwd()
require.NoError(t, err)
// By default debug is disabled
contents := []byte("[nvidia-container-runtime]\ndebug = \"/nvidia-container-toolkit.log\"")
testDir := filepath.Join(wd, "test")
filename := filepath.Join(testDir, configFilePath)
os.Setenv(configOverride, testDir)
require.NoError(t, os.MkdirAll(filepath.Dir(filename), 0766))
require.NoError(t, ioutil.WriteFile(filename, contents, 0766))
defer func() { require.NoError(t, os.RemoveAll(testDir)) }()
cfg, err := GetConfig()
require.NoError(t, err)
require.Equal(t, cfg.NVIDIAContainerRuntimeConfig.DebugFilePath, "/nvidia-container-toolkit.log")
}
func TestGetConfig(t *testing.T) {
testCases := []struct {
description string
contents []string
expectedError error
expectedConfig *Config
}{
{
description: "empty config is default",
expectedConfig: &Config{
NVIDIAContainerCLIConfig: ContainerCLIConfig{
Root: "",
},
NVIDIAContainerRuntimeConfig: RuntimeConfig{
DebugFilePath: "/dev/null",
LogLevel: "info",
Runtimes: []string{"docker-runc", "runc"},
Mode: "auto",
Modes: modesConfig{
CSV: csvModeConfig{
MountSpecPath: "/etc/nvidia-container-runtime/host-files-for-container.d",
},
},
},
NVIDIACTKConfig: CTKConfig{
Path: "nvidia-ctk",
},
},
},
{
description: "config options set inline",
contents: []string{
"nvidia-container-cli.root = \"/bar/baz\"",
"nvidia-container-runtime.debug = \"/foo/bar\"",
"nvidia-container-runtime.experimental = true",
"nvidia-container-runtime.discover-mode = \"not-legacy\"",
"nvidia-container-runtime.log-level = \"debug\"",
"nvidia-container-runtime.runtimes = [\"/some/runtime\",]",
"nvidia-container-runtime.mode = \"not-auto\"",
"nvidia-container-runtime.modes.csv.mount-spec-path = \"/not/etc/nvidia-container-runtime/host-files-for-container.d\"",
"nvidia-ctk.path = \"/foo/bar/nvidia-ctk\"",
},
expectedConfig: &Config{
NVIDIAContainerCLIConfig: ContainerCLIConfig{
Root: "/bar/baz",
},
NVIDIAContainerRuntimeConfig: RuntimeConfig{
DebugFilePath: "/foo/bar",
LogLevel: "debug",
Runtimes: []string{"/some/runtime"},
Mode: "not-auto",
Modes: modesConfig{
CSV: csvModeConfig{
MountSpecPath: "/not/etc/nvidia-container-runtime/host-files-for-container.d",
},
},
},
NVIDIACTKConfig: CTKConfig{
Path: "/foo/bar/nvidia-ctk",
},
},
},
{
description: "config options set in section",
contents: []string{
"[nvidia-container-cli]",
"root = \"/bar/baz\"",
"[nvidia-container-runtime]",
"debug = \"/foo/bar\"",
"experimental = true",
"discover-mode = \"not-legacy\"",
"log-level = \"debug\"",
"runtimes = [\"/some/runtime\",]",
"mode = \"not-auto\"",
"[nvidia-container-runtime.modes.csv]",
"mount-spec-path = \"/not/etc/nvidia-container-runtime/host-files-for-container.d\"",
"[nvidia-ctk]",
"path = \"/foo/bar/nvidia-ctk\"",
},
expectedConfig: &Config{
NVIDIAContainerCLIConfig: ContainerCLIConfig{
Root: "/bar/baz",
},
NVIDIAContainerRuntimeConfig: RuntimeConfig{
DebugFilePath: "/foo/bar",
LogLevel: "debug",
Runtimes: []string{"/some/runtime"},
Mode: "not-auto",
Modes: modesConfig{
CSV: csvModeConfig{
MountSpecPath: "/not/etc/nvidia-container-runtime/host-files-for-container.d",
},
},
},
NVIDIACTKConfig: CTKConfig{
Path: "/foo/bar/nvidia-ctk",
},
},
},
}
for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
reader := strings.NewReader(strings.Join(tc.contents, "\n"))
cfg, err := loadConfigFrom(reader)
if tc.expectedError != nil {
require.Error(t, err)
} else {
require.NoError(t, err)
}
require.EqualValues(t, tc.expectedConfig, cfg)
})
}
}

View File

@@ -1,143 +0,0 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package image
import (
"fmt"
"strconv"
"strings"
"github.com/opencontainers/runtime-spec/specs-go"
"golang.org/x/mod/semver"
)
const (
envCUDAVersion = "CUDA_VERSION"
envNVRequirePrefix = "NVIDIA_REQUIRE_"
envNVRequireCUDA = envNVRequirePrefix + "CUDA"
envNVDisableRequire = "NVIDIA_DISABLE_REQUIRE"
)
// CUDA represents a CUDA image that can be used for GPU computing. This wraps
// a map of environment variable to values that can be used to perform lookups
// such as requirements.
type CUDA map[string]string
// NewCUDAImageFromSpec creates a CUDA image from the input OCI runtime spec.
// The process environment is read (if present) to construc the CUDA Image.
func NewCUDAImageFromSpec(spec *specs.Spec) (CUDA, error) {
if spec == nil || spec.Process == nil {
return NewCUDAImageFromEnv(nil)
}
return NewCUDAImageFromEnv(spec.Process.Env)
}
// NewCUDAImageFromEnv creates a CUDA image from the input environment. The environment
// is a list of strings of the form ENVAR=VALUE.
func NewCUDAImageFromEnv(env []string) (CUDA, error) {
c := make(CUDA)
for _, e := range env {
parts := strings.SplitN(e, "=", 2)
if len(parts) != 2 {
return nil, fmt.Errorf("invalid environment variable: %v", e)
}
c[parts[0]] = parts[1]
}
return c, nil
}
// IsLegacy returns whether the associated CUDA image is a "legacy" image. An
// image is considered legacy if it has a CUDA_VERSION environment variable defined
// and no NVIDIA_REQUIRE_CUDA environment variable defined.
func (i CUDA) IsLegacy() bool {
legacyCudaVersion := i[envCUDAVersion]
cudaRequire := i[envNVRequireCUDA]
return len(legacyCudaVersion) > 0 && len(cudaRequire) == 0
}
// GetRequirements returns the requirements from all NVIDIA_REQUIRE_ environment
// variables.
func (i CUDA) GetRequirements() ([]string, error) {
// TODO: We need not process this if disable require is set, but this will be done
// in a single follow-up to ensure that the behavioural change is accurately captured.
// if i.HasDisableRequire() {
// return nil, nil
// }
// All variables with the "NVIDIA_REQUIRE_" prefix are passed to nvidia-container-cli
var requirements []string
for name, value := range i {
if strings.HasPrefix(name, envNVRequirePrefix) {
requirements = append(requirements, value)
}
}
if i.IsLegacy() {
v, err := i.legacyVersion()
if err != nil {
return nil, fmt.Errorf("failed to get version: %v", err)
}
cudaRequire := fmt.Sprintf("cuda>=%s", v)
requirements = append(requirements, cudaRequire)
}
return requirements, nil
}
// HasDisableRequire checks for the value of the NVIDIA_DISABLE_REQUIRE. If set
// to a valid (true) boolean value this can be used to disable the requirement checks
func (i CUDA) HasDisableRequire() bool {
if disable, exists := i[envNVDisableRequire]; exists {
// i.logger.Debugf("NVIDIA_DISABLE_REQUIRE=%v; skipping requirement checks", disable)
d, _ := strconv.ParseBool(disable)
return d
}
return false
}
func (i CUDA) legacyVersion() (string, error) {
majorMinor, err := parseMajorMinorVersion(i[envCUDAVersion])
if err != nil {
return "", fmt.Errorf("invalid CUDA version: %v", err)
}
return majorMinor, nil
}
func parseMajorMinorVersion(version string) (string, error) {
vVersion := "v" + strings.TrimPrefix(version, "v")
if !semver.IsValid(vVersion) {
return "", fmt.Errorf("invalid version string")
}
majorMinor := strings.TrimPrefix(semver.MajorMinor(vVersion), "v")
parts := strings.Split(majorMinor, ".")
var err error
_, err = strconv.ParseUint(parts[0], 10, 32)
if err != nil {
return "", fmt.Errorf("invalid major version")
}
_, err = strconv.ParseUint(parts[1], 10, 32)
if err != nil {
return "", fmt.Errorf("invalid minor version")
}
return majorMinor, nil
}

View File

@@ -1,71 +0,0 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package image
import (
"testing"
"github.com/stretchr/testify/require"
)
func TestParseMajorMinorVersionValid(t *testing.T) {
var tests = []struct {
version string
expected string
}{
{"0", "0.0"},
{"8", "8.0"},
{"7.5", "7.5"},
{"9.0.116", "9.0"},
{"4294967295.4294967295.4294967295", "4294967295.4294967295"},
{"v11.6", "11.6"},
}
for _, c := range tests {
t.Run(c.version, func(t *testing.T) {
version, err := parseMajorMinorVersion(c.version)
require.NoError(t, err)
require.Equal(t, c.expected, version)
})
}
}
func TestParseMajorMinorVersionInvalid(t *testing.T) {
var tests = []string{
"foo",
"foo.5.10",
"9.0.116.50",
"9.0.116foo",
"7.foo",
"9.0.bar",
"9.4294967296",
"9.0.116.",
"9..0",
"9.",
".5.10",
"-9",
"+9",
"-9.1.116",
"-9.-1.-116",
}
for _, c := range tests {
t.Run(c, func(t *testing.T) {
_, err := parseMajorMinorVersion(c)
require.Error(t, err)
})
}
}

View File

@@ -1,95 +0,0 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package config
import (
"fmt"
"github.com/pelletier/go-toml"
"github.com/sirupsen/logrus"
)
const (
dockerRuncExecutableName = "docker-runc"
runcExecutableName = "runc"
auto = "auto"
)
// RuntimeConfig stores the config options for the NVIDIA Container Runtime
type RuntimeConfig struct {
DebugFilePath string `toml:"debug"`
// LogLevel defines the logging level for the application
LogLevel string `toml:"log-level"`
// Runtimes defines the candidates for the low-level runtime
Runtimes []string `toml:"runtimes"`
Mode string `toml:"mode"`
Modes modesConfig `toml:"modes"`
}
// modesConfig defines (optional) per-mode configs
type modesConfig struct {
CSV csvModeConfig `toml:"csv"`
}
type csvModeConfig struct {
MountSpecPath string `toml:"mount-spec-path"`
}
// dummy allows us to unmarshal only a RuntimeConfig from a *toml.Tree
type dummy struct {
Runtime RuntimeConfig `toml:"nvidia-container-runtime"`
}
// getRuntimeConfigFrom reads the nvidia container runtime config from the specified toml Tree.
func getRuntimeConfigFrom(toml *toml.Tree) (*RuntimeConfig, error) {
cfg := GetDefaultRuntimeConfig()
if toml == nil {
return cfg, nil
}
d := dummy{
Runtime: *cfg,
}
if err := toml.Unmarshal(&d); err != nil {
return nil, fmt.Errorf("failed to unmarshal runtime config: %v", err)
}
return &d.Runtime, nil
}
// GetDefaultRuntimeConfig defines the default values for the config
func GetDefaultRuntimeConfig() *RuntimeConfig {
c := RuntimeConfig{
DebugFilePath: "/dev/null",
LogLevel: logrus.InfoLevel.String(),
Runtimes: []string{
dockerRuncExecutableName,
runcExecutableName,
},
Mode: auto,
Modes: modesConfig{
CSV: csvModeConfig{
MountSpecPath: "/etc/nvidia-container-runtime/host-files-for-container.d",
},
},
}
return &c
}

View File

@@ -1,46 +0,0 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package config
import "github.com/pelletier/go-toml"
// CTKConfig stores the config options for the NVIDIA Container Toolkit CLI (nvidia-ctk)
type CTKConfig struct {
Path string `toml:"path"`
}
// getCTKConfigFrom reads the nvidia container runtime config from the specified toml Tree.
func getCTKConfigFrom(toml *toml.Tree) *CTKConfig {
cfg := getDefaultCTKConfig()
if toml == nil {
return cfg
}
cfg.Path = toml.GetDefault("nvidia-ctk.path", cfg.Path).(string)
return cfg
}
// getDefaultCTKConfig defines the default values for the config
func getDefaultCTKConfig() *CTKConfig {
c := CTKConfig{
Path: "nvidia-ctk",
}
return &c
}

View File

@@ -1,137 +0,0 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package cuda
import (
"fmt"
"github.com/NVIDIA/go-nvml/pkg/dl"
)
/*
#cgo LDFLAGS: -Wl,--unresolved-symbols=ignore-in-object-files
#ifdef _WIN32
#define CUDAAPI __stdcall
#else
#define CUDAAPI
#endif
typedef int CUdevice;
typedef enum CUdevice_attribute_enum {
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR = 75,
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR = 76
} CUdevice_attribute;
typedef enum cudaError_enum {
CUDA_SUCCESS = 0
} CUresult;
CUresult CUDAAPI cuInit(unsigned int Flags);
CUresult CUDAAPI cuDriverGetVersion(int *driverVersion);
CUresult CUDAAPI cuDeviceGet(CUdevice *device, int ordinal);
CUresult CUDAAPI cuDeviceGetAttribute(int *pi, CUdevice_attribute attrib, CUdevice dev);
*/
import "C"
const (
libraryName = "libcuda.so.1"
libraryLoadFlags = dl.RTLD_LAZY | dl.RTLD_GLOBAL
)
// cuda stores a reference the cuda dynamic library
var lib *dl.DynamicLibrary
// Version returns the CUDA version of the driver as a string or an error if this
// cannot be determined.
func Version() (string, error) {
lib, err := load()
if err != nil {
return "", err
}
defer lib.Close()
if err := lib.Lookup("cuDriverGetVersion"); err != nil {
return "", fmt.Errorf("failed to lookup symbol: %v", err)
}
var version C.int
if result := C.cuDriverGetVersion(&version); result != C.CUDA_SUCCESS {
return "", fmt.Errorf("failed to get CUDA version: result=%v", result)
}
major := version / 1000
minor := version % 100 / 10
return fmt.Sprintf("%d.%d", major, minor), nil
}
// ComputeCapability returns the CUDA compute capability of a device with the specified index as a string
// or an error if this cannot be determined.
func ComputeCapability(index int) (string, error) {
lib, err := load()
if err != nil {
return "", err
}
defer lib.Close()
if err := lib.Lookup("cuInit"); err != nil {
return "", fmt.Errorf("failed to lookup symbol: %v", err)
}
if err := lib.Lookup("cuDeviceGet"); err != nil {
return "", fmt.Errorf("failed to lookup symbol: %v", err)
}
if err := lib.Lookup("cuDeviceGetAttribute"); err != nil {
return "", fmt.Errorf("failed to lookup symbol: %v", err)
}
if result := C.cuInit(C.uint(0)); result != C.CUDA_SUCCESS {
return "", fmt.Errorf("failed to initialize CUDA: result=%v", result)
}
var device C.CUdevice
// NOTE: We only query the first device
if result := C.cuDeviceGet(&device, C.int(index)); result != C.CUDA_SUCCESS {
return "", fmt.Errorf("failed to get CUDA device %v: result=%v", 0, result)
}
var major C.int
if result := C.cuDeviceGetAttribute(&major, C.CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, device); result != C.CUDA_SUCCESS {
return "", fmt.Errorf("failed to get CUDA compute capability major for device %v : result=%v", 0, result)
}
var minor C.int
if result := C.cuDeviceGetAttribute(&minor, C.CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, device); result != C.CUDA_SUCCESS {
return "", fmt.Errorf("failed to get CUDA compute capability minor for device %v: result=%v", 0, result)
}
return fmt.Sprintf("%d.%d", major, minor), nil
}
func load() (*dl.DynamicLibrary, error) {
lib := dl.New(libraryName, libraryLoadFlags)
if lib == nil {
return nil, fmt.Errorf("error instantiating DynamicLibrary for CUDA")
}
err := lib.Open()
if err != nil {
return nil, fmt.Errorf("error opening DynamicLibrary for CUDA: %v", err)
}
return lib, nil
}

View File

@@ -0,0 +1,51 @@
/*
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
*/
package discover
import (
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
log "github.com/sirupsen/logrus"
)
// NewBinaryMounts creates a discoverer for binaries using the specified root
func NewBinaryMounts(root string) Discover {
return NewBinaryMountsWithLogger(log.StandardLogger(), root)
}
// NewBinaryMountsWithLogger creates a Mounts discoverer as with NewBinaryMounts
// with the specified logger
func NewBinaryMountsWithLogger(logger *log.Logger, root string) Discover {
d := mounts{
logger: logger,
lookup: lookup.NewPathLocatorWithLogger(logger, root),
required: requiredBinaries,
}
return &d
}
// requiredBinaries defines a set of binaries and their labels
var requiredBinaries = map[string][]string{
"utility": {
"nvidia-smi", /* System management interface */
"nvidia-debugdump", /* GPU coredump utility */
"nvidia-persistenced", /* Persistence mode utility */
},
"compute": {
"nvidia-cuda-mps-control", /* Multi process service CLI */
"nvidia-cuda-mps-server", /* Multi process service server */
},
}

View File

@@ -0,0 +1,73 @@
/*
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
*/
package discover
import (
"testing"
testlog "github.com/sirupsen/logrus/hooks/test"
"github.com/stretchr/testify/require"
)
func TestBinaries(t *testing.T) {
binaryLookup := map[string]string{
"nvidia-smi": "/usr/bin/nvidia-smi",
"nvidia-persistenced": "/usr/bin/nvidia-persistenced",
"nvidia-debugdump": "test-duplicates",
"nvidia-cuda-mps-control": "test-duplicates",
}
logger, _ := testlog.NewNullLogger()
d := NewBinaryMountsWithLogger(logger, "")
// Override lookup for testing
mockLocator := NewLocatorMockFromMap(binaryLookup)
d.(*mounts).lookup = mockLocator
mounts, err := d.Mounts()
require.NoError(t, err)
expected := []Mount{
{
Path: "/usr/bin/nvidia-smi",
},
{
Path: "/usr/bin/nvidia-persistenced",
},
{
Path: "test-duplicates",
},
}
require.Equal(t, len(expected), len(mounts))
devices, err := d.Devices()
require.NoError(t, err)
require.Empty(t, devices)
hooks, err := d.Hooks()
require.NoError(t, err)
require.Empty(t, hooks)
}
func TestNewBinariesConstructor(t *testing.T) {
b := NewBinaryMounts("").(*mounts)
require.NotNil(t, b.logger)
require.NotNil(t, b.lookup)
}

View File

@@ -18,26 +18,16 @@ package discover
import "fmt"
// list is a discoverer that contains a list of Discoverers. The output of the
// Mounts functions is the concatenation of the output for each of the
// composite is a discoverer that contains a list of Discoverers. The output of the
// Devices, Mounts, and Hooks functions is the concatenation of the output for each of the
// elements in the list.
type list struct {
type composite struct {
discoverers []Discover
}
var _ Discover = (*list)(nil)
var _ Discover = (*composite)(nil)
// NewList creates a discoverer that is the composite of a list of discoveres.
func NewList(d ...Discover) Discover {
l := list{
discoverers: d,
}
return &l
}
// Devices returns all devices from the included discoverers
func (d list) Devices() ([]Device, error) {
func (d composite) Devices() ([]Device, error) {
var allDevices []Device
for i, di := range d.discoverers {
@@ -51,8 +41,7 @@ func (d list) Devices() ([]Device, error) {
return allDevices, nil
}
// Mounts returns all mounts from the included discoverers
func (d list) Mounts() ([]Mount, error) {
func (d composite) Mounts() ([]Mount, error) {
var allMounts []Mount
for i, di := range d.discoverers {
@@ -66,8 +55,7 @@ func (d list) Mounts() ([]Mount, error) {
return allMounts, nil
}
// Hooks returns all Hooks from the included discoverers
func (d list) Hooks() ([]Hook, error) {
func (d composite) Hooks() ([]Hook, error) {
var allHooks []Hook
for i, di := range d.discoverers {
@@ -80,3 +68,7 @@ func (d list) Hooks() ([]Hook, error) {
return allHooks, nil
}
func (d *composite) add(di ...Discover) {
d.discoverers = append(d.discoverers, di...)
}

View File

@@ -1,139 +0,0 @@
/**
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package discover
import (
"fmt"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover/csv"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
"github.com/sirupsen/logrus"
)
// charDevices is a discover for a list of character devices
type charDevices mounts
var _ Discover = (*charDevices)(nil)
// NewFromCSVFiles creates a discoverer for the specified CSV files. A logger is also supplied.
// The constructed discoverer is comprised of a list, with each element in the list being associated with a
// single CSV files.
func NewFromCSVFiles(logger *logrus.Logger, files []string, root string) (Discover, error) {
if len(files) == 0 {
logger.Warnf("No CSV files specified")
return None{}, nil
}
symlinkLocator := lookup.NewSymlinkLocator(logger, root)
locators := map[csv.MountSpecType]lookup.Locator{
csv.MountSpecDev: lookup.NewCharDeviceLocator(logger, root),
csv.MountSpecDir: lookup.NewDirectoryLocator(logger, root),
// Libraries and symlinks are handled in the same way
csv.MountSpecLib: symlinkLocator,
csv.MountSpecSym: symlinkLocator,
}
var discoverers []Discover
for _, filename := range files {
d, err := NewFromCSVFile(logger, locators, filename)
if err != nil {
logger.Warnf("Skipping CSV file %v: %v", filename, err)
continue
}
discoverers = append(discoverers, d)
}
return &list{discoverers: discoverers}, nil
}
// NewFromCSVFile creates a discoverer for the specified CSV file. A logger is also supplied.
// The constructed discoverer is comprised of a list, with each element in the list being associated with a particular
// MountSpecType.
func NewFromCSVFile(logger *logrus.Logger, locators map[csv.MountSpecType]lookup.Locator, filename string) (Discover, error) {
// Create a discoverer for each file-kind combination
targets, err := csv.NewCSVFileParser(logger, filename).Parse()
if err != nil {
return nil, fmt.Errorf("failed to parse CSV file: %v", err)
}
if len(targets) == 0 {
return nil, fmt.Errorf("CSV file is empty")
}
return newFromMountSpecs(logger, locators, targets)
}
// newFromMountSpecs creates a discoverer for the CSV file. A logger is also supplied.
// A list of csvDiscoverers is returned, with each being associated with a single MountSpecType.
func newFromMountSpecs(logger *logrus.Logger, locators map[csv.MountSpecType]lookup.Locator, targets []*csv.MountSpec) (Discover, error) {
if len(targets) == 0 {
return &None{}, nil
}
var discoverers []Discover
var mountSpecTypes []csv.MountSpecType
candidatesByType := make(map[csv.MountSpecType][]string)
for _, t := range targets {
if _, exists := candidatesByType[t.Type]; !exists {
mountSpecTypes = append(mountSpecTypes, t.Type)
}
candidatesByType[t.Type] = append(candidatesByType[t.Type], t.Path)
}
for _, t := range mountSpecTypes {
locator, exists := locators[t]
if !exists {
return nil, fmt.Errorf("no locator defined for '%v'", t)
}
m := &mounts{
logger: logger,
lookup: locator,
required: candidatesByType[t],
}
switch t {
case csv.MountSpecDev:
// For device mount specs, we insert a charDevices into the list of discoverers.
discoverers = append(discoverers, (*charDevices)(m))
default:
discoverers = append(discoverers, m)
}
}
return &list{discoverers: discoverers}, nil
}
// Mounts returns the discovered mounts for the charDevices. Since this explicitly specifies a
// device list, the mounts are nil.
func (d *charDevices) Mounts() ([]Mount, error) {
return nil, nil
}
// Devices returns the discovered devices for the charDevices. Here the device nodes are first
// discovered as mounts and these are converted to devices.
func (d *charDevices) Devices() ([]Device, error) {
devicesAsMounts, err := (*mounts)(d).Mounts()
if err != nil {
return nil, err
}
var devices []Device
for _, mount := range devicesAsMounts {
devices = append(devices, Device(mount))
}
return devices, nil
}

View File

@@ -1,131 +0,0 @@
/**
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package csv
import (
"bufio"
"errors"
"fmt"
"io"
"os"
"path/filepath"
"strings"
"github.com/sirupsen/logrus"
)
const (
// DefaultMountSpecPath is default location of CSV files that define the modifications required to the OCI spec
DefaultMountSpecPath = "/etc/nvidia-container-runtime/host-files-for-container.d"
)
// GetFileList returns the (non-recursive) list of CSV files in the specified
// folder
func GetFileList(root string) ([]string, error) {
contents, err := os.ReadDir(root)
if err != nil && errors.Is(err, os.ErrNotExist) {
return nil, nil
} else if err != nil {
return nil, fmt.Errorf("failed to read the contents of %v: %v", root, err)
}
var csvFilePaths []string
for _, c := range contents {
if c.IsDir() {
continue
}
if c.Name() == ".csv" {
continue
}
ext := strings.ToLower(filepath.Ext(c.Name()))
if ext != ".csv" {
continue
}
csvFilePaths = append(csvFilePaths, filepath.Join(root, c.Name()))
}
return csvFilePaths, nil
}
// BaseFilesOnly filters out non-base CSV files from the list of CSV files.
func BaseFilesOnly(filenames []string) []string {
filter := map[string]bool{
"l4t.csv": true,
"drivers.csv": true,
"devices.csv": true,
}
var selected []string
for _, file := range filenames {
base := filepath.Base(file)
if filter[base] {
selected = append(selected, file)
}
}
return selected
}
// Parser specifies an interface for parsing MountSpecs
type Parser interface {
Parse() ([]*MountSpec, error)
}
type csv struct {
logger *logrus.Logger
filename string
}
// NewCSVFileParser creates a new parser for reading MountSpecs from the specified CSV file
func NewCSVFileParser(logger *logrus.Logger, filename string) Parser {
p := csv{
logger: logger,
filename: filename,
}
return &p
}
// Parse parses the csv file and returns a list of MountSpecs in the file
func (p csv) Parse() ([]*MountSpec, error) {
reader, err := os.Open(p.filename)
if err != nil {
return nil, fmt.Errorf("failed to open %v for reading: %v", p.filename, err)
}
defer reader.Close()
return p.parseFromReader(reader), nil
}
// parseFromReader parses the specified file and returns a list of required jetson mounts
func (p csv) parseFromReader(reader io.Reader) []*MountSpec {
var targets []*MountSpec
scanner := bufio.NewScanner(reader)
for scanner.Scan() {
line := scanner.Text()
target, err := NewMountSpecFromLine(line)
if err != nil {
p.logger.Debugf("Skipping invalid mount spec '%v': %v", line, err)
continue
}
targets = append(targets, target)
}
return targets
}

View File

@@ -1,83 +0,0 @@
/**
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package csv
import (
"path/filepath"
"testing"
"github.com/NVIDIA/nvidia-container-toolkit/internal/test"
"github.com/stretchr/testify/require"
)
func TestGetFileList(t *testing.T) {
moduleRoot, _ := test.GetModuleRoot()
testCases := []struct {
description string
root string
files []string
expectedError error
}{
{
description: "returns list of CSV files",
root: "test/input/csv_samples/",
files: []string{
"jetson.csv",
"simple_wrong.csv",
"simple.csv",
"spaced.csv",
},
},
{
description: "handles empty folder",
root: "test/input/csv_samples/empty",
},
{
description: "handles non-existent folder",
root: "test/input/csv_samples/NONEXISTENT",
},
{
description: "handles non-existent folder root",
root: "/NONEXISTENT/test/input/csv_samples/",
},
}
for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
root := filepath.Join(moduleRoot, tc.root)
files, err := GetFileList(root)
if tc.expectedError != nil {
require.Error(t, err)
require.Empty(t, files)
return
}
require.NoError(t, err)
var foundFiles []string
for _, f := range files {
require.Equal(t, root, filepath.Dir(f))
require.Equal(t, ".csv", filepath.Ext(f))
foundFiles = append(foundFiles, filepath.Base(f))
}
require.ElementsMatch(t, tc.files, foundFiles)
})
}
}

View File

@@ -1,74 +0,0 @@
/**
# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package csv
import (
"fmt"
"strings"
)
// MountSpecType defines the mount types allowed in a CSV file
type MountSpecType string
const (
// MountSpecDev is used for character devices
MountSpecDev = MountSpecType("dev")
// MountSpecDir is used for directories
MountSpecDir = MountSpecType("dir")
// MountSpecLib is used for libraries or regular files
MountSpecLib = MountSpecType("lib")
// MountSpecSym is used for symlinks.
MountSpecSym = MountSpecType("sym")
)
// MountSpec represents a Jetson mount consisting of a type and a path.
type MountSpec struct {
Type MountSpecType
Path string
}
// NewMountSpecFromLine parses the specified line and returns the MountSpec or an error if the line is malformed
func NewMountSpecFromLine(line string) (*MountSpec, error) {
parts := strings.SplitN(strings.TrimSpace(line), ",", 2)
if len(parts) < 2 {
return nil, fmt.Errorf("failed to parse line: %v", line)
}
mountType := strings.TrimSpace(parts[0])
path := strings.TrimSpace(parts[1])
return NewMountSpec(mountType, path)
}
// NewMountSpec creates a MountSpec with the specified type and path. An error is returned if the type is invalid.
func NewMountSpec(mountType string, path string) (*MountSpec, error) {
mt := MountSpecType(mountType)
switch mt {
case MountSpecDev, MountSpecLib, MountSpecSym, MountSpecDir:
default:
return nil, fmt.Errorf("unexpected mount type: %v", mt)
}
if path == "" {
return nil, fmt.Errorf("invalid path: %v", path)
}
mount := MountSpec{
Type: mt,
Path: path,
}
return &mount, nil
}

View File

@@ -1,82 +0,0 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package csv
import (
"fmt"
"testing"
"github.com/stretchr/testify/require"
)
func TestNewMountSpecFromLine(t *testing.T) {
parseError := fmt.Errorf("failed to parse line")
unexpectedError := fmt.Errorf("unexpected mount type")
testCases := []struct {
line string
expectedError error
expectedValue MountSpec
}{
{
line: "",
expectedError: parseError,
},
{
line: "\t",
expectedError: parseError,
},
{
line: ",",
expectedError: parseError,
},
{
line: "dev,",
expectedError: parseError,
},
{
line: "dev ,/a/path",
expectedValue: MountSpec{
Path: "/a/path",
Type: "dev",
},
},
{
line: "dev ,/a/path,with,commas",
expectedValue: MountSpec{
Path: "/a/path,with,commas",
Type: "dev",
},
},
{
line: "not-dev ,/a/path",
expectedError: unexpectedError,
},
}
for i, tc := range testCases {
t.Run(fmt.Sprintf("test case %d", i), func(t *testing.T) {
target, err := NewMountSpecFromLine(tc.line)
if tc.expectedError != nil {
require.Error(t, err)
return
}
require.NoError(t, err)
require.EqualValues(t, &tc.expectedValue, target)
})
}
}

View File

@@ -1,160 +0,0 @@
/**
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package discover
import (
"fmt"
"testing"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover/csv"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
testlog "github.com/sirupsen/logrus/hooks/test"
"github.com/stretchr/testify/require"
)
func TestCharDevices(t *testing.T) {
logger, logHook := testlog.NewNullLogger()
testCases := []struct {
description string
input *charDevices
expectedMounts []Mount
expectedMountsError error
expectedDevicesError error
expectedDevices []Device
}{
{
description: "dev mounts are empty",
input: (*charDevices)(
&mounts{
lookup: &lookup.LocatorMock{
LocateFunc: func(string) ([]string, error) {
return []string{"located"}, nil
},
},
required: []string{"required"},
},
),
expectedDevices: []Device{{Path: "located"}},
},
{
description: "dev devices returns error for nil lookup",
input: &charDevices{},
expectedDevicesError: fmt.Errorf("no lookup defined"),
},
}
for _, tc := range testCases {
logHook.Reset()
t.Run(tc.description, func(t *testing.T) {
tc.input.logger = logger
mounts, err := tc.input.Mounts()
if tc.expectedMountsError != nil {
require.Error(t, err)
} else {
require.NoError(t, err)
}
require.ElementsMatch(t, tc.expectedMounts, mounts)
devices, err := tc.input.Devices()
if tc.expectedDevicesError != nil {
require.Error(t, err)
} else {
require.NoError(t, err)
}
require.ElementsMatch(t, tc.expectedDevices, devices)
})
}
}
func TestNewFromMountSpec(t *testing.T) {
logger, _ := testlog.NewNullLogger()
locators := map[csv.MountSpecType]lookup.Locator{
"dev": &lookup.LocatorMock{},
"lib": &lookup.LocatorMock{},
}
testCases := []struct {
description string
targets []*csv.MountSpec
expectedError error
expectedDiscoverer Discover
}{
{
description: "empty targets returns None discoverer list",
expectedDiscoverer: &None{},
},
{
description: "unexpected locator returns error",
targets: []*csv.MountSpec{
{
Type: "foo",
Path: "bar",
},
},
expectedError: fmt.Errorf("no locator defined for foo"),
},
{
description: "creates discoverers based on type",
targets: []*csv.MountSpec{
{
Type: "dev",
Path: "dev0",
},
{
Type: "lib",
Path: "lib0",
},
{
Type: "dev",
Path: "dev1",
},
},
expectedDiscoverer: &list{
discoverers: []Discover{
(*charDevices)(
&mounts{
logger: logger,
lookup: locators["dev"],
required: []string{"dev0", "dev1"},
},
),
&mounts{
logger: logger,
lookup: locators["lib"],
required: []string{"lib0"},
},
},
},
},
}
for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
discoverer, err := newFromMountSpecs(logger, locators, tc.targets)
if tc.expectedError != nil {
require.Error(t, err)
return
}
require.NoError(t, err)
require.EqualValues(t, tc.expectedDiscoverer, discoverer)
})
}
}

View File

@@ -1,5 +1,5 @@
/*
# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -16,31 +16,48 @@
package discover
// Config represents the configuration options for discovery
type Config struct {
Root string
NVIDIAContainerToolkitCLIExecutablePath string
// DevicePath is a path in /dev associated with a device
type DevicePath string
// ProcPath is a path in /proc associated with a devices
type ProcPath string
// PCIBusID is the ID on the PCI bus of a device
type PCIBusID string
// DeviceNode represents a device on the file system
type DeviceNode struct {
Path DevicePath
Major int
Minor int
}
// Device represents a discovered character device.
// Device represents a discovered device including identifiers (Index, UUID, PCI bus ID)
// for selection and paths in /dev and /proc associated with the device
type Device struct {
Path string
Index string
UUID string
PCIBusID PCIBusID
DeviceNodes []DeviceNode
ProcPaths []ProcPath
}
// Mount represents a discovered mount.
// Mount represents a discovered mount. This includes a set of labels
// for selection and the mount path
type Mount struct {
Path string
Path string
Labels map[string]string
}
// Hook represents a discovered hook.
// Hook represents a discovered hook
type Hook struct {
Lifecycle string
Path string
Args []string
Path string
Args []string
HookName string
Labels map[string]string
}
//go:generate moq -stub -out discover_mock.go . Discover
// Discover defines an interface for discovering the devices, mounts, and hooks available on a system
// Discover defines an interface for discovering the devices and mounts available on a system
type Discover interface {
Devices() ([]Device, error)
Mounts() ([]Mount, error)

View File

@@ -1,150 +0,0 @@
// Code generated by moq; DO NOT EDIT.
// github.com/matryer/moq
package discover
import (
"sync"
)
// Ensure, that DiscoverMock does implement Discover.
// If this is not the case, regenerate this file with moq.
var _ Discover = &DiscoverMock{}
// DiscoverMock is a mock implementation of Discover.
//
// func TestSomethingThatUsesDiscover(t *testing.T) {
//
// // make and configure a mocked Discover
// mockedDiscover := &DiscoverMock{
// DevicesFunc: func() ([]Device, error) {
// panic("mock out the Devices method")
// },
// HooksFunc: func() ([]Hook, error) {
// panic("mock out the Hooks method")
// },
// MountsFunc: func() ([]Mount, error) {
// panic("mock out the Mounts method")
// },
// }
//
// // use mockedDiscover in code that requires Discover
// // and then make assertions.
//
// }
type DiscoverMock struct {
// DevicesFunc mocks the Devices method.
DevicesFunc func() ([]Device, error)
// HooksFunc mocks the Hooks method.
HooksFunc func() ([]Hook, error)
// MountsFunc mocks the Mounts method.
MountsFunc func() ([]Mount, error)
// calls tracks calls to the methods.
calls struct {
// Devices holds details about calls to the Devices method.
Devices []struct {
}
// Hooks holds details about calls to the Hooks method.
Hooks []struct {
}
// Mounts holds details about calls to the Mounts method.
Mounts []struct {
}
}
lockDevices sync.RWMutex
lockHooks sync.RWMutex
lockMounts sync.RWMutex
}
// Devices calls DevicesFunc.
func (mock *DiscoverMock) Devices() ([]Device, error) {
callInfo := struct {
}{}
mock.lockDevices.Lock()
mock.calls.Devices = append(mock.calls.Devices, callInfo)
mock.lockDevices.Unlock()
if mock.DevicesFunc == nil {
var (
devicesOut []Device
errOut error
)
return devicesOut, errOut
}
return mock.DevicesFunc()
}
// DevicesCalls gets all the calls that were made to Devices.
// Check the length with:
// len(mockedDiscover.DevicesCalls())
func (mock *DiscoverMock) DevicesCalls() []struct {
} {
var calls []struct {
}
mock.lockDevices.RLock()
calls = mock.calls.Devices
mock.lockDevices.RUnlock()
return calls
}
// Hooks calls HooksFunc.
func (mock *DiscoverMock) Hooks() ([]Hook, error) {
callInfo := struct {
}{}
mock.lockHooks.Lock()
mock.calls.Hooks = append(mock.calls.Hooks, callInfo)
mock.lockHooks.Unlock()
if mock.HooksFunc == nil {
var (
hooksOut []Hook
errOut error
)
return hooksOut, errOut
}
return mock.HooksFunc()
}
// HooksCalls gets all the calls that were made to Hooks.
// Check the length with:
// len(mockedDiscover.HooksCalls())
func (mock *DiscoverMock) HooksCalls() []struct {
} {
var calls []struct {
}
mock.lockHooks.RLock()
calls = mock.calls.Hooks
mock.lockHooks.RUnlock()
return calls
}
// Mounts calls MountsFunc.
func (mock *DiscoverMock) Mounts() ([]Mount, error) {
callInfo := struct {
}{}
mock.lockMounts.Lock()
mock.calls.Mounts = append(mock.calls.Mounts, callInfo)
mock.lockMounts.Unlock()
if mock.MountsFunc == nil {
var (
mountsOut []Mount
errOut error
)
return mountsOut, errOut
}
return mock.MountsFunc()
}
// MountsCalls gets all the calls that were made to Mounts.
// Check the length with:
// len(mockedDiscover.MountsCalls())
func (mock *DiscoverMock) MountsCalls() []struct {
} {
var calls []struct {
}
mock.lockMounts.RLock()
calls = mock.calls.Mounts
mock.lockMounts.RUnlock()
return calls
}

View File

@@ -0,0 +1,424 @@
/*
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
*/
package discover
import (
"fmt"
"path/filepath"
"strings"
"github.com/NVIDIA/nvidia-container-toolkit/internal/nvml"
"github.com/NVIDIA/nvidia-container-toolkit/internal/proc"
log "github.com/sirupsen/logrus"
)
const (
// ControlDeviceUUID is used as the UUID for control devices such as nvidiactl or nvidia-modeset
ControlDeviceUUID = "CONTROL"
// MIGConfigDeviceUUID is used to indicate the MIG config control device
MIGConfigDeviceUUID = "CONFIG"
// MIGMonitorDeviceUUID is used to indicate the MIG monitor control device
MIGMonitorDeviceUUID = "MONITOR"
nvidiaGPUDeviceName = "nvidia-frontend"
nvidiaCapsDeviceName = "nvidia-caps"
nvidiaUVMDeviceName = "nvidia-uvm"
)
type nvmlDiscover struct {
None
logger *log.Logger
nvml nvml.Interface
migCaps map[ProcPath]DeviceNode
nvidiaDevices proc.NvidiaDevices
}
var _ Discover = (*nvmlDiscover)(nil)
// NewNVMLDiscover constructs a discoverer that uses NVML to find the devices
// available on a system.
func NewNVMLDiscover(nvml nvml.Interface) (Discover, error) {
return NewNVMLDiscoverWithLogger(log.StandardLogger(), nvml)
}
// NewNVMLDiscoverWithLogger constructs a discovered as with NewNVMLDiscover with the specified
// logger
func NewNVMLDiscoverWithLogger(logger *log.Logger, nvml nvml.Interface) (Discover, error) {
nvidiaDevices, err := proc.GetNvidiaDevices()
if err != nil {
return nil, fmt.Errorf("error loading NVIDIA devices: %v", err)
}
var migCaps map[ProcPath]DeviceNode
nvcapsDevice, exists := nvidiaDevices.Get(nvidiaCapsDeviceName)
if !exists {
logger.Warnf("%v nvcaps device could not be found", nvidiaCapsDeviceName)
} else if migCaps, err = getMigCaps(nvcapsDevice.Major); err != nil {
logger.Warnf("Could not load MIG capability devices: %v", err)
migCaps = nil
}
discover := &nvmlDiscover{
logger: logger,
nvml: nvml,
migCaps: migCaps,
nvidiaDevices: nvidiaDevices,
}
return discover, nil
}
// hasMigSupport checks if MIG device discovery is supported.
// Cases where this will be disabled include where no MIG minors file is
// present.
func (d nvmlDiscover) hasMigSupport() bool {
return len(d.migCaps) > 0
}
func (d *nvmlDiscover) Devices() ([]Device, error) {
ret := d.nvml.Init()
if ret.Value() != nvml.SUCCESS {
return nil, fmt.Errorf("error initalizing NVML: %v", ret.Error())
}
defer d.tryShutdownNVML()
c, ret := d.nvml.DeviceGetCount()
if ret.Value() != nvml.SUCCESS {
return nil, fmt.Errorf("error getting device count: %v", ret.Error())
}
var handles []nvml.Device
for i := 0; i < c; i++ {
handle, ret := d.nvml.DeviceGetHandleByIndex(i)
if ret.Value() != nvml.SUCCESS {
return nil, fmt.Errorf("error getting device handle for device %v: %v", i, ret.Error())
}
if !d.hasMigSupport() {
handles = append(handles, handle)
continue
}
migHandles, err := getMIGHandlesForDevice(handle)
if err != nil {
return nil, fmt.Errorf("error getting MIG handles for device: %v", err)
}
if len(migHandles) == 0 {
handles = append(handles, handle)
}
handles = append(handles, migHandles...)
}
return d.devicesByHandle(handles)
}
func (d *nvmlDiscover) devicesByHandle(handles []nvml.Device) ([]Device, error) {
var devices []Device
var largestMinorNumber int
for _, h := range handles {
device, err := d.deviceFromNVMLHandle(h)
if err != nil {
return nil, fmt.Errorf("error constructing device from handle %v: %v", h, err)
}
devices = append(devices, device)
if largestMinorNumber < device.DeviceNodes[0].Minor {
largestMinorNumber = device.DeviceNodes[0].Minor
}
}
controlDevices, err := d.getControlDevices()
if err != nil {
return nil, fmt.Errorf("error getting control devices: %v", err)
}
devices = append(devices, controlDevices...)
if d.hasMigSupport() {
migControlDevices, err := d.getMigControlDevices(largestMinorNumber)
if err != nil {
return nil, fmt.Errorf("error getting MIG control devices: %v", err)
}
devices = append(devices, migControlDevices...)
}
return devices, nil
}
func (d *nvmlDiscover) deviceFromNVMLHandle(handle nvml.Device) (Device, error) {
if d.hasMigSupport() {
isMigDevice, ret := handle.IsMigDeviceHandle()
if ret.Value() != nvml.SUCCESS {
return Device{}, fmt.Errorf("error checking device handle: %v", ret.Error())
}
if isMigDevice {
return d.deviceFromMIGDeviceHandle(handle)
}
}
return d.deviceFromFullDeviceHandle(handle)
}
func (d *nvmlDiscover) deviceFromFullDeviceHandle(handle nvml.Device) (Device, error) {
index, ret := handle.GetIndex()
if ret.Value() != nvml.SUCCESS {
return Device{}, fmt.Errorf("error getting device index: %v", ret.Error())
}
uuid, ret := handle.GetUUID()
if ret.Value() != nvml.SUCCESS {
return Device{}, fmt.Errorf("error getting device UUID: %v", ret.Error())
}
pciInfo, ret := handle.GetPciInfo()
if ret.Value() != nvml.SUCCESS {
return Device{}, fmt.Errorf("error getting PCI info: %v", ret.Error())
}
busID := NewPCIBusID(pciInfo)
minor, ret := handle.GetMinorNumber()
if ret.Value() != nvml.SUCCESS {
return Device{}, fmt.Errorf("error getting minor number: %v", ret.Error())
}
nvidiaGPUDevice, exists := d.nvidiaDevices.Get(nvidiaGPUDeviceName)
if !exists {
return Device{}, fmt.Errorf("device for '%v' does not exist", nvidiaGPUDeviceName)
}
deviceNode := DeviceNode{
Path: DevicePath(fmt.Sprintf("/dev/nvidia%d", minor)),
Major: nvidiaGPUDevice.Major,
Minor: minor,
}
device := Device{
Index: fmt.Sprintf("%d", index),
PCIBusID: busID,
UUID: uuid,
DeviceNodes: []DeviceNode{deviceNode},
ProcPaths: []ProcPath{busID.GetProcPath()},
}
return device, nil
}
func (d *nvmlDiscover) deviceFromMIGDeviceHandle(handle nvml.Device) (Device, error) {
parent, ret := handle.GetDeviceHandleFromMigDeviceHandle()
if ret.Value() != nvml.SUCCESS {
return Device{}, fmt.Errorf("error getting parent device handle: %v", ret.Error())
}
gpu, ret := parent.GetMinorNumber()
if ret.Value() != nvml.SUCCESS {
return Device{}, fmt.Errorf("error getting GPU minor number: %v", ret.Error())
}
parentDevice, err := d.deviceFromFullDeviceHandle(parent)
if err != nil {
return Device{}, fmt.Errorf("error getting parent device: %v", err)
}
index, ret := handle.GetIndex()
if ret.Value() != nvml.SUCCESS {
return Device{}, fmt.Errorf("error getting device index: %v", ret.Error())
}
uuid, ret := handle.GetUUID()
if ret.Value() != nvml.SUCCESS {
return Device{}, fmt.Errorf("error getting device UUID: %v", ret.Error())
}
capDeviceNodes := []DeviceNode{}
procPaths, err := getProcPathsForMigDevice(gpu, handle)
if err != nil {
return Device{}, fmt.Errorf("error getting proc paths for MIG device: %v", err)
}
for _, p := range procPaths {
capDeviceNode, ok := d.migCaps[p]
if !ok {
return Device{}, fmt.Errorf("could not determine cap device path for %v", p)
}
capDeviceNodes = append(capDeviceNodes, capDeviceNode)
}
device := Device{
Index: fmt.Sprintf("%s:%d", parentDevice.Index, index),
UUID: uuid,
DeviceNodes: append(parentDevice.DeviceNodes, capDeviceNodes...),
ProcPaths: append(parentDevice.ProcPaths, procPaths...),
}
return device, nil
}
func (d *nvmlDiscover) getControlDevices() ([]Device, error) {
devices := []struct {
name string
path string
minor int
}{
// TODO: Where is the best place to find these device Minors programatically?
{nvidiaGPUDeviceName, "/dev/nvidia-modeset", 254},
{nvidiaGPUDeviceName, "/dev/nvidiactl", 255},
{nvidiaUVMDeviceName, "/dev/nvidia-uvm", 0},
{nvidiaUVMDeviceName, "/dev/nvidia-uvm-tools", 1},
}
var controlDevices []Device
for _, info := range devices {
device, exists := d.nvidiaDevices.Get(info.name)
if !exists {
d.logger.Warnf("device name %v not defined; skipping control devices %v", info.name, info.path)
continue
}
deviceNode := DeviceNode{
Path: DevicePath(info.path),
Major: device.Major,
Minor: info.minor,
}
controlDevices = append(controlDevices, Device{
UUID: ControlDeviceUUID,
DeviceNodes: []DeviceNode{deviceNode},
ProcPaths: []ProcPath{},
})
}
return controlDevices, nil
}
func (d *nvmlDiscover) getMigControlDevices(numGpus int) ([]Device, error) {
targets := map[string]ProcPath{
MIGConfigDeviceUUID: ProcPath("/proc/driver/nvidia/capabilities/mig/config"),
MIGMonitorDeviceUUID: ProcPath("/proc/driver/nvidia/capabilities/mig/monitor"),
}
var devices []Device
for id, procPath := range targets {
deviceNode, exists := d.migCaps[procPath]
if !exists {
return nil, fmt.Errorf("device node for '%v' is undefined", procPath)
}
var procPaths []ProcPath
for gpu := 0; gpu <= numGpus; gpu++ {
procPaths = append(procPaths, ProcPath(fmt.Sprintf("/proc/driver/nvidia/capabilities/gpu%d/mig", gpu)))
}
procPaths = append(procPaths, procPath)
devices = append(devices, Device{
UUID: id,
DeviceNodes: []DeviceNode{deviceNode},
ProcPaths: procPaths,
})
}
return devices, nil
}
func getProcPathsForMigDevice(gpu int, handle nvml.Device) ([]ProcPath, error) {
gi, ret := handle.GetGPUInstanceId()
if ret.Value() != nvml.SUCCESS {
return nil, fmt.Errorf("error getting GPU instance ID: %v", ret.Error())
}
ci, ret := handle.GetComputeInstanceId()
if ret.Value() != nvml.SUCCESS {
return nil, fmt.Errorf("error getting comput instance ID: %v", ret.Error())
}
procPaths := []ProcPath{
ProcPath(fmt.Sprintf("/proc/driver/nvidia/capabilities/gpu%d/mig/gi%d/access", gpu, gi)),
ProcPath(fmt.Sprintf("/proc/driver/nvidia/capabilities/gpu%d/mig/gi%d/ci%d/access", gpu, gi, ci)),
}
return procPaths, nil
}
func getMIGHandlesForDevice(handle nvml.Device) ([]nvml.Device, error) {
currentMigMode, _, ret := handle.GetMigMode()
if ret.Value() == nvml.ERROR_NOT_SUPPORTED {
return nil, nil
}
if ret.Value() != nvml.SUCCESS {
return nil, fmt.Errorf("error getting MIG mode for device: %v", ret.Error())
}
if currentMigMode == nvml.DEVICE_MIG_DISABLE {
return nil, nil
}
maxMigDeviceCount, ret := handle.GetMaxMigDeviceCount()
if ret.Value() != nvml.SUCCESS {
return nil, fmt.Errorf("error getting number of MIG devices: %v", ret.Error())
}
var migHandles []nvml.Device
for mi := 0; mi < maxMigDeviceCount; mi++ {
migHandle, ret := handle.GetMigDeviceHandleByIndex(mi)
if ret.Value() == nvml.ERROR_NOT_FOUND {
continue
}
if ret.Value() != nvml.SUCCESS {
return nil, fmt.Errorf("error getting MIG device %v: %v", mi, ret.Error())
}
migHandles = append(migHandles, migHandle)
}
return migHandles, nil
}
func (d *nvmlDiscover) tryShutdownNVML() {
ret := d.nvml.Shutdown()
if ret.Value() != nvml.SUCCESS {
d.logger.Warnf("Could not shut down NVML: %v", ret.Error())
}
}
// NewPCIBusID provides a utility function that returns the string representation
// of the bus ID.
func NewPCIBusID(p nvml.PciInfo) PCIBusID {
var bytes []byte
for _, b := range p.BusId {
if byte(b) == '\x00' {
break
}
bytes = append(bytes, byte(b))
}
return PCIBusID(string(bytes))
}
// GetProcPath returns the path in /proc associated with the PCI bus ID
func (p PCIBusID) GetProcPath() ProcPath {
id := strings.ToLower(p.String())
if strings.HasPrefix(id, "0000") {
id = id[4:]
}
return ProcPath(filepath.Join("/proc/driver/nvidia/gpus", id))
}
func (p PCIBusID) String() string {
return string(p)
}

View File

@@ -0,0 +1,44 @@
/*
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
*/
package discover
import (
"fmt"
"github.com/NVIDIA/nvidia-container-toolkit/internal/nvcaps"
)
// getMigCaps returns a mapping of MIG capability path to device nodes
func getMigCaps(capDeviceMajor int) (map[ProcPath]DeviceNode, error) {
migCaps, err := nvcaps.LoadMigMinors()
if err != nil {
return nil, fmt.Errorf("error loading MIG minors: %v", err)
}
return getMigCapsFromMigMinors(migCaps, capDeviceMajor), nil
}
func getMigCapsFromMigMinors(migCaps map[nvcaps.MigCap]nvcaps.MigMinor, capDeviceMajor int) map[ProcPath]DeviceNode {
capsDevicePaths := make(map[ProcPath]DeviceNode)
for cap, minor := range migCaps {
capsDevicePaths[ProcPath(cap.ProcPath())] = DeviceNode{
Path: DevicePath(minor.DevicePath()),
Major: capDeviceMajor,
Minor: int(minor),
}
}
return capsDevicePaths
}

View File

@@ -0,0 +1,41 @@
/*
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
*/
package discover
import (
"testing"
"github.com/NVIDIA/nvidia-container-toolkit/internal/nvcaps"
"github.com/stretchr/testify/require"
)
func TestGetMigCaps(t *testing.T) {
migMinors := map[nvcaps.MigCap]nvcaps.MigMinor{
"config": 1,
"monitor": 2,
"gpu0/gi0/access": 3,
"gpu0/gi0/ci0/access": 4,
}
migCapMajor := 999
migCaps := getMigCapsFromMigMinors(migMinors, migCapMajor)
require.Len(t, migCaps, len(migMinors))
for _, c := range migCaps {
require.Equal(t, migCapMajor, c.Major)
}
}

View File

@@ -0,0 +1,219 @@
/*
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
*/
package discover
import (
"testing"
"github.com/NVIDIA/nvidia-container-toolkit/internal/nvml"
"github.com/NVIDIA/nvidia-container-toolkit/internal/proc"
testlog "github.com/sirupsen/logrus/hooks/test"
"github.com/stretchr/testify/require"
)
const (
nvidiaGPUDeviceMajorDefault = 195
nvidiaCapsDeviceMajorDefault = 235
)
func newTestDiscover() nvmlDiscover {
logger, _ := testlog.NewNullLogger()
nvml := nvml.NewMockNVMLServer(nvml.NewMockA100Device(0))
return nvmlDiscover{
logger: logger,
nvml: nvml,
nvidiaDevices: proc.NewMockNvidiaDevices(
proc.Device{Name: nvidiaGPUDeviceName, Major: nvidiaGPUDeviceMajorDefault},
),
}
}
func newTestDiscoverWithMIG() nvmlDiscover {
device := &nvml.MockA100Device{
Index: 0,
MigMode: nvml.DEVICE_MIG_ENABLE,
GpuInstances: make(map[*nvml.MockA100GpuInstance]struct{}),
GpuInstanceCounter: 0,
}
// Create a single gi and ci on the device
gpuInstanceProfileInfo := &nvml.GpuInstanceProfileInfo{
Id: nvml.GPU_INSTANCE_PROFILE_7_SLICE,
}
gi, _ := device.CreateGpuInstance(gpuInstanceProfileInfo)
computeInstanceProfileInfo := &nvml.ComputeInstanceProfileInfo{
Id: nvml.COMPUTE_INSTANCE_PROFILE_1_SLICE,
}
_, _ = gi.CreateComputeInstance(computeInstanceProfileInfo)
logger, _ := testlog.NewNullLogger()
return nvmlDiscover{
logger: logger,
nvml: nvml.NewMockNVMLServer(device),
migCaps: map[ProcPath]DeviceNode{
ProcPath("/proc/driver/nvidia/capabilities/gpu0/mig/gi0/access"): {
Path: DevicePath("/dev/nvidia-caps/nvidia-cap3"),
Minor: 3,
Major: 235,
},
ProcPath("/proc/driver/nvidia/capabilities/gpu0/mig/gi0/ci0/access"): {
Path: DevicePath("/dev/nvidia-caps/nvidia-cap4"),
Minor: 4,
Major: 235,
},
ProcPath("/proc/driver/nvidia/capabilities/mig/config"): {
Path: DevicePath("/dev/nvidia-caps/nvidia-cap1"),
Minor: 1,
Major: 235,
},
ProcPath("/proc/driver/nvidia/capabilities/mig/monitor"): {
Path: DevicePath("/dev/nvidia-caps/nvidia-cap2"),
Minor: 2,
Major: 235,
},
},
nvidiaDevices: proc.NewMockNvidiaDevices(
proc.Device{Name: nvidiaGPUDeviceName, Major: nvidiaGPUDeviceMajorDefault},
proc.Device{Name: nvidiaCapsDeviceName, Major: nvidiaCapsDeviceMajorDefault},
),
}
}
func TestDiscoverNvmlDevices(t *testing.T) {
d := newTestDiscover()
devices, err := d.Devices()
require.NoError(t, err)
require.Len(t, devices, 3)
device := devices[0]
require.Equal(t, "0", device.Index)
require.Equal(t, "GPU-0", device.UUID)
require.Equal(t, "0000FFFF:FF:FF.F", device.PCIBusID.String())
expectedDeviceNodes := []DeviceNode{
{
Path: DevicePath("/dev/nvidia0"),
Minor: 0,
Major: nvidiaGPUDeviceMajorDefault,
},
}
require.Equal(t, expectedDeviceNodes, device.DeviceNodes)
expectedProcPaths := []ProcPath{ProcPath("/proc/driver/nvidia/gpus/ffff:ff:ff.f")}
require.Equal(t, expectedProcPaths, device.ProcPaths)
}
func TestDiscoverNvmlMigDevices(t *testing.T) {
d := newTestDiscoverWithMIG()
devices, err := d.Devices()
require.NoError(t, err)
require.Len(t, devices, 5)
mig := devices[0]
require.Equal(t, "0:0", mig.Index)
require.Equal(t, "MIG-0", mig.UUID)
require.Empty(t, mig.PCIBusID)
expectedDeviceNodes := []DeviceNode{
{
Path: DevicePath("/dev/nvidia0"),
Minor: 0,
Major: nvidiaGPUDeviceMajorDefault,
},
{
Path: DevicePath("/dev/nvidia-caps/nvidia-cap3"),
Minor: 3,
Major: nvidiaCapsDeviceMajorDefault,
},
{
Path: DevicePath("/dev/nvidia-caps/nvidia-cap4"),
Minor: 4,
Major: nvidiaCapsDeviceMajorDefault,
},
}
require.Equal(t, expectedDeviceNodes, mig.DeviceNodes)
expectedProcPaths := []ProcPath{
ProcPath("/proc/driver/nvidia/gpus/ffff:ff:ff.f"),
ProcPath("/proc/driver/nvidia/capabilities/gpu0/mig/gi0/access"),
ProcPath("/proc/driver/nvidia/capabilities/gpu0/mig/gi0/ci0/access"),
}
require.Equal(t, expectedProcPaths, mig.ProcPaths)
var config *Device
var monitor *Device
for i, d := range devices {
if d.UUID == "CONFIG" {
config = &devices[i]
}
if d.UUID == "MONITOR" {
monitor = &devices[i]
}
}
require.NotNil(t, config)
require.NotNil(t, monitor)
require.Equal(t, "CONFIG", config.UUID)
expectedDeviceNodes = []DeviceNode{
{
Path: DevicePath("/dev/nvidia-caps/nvidia-cap1"),
Minor: 1,
Major: nvidiaCapsDeviceMajorDefault,
},
}
require.Equal(t, expectedDeviceNodes, config.DeviceNodes)
require.Contains(t, config.ProcPaths, ProcPath("/proc/driver/nvidia/capabilities/mig/config"))
require.Len(t, config.ProcPaths, 2)
require.Equal(t, "MONITOR", monitor.UUID)
expectedDeviceNodes = []DeviceNode{
{
Path: DevicePath("/dev/nvidia-caps/nvidia-cap2"),
Minor: 2,
Major: nvidiaCapsDeviceMajorDefault,
},
}
require.Equal(t, expectedDeviceNodes, monitor.DeviceNodes)
require.Contains(t, monitor.ProcPaths, ProcPath("/proc/driver/nvidia/capabilities/mig/monitor"))
require.Len(t, monitor.ProcPaths, 2)
}
func TestPCIBusID(t *testing.T) {
testCases := map[string]ProcPath{
"0000FFFF:FF:FF.F": "/proc/driver/nvidia/gpus/ffff:ff:ff.f",
"FFFFFFFF:FF:FF.F": "/proc/driver/nvidia/gpus/ffffffff:ff:ff.f",
}
for busID, procPath := range testCases {
p := PCIBusID(busID)
require.Equal(t, busID, p.String())
require.Equal(t, procPath, p.GetProcPath())
}
}

View File

@@ -0,0 +1,71 @@
/*
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
*/
package discover
import (
log "github.com/sirupsen/logrus"
)
type hooks struct {
None
logger *log.Logger
}
var _ Discover = (*hooks)(nil)
// NewHooks creates a discoverer for linux containers
func NewHooks() Discover {
return NewHooksWithLogger(log.StandardLogger())
}
// NewHooksWithLogger creates a discoverer as with NewHooks with the specified logger
func NewHooksWithLogger(logger *log.Logger) Discover {
h := hooks{
logger: logger,
}
return &h
}
func (h hooks) Hooks() ([]Hook, error) {
var hooks []Hook
hooks = append(hooks, newLdconfigHook())
return hooks, nil
}
func newLdconfigHook() Hook {
const rootPattern = "@Root.Path@"
h := Hook{
Path: "/sbin/ldconfig",
Args: []string{
// TODO: Testing seems to indicate that this is -v flag is required
"-v",
"-r", rootPattern,
},
// TODO: CreateContainer hooks were only added to a later OCI spec version
// We will have to find a way to deal with OCI versions before 1.0.2
HookName: "create-container",
Labels: map[string]string{
"min-oci-version": "1.0.2",
},
}
return h
}

52
internal/discover/ipc.go Normal file
View File

@@ -0,0 +1,52 @@
/*
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
*/
package discover
import (
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
log "github.com/sirupsen/logrus"
)
// NewIPCMounts creates a discoverer for IPC sockets
func NewIPCMounts(root string) Discover {
return NewIPCMountsWithLogger(log.StandardLogger(), root)
}
// NewIPCMountsWithLogger creates a discovered as with NewIPCMounts with the
// specified logger.
func NewIPCMountsWithLogger(logger *log.Logger, root string) Discover {
d := mounts{
logger: logger,
lookup: lookup.NewFileLocatorWithLogger(logger, root),
required: requiredIPCs,
}
return &d
}
var requiredIPCs = map[string][]string{
"nvidia-persistenced": {
"/var/run/nvidia-persistenced/socket",
},
"nvidia-fabricmanager": {
"/var/run/nvidia-fabricmanager/socket",
},
// TODO: This can be controlled by the NV_MPS_PIPE_DIR envvar
"nvidia-mps": {
"/tmp/nvidia-mps",
},
}

View File

@@ -0,0 +1,56 @@
/*
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
*/
package discover
import (
"testing"
testlog "github.com/sirupsen/logrus/hooks/test"
"github.com/stretchr/testify/require"
)
func TestIPCDiscover(t *testing.T) {
ipcLookup := map[string]string{
"/var/run/nvidia-persistenced/socket": "/var/run/nvidia-persistenced/socket",
"/var/run/nvidia-fabricmanager/socket": "fm-socket",
}
logger, _ := testlog.NewNullLogger()
d := NewIPCMountsWithLogger(logger, "")
// Override lookup for testing
mockLocator := NewLocatorMockFromMap(ipcLookup)
d.(*mounts).lookup = mockLocator
mounts, err := d.Mounts()
require.NoError(t, err)
require.ElementsMatch(t, []Mount{
{Path: "/var/run/nvidia-persistenced/socket", Labels: map[string]string{}},
{Path: "fm-socket", Labels: map[string]string{}}}, mounts)
require.Len(t, mockLocator.LocateCalls(), 3)
devices, err := d.Devices()
require.NoError(t, err)
require.Empty(t, devices)
hooks, err := d.Hooks()
require.NoError(t, err)
require.Empty(t, hooks)
}

View File

@@ -1,126 +0,0 @@
/**
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package discover
import (
"fmt"
"path/filepath"
"sort"
"strings"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
"github.com/sirupsen/logrus"
)
// NewLDCacheUpdateHook creates a discoverer that updates the ldcache for the specified mounts. A logger can also be specified
func NewLDCacheUpdateHook(logger *logrus.Logger, mounts Discover, cfg *Config) (Discover, error) {
d := ldconfig{
logger: logger,
mountsFrom: mounts,
lookup: lookup.NewExecutableLocator(logger, cfg.Root),
nvidiaCTKExecutablePath: cfg.NVIDIAContainerToolkitCLIExecutablePath,
}
return &d, nil
}
const (
nvidiaCTKDefaultFilePath = "/usr/bin/nvidia-ctk"
)
type ldconfig struct {
None
logger *logrus.Logger
mountsFrom Discover
lookup lookup.Locator
nvidiaCTKExecutablePath string
}
// Hooks checks the required mounts for libraries and returns a hook to update the LDcache for the discovered paths.
func (d ldconfig) Hooks() ([]Hook, error) {
mounts, err := d.mountsFrom.Mounts()
if err != nil {
return nil, fmt.Errorf("failed to discover mounts for ldcache update: %v", err)
}
libDirs := getLibDirs(mounts)
hookPath := nvidiaCTKDefaultFilePath
targets, err := d.lookup.Locate(d.nvidiaCTKExecutablePath)
if err != nil {
d.logger.Warnf("Failed to locate %v: %v", d.nvidiaCTKExecutablePath, err)
} else if len(targets) == 0 {
d.logger.Warnf("%v not found", d.nvidiaCTKExecutablePath)
} else {
d.logger.Debugf("Found %v candidates: %v", d.nvidiaCTKExecutablePath, targets)
hookPath = targets[0]
}
d.logger.Debugf("Using NVIDIA Container Toolkit CLI path %v", hookPath)
args := []string{hookPath, "hook", "update-ldcache"}
for _, f := range libDirs {
args = append(args, "--folder", f)
}
h := Hook{
Lifecycle: cdi.CreateContainerHook,
Path: hookPath,
Args: args,
}
return []Hook{h}, nil
}
// getLibDirs extracts the library dirs from the specified mounts
func getLibDirs(mounts []Mount) []string {
var paths []string
checked := make(map[string]bool)
for _, m := range mounts {
dir := filepath.Dir(m.Path)
if dir == "" {
continue
}
_, exists := checked[dir]
if exists {
continue
}
checked[dir] = isLibName(filepath.Base(m.Path))
if checked[dir] {
paths = append(paths, dir)
}
}
sort.Strings(paths)
return paths
}
// isLibName checks if the specified filename is a library (i.e. ends in `.so*`)
func isLibName(filename string) bool {
parts := strings.Split(filename, ".")
for _, p := range parts {
if p == "so" {
return true
}
}
return false
}

View File

@@ -0,0 +1,100 @@
/*
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
*/
package discover
import (
"fmt"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
log "github.com/sirupsen/logrus"
)
// NewLibraries constructs discoverer for libraries
func NewLibraries(root string) (Discover, error) {
return NewLibrariesWithLogger(log.StandardLogger(), root)
}
// NewLibrariesWithLogger constructs discoverer for libraries with the specified logger
func NewLibrariesWithLogger(logger *log.Logger, root string) (Discover, error) {
lookup, err := lookup.NewLibraryLocatorWithLogger(logger, root)
if err != nil {
return nil, fmt.Errorf("error constructing locator: %v", err)
}
d := mounts{
logger: logger,
lookup: lookup,
required: requiredLibraries,
}
return &d, nil
}
// requiredLibraries defines a set of libraries and their labels
var requiredLibraries = map[string][]string{
"utility": {
"libnvidia-ml.so", /* Management library */
"libnvidia-cfg.so", /* GPU configuration */
},
"compute": {
"libcuda.so", /* CUDA driver library */
"libnvidia-opencl.so", /* NVIDIA OpenCL ICD */
"libnvidia-ptxjitcompiler.so", /* PTX-SASS JIT compiler (used by libcuda) */
"libnvidia-fatbinaryloader.so", /* fatbin loader (used by libcuda) */
"libnvidia-allocator.so", /* NVIDIA allocator runtime library */
"libnvidia-compiler.so", /* NVVM-PTX compiler for OpenCL (used by libnvidia-opencl) */
},
"video": {
"libvdpau_nvidia.so", /* NVIDIA VDPAU ICD */
"libnvidia-encode.so", /* Video encoder */
"libnvidia-opticalflow.so", /* NVIDIA Opticalflow library */
"libnvcuvid.so", /* Video decoder */
},
"graphics": {
//"libnvidia-egl-wayland.so", /* EGL wayland platform extension (used by libEGL_nvidia) */
"libnvidia-eglcore.so", /* EGL core (used by libGLES*[_nvidia] and libEGL_nvidia) */
"libnvidia-glcore.so", /* OpenGL core (used by libGL or libGLX_nvidia) */
"libnvidia-tls.so", /* Thread local storage (used by libGL or libGLX_nvidia) */
"libnvidia-glsi.so", /* OpenGL system interaction (used by libEGL_nvidia) */
"libnvidia-fbc.so", /* Framebuffer capture */
"libnvidia-ifr.so", /* OpenGL framebuffer capture */
"libnvidia-rtcore.so", /* Optix */
"libnvoptix.so", /* Optix */
},
"glvnd": {
//"libGLX.so", /* GLX ICD loader */
//"libOpenGL.so", /* OpenGL ICD loader */
//"libGLdispatch.so", /* OpenGL dispatch (used by libOpenGL, libEGL and libGLES*) */
"libGLX_nvidia.so", /* OpenGL/GLX ICD */
"libEGL_nvidia.so", /* EGL ICD */
"libGLESv2_nvidia.so", /* OpenGL ES v2 ICD */
"libGLESv1_CM_nvidia.so", /* OpenGL ES v1 common profile ICD */
"libnvidia-glvkspirv.so", /* SPIR-V Lib for Vulkan */
"libnvidia-cbl.so", /* VK_NV_ray_tracing */
},
"compat32": {
"libGL.so", /* OpenGL/GLX legacy _or_ compatibility wrapper (GLVND) */
"libEGL.so", /* EGL legacy _or_ ICD loader (GLVND) */
"libGLESv1_CM.so", /* OpenGL ES v1 common profile legacy _or_ ICD loader (GLVND) */
"libGLESv2.so", /* OpenGL ES v2 legacy _or_ ICD loader (GLVND) */
},
"ngx": {
"libnvidia-ngx.so", /* NGX library */
},
"dxcore": {
"libdxcore.so", /* Core library for dxcore support */
},
}

View File

@@ -0,0 +1,56 @@
/*
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
*/
package discover
import (
"testing"
testlog "github.com/sirupsen/logrus/hooks/test"
"github.com/stretchr/testify/require"
)
func TestLibraries(t *testing.T) {
libraryLookup := map[string]string{
"libcuda.so": "/lib/libcuda.so.999.99",
"libversion.so": "/lib/libversion.so.111.11",
"libother.so": "/lib/libother.so.999.99",
}
logger, _ := testlog.NewNullLogger()
d, err := NewLibrariesWithLogger(logger, "")
require.NoError(t, err)
// Override lookup for testing
mockLocator := NewLocatorMockFromMap(libraryLookup)
d.(*mounts).lookup = mockLocator
mounts, err := d.Mounts()
require.NoError(t, err)
require.ElementsMatch(t, []Mount{{Path: "/lib/libcuda.so.999.99", Labels: map[string]string{}}}, mounts)
devices, err := d.Devices()
require.NoError(t, err)
require.Empty(t, devices)
hooks, err := d.Hooks()
require.NoError(t, err)
require.Empty(t, hooks)
}

View File

@@ -18,68 +18,108 @@ package discover
import (
"fmt"
"sync"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
"github.com/sirupsen/logrus"
log "github.com/sirupsen/logrus"
)
const (
capabilityLabel = "capability"
versionLabel = "version"
)
// mounts is a generic discoverer for Mounts. It is customized by specifying the
// required entities as a list and a Locator that is used to find the target mounts
// based on the entry in the list.
// required entities as a key-value pair as well as a Locator that is used to
// identify the mounts that are to be included.
type mounts struct {
None
logger *logrus.Logger
logger *log.Logger
lookup lookup.Locator
required []string
sync.Mutex
cache []Mount
required map[string][]string
}
var _ Discover = (*mounts)(nil)
func (d *mounts) Mounts() ([]Mount, error) {
func (d mounts) Mounts() ([]Mount, error) {
mounts, err := d.uniqueMounts()
if err != nil {
return nil, fmt.Errorf("error discovering mounts: %v", err)
}
return mounts.Slice(), nil
}
func (d mounts) uniqueMounts() (mountsByPath, error) {
if d.lookup == nil {
return nil, fmt.Errorf("no lookup defined")
}
if d.cache != nil {
d.logger.Debugf("returning cached mounts")
return d.cache, nil
}
mounts := make(mountsByPath)
d.Lock()
defer d.Unlock()
paths := make(map[string]bool)
for _, candidate := range d.required {
d.logger.Debugf("Locating %v", candidate)
located, err := d.lookup.Locate(candidate)
if err != nil {
d.logger.Warnf("Could not locate %v: %v", candidate, err)
continue
}
if len(located) == 0 {
d.logger.Warnf("Missing %v", candidate)
continue
}
d.logger.Debugf("Located %v as %v", candidate, located)
for _, p := range located {
paths[p] = true
for id, keys := range d.required {
for _, key := range keys {
d.logger.Debugf("Locating %v [%v]", key, id)
located, err := d.lookup.Locate(key)
if err != nil {
d.logger.Warnf("Could not locate %v [%v]: %v", key, id, err)
continue
}
d.logger.Infof("Located %v [%v]: %v", key, id, located)
for _, p := range located {
// TODO: We need to add labels
mount := newMount(p)
mounts.Put(mount)
}
}
}
var mounts []Mount
for path := range paths {
d.logger.Infof("Selecting %v", path)
mount := Mount{
Path: path,
}
mounts = append(mounts, mount)
}
d.cache = mounts
return mounts, nil
}
type mountsByPath map[string]Mount
func (m mountsByPath) Slice() []Mount {
var mounts []Mount
for _, mount := range m {
mounts = append(mounts, mount)
}
return mounts
}
func (m *mountsByPath) Put(value Mount) {
key := value.Path
mount, exists := (*m)[key]
if !exists {
(*m)[key] = value
return
}
for k, v := range value.Labels {
mount.Labels[k] = v
}
(*m)[key] = mount
}
// NewMountForCapability creates a mount with the specified capability label
func NewMountForCapability(path string, capability string) Mount {
return newMount(path, capabilityLabel, capability)
}
// NewMountForVersion creates a mount with the specified version label
func NewMountForVersion(path string, version string) Mount {
return newMount(path, versionLabel, version)
}
func newMount(path string, labels ...string) Mount {
l := make(map[string]string)
for i := 0; i < len(labels)-1; i += 2 {
l[labels[i]] = labels[i+1]
}
return Mount{
Path: path,
Labels: l,
}
}

View File

@@ -22,144 +22,32 @@ import (
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
"github.com/stretchr/testify/require"
testlog "github.com/sirupsen/logrus/hooks/test"
)
func TestMountsReturnsEmptyDevices(t *testing.T) {
func TestMountsReturnsErrorForNoLookup(t *testing.T) {
d := mounts{}
devices, err := d.Devices()
mounts, err := d.Mounts()
require.Error(t, err)
require.Len(t, mounts, 0)
devices, err := d.Devices()
require.NoError(t, err)
require.Empty(t, devices)
hooks, err := d.Hooks()
require.NoError(t, err)
require.Empty(t, hooks)
}
func TestMounts(t *testing.T) {
logger, logHook := testlog.NewNullLogger()
testCases := []struct {
description string
expectedError error
expectedMounts []Mount
input *mounts
}{
{
description: "nill lookup returns error",
expectedError: fmt.Errorf("no lookup defined"),
input: &mounts{},
},
{
description: "empty required returns no mounts",
expectedError: nil,
input: &mounts{
lookup: &lookup.LocatorMock{
LocateFunc: func(string) ([]string, error) {
return []string{"located"}, nil
},
},
},
},
{
description: "required returns located",
expectedError: nil,
input: &mounts{
lookup: &lookup.LocatorMock{
LocateFunc: func(string) ([]string, error) {
return []string{"located"}, nil
},
},
required: []string{"required"},
},
expectedMounts: []Mount{{Path: "located"}},
},
{
description: "mounts removes located duplicates",
expectedError: nil,
input: &mounts{
lookup: &lookup.LocatorMock{
LocateFunc: func(string) ([]string, error) {
return []string{"located"}, nil
},
},
required: []string{"required0", "required1"},
},
expectedMounts: []Mount{{Path: "located"}},
},
{
description: "mounts skips located errors",
input: &mounts{
lookup: &lookup.LocatorMock{
LocateFunc: func(s string) ([]string, error) {
if s == "error" {
return nil, fmt.Errorf(s)
}
return []string{s}, nil
},
},
required: []string{"required0", "error", "required1"},
},
expectedMounts: []Mount{{Path: "required0"}, {Path: "required1"}},
},
{
description: "mounts skips unlocated",
input: &mounts{
lookup: &lookup.LocatorMock{
LocateFunc: func(s string) ([]string, error) {
if s == "empty" {
return nil, nil
}
return []string{s}, nil
},
},
required: []string{"required0", "empty", "required1"},
},
expectedMounts: []Mount{{Path: "required0"}, {Path: "required1"}},
},
{
description: "mounts skips unlocated",
input: &mounts{
lookup: &lookup.LocatorMock{
LocateFunc: func(s string) ([]string, error) {
if s == "multiple" {
return []string{"multiple0", "multiple1"}, nil
}
return []string{s}, nil
},
},
required: []string{"required0", "multiple", "required1"},
},
expectedMounts: []Mount{
{Path: "required0"},
{Path: "multiple0"},
{Path: "multiple1"},
{Path: "required1"},
},
},
}
for _, tc := range testCases {
logHook.Reset()
t.Run(tc.description, func(t *testing.T) {
tc.input.logger = logger
mounts, err := tc.input.Mounts()
if tc.expectedError != nil {
require.Error(t, err)
} else {
require.NoError(t, err)
func NewLocatorMockFromMap(lookupMap map[string]string) *lookup.LocatorMock {
return &lookup.LocatorMock{
LocateFunc: func(key string) ([]string, error) {
value, exists := lookupMap[key]
if !exists {
return nil, fmt.Errorf("key %v not found", key)
}
require.ElementsMatch(t, tc.expectedMounts, mounts)
// We check that the mock is called for each element of required
if tc.input.lookup != nil {
mock := tc.input.lookup.(*lookup.LocatorMock)
require.Len(t, mock.LocateCalls(), len(tc.input.required))
var args []string
for _, c := range mock.LocateCalls() {
args = append(args, c.S)
}
require.EqualValues(t, args, tc.input.required)
}
})
return []string{value}, nil
},
}
}

View File

@@ -25,7 +25,15 @@ import (
func TestNone(t *testing.T) {
d := None{}
devices, err := d.Devices()
require.NoError(t, err)
require.Empty(t, devices)
mounts, err := d.Mounts()
require.NoError(t, err)
require.Empty(t, mounts)
hooks, err := d.Hooks()
require.NoError(t, err)
require.Empty(t, hooks)
}

View File

@@ -0,0 +1,71 @@
/*
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
*/
package discover
import (
"fmt"
"github.com/NVIDIA/nvidia-container-toolkit/internal/nvml"
log "github.com/sirupsen/logrus"
)
type nvmlServer struct {
logger *log.Logger
composite
}
var _ Discover = (*nvmlServer)(nil)
// NewNVMLServer constructs a discoverer for server systems using NVML to discover devices
func NewNVMLServer(root string) (Discover, error) {
return NewNVMLServerWithLogger(log.StandardLogger(), root)
}
// NewNVMLServerWithLogger constructs a discoverer for server systems using NVML to discover devices with
// the specified logger
func NewNVMLServerWithLogger(logger *log.Logger, root string) (Discover, error) {
return createNVMLServer(logger, nvml.New(), root)
}
func createNVMLServer(logger *log.Logger, nvml nvml.Interface, root string) (Discover, error) {
d := nvmlServer{
logger: logger,
}
devices, err := NewNVMLDiscoverWithLogger(logger, nvml)
if err != nil {
return nil, fmt.Errorf("error constructing NVML device discoverer: %v", err)
}
libraries, err := NewLibrariesWithLogger(logger, root)
if err != nil {
return nil, fmt.Errorf("error constructing library discoverer: %v", err)
}
d.add(
// Device discovery
devices,
// Mounts discovery
libraries,
NewBinaryMountsWithLogger(logger, root),
NewIPCMountsWithLogger(logger, root),
// Hook discovery
NewHooksWithLogger(logger),
)
return &d, nil
}

View File

@@ -0,0 +1,66 @@
/*
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
*/
package discover
import (
"testing"
"github.com/NVIDIA/nvidia-container-toolkit/internal/nvml"
"github.com/NVIDIA/nvidia-container-toolkit/internal/proc"
testlog "github.com/sirupsen/logrus/hooks/test"
"github.com/stretchr/testify/require"
)
const (
testMajor = 999
)
func TestNVMLServerConstructor(t *testing.T) {
logger, _ := testlog.NewNullLogger()
nvml := nvml.NewMockNVMLOnLunaServer()
d, err := createNVMLServer(logger, nvml, "")
require.NoError(t, err)
instance := d.(*nvmlServer)
require.Len(t, instance.discoverers, 1+3+1)
// We need to override the nvidiaDevices member of the nvmlDiscovery
// TODO: Use a mock instead, or allow for injection into a constructor
instance.discoverers[0].(*nvmlDiscover).nvidiaDevices = &mockNvidiaDevices{}
devices, err := d.Devices()
require.NoError(t, err)
require.NotEmpty(t, devices)
_, err = d.Mounts()
require.NoError(t, err)
}
type mockNvidiaDevices struct{}
var _ proc.NvidiaDevices = (*mockNvidiaDevices)(nil)
func (d mockNvidiaDevices) Get(name string) (proc.Device, bool) {
return proc.Device{Name: name, Major: testMajor}, true
}
func (d mockNvidiaDevices) Exists(string) bool {
return false
}

Some files were not shown because too many files have changed in this diff Show More