mirror of
https://github.com/NVIDIA/nvidia-container-toolkit
synced 2025-06-26 18:18:24 +00:00
Compare commits
27 Commits
v1.10.0-rc
...
experiment
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
8551c71a8a | ||
|
|
7ee81e91de | ||
|
|
5a6860adbf | ||
|
|
ce2b0dfdf0 | ||
|
|
6f0129bdf2 | ||
|
|
ffd98424d8 | ||
|
|
cf192169a8 | ||
|
|
eeabd2b94d | ||
|
|
d81329da1f | ||
|
|
3b84f527cc | ||
|
|
8da6e42d88 | ||
|
|
1a755d471a | ||
|
|
c05f9dffc5 | ||
|
|
7c5b913e09 | ||
|
|
c0d2d41f23 | ||
|
|
08cf87eb21 | ||
|
|
d3a9f512c4 | ||
|
|
73baa74ea8 | ||
|
|
39aa24690c | ||
|
|
dc6b7c703b | ||
|
|
b044b56c6e | ||
|
|
46cdf8c41a | ||
|
|
22ee5eb64f | ||
|
|
a7f3398a68 | ||
|
|
72fe259745 | ||
|
|
3b27d91026 | ||
|
|
d3997eceb2 |
@@ -20,7 +20,6 @@ default:
|
||||
variables:
|
||||
GIT_SUBMODULE_STRATEGY: recursive
|
||||
BUILDIMAGE: "${CI_REGISTRY_IMAGE}/build:${CI_COMMIT_SHORT_SHA}"
|
||||
BUILD_MULTI_ARCH_IMAGES: "true"
|
||||
|
||||
stages:
|
||||
- image
|
||||
@@ -33,6 +32,7 @@ stages:
|
||||
- test
|
||||
- scan
|
||||
- release
|
||||
- build-all
|
||||
|
||||
# Define the distribution targets
|
||||
.dist-amazonlinux2:
|
||||
@@ -42,12 +42,11 @@ stages:
|
||||
.dist-centos7:
|
||||
variables:
|
||||
DIST: centos7
|
||||
CVE_UPDATES: "cyrus-sasl-lib"
|
||||
CVE_UPDATES: "nss"
|
||||
|
||||
.dist-centos8:
|
||||
variables:
|
||||
DIST: centos8
|
||||
CVE_UPDATES: "cyrus-sasl-lib"
|
||||
|
||||
.dist-debian10:
|
||||
variables:
|
||||
@@ -64,7 +63,6 @@ stages:
|
||||
.dist-ubi8:
|
||||
variables:
|
||||
DIST: ubi8
|
||||
CVE_UPDATES: "cyrus-sasl-lib"
|
||||
|
||||
.dist-ubuntu16.04:
|
||||
variables:
|
||||
@@ -73,12 +71,6 @@ stages:
|
||||
.dist-ubuntu18.04:
|
||||
variables:
|
||||
DIST: ubuntu18.04
|
||||
CVE_UPDATES: "libsasl2-2 libsasl2-modules-db"
|
||||
|
||||
.dist-ubuntu20.04:
|
||||
variables:
|
||||
DIST: ubuntu20.04
|
||||
CVE_UPDATES: "libsasl2-2 libsasl2-modules-db"
|
||||
|
||||
.dist-packaging:
|
||||
variables:
|
||||
@@ -105,15 +97,6 @@ stages:
|
||||
variables:
|
||||
ARCH: x86_64
|
||||
|
||||
# Define the platform targets
|
||||
.platform-amd64:
|
||||
variables:
|
||||
PLATFORM: linux/amd64
|
||||
|
||||
.platform-arm64:
|
||||
variables:
|
||||
PLATFORM: linux/arm64
|
||||
|
||||
# Define test helpers
|
||||
.integration:
|
||||
stage: test
|
||||
@@ -135,30 +118,20 @@ test-packaging:
|
||||
needs:
|
||||
- image-packaging
|
||||
|
||||
# Download the regctl binary for use in the release steps
|
||||
.regctl-setup:
|
||||
before_script:
|
||||
- export REGCTL_VERSION=v0.3.10
|
||||
- apk add --no-cache curl
|
||||
- mkdir -p bin
|
||||
- curl -sSLo bin/regctl https://github.com/regclient/regclient/releases/download/${REGCTL_VERSION}/regctl-linux-amd64
|
||||
- chmod a+x bin/regctl
|
||||
- export PATH=$(pwd)/bin:${PATH}
|
||||
|
||||
# .release forms the base of the deployment jobs which push images to the CI registry.
|
||||
# This is extended with the version to be deployed (e.g. the SHA or TAG) and the
|
||||
# target os.
|
||||
.release:
|
||||
stage: release
|
||||
stage:
|
||||
release
|
||||
variables:
|
||||
# Define the source image for the release
|
||||
IMAGE_NAME: "${CI_REGISTRY_IMAGE}/container-toolkit"
|
||||
VERSION: "${CI_COMMIT_SHORT_SHA}"
|
||||
# OUT_IMAGE_VERSION is overridden for external releases
|
||||
OUT_IMAGE_VERSION: "${CI_COMMIT_SHORT_SHA}"
|
||||
stage: release
|
||||
before_script:
|
||||
- !reference [.regctl-setup, before_script]
|
||||
|
||||
# We ensure that the OUT_IMAGE_VERSION is set
|
||||
- 'echo Version: ${OUT_IMAGE_VERSION} ; [[ -n "${OUT_IMAGE_VERSION}" ]] || exit 1'
|
||||
|
||||
@@ -166,16 +139,16 @@ test-packaging:
|
||||
# need to tag the image.
|
||||
# Note: a leading 'v' is stripped from the version if present
|
||||
- apk add --no-cache make bash
|
||||
script:
|
||||
# Log in to the "output" registry, tag the image and push the image
|
||||
- 'echo "Logging in to CI registry ${CI_REGISTRY}"'
|
||||
- regctl registry login "${CI_REGISTRY}" -u "${CI_REGISTRY_USER}" -p "${CI_REGISTRY_PASSWORD}"
|
||||
- '[ ${CI_REGISTRY} = ${OUT_REGISTRY} ] || echo "Logging in to output registry ${OUT_REGISTRY}"'
|
||||
- '[ ${CI_REGISTRY} = ${OUT_REGISTRY} ] || regctl registry login "${OUT_REGISTRY}" -u "${OUT_REGISTRY_USER}" -p "${OUT_REGISTRY_TOKEN}"'
|
||||
|
||||
# Since OUT_IMAGE_NAME and OUT_IMAGE_VERSION are set, this will push the CI image to the
|
||||
# Target
|
||||
- make -f build/container/Makefile push-${DIST}
|
||||
- docker login -u "${CI_REGISTRY_USER}" -p "${CI_REGISTRY_PASSWORD}" "${CI_REGISTRY}"
|
||||
- docker pull "${IMAGE_NAME}:${VERSION}-${DIST}"
|
||||
script:
|
||||
- docker tag "${IMAGE_NAME}:${VERSION}-${DIST}" "${OUT_IMAGE_NAME}:${OUT_IMAGE_VERSION}-${DIST}"
|
||||
# Log in to the "output" registry, tag the image and push the image
|
||||
- 'echo "Logging in to output registry ${OUT_REGISTRY}"'
|
||||
- docker logout
|
||||
- docker login -u "${OUT_REGISTRY_USER}" -p "${OUT_REGISTRY_TOKEN}" "${OUT_REGISTRY}"
|
||||
- make IMAGE_NAME=${OUT_IMAGE_NAME} VERSION=${OUT_IMAGE_VERSION} -f build/container/Makefile push-${DIST}
|
||||
|
||||
# Define a staging release step that pushes an image to an internal "staging" repository
|
||||
# This is triggered for all pipelines (i.e. not only tags) to test the pipeline steps
|
||||
@@ -190,7 +163,7 @@ test-packaging:
|
||||
OUT_IMAGE_NAME: "${CI_REGISTRY_IMAGE}/staging/container-toolkit"
|
||||
|
||||
# Define an external release step that pushes an image to an external repository.
|
||||
# This includes a devlopment image off main.
|
||||
# This includes a devlopment image off master.
|
||||
.release:external:
|
||||
extends:
|
||||
- .release
|
||||
@@ -234,16 +207,6 @@ release:staging-ubuntu18.04:
|
||||
- test-crio-ubuntu18.04
|
||||
- test-docker-ubuntu18.04
|
||||
|
||||
release:staging-ubuntu20.04:
|
||||
extends:
|
||||
- .release:staging
|
||||
- .dist-ubuntu20.04
|
||||
needs:
|
||||
- test-toolkit-ubuntu20.04
|
||||
- test-containerd-ubuntu20.04
|
||||
- test-crio-ubuntu20.04
|
||||
- test-docker-ubuntu20.04
|
||||
|
||||
release:staging-packaging:
|
||||
extends:
|
||||
- .release:staging
|
||||
|
||||
3
.gitignore
vendored
3
.gitignore
vendored
@@ -1,9 +1,8 @@
|
||||
dist
|
||||
*.swp
|
||||
*.swo
|
||||
/coverage.out*
|
||||
/coverage.out
|
||||
/test/output/
|
||||
/nvidia-container-runtime
|
||||
/nvidia-container-toolkit
|
||||
/nvidia-ctk
|
||||
/shared-*
|
||||
|
||||
@@ -94,9 +94,8 @@ unit-tests:
|
||||
- .multi-arch-build
|
||||
- .package-artifacts
|
||||
stage: package-build
|
||||
timeout: 2h 30m
|
||||
script:
|
||||
- ./scripts/build-packages.sh ${DIST}-${ARCH}
|
||||
- ./scripts/release.sh ${DIST}-${ARCH}
|
||||
|
||||
artifacts:
|
||||
name: ${ARTIFACTS_NAME}
|
||||
@@ -194,33 +193,19 @@ package-ubuntu18.04-ppc64le:
|
||||
- .dist-ubuntu18.04
|
||||
- .arch-ppc64le
|
||||
|
||||
.buildx-setup:
|
||||
before_script:
|
||||
- export BUILDX_VERSION=v0.6.3
|
||||
- apk add --no-cache curl
|
||||
- mkdir -p ~/.docker/cli-plugins
|
||||
- curl -sSLo ~/.docker/cli-plugins/docker-buildx "https://github.com/docker/buildx/releases/download/${BUILDX_VERSION}/buildx-${BUILDX_VERSION}.linux-amd64"
|
||||
- chmod a+x ~/.docker/cli-plugins/docker-buildx
|
||||
|
||||
- docker buildx create --use --platform=linux/amd64,linux/arm64
|
||||
|
||||
- '[[ -n "${SKIP_QEMU_SETUP}" ]] || docker run --rm --privileged multiarch/qemu-user-static --reset -p yes'
|
||||
|
||||
# Define the image build targets
|
||||
.image-build:
|
||||
stage: image-build
|
||||
variables:
|
||||
IMAGE_NAME: "${CI_REGISTRY_IMAGE}/container-toolkit"
|
||||
VERSION: "${CI_COMMIT_SHORT_SHA}"
|
||||
PUSH_ON_BUILD: "true"
|
||||
before_script:
|
||||
- !reference [.buildx-setup, before_script]
|
||||
|
||||
- apk add --no-cache bash make
|
||||
- 'echo "Logging in to CI registry ${CI_REGISTRY}"'
|
||||
- docker login -u "${CI_REGISTRY_USER}" -p "${CI_REGISTRY_PASSWORD}" "${CI_REGISTRY}"
|
||||
script:
|
||||
- make -f build/container/Makefile build-${DIST}
|
||||
- make -f build/container/Makefile push-${DIST}
|
||||
|
||||
image-centos7:
|
||||
extends:
|
||||
@@ -247,10 +232,9 @@ image-ubi8:
|
||||
- .package-artifacts
|
||||
- .dist-ubi8
|
||||
needs:
|
||||
# Note: The ubi8 image uses the centos8 packages
|
||||
- package-centos8-aarch64
|
||||
- package-centos8-x86_64
|
||||
- package-centos8-ppc64le
|
||||
# Note: The ubi8 image currently uses the centos7 packages
|
||||
- package-centos7-ppc64le
|
||||
- package-centos7-x86_64
|
||||
|
||||
image-ubuntu18.04:
|
||||
extends:
|
||||
@@ -262,16 +246,6 @@ image-ubuntu18.04:
|
||||
- package-ubuntu18.04-arm64
|
||||
- package-ubuntu18.04-ppc64le
|
||||
|
||||
image-ubuntu20.04:
|
||||
extends:
|
||||
- .image-build
|
||||
- .package-artifacts
|
||||
- .dist-ubuntu20.04
|
||||
needs:
|
||||
- package-ubuntu18.04-amd64
|
||||
- package-ubuntu18.04-arm64
|
||||
- package-ubuntu18.04-ppc64le
|
||||
|
||||
# The DIST=packaging target creates an image containing all built packages
|
||||
image-packaging:
|
||||
extends:
|
||||
@@ -353,31 +327,46 @@ test-docker-ubuntu18.04:
|
||||
needs:
|
||||
- image-ubuntu18.04
|
||||
|
||||
test-toolkit-ubuntu20.04:
|
||||
# build-all jobs build packages for every OS / ARCH combination we support.
|
||||
#
|
||||
# They are run under two conditions:
|
||||
# 1) Automatically whenever a new tag is pushed to the repo (e.g. v1.1.0)
|
||||
# 2) Manually by a reviewer just before merging a MR.
|
||||
.build-all-for-arch:
|
||||
variables:
|
||||
# Setting DIST=docker invokes the docker- release targets
|
||||
DIST: docker
|
||||
extends:
|
||||
- .test:toolkit
|
||||
- .dist-ubuntu20.04
|
||||
needs:
|
||||
- image-ubuntu20.04
|
||||
- .package-build
|
||||
stage: build-all
|
||||
timeout: 2h 30m
|
||||
rules:
|
||||
- if: $CI_COMMIT_TAG
|
||||
when: always
|
||||
|
||||
test-containerd-ubuntu20.04:
|
||||
# The full set of build-all jobs organized to
|
||||
# have builds for each ARCH run in parallel.
|
||||
build-all-amd64:
|
||||
extends:
|
||||
- .test:containerd
|
||||
- .dist-ubuntu20.04
|
||||
needs:
|
||||
- image-ubuntu20.04
|
||||
- .build-all-for-arch
|
||||
- .arch-amd64
|
||||
|
||||
test-crio-ubuntu20.04:
|
||||
build-all-x86_64:
|
||||
extends:
|
||||
- .test:crio
|
||||
- .dist-ubuntu20.04
|
||||
needs:
|
||||
- image-ubuntu20.04
|
||||
- .build-all-for-arch
|
||||
- .arch-x86_64
|
||||
|
||||
test-docker-ubuntu20.04:
|
||||
build-all-ppc64le:
|
||||
extends:
|
||||
- .test:docker
|
||||
- .dist-ubuntu20.04
|
||||
needs:
|
||||
- image-ubuntu20.04
|
||||
- .build-all-for-arch
|
||||
- .arch-ppc64le
|
||||
|
||||
build-all-arm64:
|
||||
extends:
|
||||
- .build-all-for-arch
|
||||
- .arch-arm64
|
||||
|
||||
build-all-aarch64:
|
||||
extends:
|
||||
- .build-all-for-arch
|
||||
- .arch-aarch64
|
||||
|
||||
1
.gitmodules
vendored
1
.gitmodules
vendored
@@ -1,7 +1,6 @@
|
||||
[submodule "third_party/libnvidia-container"]
|
||||
path = third_party/libnvidia-container
|
||||
url = https://gitlab.com/nvidia/container-toolkit/libnvidia-container.git
|
||||
branch = main
|
||||
[submodule "third_party/nvidia-container-runtime"]
|
||||
path = third_party/nvidia-container-runtime
|
||||
url = https://gitlab.com/nvidia/container-toolkit/container-runtime.git
|
||||
|
||||
131
.nvidia-ci.yml
131
.nvidia-ci.yml
@@ -27,8 +27,8 @@ default:
|
||||
variables:
|
||||
DOCKER_DRIVER: overlay2
|
||||
DOCKER_TLS_CERTDIR: "/certs"
|
||||
# Release "devel"-tagged images off the main branch
|
||||
RELEASE_DEVEL_BRANCH: "main"
|
||||
# Release "devel"-tagged images off the master branch
|
||||
RELEASE_DEVEL_BRANCH: "master"
|
||||
DEVEL_RELEASE_IMAGE_VERSION: "devel"
|
||||
# On the multi-arch builder we don't need the qemu setup.
|
||||
SKIP_QEMU_SETUP: "1"
|
||||
@@ -46,7 +46,6 @@ variables:
|
||||
OUT_REGISTRY_TOKEN: "${CI_REGISTRY_PASSWORD}"
|
||||
OUT_REGISTRY: "${CI_REGISTRY}"
|
||||
OUT_IMAGE_NAME: "${CI_REGISTRY_IMAGE}/container-toolkit"
|
||||
PUSH_MULTIPLE_TAGS: "false"
|
||||
# We delay the job start to allow the public pipeline to generate the required images.
|
||||
when: delayed
|
||||
start_in: 30 minutes
|
||||
@@ -57,13 +56,13 @@ variables:
|
||||
- job_execution_timeout
|
||||
- stuck_or_timeout_failure
|
||||
before_script:
|
||||
- !reference [.regctl-setup, before_script]
|
||||
- apk add --no-cache make bash
|
||||
- >
|
||||
regctl manifest get ${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION}-${DIST} --list > /dev/null && echo "${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION}-${DIST}" || ( echo "${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION}-${DIST} does not exist" && sleep infinity )
|
||||
docker pull ${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION}-${DIST} > /dev/null && echo "${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION}-${DIST}" || ( echo "${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION}-${DIST} does not exist" && sleep infinity )
|
||||
script:
|
||||
- regctl registry login "${OUT_REGISTRY}" -u "${OUT_REGISTRY_USER}" -p "${OUT_REGISTRY_TOKEN}"
|
||||
- make -f build/container/Makefile IMAGE=${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION}-${DIST} OUT_IMAGE=${OUT_IMAGE_NAME}:${CI_COMMIT_SHORT_SHA}-${DIST} push-${DIST}
|
||||
- docker pull ${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION}-${DIST}
|
||||
- docker tag ${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION}-${DIST} ${OUT_IMAGE_NAME}:${CI_COMMIT_SHORT_SHA}-${DIST}
|
||||
- docker login -u "${OUT_REGISTRY_USER}" -p "${OUT_REGISTRY_TOKEN}" "${OUT_REGISTRY}"
|
||||
- docker push ${OUT_IMAGE_NAME}:${CI_COMMIT_SHORT_SHA}-${DIST}
|
||||
|
||||
image-centos7:
|
||||
extends:
|
||||
@@ -85,11 +84,6 @@ image-ubuntu18.04:
|
||||
- .image-pull
|
||||
- .dist-ubuntu18.04
|
||||
|
||||
image-ubuntu20.04:
|
||||
extends:
|
||||
- .image-pull
|
||||
- .dist-ubuntu20.04
|
||||
|
||||
# The DIST=packaging target creates an image containing all built packages
|
||||
image-packaging:
|
||||
extends:
|
||||
@@ -118,7 +112,7 @@ image-packaging:
|
||||
before_script:
|
||||
- docker login -u "${CI_REGISTRY_USER}" -p "${CI_REGISTRY_PASSWORD}" "${CI_REGISTRY}"
|
||||
# TODO: We should specify the architecture here and scan all architectures
|
||||
- docker pull --platform="${PLATFORM}" "${IMAGE}"
|
||||
- docker pull "${IMAGE}"
|
||||
- docker save "${IMAGE}" -o "${IMAGE_ARCHIVE}"
|
||||
- AuthHeader=$(echo -n $SSA_CLIENT_ID:$SSA_CLIENT_SECRET | base64 -w0)
|
||||
- >
|
||||
@@ -137,91 +131,34 @@ image-packaging:
|
||||
- policy_evaluation.json
|
||||
|
||||
# Define the scan targets
|
||||
scan-centos7-amd64:
|
||||
scan-centos7:
|
||||
extends:
|
||||
- .scan
|
||||
- .dist-centos7
|
||||
- .platform-amd64
|
||||
needs:
|
||||
- image-centos7
|
||||
|
||||
scan-centos7-arm64:
|
||||
extends:
|
||||
- .scan
|
||||
- .dist-centos7
|
||||
- .platform-arm64
|
||||
needs:
|
||||
- image-centos7
|
||||
- scan-centos7-amd64
|
||||
|
||||
scan-centos8-amd64:
|
||||
scan-centos8:
|
||||
extends:
|
||||
- .scan
|
||||
- .dist-centos8
|
||||
- .platform-amd64
|
||||
needs:
|
||||
- image-centos8
|
||||
|
||||
scan-centos8-arm64:
|
||||
extends:
|
||||
- .scan
|
||||
- .dist-centos8
|
||||
- .platform-arm64
|
||||
needs:
|
||||
- image-centos8
|
||||
- scan-centos8-amd64
|
||||
|
||||
scan-ubuntu18.04-amd64:
|
||||
scan-ubuntu18.04:
|
||||
extends:
|
||||
- .scan
|
||||
- .dist-ubuntu18.04
|
||||
- .platform-amd64
|
||||
needs:
|
||||
- image-ubuntu18.04
|
||||
|
||||
scan-ubuntu18.04-arm64:
|
||||
extends:
|
||||
- .scan
|
||||
- .dist-ubuntu18.04
|
||||
- .platform-arm64
|
||||
needs:
|
||||
- image-ubuntu18.04
|
||||
- scan-ubuntu18.04-amd64
|
||||
|
||||
scan-ubuntu20.04-amd64:
|
||||
extends:
|
||||
- .scan
|
||||
- .dist-ubuntu20.04
|
||||
- .platform-amd64
|
||||
needs:
|
||||
- image-ubuntu20.04
|
||||
|
||||
scan-ubuntu20.04-arm64:
|
||||
extends:
|
||||
- .scan
|
||||
- .dist-ubuntu20.04
|
||||
- .platform-arm64
|
||||
needs:
|
||||
- image-ubuntu20.04
|
||||
- scan-ubuntu20.04-amd64
|
||||
|
||||
scan-ubi8-amd64:
|
||||
scan-ubi8:
|
||||
extends:
|
||||
- .scan
|
||||
- .dist-ubi8
|
||||
- .platform-amd64
|
||||
needs:
|
||||
- image-ubi8
|
||||
|
||||
scan-ubi8-arm64:
|
||||
extends:
|
||||
- .scan
|
||||
- .dist-ubi8
|
||||
- .platform-arm64
|
||||
needs:
|
||||
- image-ubi8
|
||||
- scan-ubi8-amd64
|
||||
|
||||
# Define external release helpers
|
||||
.release:ngc:
|
||||
extends:
|
||||
@@ -232,6 +169,15 @@ scan-ubi8-arm64:
|
||||
OUT_REGISTRY: "${NGC_REGISTRY}"
|
||||
OUT_IMAGE_NAME: "${NGC_REGISTRY_IMAGE}"
|
||||
|
||||
.release:dockerhub:
|
||||
extends:
|
||||
- .release:external
|
||||
variables:
|
||||
OUT_REGISTRY_USER: "${REGISTRY_USER}"
|
||||
OUT_REGISTRY_TOKEN: "${REGISTRY_TOKEN}"
|
||||
OUT_REGISTRY: "${DOCKERHUB_REGISTRY}"
|
||||
OUT_IMAGE_NAME: "${REGISTRY_IMAGE}"
|
||||
|
||||
release:staging-ubuntu18.04:
|
||||
extends:
|
||||
- .release:staging
|
||||
@@ -239,13 +185,6 @@ release:staging-ubuntu18.04:
|
||||
needs:
|
||||
- image-ubuntu18.04
|
||||
|
||||
release:staging-ubuntu20.04:
|
||||
extends:
|
||||
- .release:staging
|
||||
- .dist-ubuntu20.04
|
||||
needs:
|
||||
- image-ubuntu20.04
|
||||
|
||||
# Define the external release targets
|
||||
# Release to NGC
|
||||
release:ngc-centos7:
|
||||
@@ -258,17 +197,33 @@ release:ngc-centos8:
|
||||
- .release:ngc
|
||||
- .dist-centos8
|
||||
|
||||
release:ngc-ubuntu18.04:
|
||||
release:ngc-ubuntu18:
|
||||
extends:
|
||||
- .release:ngc
|
||||
- .dist-ubuntu18.04
|
||||
|
||||
release:ngc-ubuntu20.04:
|
||||
extends:
|
||||
- .release:ngc
|
||||
- .dist-ubuntu20.04
|
||||
|
||||
release:ngc-ubi8:
|
||||
extends:
|
||||
- .release:ngc
|
||||
- .dist-ubi8
|
||||
|
||||
# Release to Dockerhub
|
||||
release:dockerhub-centos7:
|
||||
extends:
|
||||
- .release:dockerhub
|
||||
- .dist-centos7
|
||||
|
||||
release:dockerhub-centos8:
|
||||
extends:
|
||||
- .release:dockerhub
|
||||
- .dist-centos8
|
||||
|
||||
release:dockerhub-ubuntu18:
|
||||
extends:
|
||||
- .release:dockerhub
|
||||
- .dist-ubuntu18.04
|
||||
|
||||
release:dockerhub-ubi8:
|
||||
extends:
|
||||
- .release:dockerhub
|
||||
- .dist-ubi8
|
||||
|
||||
@@ -13,7 +13,7 @@ The `nvidia-container-toolkit` resides in this repo directly.
|
||||
|
||||
In oder to build the packages, the following command is executed
|
||||
```sh
|
||||
./scripts/build-packages.sh TARGET
|
||||
./scripts/build-all-components.sh TARGET
|
||||
```
|
||||
where `TARGET` is a make target that is valid for each of the sub-components.
|
||||
|
||||
@@ -21,8 +21,6 @@ These include:
|
||||
* `ubuntu18.04-amd64`
|
||||
* `centos8-x86_64`
|
||||
|
||||
If no `TARGET` is specified, all valid release targets are built.
|
||||
|
||||
The packages are generated in the `dist` folder.
|
||||
|
||||
## Testing local changes
|
||||
@@ -39,23 +37,9 @@ The [test/release](./test/release/) folder contains documentation on how the ins
|
||||
|
||||
## Releasing
|
||||
|
||||
In order to release packages required for a release, a utility script
|
||||
[`scripts/release-packages.sh`](./scripts/release-packages.sh) is provided.
|
||||
This script can be executed as follows:
|
||||
|
||||
```bash
|
||||
GPG_LOCAL_USER="GPG_USER" \
|
||||
MASTER_KEY_PATH=/path/to/gpg-master.key \
|
||||
SUB_KEY_PATH=/path/to/gpg-subkey.key \
|
||||
./scripts/release-packages.sh REPO PACKAGE_REPO_ROOT [REFERENCE]
|
||||
A utility script [`scripts/release.sh`](./scripts/release.sh) is provided to build
|
||||
packages required for release. If run without arguments, all supported distribution-architecture combinations are built. A specific distribution-architecture pair can also be provided
|
||||
```sh
|
||||
./scripts/release.sh ubuntu18.04-amd64
|
||||
```
|
||||
|
||||
Where `REPO` is one of `stable` or `experimental`, `PACKAGE_REPO_ROOT` is the local path to the `libnvidia-container` repository checked out to the `gh-pages` branch, and `REFERENCE` is the git SHA that is to be released. If reference is not specified `HEAD` is assumed.
|
||||
|
||||
This scripts performs the following basic functions:
|
||||
* Pulls the package image defined by the `REFERENCE` git SHA from the staging registry,
|
||||
* Copies the required packages to the package repository at `PACKAGE_REPO_ROOT/REPO`,
|
||||
* Signs the packages using the specified GPG keys
|
||||
|
||||
While the last two are performed, commits are added to the package repository. These can be pushed to the relevant repository.
|
||||
|
||||
where the `amd64` builds for `ubuntu18.04` are provided as an example.
|
||||
|
||||
28
Makefile
28
Makefile
@@ -38,6 +38,8 @@ EXAMPLE_TARGETS := $(patsubst %,example-%, $(EXAMPLES))
|
||||
CMDS := $(patsubst ./cmd/%/,%,$(sort $(dir $(wildcard ./cmd/*/))))
|
||||
CMD_TARGETS := $(patsubst %,cmd-%, $(CMDS))
|
||||
|
||||
$(info CMD_TARGETS=$(CMD_TARGETS))
|
||||
|
||||
CHECK_TARGETS := assert-fmt vet lint ineffassign misspell
|
||||
MAKE_TARGETS := binaries build check fmt lint-internal test examples cmds coverage generate $(CHECK_TARGETS)
|
||||
|
||||
@@ -46,12 +48,6 @@ TARGETS := $(MAKE_TARGETS) $(EXAMPLE_TARGETS) $(CMD_TARGETS)
|
||||
DOCKER_TARGETS := $(patsubst %,docker-%, $(TARGETS))
|
||||
.PHONY: $(TARGETS) $(DOCKER_TARGETS)
|
||||
|
||||
ifeq ($(VERSION),)
|
||||
CLI_VERSION = $(LIB_VERSION)$(if $(LIB_TAG),-$(LIB_TAG))
|
||||
else
|
||||
CLI_VERSION = $(VERSION)
|
||||
endif
|
||||
|
||||
GOOS ?= linux
|
||||
|
||||
binaries: cmds
|
||||
@@ -60,7 +56,7 @@ cmd-%: COMMAND_BUILD_OPTIONS = -o $(PREFIX)/$(*)
|
||||
endif
|
||||
cmds: $(CMD_TARGETS)
|
||||
$(CMD_TARGETS): cmd-%:
|
||||
GOOS=$(GOOS) go build -ldflags "-s -w -X github.com/NVIDIA/nvidia-container-toolkit/internal/info.gitCommit=$(GIT_COMMIT) -X github.com/NVIDIA/nvidia-container-toolkit/internal/info.version=$(CLI_VERSION)" $(COMMAND_BUILD_OPTIONS) $(MODULE)/cmd/$(*)
|
||||
GOOS=$(GOOS) go build -ldflags "-s -w" $(COMMAND_BUILD_OPTIONS) $(MODULE)/cmd/$(*)
|
||||
|
||||
build:
|
||||
GOOS=$(GOOS) go build ./...
|
||||
@@ -94,7 +90,11 @@ ineffassign:
|
||||
|
||||
lint:
|
||||
# We use `go list -f '{{.Dir}}' $(MODULE)/...` to skip the `vendor` folder.
|
||||
go list -f '{{.Dir}}' $(MODULE)/... | xargs golint -set_exit_status
|
||||
go list -f '{{.Dir}}' $(MODULE)/... | grep -v /internal/ | xargs golint -set_exit_status
|
||||
|
||||
lint-internal:
|
||||
# We use `go list -f '{{.Dir}}' $(MODULE)/...` to skip the `vendor` folder.
|
||||
go list -f '{{.Dir}}' $(MODULE)/internal/... | xargs golint -set_exit_status
|
||||
|
||||
misspell:
|
||||
misspell $(MODULE)/...
|
||||
@@ -142,15 +142,3 @@ $(DOCKER_TARGETS): docker-%: .build-image
|
||||
--user $$(id -u):$$(id -g) \
|
||||
$(BUILDIMAGE) \
|
||||
make $(*)
|
||||
|
||||
# Start an interactive shell using the development image.
|
||||
PHONY: .shell
|
||||
.shell:
|
||||
$(DOCKER) run \
|
||||
--rm \
|
||||
-ti \
|
||||
-e GOCACHE=/tmp/.cache \
|
||||
-v $(PWD):$(PWD) \
|
||||
-w $(PWD) \
|
||||
--user $$(id -u):$$(id -g) \
|
||||
$(BUILDIMAGE)
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# NVIDIA Container Toolkit
|
||||
|
||||
[](https://raw.githubusercontent.com/NVIDIA/nvidia-container-toolkit/main/LICENSE)
|
||||
[](https://raw.githubusercontent.com/NVIDIA/nvidia-container-toolkit/master/LICENSE)
|
||||
[](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/overview.html)
|
||||
[](https://nvidia.github.io/libnvidia-container)
|
||||
|
||||
|
||||
@@ -42,16 +42,6 @@ RUN GOPATH=/artifacts go install -ldflags="-s -w -X 'main.Version=${VERSION}'" .
|
||||
|
||||
FROM nvidia/cuda:${CUDA_VERSION}-base-${BASE_DIST}
|
||||
|
||||
ARG BASE_DIST
|
||||
# See https://www.centos.org/centos-linux-eol/
|
||||
# and https://stackoverflow.com/a/70930049 for move to vault.centos.org
|
||||
# and https://serverfault.com/questions/1093922/failing-to-run-yum-update-in-centos-8 for move to vault.epel.cloud
|
||||
RUN [[ "${BASE_DIST}" != "centos8" ]] || \
|
||||
( \
|
||||
sed -i 's/mirrorlist/#mirrorlist/g' /etc/yum.repos.d/CentOS-Linux-* && \
|
||||
sed -i 's|#baseurl=http://mirror.centos.org|baseurl=http://vault.epel.cloud|g' /etc/yum.repos.d/CentOS-Linux-* \
|
||||
)
|
||||
|
||||
ENV NVIDIA_DISABLE_REQUIRE="true"
|
||||
ENV NVIDIA_VISIBLE_DEVICES=all
|
||||
ENV NVIDIA_DRIVER_CAPABILITIES=utility
|
||||
@@ -63,10 +53,8 @@ COPY ${ARTIFACTS_ROOT}/${PACKAGE_DIST} /artifacts/packages/${PACKAGE_DIST}
|
||||
WORKDIR /artifacts/packages
|
||||
|
||||
ARG PACKAGE_VERSION
|
||||
ARG TARGETARCH
|
||||
ENV PACKAGE_ARCH ${TARGETARCH}
|
||||
RUN PACKAGE_ARCH=${PACKAGE_ARCH/amd64/x86_64} && PACKAGE_ARCH=${PACKAGE_ARCH/arm64/aarch64} && \
|
||||
yum localinstall -y \
|
||||
ARG PACKAGE_ARCH
|
||||
RUN yum localinstall -y \
|
||||
${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container1-${PACKAGE_VERSION}*.rpm \
|
||||
${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container-tools-${PACKAGE_VERSION}*.rpm \
|
||||
${PACKAGE_DIST}/${PACKAGE_ARCH}/nvidia-container-toolkit-${PACKAGE_VERSION}*.rpm
|
||||
|
||||
@@ -40,15 +40,11 @@ COPY . .
|
||||
RUN GOPATH=/artifacts go install -ldflags="-s -w -X 'main.Version=${VERSION}'" ./tools/...
|
||||
|
||||
|
||||
FROM nvcr.io/nvidia/cuda:${CUDA_VERSION}-base-${BASE_DIST}
|
||||
|
||||
# Remove the CUDA repository configurations to avoid issues with rotated GPG keys
|
||||
RUN rm -f /etc/apt/sources.list.d/cuda.list
|
||||
FROM nvidia/cuda:${CUDA_VERSION}-base-${BASE_DIST}
|
||||
|
||||
ARG DEBIAN_FRONTEND=noninteractive
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
libcap2 \
|
||||
curl \
|
||||
&& \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
@@ -63,17 +59,7 @@ COPY ${ARTIFACTS_ROOT}/${PACKAGE_DIST} /artifacts/packages/${PACKAGE_DIST}
|
||||
WORKDIR /artifacts/packages
|
||||
|
||||
ARG PACKAGE_VERSION
|
||||
ARG TARGETARCH
|
||||
ENV PACKAGE_ARCH ${TARGETARCH}
|
||||
|
||||
ARG LIBNVIDIA_CONTAINER_REPO="https://nvidia.github.io/libnvidia-container"
|
||||
ARG LIBNVIDIA_CONTAINER0_VERSION
|
||||
RUN if [ "${PACKAGE_ARCH}" = "arm64" ]; then \
|
||||
curl -L ${LIBNVIDIA_CONTAINER_REPO}/${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container0_${LIBNVIDIA_CONTAINER0_VERSION}_${PACKAGE_ARCH}.deb \
|
||||
--output ${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container0_${LIBNVIDIA_CONTAINER0_VERSION}_${PACKAGE_ARCH}.deb && \
|
||||
dpkg -i ${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container0_${LIBNVIDIA_CONTAINER0_VERSION}_${PACKAGE_ARCH}.deb; \
|
||||
fi
|
||||
|
||||
ARG PACKAGE_ARCH
|
||||
RUN dpkg -i \
|
||||
${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container1_${PACKAGE_VERSION}*.deb \
|
||||
${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container-tools_${PACKAGE_VERSION}*.deb \
|
||||
@@ -95,11 +81,4 @@ LABEL description="See summary"
|
||||
|
||||
COPY ./LICENSE /licenses/LICENSE
|
||||
|
||||
# Install / upgrade packages here that are required to resolve CVEs
|
||||
ARG CVE_UPDATES
|
||||
RUN if [ -n "${CVE_UPDATES}" ]; then \
|
||||
apt-get update && apt-get upgrade -y ${CVE_UPDATES} && \
|
||||
rm -rf /var/lib/apt/lists/*; \
|
||||
fi
|
||||
|
||||
ENTRYPOINT ["/work/nvidia-toolkit"]
|
||||
|
||||
@@ -12,14 +12,7 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
BUILD_MULTI_ARCH_IMAGES ?= false
|
||||
DOCKER ?= docker
|
||||
|
||||
BUILDX =
|
||||
ifeq ($(BUILD_MULTI_ARCH_IMAGES),true)
|
||||
BUILDX = buildx
|
||||
endif
|
||||
|
||||
DOCKER ?= docker
|
||||
MKDIR ?= mkdir
|
||||
DIST_DIR ?= $(CURDIR)/dist
|
||||
|
||||
@@ -32,47 +25,36 @@ IMAGE_NAME := $(REGISTRY)/container-toolkit
|
||||
endif
|
||||
|
||||
VERSION ?= $(LIB_VERSION)$(if $(LIB_TAG),-$(LIB_TAG))
|
||||
IMAGE_VERSION := $(VERSION)
|
||||
|
||||
IMAGE_TAG ?= $(VERSION)-$(DIST)
|
||||
IMAGE = $(IMAGE_NAME):$(IMAGE_TAG)
|
||||
|
||||
OUT_IMAGE_NAME ?= $(IMAGE_NAME)
|
||||
OUT_IMAGE_VERSION ?= $(IMAGE_VERSION)
|
||||
OUT_IMAGE_TAG = $(OUT_IMAGE_VERSION)-$(DIST)
|
||||
OUT_IMAGE = $(OUT_IMAGE_NAME):$(OUT_IMAGE_TAG)
|
||||
|
||||
##### Public rules #####
|
||||
DEFAULT_PUSH_TARGET := ubuntu18.04
|
||||
DISTRIBUTIONS := ubuntu20.04 ubuntu18.04 ubi8 centos7 centos8
|
||||
TARGETS := ubuntu20.04 ubuntu18.04 ubi8 centos7 centos8
|
||||
|
||||
META_TARGETS := packaging
|
||||
|
||||
BUILD_TARGETS := $(patsubst %,build-%,$(DISTRIBUTIONS) $(META_TARGETS))
|
||||
PUSH_TARGETS := $(patsubst %,push-%,$(DISTRIBUTIONS) $(META_TARGETS))
|
||||
TEST_TARGETS := $(patsubst %,test-%, $(DISTRIBUTIONS))
|
||||
BUILD_TARGETS := $(patsubst %,build-%,$(TARGETS) $(META_TARGETS))
|
||||
PUSH_TARGETS := $(patsubst %,push-%,$(TARGETS) $(META_TARGETS))
|
||||
TEST_TARGETS := $(patsubst %,test-%, $(TARGETS))
|
||||
|
||||
.PHONY: $(DISTRIBUTIONS) $(PUSH_TARGETS) $(BUILD_TARGETS) $(TEST_TARGETS)
|
||||
.PHONY: $(TARGETS) $(PUSH_TARGETS) $(BUILD_TARGETS) $(TEST_TARGETS)
|
||||
|
||||
ifneq ($(BUILD_MULTI_ARCH_IMAGES),true)
|
||||
include $(CURDIR)/build/container/native-only.mk
|
||||
else
|
||||
include $(CURDIR)/build/container/multi-arch.mk
|
||||
endif
|
||||
push-%: DIST = $(*)
|
||||
$(PUSH_TARGETS): push-%:
|
||||
$(DOCKER) push "$(IMAGE_NAME):$(IMAGE_TAG)"
|
||||
|
||||
# For the default push target we also push a short tag equal to the version.
|
||||
# We skip this for the development release
|
||||
DEVEL_RELEASE_IMAGE_VERSION ?= devel
|
||||
PUSH_MULTIPLE_TAGS ?= true
|
||||
ifeq ($(strip $(OUT_IMAGE_VERSION)),$(DEVEL_RELEASE_IMAGE_VERSION))
|
||||
PUSH_MULTIPLE_TAGS = false
|
||||
endif
|
||||
ifeq ($(PUSH_MULTIPLE_TAGS),true)
|
||||
ifneq ($(strip $(VERSION)),$(DEVEL_RELEASE_IMAGE_VERSION))
|
||||
push-$(DEFAULT_PUSH_TARGET): push-short
|
||||
endif
|
||||
push-short:
|
||||
$(DOCKER) tag "$(IMAGE_NAME):$(VERSION)-$(DEFAULT_PUSH_TARGET)" "$(IMAGE_NAME):$(VERSION)"
|
||||
$(DOCKER) push "$(IMAGE_NAME):$(VERSION)"
|
||||
|
||||
push-%: DIST = $(*)
|
||||
push-short: DIST = $(DEFAULT_PUSH_TARGET)
|
||||
|
||||
build-%: DIST = $(*)
|
||||
build-%: DOCKERFILE = $(CURDIR)/build/container/Dockerfile.$(DOCKERFILE_SUFFIX)
|
||||
@@ -82,17 +64,16 @@ ARTIFACTS_ROOT ?= $(shell realpath --relative-to=$(CURDIR) $(DIST_DIR))
|
||||
# Use a generic build target to build the relevant images
|
||||
$(BUILD_TARGETS): build-%: $(ARTIFACTS_ROOT)
|
||||
DOCKER_BUILDKIT=1 \
|
||||
$(DOCKER) $(BUILDX) build --pull \
|
||||
$(DOCKER_BUILD_OPTIONS) \
|
||||
$(DOCKER_BUILD_PLATFORM_OPTIONS) \
|
||||
$(DOCKER) build --pull \
|
||||
--platform=linux/amd64 \
|
||||
--tag $(IMAGE) \
|
||||
--build-arg ARTIFACTS_ROOT="$(ARTIFACTS_ROOT)" \
|
||||
--build-arg BASE_DIST="$(BASE_DIST)" \
|
||||
--build-arg CUDA_VERSION="$(CUDA_VERSION)" \
|
||||
--build-arg GOLANG_VERSION="$(GOLANG_VERSION)" \
|
||||
--build-arg LIBNVIDIA_CONTAINER0_VERSION="$(LIBNVIDIA_CONTAINER0_DEPENDENCY)" \
|
||||
--build-arg PACKAGE_DIST="$(PACKAGE_DIST)" \
|
||||
--build-arg PACKAGE_VERSION="$(PACKAGE_VERSION)" \
|
||||
--build-arg PACKAGE_ARCH="$(PACKAGE_ARCH)" \
|
||||
--build-arg VERSION="$(VERSION)" \
|
||||
--build-arg CVE_UPDATES="$(CVE_UPDATES)" \
|
||||
-f $(DOCKERFILE) \
|
||||
@@ -101,17 +82,20 @@ $(BUILD_TARGETS): build-%: $(ARTIFACTS_ROOT)
|
||||
|
||||
build-ubuntu%: BASE_DIST = $(*)
|
||||
build-ubuntu%: DOCKERFILE_SUFFIX := ubuntu
|
||||
build-ubuntu%: PACKAGE_DIST = ubuntu18.04
|
||||
build-ubuntu%: PACKAGE_ARCH := amd64
|
||||
build-ubuntu%: PACKAGE_DIST = $(BASE_DIST)
|
||||
build-ubuntu%: PACKAGE_VERSION := $(LIB_VERSION)$(if $(LIB_TAG),~$(LIB_TAG))
|
||||
build-ubuntu%: LIBNVIDIA_CONTAINER0_DEPENDENCY=$(LIBNVIDIA_CONTAINER0_VERSION)
|
||||
|
||||
# TODO: Update this to use the centos8 packages
|
||||
build-ubi8: BASE_DIST := ubi8
|
||||
build-ubi8: DOCKERFILE_SUFFIX := centos
|
||||
build-ubi8: PACKAGE_DIST = centos8
|
||||
build-ubi8: PACKAGE_ARCH := x86_64
|
||||
build-ubi8: PACKAGE_DIST = centos7
|
||||
build-ubi8: PACKAGE_VERSION := $(LIB_VERSION)-$(if $(LIB_TAG),0.1.$(LIB_TAG),1)
|
||||
|
||||
build-centos%: BASE_DIST = $(*)
|
||||
build-centos%: DOCKERFILE_SUFFIX := centos
|
||||
build-centos%: PACKAGE_ARCH := x86_64
|
||||
build-centos%: PACKAGE_DIST = $(BASE_DIST)
|
||||
build-centos%: PACKAGE_VERSION := $(LIB_VERSION)-$(if $(LIB_TAG),0.1.$(LIB_TAG),1)
|
||||
|
||||
|
||||
@@ -1,34 +0,0 @@
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
PUSH_ON_BUILD ?= false
|
||||
DOCKER_BUILD_OPTIONS = --output=type=image,push=$(PUSH_ON_BUILD)
|
||||
DOCKER_BUILD_PLATFORM_OPTIONS = --platform=linux/amd64,linux/arm64
|
||||
|
||||
REGCTL ?= regctl
|
||||
$(PUSH_TARGETS): push-%:
|
||||
$(REGCTL) \
|
||||
image copy \
|
||||
$(IMAGE) $(OUT_IMAGE)
|
||||
|
||||
push-short:
|
||||
$(REGCTL) \
|
||||
image copy \
|
||||
$(IMAGE) $(OUT_IMAGE_NAME):$(OUT_IMAGE_VERSION)
|
||||
|
||||
# We only have x86_64 packages for centos7
|
||||
build-centos7: DOCKER_BUILD_PLATFORM_OPTIONS = --platform=linux/amd64
|
||||
|
||||
# We only generate a single image for packaging targets
|
||||
build-packaging: DOCKER_BUILD_PLATFORM_OPTIONS = --platform=linux/amd64
|
||||
170
cmd/nvidia-container-runtime.experimental/README.md
Normal file
170
cmd/nvidia-container-runtime.experimental/README.md
Normal file
@@ -0,0 +1,170 @@
|
||||
# The Experimental NVIDIA Container Runtime
|
||||
|
||||
## Introduction
|
||||
|
||||
The experimental NVIDIA Container Runtime is a proof-of-concept runtime that
|
||||
approaches the problem of making GPUs (or other NVIDIA devices) available in
|
||||
containerized environments in a different manner to the existing
|
||||
[NVIDIA Container Runtime](../nvidia-container-runtime). Wherease the current
|
||||
runtime relies on the [NVIDIA Container Library](https://github.com/NVIDIA/libnvidia-container)
|
||||
to perform the modifications to a container, the experiemental runtime aims to
|
||||
express the required modifications in terms of changes to a container's [OCI
|
||||
runtime specification](https://github.com/opencontainers/runtime-spec). This
|
||||
also aligns with open initiatives such as the [Container Device Interface (CDI)](https://github.com/container-orchestrated-devices/container-device-interface).
|
||||
|
||||
## Known Limitations
|
||||
|
||||
* The path of NVIDIA CUDA libraries / binaries injected into the container currently match that of the host system. This means that on an Ubuntu-based host systems these would be at `/usr/lib/x86_64-linux-gnu`
|
||||
even if the container distribution would normally expect these at another location (e.g. `/usr/lib64`)
|
||||
* Tools such as `nvidia-smi` may create additional device nodes in the container when run. This is
|
||||
prevented in the "classic" runtime (and the NVIDIA Container Library) by modifying
|
||||
the `/proc/driver/nvidia/params` file in the container.
|
||||
* Other `NVIDIA_*` environment variables (e.g. `NVIDIA_DRIVER_CAPABILITIES`) are
|
||||
not considered to filter mounted libraries or binaries. This is equivalent to
|
||||
always using `NVIDIA_DRIVER_CAPABILITIES=all`.
|
||||
|
||||
## Building / Installing
|
||||
|
||||
The experimental NVIDIA Container Runtime is a self-contained golang binary and
|
||||
can thus be built from source, or installed directly using `go install`.
|
||||
|
||||
### From source
|
||||
|
||||
After cloning the `nvidia-container-toolkit` repository from [GitLab](https://gitlab.com/nvidia/container-toolkit/container-toolkit)
|
||||
or from the read-only mirror on [GitHub](https://github.com/NVIDIA/nvidia-container-toolkit)
|
||||
running the following make command in the repository root:
|
||||
```bash
|
||||
make cmd-nvidia-container-runtime.experimental
|
||||
```
|
||||
will create an executable file `nvidia-container-runtime.experimental` in the
|
||||
root.
|
||||
|
||||
A dockerized target:
|
||||
```bash
|
||||
make docker-cmd-nvidia-container-runtime.experimental
|
||||
```
|
||||
will also create the executable file `nvidia-container-runtime.experimental`
|
||||
without requiring the setup of a development environment (with the exception)
|
||||
of having `make` and `docker` installed.
|
||||
|
||||
### Go install
|
||||
|
||||
The experimental NVIDIA Container Runtime can also be `go installed` by running
|
||||
the following command:
|
||||
|
||||
```bash
|
||||
go install github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-container-runtime.experimental@experimental
|
||||
```
|
||||
which will build and install the `nvidia-container-runtime.experimental`
|
||||
executable in the `${GOPATH}/bin` folder.
|
||||
|
||||
## Using the Runtime
|
||||
|
||||
The experimental NVIDIA Container Runtime is intended as a drop-in replacement
|
||||
for the "classic" NVIDIA Container Runtime. As such it is used in the same
|
||||
way (with the exception of the known limitiations noted above).
|
||||
|
||||
In general terms, to use the experimental NVIDIA Container Runtime to launch a
|
||||
container with GPU support, it should be inserted as a shim for the desired
|
||||
low-level OCI-compliant runtime (e.g. `runc` or `crun`). How this is achieved
|
||||
depends on how containers are being launched.
|
||||
|
||||
### Docker
|
||||
In the case of `docker` for example, the runtime must be registered with the
|
||||
Docker daemon. This can be done by modifying the `/etc/docker/daemon.json` file
|
||||
to contain the following:
|
||||
```json
|
||||
{
|
||||
"runtimes": {
|
||||
"nvidia-experimental": {
|
||||
"path": "nvidia-container-runtime.experimental",
|
||||
"runtimeArgs": []
|
||||
}
|
||||
},
|
||||
}
|
||||
```
|
||||
This can then be invoked from docker by including the `--runtime=nvidia-experimental`
|
||||
option when executing a `docker run` command.
|
||||
|
||||
### Runc
|
||||
|
||||
If `runc` is being used to run a container directly substituting the `runc`
|
||||
command for `nvidia-container-runtime.experimental` should be sufficient as
|
||||
the latter will `exec` to `runc` once the required (in-place) modifications have
|
||||
been made to the container's OCI spec (`config.json` file).
|
||||
|
||||
## Configuration
|
||||
|
||||
### Runtime Path
|
||||
The experimental NVIDIA Container Runtime allows for the path to the low-level
|
||||
runtime to be specified. This is done by setting the following option in the
|
||||
`/etc/nvidia-container-runtime/config.toml` file or setting the
|
||||
`NVIDIA_CONTAINER_RUNTIME_PATH` environment variable.
|
||||
|
||||
```toml
|
||||
[nvidia-container-runtime.experimental]
|
||||
|
||||
runtime-path = "/path/to/low-level-runtime"
|
||||
```
|
||||
This path can be set to the path for `runc` or `crun` on a system and if it is
|
||||
a relative path, the `PATH` is searched for a matching executable.
|
||||
|
||||
### Device Selection
|
||||
In order to select a specific device, the experimental NVIDIA Container Runtime
|
||||
mimics the behaviour of the "classic" runtime. That is to say that the values of
|
||||
certain [environment variables](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/user-guide.html#environment-variables-oci-spec)
|
||||
**in the container's OCI specification** control the behaviour of the runtime.
|
||||
|
||||
#### `NVIDIA_VISIBLE_DEVICES`
|
||||
This variable controls which GPUs will be made accessible inside the container.
|
||||
|
||||
##### Possible values
|
||||
* `0,1,2`, `GPU-fef8089b` …: a comma-separated list of GPU UUID(s) or index(es).
|
||||
* `all`: all GPUs will be accessible, this is the default value in our container images.
|
||||
* `none`: no GPU will be accessible, but driver capabilities will be enabled.
|
||||
* `void` or *empty* or *unset*: `nvidia-container-runtime` will have the same behavior as `runc`.
|
||||
|
||||
**Note**: When running on a MIG capable device, the following values will also be available:
|
||||
* `0:0,0:1,1:0`, `MIG-GPU-fef8089b/0/1` …: a comma-separated list of MIG Device UUID(s) or index(es).
|
||||
|
||||
Where the MIG device indices have the form `<GPU Device Index>:<MIG Device Index>` as seen in the example output:
|
||||
```
|
||||
$ nvidia-smi -L
|
||||
GPU 0: Graphics Device (UUID: GPU-b8ea3855-276c-c9cb-b366-c6fa655957c5)
|
||||
MIG Device 0: (UUID: MIG-GPU-b8ea3855-276c-c9cb-b366-c6fa655957c5/1/0)
|
||||
MIG Device 1: (UUID: MIG-GPU-b8ea3855-276c-c9cb-b366-c6fa655957c5/1/1)
|
||||
MIG Device 2: (UUID: MIG-GPU-b8ea3855-276c-c9cb-b366-c6fa655957c5/11/0)
|
||||
```
|
||||
|
||||
#### `NVIDIA_MIG_CONFIG_DEVICES`
|
||||
This variable controls which of the visible GPUs can have their MIG
|
||||
configuration managed from within the container. This includes enabling and
|
||||
disabling MIG mode, creating and destroying GPU Instances and Compute
|
||||
Instances, etc.
|
||||
|
||||
##### Possible values
|
||||
* `all`: Allow all MIG-capable GPUs in the visible device list to have their
|
||||
MIG configurations managed.
|
||||
|
||||
**Note**:
|
||||
* This feature is only available on MIG capable devices (e.g. the A100).
|
||||
* To use this feature, the container must be started with `CAP_SYS_ADMIN` privileges.
|
||||
* When not running as `root`, the container user must have read access to the
|
||||
`/proc/driver/nvidia/capabilities/mig/config` file on the host.
|
||||
|
||||
#### `NVIDIA_MIG_MONITOR_DEVICES`
|
||||
This variable controls which of the visible GPUs can have aggregate information
|
||||
about all of their MIG devices monitored from within the container. This
|
||||
includes inspecting the aggregate memory usage, listing the aggregate running
|
||||
processes, etc.
|
||||
|
||||
##### Possible values
|
||||
* `all`: Allow all MIG-capable GPUs in the visible device list to have their
|
||||
MIG devices monitored.
|
||||
|
||||
**Note**:
|
||||
* This feature is only available on MIG capable devices (e.g. the A100).
|
||||
* To use this feature, the container must be started with `CAP_SYS_ADMIN` privileges.
|
||||
* When not running as `root`, the container user must have read access to the
|
||||
`/proc/driver/nvidia/capabilities/mig/monitor` file on the host.
|
||||
|
||||
@@ -0,0 +1,52 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package config
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// list represents a list of config updaters that are applied in order, with later
|
||||
// configs having preference.
|
||||
type list struct {
|
||||
logger *log.Logger
|
||||
configs []configUpdater
|
||||
}
|
||||
|
||||
var _ configUpdater = (*list)(nil)
|
||||
|
||||
func newListWithLogger(logger *log.Logger, ci ...configUpdater) configUpdater {
|
||||
c := list{
|
||||
logger: logger,
|
||||
configs: ci,
|
||||
}
|
||||
return &c
|
||||
}
|
||||
|
||||
func (c list) Update(cfg *Config) error {
|
||||
for i, u := range c.configs {
|
||||
c.logger.Debugf("Applying config %v: %v", i, u)
|
||||
err := u.Update(cfg)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error applying config %v: %v", i, err)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
@@ -0,0 +1,100 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package config
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
testlog "github.com/sirupsen/logrus/hooks/test"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestComposite(t *testing.T) {
|
||||
logger, _ := testlog.NewNullLogger()
|
||||
|
||||
// Empty list
|
||||
c := newListWithLogger(logger)
|
||||
|
||||
cfg := &Config{}
|
||||
err := c.Update(cfg)
|
||||
|
||||
require.NoError(t, err)
|
||||
require.EqualValues(t, &Config{}, cfg)
|
||||
|
||||
// Add a single mock config
|
||||
mockConfigUpdater := &configUpdaterMock{
|
||||
UpdateFunc: func(cfg *Config) error {
|
||||
cfg.DebugFilePath = "updated"
|
||||
return nil
|
||||
},
|
||||
}
|
||||
|
||||
c = newListWithLogger(logger, mockConfigUpdater)
|
||||
cfg = &Config{}
|
||||
err = c.Update(cfg)
|
||||
require.NoError(t, err)
|
||||
require.EqualValues(t, &Config{DebugFilePath: "updated"}, cfg)
|
||||
|
||||
require.Len(t, mockConfigUpdater.UpdateCalls(), 1)
|
||||
|
||||
// Reset the calls
|
||||
mockConfigUpdater.calls = struct{ Update []struct{ Config *Config } }{}
|
||||
|
||||
// define an error config
|
||||
errorConfigUpdater := &configUpdaterMock{
|
||||
UpdateFunc: func(cfg *Config) error {
|
||||
return fmt.Errorf("mock error")
|
||||
},
|
||||
}
|
||||
|
||||
c = newListWithLogger(logger, errorConfigUpdater, mockConfigUpdater)
|
||||
cfg = &Config{}
|
||||
err = c.Update(cfg)
|
||||
require.Error(t, err)
|
||||
require.EqualValues(t, &Config{}, cfg)
|
||||
|
||||
require.Len(t, errorConfigUpdater.UpdateCalls(), 1)
|
||||
require.Len(t, mockConfigUpdater.UpdateCalls(), 0)
|
||||
|
||||
// Reset the calls
|
||||
mockConfigUpdater.calls = struct{ Update []struct{ Config *Config } }{}
|
||||
errorConfigUpdater.calls = struct{ Update []struct{ Config *Config } }{}
|
||||
|
||||
// Change the order of the config and error
|
||||
c = newListWithLogger(logger, mockConfigUpdater, errorConfigUpdater)
|
||||
cfg = &Config{}
|
||||
err = c.Update(cfg)
|
||||
require.Error(t, err)
|
||||
require.EqualValues(t, &Config{DebugFilePath: "updated"}, cfg)
|
||||
|
||||
require.Len(t, errorConfigUpdater.UpdateCalls(), 1)
|
||||
require.Len(t, mockConfigUpdater.UpdateCalls(), 1)
|
||||
|
||||
// Reset the calls
|
||||
mockConfigUpdater.calls = struct{ Update []struct{ Config *Config } }{}
|
||||
errorConfigUpdater.calls = struct{ Update []struct{ Config *Config } }{}
|
||||
|
||||
// Call the mock twice
|
||||
c = newListWithLogger(logger, mockConfigUpdater, mockConfigUpdater)
|
||||
cfg = &Config{}
|
||||
err = c.Update(cfg)
|
||||
require.NoError(t, err)
|
||||
require.EqualValues(t, &Config{DebugFilePath: "updated"}, cfg)
|
||||
|
||||
require.Len(t, mockConfigUpdater.UpdateCalls(), 2)
|
||||
}
|
||||
63
cmd/nvidia-container-runtime.experimental/config/config.go
Normal file
63
cmd/nvidia-container-runtime.experimental/config/config.go
Normal file
@@ -0,0 +1,63 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package config
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// Config defines the configuration options for the NVIDIA Container Runtime
|
||||
type Config struct {
|
||||
// Root defines the root of the file system to be used for locating mounts
|
||||
Root string
|
||||
// DebugFilePath defines a log file to print debug output to
|
||||
DebugFilePath string
|
||||
// RuntimePath defines the path to an OCI compliant runtime
|
||||
RuntimePath string
|
||||
// LogLevel defines the logging level for the application
|
||||
LogLevel string
|
||||
}
|
||||
|
||||
//go:generate moq -stub -out config_mock.go . configUpdater
|
||||
|
||||
// configUpdate represents an interface for applying updates to a config.
|
||||
type configUpdater interface {
|
||||
Update(*Config) error
|
||||
}
|
||||
|
||||
// GetConfig returns a config struct with the values resolved. The values are defined in order of
|
||||
// priority:
|
||||
// 1. From the associated environment variables
|
||||
// 2. From the loaded config file
|
||||
// 3. From the default values defined in the `defaultConfig` function
|
||||
func GetConfig(logger *log.Logger) (*Config, error) {
|
||||
cfg := &Config{}
|
||||
|
||||
configs := newListWithLogger(logger,
|
||||
newDefaultConfig(),
|
||||
newDefaultConfigFileWithLogger(logger),
|
||||
newConfigFromEnvironment(),
|
||||
)
|
||||
|
||||
err := configs.Update(cfg)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error getting config: %v", err)
|
||||
}
|
||||
return cfg, nil
|
||||
}
|
||||
@@ -0,0 +1,76 @@
|
||||
// Code generated by moq; DO NOT EDIT.
|
||||
// github.com/matryer/moq
|
||||
|
||||
package config
|
||||
|
||||
import (
|
||||
"sync"
|
||||
)
|
||||
|
||||
// Ensure, that configUpdaterMock does implement configUpdater.
|
||||
// If this is not the case, regenerate this file with moq.
|
||||
var _ configUpdater = &configUpdaterMock{}
|
||||
|
||||
// configUpdaterMock is a mock implementation of configUpdater.
|
||||
//
|
||||
// func TestSomethingThatUsesconfigUpdater(t *testing.T) {
|
||||
//
|
||||
// // make and configure a mocked configUpdater
|
||||
// mockedconfigUpdater := &configUpdaterMock{
|
||||
// UpdateFunc: func(config *Config) error {
|
||||
// panic("mock out the Update method")
|
||||
// },
|
||||
// }
|
||||
//
|
||||
// // use mockedconfigUpdater in code that requires configUpdater
|
||||
// // and then make assertions.
|
||||
//
|
||||
// }
|
||||
type configUpdaterMock struct {
|
||||
// UpdateFunc mocks the Update method.
|
||||
UpdateFunc func(config *Config) error
|
||||
|
||||
// calls tracks calls to the methods.
|
||||
calls struct {
|
||||
// Update holds details about calls to the Update method.
|
||||
Update []struct {
|
||||
// Config is the config argument value.
|
||||
Config *Config
|
||||
}
|
||||
}
|
||||
lockUpdate sync.RWMutex
|
||||
}
|
||||
|
||||
// Update calls UpdateFunc.
|
||||
func (mock *configUpdaterMock) Update(config *Config) error {
|
||||
callInfo := struct {
|
||||
Config *Config
|
||||
}{
|
||||
Config: config,
|
||||
}
|
||||
mock.lockUpdate.Lock()
|
||||
mock.calls.Update = append(mock.calls.Update, callInfo)
|
||||
mock.lockUpdate.Unlock()
|
||||
if mock.UpdateFunc == nil {
|
||||
var (
|
||||
errOut error
|
||||
)
|
||||
return errOut
|
||||
}
|
||||
return mock.UpdateFunc(config)
|
||||
}
|
||||
|
||||
// UpdateCalls gets all the calls that were made to Update.
|
||||
// Check the length with:
|
||||
// len(mockedconfigUpdater.UpdateCalls())
|
||||
func (mock *configUpdaterMock) UpdateCalls() []struct {
|
||||
Config *Config
|
||||
} {
|
||||
var calls []struct {
|
||||
Config *Config
|
||||
}
|
||||
mock.lockUpdate.RLock()
|
||||
calls = mock.calls.Update
|
||||
mock.lockUpdate.RUnlock()
|
||||
return calls
|
||||
}
|
||||
@@ -0,0 +1,58 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package config
|
||||
|
||||
import (
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
testlog "github.com/sirupsen/logrus/hooks/test"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestGetConfigWithCustomConfig(t *testing.T) {
|
||||
wd, err := os.Getwd()
|
||||
require.NoError(t, err)
|
||||
|
||||
// By default debug is disabled
|
||||
contents := []byte("[nvidia-container-runtime]\ndebug = \"/nvidia-container-toolkit.log\"")
|
||||
testDir := path.Join(wd, "test")
|
||||
filename := path.Join(testDir, configFileRelativePath)
|
||||
|
||||
previousConfig, present := os.LookupEnv(configOverride)
|
||||
os.Setenv(configOverride, testDir)
|
||||
defer func() {
|
||||
if present {
|
||||
os.Setenv(configOverride, previousConfig)
|
||||
} else {
|
||||
os.Unsetenv(configOverride)
|
||||
}
|
||||
}()
|
||||
|
||||
require.NoError(t, os.MkdirAll(filepath.Dir(filename), 0766))
|
||||
require.NoError(t, ioutil.WriteFile(filename, contents, 0766))
|
||||
|
||||
defer func() { require.NoError(t, os.RemoveAll(testDir)) }()
|
||||
|
||||
logger, _ := testlog.NewNullLogger()
|
||||
cfg, err := GetConfig(logger)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, "/nvidia-container-toolkit.log", cfg.DebugFilePath)
|
||||
}
|
||||
47
cmd/nvidia-container-runtime.experimental/config/default.go
Normal file
47
cmd/nvidia-container-runtime.experimental/config/default.go
Normal file
@@ -0,0 +1,47 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package config
|
||||
|
||||
import (
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
type defaultConfig struct{}
|
||||
|
||||
var _ configUpdater = (*defaultConfig)(nil)
|
||||
|
||||
func newDefaultConfig() configUpdater {
|
||||
c := defaultConfig{}
|
||||
return &c
|
||||
}
|
||||
|
||||
// Update defines the the default values for the config options
|
||||
func (c defaultConfig) Update(cfg *Config) error {
|
||||
*cfg = Config{
|
||||
DebugFilePath: "/dev/null",
|
||||
LogLevel: log.InfoLevel.String(),
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func getDefaultConfig() *Config {
|
||||
cfg := &Config{}
|
||||
|
||||
defaultConfig{}.Update(cfg)
|
||||
|
||||
return cfg
|
||||
}
|
||||
@@ -0,0 +1,37 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package config
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestDefaultConfigUpdate(t *testing.T) {
|
||||
cfg := &Config{}
|
||||
|
||||
require.Empty(t, cfg.DebugFilePath)
|
||||
|
||||
c := defaultConfig{}
|
||||
|
||||
err := c.Update(cfg)
|
||||
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, "/dev/null", cfg.DebugFilePath)
|
||||
require.Equal(t, "", cfg.Root)
|
||||
}
|
||||
@@ -0,0 +1,61 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package config
|
||||
|
||||
import (
|
||||
"os"
|
||||
"strings"
|
||||
)
|
||||
|
||||
const (
|
||||
debugFilePathEnvvarName = "NVIDIA_CONTAINER_RUNTIME_DEBUG"
|
||||
runtimePathEnvvarName = "NVIDIA_CONTAINER_RUNTIME_PATH"
|
||||
rootEnvvarName = "NVIDIA_CONTAINER_RUNTIME_ROOT"
|
||||
logLevelEnvvarName = "NVIDIA_CONTAINER_RUNTIME_LOG_LEVEL"
|
||||
)
|
||||
|
||||
type envConfig struct{}
|
||||
|
||||
func newConfigFromEnvironment() configUpdater {
|
||||
c := envConfig{}
|
||||
|
||||
return &c
|
||||
}
|
||||
|
||||
func (c envConfig) Update(cfg *Config) error {
|
||||
debugFilePathEnvvar, exists := os.LookupEnv(debugFilePathEnvvarName)
|
||||
if exists && strings.TrimSpace(debugFilePathEnvvar) != "" {
|
||||
cfg.DebugFilePath = debugFilePathEnvvar
|
||||
}
|
||||
|
||||
runtimePathEnvvar, exists := os.LookupEnv(runtimePathEnvvarName)
|
||||
if exists && strings.TrimSpace(runtimePathEnvvar) != "" {
|
||||
cfg.RuntimePath = runtimePathEnvvar
|
||||
}
|
||||
|
||||
rootEnvvar, exists := os.LookupEnv(rootEnvvarName)
|
||||
if exists && strings.TrimSpace(rootEnvvar) != "" {
|
||||
cfg.Root = rootEnvvar
|
||||
}
|
||||
|
||||
logLevelEnvvar, exists := os.LookupEnv(logLevelEnvvarName)
|
||||
if exists && strings.TrimSpace(logLevelEnvvar) != "" {
|
||||
cfg.LogLevel = logLevelEnvvar
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
@@ -1,5 +1,5 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
@@ -12,13 +12,18 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
*/
|
||||
|
||||
package constraints
|
||||
package config
|
||||
|
||||
//go:generate moq -stub -out constraint_mock.go . Constraint
|
||||
// Constraint represents a constraint that is to be evaluated
|
||||
type Constraint interface {
|
||||
String() string
|
||||
Assert() error
|
||||
// noop implements a configUpdater that does not update a config.
|
||||
type noop struct{}
|
||||
|
||||
func newNoopConfigUpdater() configUpdater {
|
||||
c := noop{}
|
||||
return &c
|
||||
}
|
||||
|
||||
func (c noop) Update(cfg *Config) error {
|
||||
return nil
|
||||
}
|
||||
@@ -0,0 +1,36 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package config
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestNoopDoesNotModifyConfig(t *testing.T) {
|
||||
cfg := &Config{
|
||||
DebugFilePath: "test-path",
|
||||
}
|
||||
|
||||
c := newNoopConfigUpdater()
|
||||
|
||||
err := c.Update(cfg)
|
||||
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, "test-path", cfg.DebugFilePath)
|
||||
}
|
||||
158
cmd/nvidia-container-runtime.experimental/config/toml.go
Normal file
158
cmd/nvidia-container-runtime.experimental/config/toml.go
Normal file
@@ -0,0 +1,158 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package config
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/pelletier/go-toml"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
const (
|
||||
configFileRelativePath = "nvidia-container-runtime/config.toml"
|
||||
configOverride = "XDG_CONFIG_HOME"
|
||||
defaultConfigRoot = "/etc"
|
||||
|
||||
nvidiaContainerCliSection = "nvidia-container-cli"
|
||||
nvidiaContainerRuntimeConfigSection = "nvidia-container-runtime"
|
||||
nvidiaContainerRuntimeExperimentalConfigSection = "nvidia-container-runtime.experimental"
|
||||
)
|
||||
|
||||
type tomlConfig struct {
|
||||
logger *log.Logger
|
||||
path string
|
||||
sections []tomlSection
|
||||
}
|
||||
|
||||
type tomlSection struct {
|
||||
section string
|
||||
keys map[string]struct{}
|
||||
}
|
||||
|
||||
func newDefaultConfigFileWithLogger(logger *log.Logger) configUpdater {
|
||||
configDir := defaultConfigRoot
|
||||
if XDGConfigDir := os.Getenv(configOverride); len(XDGConfigDir) != 0 {
|
||||
configDir = XDGConfigDir
|
||||
}
|
||||
configFilePath := filepath.Join(configDir, configFileRelativePath)
|
||||
|
||||
return newConfigFromFileWithLogger(logger, configFilePath)
|
||||
}
|
||||
|
||||
func newConfigFromFileWithLogger(logger *log.Logger, filepath string) configUpdater {
|
||||
if _, err := os.Stat(filepath); os.IsNotExist(err) {
|
||||
logger.Warnf("The config file '%v' does not exist", filepath)
|
||||
return newNoopConfigUpdater()
|
||||
}
|
||||
|
||||
sections := []tomlSection{
|
||||
{
|
||||
section: nvidiaContainerRuntimeConfigSection,
|
||||
},
|
||||
{
|
||||
section: nvidiaContainerRuntimeExperimentalConfigSection,
|
||||
},
|
||||
{
|
||||
section: nvidiaContainerCliSection,
|
||||
keys: map[string]struct{}{
|
||||
"root": {},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
c := tomlConfig{
|
||||
logger: logger,
|
||||
path: filepath,
|
||||
sections: sections,
|
||||
}
|
||||
|
||||
return &c
|
||||
}
|
||||
|
||||
func (c tomlConfig) Update(cfg *Config) error {
|
||||
configFile, err := os.Open(c.path)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error opening config file %v: %v", c.path, err)
|
||||
}
|
||||
defer configFile.Close()
|
||||
|
||||
return c.updateFromReader(cfg, configFile)
|
||||
}
|
||||
|
||||
func (c tomlConfig) updateFromReader(cfg *Config, reader io.Reader) error {
|
||||
toml, err := toml.LoadReader(reader)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error reading TOML contents: %v", err)
|
||||
}
|
||||
|
||||
for _, section := range c.sections {
|
||||
if v, ok := section.GetStringFrom(toml, "debug"); ok {
|
||||
cfg.DebugFilePath = v
|
||||
}
|
||||
|
||||
if v, ok := section.GetStringFrom(toml, "runtime-path"); ok {
|
||||
cfg.RuntimePath = v
|
||||
}
|
||||
|
||||
if v, ok := section.GetStringFrom(toml, "root"); ok {
|
||||
cfg.Root = v
|
||||
}
|
||||
|
||||
if v, ok := section.GetStringFrom(toml, "log-level"); ok {
|
||||
cfg.LogLevel = v
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c tomlSection) GetStringFrom(toml *toml.Tree, key string) (string, bool) {
|
||||
value := c.GetFrom(toml, key)
|
||||
if value != nil {
|
||||
if v, ok := value.(string); ok {
|
||||
return v, ok
|
||||
}
|
||||
}
|
||||
return "", false
|
||||
}
|
||||
|
||||
func (c tomlSection) GetFrom(toml *toml.Tree, key string) interface{} {
|
||||
if !c.validKey(key) {
|
||||
return nil
|
||||
}
|
||||
return toml.Get(c.configKey(key))
|
||||
}
|
||||
|
||||
func (c tomlSection) validKey(key string) bool {
|
||||
if c.keys == nil {
|
||||
return true
|
||||
}
|
||||
|
||||
_, exists := c.keys[key]
|
||||
|
||||
return exists
|
||||
}
|
||||
|
||||
func (c tomlSection) configKey(key string) string {
|
||||
if c.section == "" {
|
||||
return key
|
||||
}
|
||||
return c.section + "." + key
|
||||
}
|
||||
259
cmd/nvidia-container-runtime.experimental/config/toml_test.go
Normal file
259
cmd/nvidia-container-runtime.experimental/config/toml_test.go
Normal file
@@ -0,0 +1,259 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package config
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
"testing/iotest"
|
||||
|
||||
testlog "github.com/sirupsen/logrus/hooks/test"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestUpdateFromReader(t *testing.T) {
|
||||
logger, _ := testlog.NewNullLogger()
|
||||
|
||||
testCases := []struct {
|
||||
description string
|
||||
readerError bool
|
||||
lines []string
|
||||
expected *Config
|
||||
expectedError bool
|
||||
}{
|
||||
{
|
||||
description: "reader error returns error",
|
||||
readerError: true,
|
||||
expectedError: true,
|
||||
expected: getDefaultConfig(),
|
||||
},
|
||||
{
|
||||
description: "empty config returns defaults",
|
||||
lines: []string{},
|
||||
expected: &Config{
|
||||
DebugFilePath: "/dev/null",
|
||||
LogLevel: "info",
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "debug output is set",
|
||||
lines: []string{"nvidia-container-runtime.debug=\"nvidia-container-toolkit.log\""},
|
||||
expected: &Config{
|
||||
DebugFilePath: "nvidia-container-toolkit.log",
|
||||
LogLevel: "info",
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "debug output is set in section",
|
||||
lines: []string{"[nvidia-container-runtime]", "debug=\"nvidia-container-toolkit.log\""},
|
||||
expected: &Config{
|
||||
DebugFilePath: "nvidia-container-toolkit.log",
|
||||
LogLevel: "info",
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "blank debug is set",
|
||||
lines: []string{"nvidia-container-runtime.debug=\"\""},
|
||||
expected: &Config{
|
||||
DebugFilePath: "",
|
||||
LogLevel: "info",
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "non-string debug is ignored",
|
||||
lines: []string{"nvidia-container-runtime.debug=2"},
|
||||
expected: &Config{
|
||||
DebugFilePath: "/dev/null",
|
||||
LogLevel: "info",
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "log-level is set",
|
||||
lines: []string{"nvidia-container-runtime.log-level=\"trace\""},
|
||||
expected: &Config{
|
||||
DebugFilePath: "/dev/null",
|
||||
LogLevel: "trace",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
cfg := getDefaultConfig()
|
||||
|
||||
c := tomlConfig{
|
||||
logger: logger,
|
||||
sections: []tomlSection{
|
||||
{section: nvidiaContainerRuntimeConfigSection},
|
||||
},
|
||||
}
|
||||
var reader io.Reader
|
||||
if tc.readerError {
|
||||
reader = iotest.ErrReader(fmt.Errorf("error"))
|
||||
} else {
|
||||
reader = strings.NewReader(strings.Join(tc.lines, "\n"))
|
||||
}
|
||||
|
||||
err := c.updateFromReader(cfg, reader)
|
||||
|
||||
if tc.expectedError {
|
||||
require.Error(t, err)
|
||||
} else {
|
||||
require.NoError(t, err)
|
||||
}
|
||||
require.EqualValues(t, tc.expected, cfg)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestUpdateFromReaderExperimental(t *testing.T) {
|
||||
logger, _ := testlog.NewNullLogger()
|
||||
|
||||
testCases := []struct {
|
||||
readerError bool
|
||||
lines []string
|
||||
expected *Config
|
||||
expectedError bool
|
||||
}{
|
||||
{
|
||||
lines: []string{"nvidia-container-runtime.debug=\"nvidia-container-toolkit.log\""},
|
||||
expected: getDefaultConfig(),
|
||||
},
|
||||
{
|
||||
lines: []string{"[nvidia-container-runtime]", "debug=\"nvidia-container-toolkit.log\""},
|
||||
expected: getDefaultConfig(),
|
||||
},
|
||||
{
|
||||
lines: []string{"nvidia-container-runtime.experimental.debug=\"\""},
|
||||
expected: &Config{
|
||||
DebugFilePath: "",
|
||||
LogLevel: "info",
|
||||
},
|
||||
},
|
||||
{
|
||||
lines: []string{"nvidia-container-runtime.experimental.debug=2"},
|
||||
expected: getDefaultConfig(),
|
||||
},
|
||||
{
|
||||
lines: []string{"nvidia-container-runtime.experimental.debug=\"nvidia-container-toolkit.log\""},
|
||||
expected: &Config{
|
||||
DebugFilePath: "nvidia-container-toolkit.log",
|
||||
LogLevel: "info",
|
||||
},
|
||||
},
|
||||
{
|
||||
lines: []string{"[nvidia-container-runtime.experimental]", "debug=\"nvidia-container-toolkit.log\""},
|
||||
expected: &Config{
|
||||
DebugFilePath: "nvidia-container-toolkit.log",
|
||||
LogLevel: "info",
|
||||
},
|
||||
},
|
||||
{
|
||||
lines: []string{
|
||||
"nvidia-container-runtime.debug=\"nvidia-container-toolkit.log\"",
|
||||
"nvidia-container-runtime.experimental.debug=\"nvidia-container-exp-toolkit.log\"",
|
||||
},
|
||||
expected: &Config{
|
||||
DebugFilePath: "nvidia-container-exp-toolkit.log",
|
||||
LogLevel: "info",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for i, tc := range testCases {
|
||||
cfg := getDefaultConfig()
|
||||
|
||||
c := tomlConfig{
|
||||
logger: logger,
|
||||
sections: []tomlSection{
|
||||
{section: nvidiaContainerRuntimeExperimentalConfigSection},
|
||||
},
|
||||
}
|
||||
var reader io.Reader
|
||||
if tc.readerError {
|
||||
reader = iotest.ErrReader(fmt.Errorf("error"))
|
||||
} else {
|
||||
reader = strings.NewReader(strings.Join(tc.lines, "\n"))
|
||||
}
|
||||
|
||||
err := c.updateFromReader(cfg, reader)
|
||||
|
||||
if tc.expectedError {
|
||||
require.Error(t, err, "%d: %v", i, tc)
|
||||
} else {
|
||||
require.NoError(t, err, "%d: %v", i, tc)
|
||||
}
|
||||
require.EqualValues(t, tc.expected, cfg, "%d: %v", i, tc)
|
||||
}
|
||||
}
|
||||
|
||||
func TestConfigFromFile(t *testing.T) {
|
||||
wd, err := os.Getwd()
|
||||
require.NoError(t, err)
|
||||
|
||||
// By default debug is disabled
|
||||
lines := []string{
|
||||
"[nvidia-container-cli]",
|
||||
"root = \"/run/nvidia/driver\"",
|
||||
"[nvidia-container-runtime]",
|
||||
"#debug = \"/nvidia-container-toolkit.log\"",
|
||||
"",
|
||||
"[nvidia-container-runtime.experimental]",
|
||||
"debug = \"/nvidia-container-toolkit.experimental.log\"",
|
||||
}
|
||||
|
||||
contents := []byte(strings.Join(lines, "\n"))
|
||||
testDir := path.Join(wd, "test")
|
||||
filename := path.Join(testDir, configFileRelativePath)
|
||||
|
||||
require.NoError(t, os.MkdirAll(filepath.Dir(filename), 0766))
|
||||
require.NoError(t, ioutil.WriteFile(filename, contents, 0766))
|
||||
defer func() { require.NoError(t, os.RemoveAll(testDir)) }()
|
||||
|
||||
logger, _ := testlog.NewNullLogger()
|
||||
c := newConfigFromFileWithLogger(logger, filename)
|
||||
|
||||
cfg := getDefaultConfig()
|
||||
|
||||
err = c.Update(cfg)
|
||||
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, "/nvidia-container-toolkit.experimental.log", cfg.DebugFilePath)
|
||||
|
||||
require.Equal(t, "/run/nvidia/driver", cfg.Root)
|
||||
}
|
||||
|
||||
func TestConfigFromNonexistentFileReturnsNoop(t *testing.T) {
|
||||
logger, _ := testlog.NewNullLogger()
|
||||
c := newConfigFromFileWithLogger(logger, "/does/not/exist")
|
||||
|
||||
n, ok := c.(*noop)
|
||||
|
||||
require.True(t, ok)
|
||||
require.NotNil(t, n)
|
||||
}
|
||||
|
||||
func TestGetConfigKey(t *testing.T) {
|
||||
require.Equal(t, "key", tomlSection{}.configKey("key"))
|
||||
require.Equal(t, "section.key", tomlSection{section: "section"}.configKey("key"))
|
||||
}
|
||||
144
cmd/nvidia-container-runtime.experimental/main.go
Normal file
144
cmd/nvidia-container-runtime.experimental/main.go
Normal file
@@ -0,0 +1,144 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/ensure"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/filter"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/modify"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/runtime"
|
||||
log "github.com/sirupsen/logrus"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-container-runtime.experimental/config"
|
||||
)
|
||||
|
||||
const (
|
||||
visibleDevicesEnvvar = "NVIDIA_VISIBLE_DEVICES"
|
||||
visibleDevicesVoid = "void"
|
||||
)
|
||||
|
||||
var logger = log.New()
|
||||
|
||||
func main() {
|
||||
cfg, err := config.GetConfig(logger)
|
||||
if err != nil {
|
||||
logger.Errorf("Error loading config: %v", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
if cfg.DebugFilePath != "" && cfg.DebugFilePath != "/dev/null" {
|
||||
logFile, err := os.OpenFile(cfg.DebugFilePath, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
|
||||
if err != nil {
|
||||
logger.Errorf("Error opening debug log file: %v", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
defer logFile.Close()
|
||||
|
||||
logger.SetOutput(logFile)
|
||||
}
|
||||
|
||||
logLevel, err := log.ParseLevel(cfg.LogLevel)
|
||||
if err == nil {
|
||||
logger.SetLevel(logLevel)
|
||||
} else {
|
||||
logger.Warnf("Invalid log-level '%v'; using '%v'", cfg.LogLevel, logger.Level.String())
|
||||
}
|
||||
|
||||
logger.Infof("Starting nvidia-container-runtime: %v", os.Args)
|
||||
logger.Debugf("Using config=%+v", cfg)
|
||||
|
||||
if err := run(cfg, os.Args); err != nil {
|
||||
logger.Errorf("Error running runtime: %v", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
func run(cfg *config.Config, args []string) error {
|
||||
logger.Debugf("running with args=%v", args)
|
||||
|
||||
// We create a low-level runtime
|
||||
lowLevelRuntime, err := createLowLevelRuntime(logger, cfg)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error constructing low-level runtime: %v", err)
|
||||
}
|
||||
|
||||
if !oci.HasCreateSubcommand(args) {
|
||||
logger.Infof("No modification of OCI specification required")
|
||||
logger.Infof("Forwarding command to runtime")
|
||||
return lowLevelRuntime.Exec(args)
|
||||
}
|
||||
|
||||
// We create the OCI spec that is to be modified
|
||||
ociSpec, bundleDir, err := oci.NewSpecFromArgs(args)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error constructing OCI spec: %v", err)
|
||||
}
|
||||
|
||||
err = ociSpec.Load()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error loading OCI specification: %v", err)
|
||||
}
|
||||
|
||||
visibleDevices, exists := ociSpec.LookupEnv(visibleDevicesEnvvar)
|
||||
if !exists || visibleDevices == "" || visibleDevices == visibleDevicesVoid {
|
||||
logger.Infof("Using low-level runtime: %v=%v (exists=%v)", visibleDevicesEnvvar, visibleDevices, exists)
|
||||
return lowLevelRuntime.Exec(os.Args)
|
||||
}
|
||||
|
||||
// We create the modifier that will be applied by the Modifying Runtime Wrapper
|
||||
modifier, err := createModifier(cfg.Root, bundleDir, visibleDevices, ociSpec)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error constructing modifer: %v", err)
|
||||
}
|
||||
|
||||
// We construct the Modifying runtime
|
||||
r := runtime.NewModifyingRuntimeWrapperWithLogger(logger, lowLevelRuntime, ociSpec, modifier)
|
||||
|
||||
return r.Exec(os.Args)
|
||||
}
|
||||
|
||||
func createLowLevelRuntime(logger *log.Logger, cfg *config.Config) (oci.Runtime, error) {
|
||||
if cfg.RuntimePath == "" {
|
||||
return oci.NewLowLevelRuntimeWithLogger(logger, "docker-runc", "runc")
|
||||
}
|
||||
logger.Infof("Creating runtime with path %v", cfg.RuntimePath)
|
||||
return oci.NewRuntimeForPathWithLogger(logger, cfg.RuntimePath)
|
||||
}
|
||||
|
||||
func createModifier(root string, bundleDir string, visibleDevices string, env filter.EnvLookup) (modify.Modifier, error) {
|
||||
// We set up the modifier using discovery
|
||||
discovered, err := discover.NewNVMLServerWithLogger(logger, root)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error discovering devices: %v", err)
|
||||
}
|
||||
|
||||
// We apply a filter to the discovered devices
|
||||
selected := filter.NewSelectDevicesFromWithLogger(logger, discovered, visibleDevices, env)
|
||||
|
||||
// We ensure that the selected devices are available
|
||||
available := ensure.NewEnsureDevicesWithLogger(logger, selected, root)
|
||||
|
||||
// We construct the modifer for the OCI spec
|
||||
modifier := modify.NewModifierWithLoggerFor(logger, available, root, bundleDir)
|
||||
|
||||
return modifier, nil
|
||||
}
|
||||
@@ -1,25 +0,0 @@
|
||||
# The NVIDIA Container Runtime
|
||||
|
||||
The NVIDIA Container Runtime is a shim for OCI-compliant low-level runtimes such as [runc](https://github.com/opencontainers/runc). When a `create` command is detected, the incoming [OCI runtime specification](https://github.com/opencontainers/runtime-spec) is modified in place and the command is forwarded to the low-level runtime.
|
||||
|
||||
## Standard Mode
|
||||
|
||||
In the standard mode configuration, the NVIDIA Container Runtime adds a [`prestart` hook](https://github.com/opencontainers/runtime-spec/blob/master/config.md#prestart) to the incomming OCI specification that invokes the NVIDIA Container Runtime Hook for all containers created. This hook checks whether NVIDIA devices are requested and ensures GPU access is configured using the `nvidia-container-cli` from project [libnvidia-container](https://github.com/NVIDIA/libnvidia-container).
|
||||
|
||||
## Experimental Mode
|
||||
|
||||
The NVIDIA Container Runtime can be configured in an experimental mode by setting the following options in the runtime's `config.toml` file:
|
||||
|
||||
```toml
|
||||
[nvidia-container-runtime]
|
||||
experimental = true
|
||||
```
|
||||
|
||||
When this setting is enabled, the modifications made to the OCI specification are controlled by the `nvidia-container-runtime.discover-mode` option, with the following mode supported:
|
||||
|
||||
* `"legacy"`: This mode mirrors the behaviour of the standard mode, inserting the NVIDIA Container Runtime Hook as a `prestart` hook into the container's OCI specification.
|
||||
* `"csv"`: This mode uses CSV files at `/etc/nvidia-container-runtime/host-files-for-container.d` to define the devices and mounts that are to be injected into a container when it is created.
|
||||
|
||||
### Notes on using the docker CLI
|
||||
|
||||
The `docker` CLI supports the `--gpus` flag to select GPUs for inclusion in a container. Since specifying this flag inserts the same NVIDIA Container Runtime Hook into the OCI runtime specification. When experimental mode is activated, the NVIDIA Container Runtime detects the presence of the hook and raises an error. This requirement will be relaxed in the near future.
|
||||
@@ -20,220 +20,60 @@ import (
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/tsaikd/KDGoLib/logrusutil"
|
||||
)
|
||||
|
||||
// Logger adds a way to manage output to a log file to a logrus.Logger
|
||||
type Logger struct {
|
||||
*logrus.Logger
|
||||
previousLogger *logrus.Logger
|
||||
logFiles []*os.File
|
||||
previousOutput io.Writer
|
||||
logFile *os.File
|
||||
}
|
||||
|
||||
// NewLogger creates an empty logger
|
||||
// NewLogger constructs a Logger with a preddefined formatter
|
||||
func NewLogger() *Logger {
|
||||
return &Logger{
|
||||
Logger: logrus.New(),
|
||||
logrusLogger := logrus.New()
|
||||
|
||||
formatter := &logrusutil.ConsoleLogFormatter{
|
||||
TimestampFormat: "2006/01/02 15:04:07",
|
||||
Flag: logrusutil.Ltime,
|
||||
}
|
||||
|
||||
logger := &Logger{
|
||||
Logger: logrusLogger,
|
||||
}
|
||||
logger.SetFormatter(formatter)
|
||||
|
||||
return logger
|
||||
}
|
||||
|
||||
// UpdateLogger constructs a Logger with a preddefined formatter
|
||||
func UpdateLogger(filename string, logLevel string, argv []string) (*Logger, error) {
|
||||
configFromArgs := parseArgs(argv)
|
||||
|
||||
level, logLevelError := configFromArgs.getLevel(logLevel)
|
||||
|
||||
var logFiles []*os.File
|
||||
var argLogFileError error
|
||||
|
||||
// We don't create log files if the version argument is supplied
|
||||
if !configFromArgs.version {
|
||||
configLogFile, err := createLogFile(filename)
|
||||
if err != nil {
|
||||
return logger, fmt.Errorf("error opening debug log file: %v", err)
|
||||
}
|
||||
if configLogFile != nil {
|
||||
logFiles = append(logFiles, configLogFile)
|
||||
}
|
||||
|
||||
argLogFile, err := createLogFile(configFromArgs.file)
|
||||
if argLogFile != nil {
|
||||
logFiles = append(logFiles, argLogFile)
|
||||
}
|
||||
argLogFileError = err
|
||||
// LogToFile opens the specified file for appending and sets the logger to
|
||||
// output to the opened file. A reference to the file pointer is stored to
|
||||
// allow this to be closed.
|
||||
func (l *Logger) LogToFile(filename string) error {
|
||||
logFile, err := os.OpenFile(filename, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error opening debug log file: %v", err)
|
||||
}
|
||||
|
||||
l := &Logger{
|
||||
Logger: logrus.New(),
|
||||
previousLogger: logger.Logger,
|
||||
logFiles: logFiles,
|
||||
}
|
||||
l.logFile = logFile
|
||||
l.previousOutput = l.Out
|
||||
l.SetOutput(logFile)
|
||||
|
||||
l.SetLevel(level)
|
||||
if level == logrus.DebugLevel {
|
||||
logrus.SetReportCaller(true)
|
||||
// Shorten function and file names reported by the logger, by
|
||||
// trimming common "github.com/opencontainers/runc" prefix.
|
||||
// This is only done for text formatter.
|
||||
_, file, _, _ := runtime.Caller(0)
|
||||
prefix := filepath.Dir(file) + "/"
|
||||
logrus.SetFormatter(&logrus.TextFormatter{
|
||||
CallerPrettyfier: func(f *runtime.Frame) (string, string) {
|
||||
function := strings.TrimPrefix(f.Function, prefix) + "()"
|
||||
fileLine := strings.TrimPrefix(f.File, prefix) + ":" + strconv.Itoa(f.Line)
|
||||
return function, fileLine
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
if configFromArgs.format == "json" {
|
||||
l.SetFormatter(new(logrus.JSONFormatter))
|
||||
}
|
||||
|
||||
if len(logFiles) == 0 {
|
||||
l.SetOutput(io.Discard)
|
||||
} else if len(logFiles) == 1 {
|
||||
l.SetOutput(logFiles[0])
|
||||
} else if len(logFiles) > 1 {
|
||||
var writers []io.Writer
|
||||
for _, f := range logFiles {
|
||||
writers = append(writers, f)
|
||||
}
|
||||
l.SetOutput(io.MultiWriter(writers...))
|
||||
}
|
||||
|
||||
if logLevelError != nil {
|
||||
l.Warn(logLevelError)
|
||||
}
|
||||
|
||||
if argLogFileError != nil {
|
||||
l.Warnf("Failed to open log file: %v", argLogFileError)
|
||||
}
|
||||
|
||||
return l, nil
|
||||
return nil
|
||||
}
|
||||
|
||||
// Reset closes the log file (if any) and resets the logger output to what it
|
||||
// was before UpdateLogger was called.
|
||||
func (l *Logger) Reset() error {
|
||||
defer func() {
|
||||
previous := l.previousLogger
|
||||
if previous == nil {
|
||||
previous = logrus.New()
|
||||
}
|
||||
logger = &Logger{Logger: previous}
|
||||
}()
|
||||
|
||||
var errs []error
|
||||
for _, f := range l.logFiles {
|
||||
err := f.Close()
|
||||
if err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
// CloseFile closes the log file (if any) and resets the logger output to what it
|
||||
// was before LogToFile was called.
|
||||
func (l *Logger) CloseFile() error {
|
||||
if l.logFile == nil {
|
||||
return nil
|
||||
}
|
||||
logFile := l.logFile
|
||||
l.SetOutput(l.previousOutput)
|
||||
l.logFile = nil
|
||||
|
||||
var err error
|
||||
for _, e := range errs {
|
||||
if err == nil {
|
||||
err = e
|
||||
continue
|
||||
}
|
||||
return fmt.Errorf("%v; %w", e, err)
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
func createLogFile(filename string) (*os.File, error) {
|
||||
if filename != "" && filename != os.DevNull {
|
||||
return os.OpenFile(filename, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
|
||||
}
|
||||
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
type loggerConfig struct {
|
||||
file string
|
||||
format string
|
||||
debug bool
|
||||
version bool
|
||||
}
|
||||
|
||||
func (c loggerConfig) getLevel(logLevel string) (logrus.Level, error) {
|
||||
if c.debug {
|
||||
return logrus.DebugLevel, nil
|
||||
}
|
||||
|
||||
if logLevel, err := logrus.ParseLevel(logLevel); err == nil {
|
||||
return logLevel, nil
|
||||
}
|
||||
|
||||
return logrus.InfoLevel, fmt.Errorf("invalid log-level '%v'", logLevel)
|
||||
}
|
||||
|
||||
// Informed by Taken from https://github.com/opencontainers/runc/blob/7fd8b57001f5bfa102e89cb434d96bf71f7c1d35/main.go#L182
|
||||
func parseArgs(args []string) loggerConfig {
|
||||
c := loggerConfig{}
|
||||
|
||||
expected := map[string]*string{
|
||||
"log-format": &c.format,
|
||||
"log": &c.file,
|
||||
}
|
||||
|
||||
found := make(map[string]bool)
|
||||
|
||||
for i := 0; i < len(args); i++ {
|
||||
if len(found) == 4 {
|
||||
break
|
||||
}
|
||||
|
||||
param := args[i]
|
||||
|
||||
parts := strings.SplitN(param, "=", 2)
|
||||
trimmed := strings.TrimLeft(parts[0], "-")
|
||||
// If this is not a flag we continue
|
||||
if parts[0] == trimmed {
|
||||
continue
|
||||
}
|
||||
|
||||
// Check the version flag
|
||||
if trimmed == "version" {
|
||||
c.version = true
|
||||
found["version"] = true
|
||||
// For the version flag we don't process any other flags
|
||||
continue
|
||||
}
|
||||
|
||||
// Check the debug flag
|
||||
if trimmed == "debug" {
|
||||
c.debug = true
|
||||
found["debug"] = true
|
||||
continue
|
||||
}
|
||||
|
||||
destination, exists := expected[trimmed]
|
||||
if !exists {
|
||||
continue
|
||||
}
|
||||
|
||||
var value string
|
||||
if len(parts) == 2 {
|
||||
value = parts[2]
|
||||
} else if i+1 < len(args) {
|
||||
value = args[i+1]
|
||||
i++
|
||||
} else {
|
||||
continue
|
||||
}
|
||||
|
||||
*destination = value
|
||||
found[trimmed] = true
|
||||
}
|
||||
|
||||
return c
|
||||
return logFile.Close()
|
||||
}
|
||||
|
||||
@@ -3,82 +3,87 @@ package main
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
"path"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/info"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
"github.com/pelletier/go-toml"
|
||||
)
|
||||
|
||||
// version must be set by go build's -X main.version= option in the Makefile.
|
||||
var version = "unknown"
|
||||
const (
|
||||
configOverride = "XDG_CONFIG_HOME"
|
||||
configFilePath = "nvidia-container-runtime/config.toml"
|
||||
|
||||
// gitCommit will be the hash that the binary was built from
|
||||
// and will be populated by the Makefile
|
||||
var gitCommit = ""
|
||||
hookDefaultFilePath = "/usr/bin/nvidia-container-runtime-hook"
|
||||
)
|
||||
|
||||
var (
|
||||
configDir = "/etc/"
|
||||
)
|
||||
|
||||
var logger = NewLogger()
|
||||
|
||||
func main() {
|
||||
err := run(os.Args)
|
||||
if err != nil {
|
||||
logger.Errorf("%v", err)
|
||||
logger.Errorf("Error running %v: %v", os.Args, err)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
// run is an entry point that allows for idiomatic handling of errors
|
||||
// when calling from the main function.
|
||||
func run(argv []string) (rerr error) {
|
||||
printVersion := hasVersionFlag(argv)
|
||||
if printVersion {
|
||||
fmt.Printf("%v version %v\n", "NVIDIA Container Runtime", info.GetVersionString(fmt.Sprintf("spec: %v", specs.Version)))
|
||||
}
|
||||
|
||||
cfg, err := config.GetConfig()
|
||||
func run(argv []string) (err error) {
|
||||
cfg, err := getConfig()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error loading config: %v", err)
|
||||
}
|
||||
|
||||
logger, err = UpdateLogger(
|
||||
cfg.NVIDIAContainerRuntimeConfig.DebugFilePath,
|
||||
cfg.NVIDIAContainerRuntimeConfig.LogLevel,
|
||||
argv,
|
||||
)
|
||||
err = logger.LogToFile(cfg.debugFilePath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to set up logger: %v", err)
|
||||
return fmt.Errorf("error opening debug log file: %v", err)
|
||||
}
|
||||
defer logger.Reset()
|
||||
defer func() {
|
||||
// We capture and log a returning error before closing the log file.
|
||||
if err != nil {
|
||||
logger.Errorf("Error running %v: %v", argv, err)
|
||||
}
|
||||
logger.CloseFile()
|
||||
}()
|
||||
|
||||
logger.Debugf("Command line arguments: %v", argv)
|
||||
runtime, err := newNVIDIAContainerRuntime(logger.Logger, cfg, argv)
|
||||
r, err := newRuntime(argv)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create NVIDIA Container Runtime: %v", err)
|
||||
return fmt.Errorf("error creating runtime: %v", err)
|
||||
}
|
||||
|
||||
if printVersion {
|
||||
fmt.Print("\n")
|
||||
}
|
||||
return runtime.Exec(argv)
|
||||
logger.Printf("Running %s\n", argv[0])
|
||||
return r.Exec(argv)
|
||||
}
|
||||
|
||||
// TODO: This should be refactored / combined with parseArgs in logger.
|
||||
func hasVersionFlag(args []string) bool {
|
||||
for i := 0; i < len(args); i++ {
|
||||
param := args[i]
|
||||
type config struct {
|
||||
debugFilePath string
|
||||
}
|
||||
|
||||
parts := strings.SplitN(param, "=", 2)
|
||||
trimmed := strings.TrimLeft(parts[0], "-")
|
||||
// If this is not a flag we continue
|
||||
if parts[0] == trimmed {
|
||||
continue
|
||||
}
|
||||
// getConfig sets up the config struct. Values are read from a toml file
|
||||
// or set via the environment.
|
||||
func getConfig() (*config, error) {
|
||||
cfg := &config{}
|
||||
|
||||
// Check the version flag
|
||||
if trimmed == "version" {
|
||||
return true
|
||||
}
|
||||
if XDGConfigDir := os.Getenv(configOverride); len(XDGConfigDir) != 0 {
|
||||
configDir = XDGConfigDir
|
||||
}
|
||||
|
||||
return false
|
||||
configFilePath := path.Join(configDir, configFilePath)
|
||||
|
||||
tomlContent, err := os.ReadFile(configFilePath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
toml, err := toml.Load(string(tomlContent))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
cfg.debugFilePath = toml.GetDefault("nvidia-container-runtime.debug", "/dev/null").(string)
|
||||
|
||||
return cfg, nil
|
||||
}
|
||||
|
||||
@@ -3,15 +3,15 @@ package main
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-container-runtime/modifier"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/test"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
@@ -24,10 +24,6 @@ const (
|
||||
unmodifiedSpecFileSuffix = "test/input/test_spec.json"
|
||||
)
|
||||
|
||||
const (
|
||||
runcExecutableName = "runc"
|
||||
)
|
||||
|
||||
type testConfig struct {
|
||||
root string
|
||||
binPath string
|
||||
@@ -39,7 +35,7 @@ func TestMain(m *testing.M) {
|
||||
// TEST SETUP
|
||||
// Determine the module root and the test binary path
|
||||
var err error
|
||||
moduleRoot, err := test.GetModuleRoot()
|
||||
moduleRoot, err := getModuleRoot()
|
||||
if err != nil {
|
||||
logger.Fatalf("error in test setup: could not get module root: %v", err)
|
||||
}
|
||||
@@ -47,7 +43,7 @@ func TestMain(m *testing.M) {
|
||||
testInputPath := filepath.Join(moduleRoot, "test", "input")
|
||||
|
||||
// Set the environment variables for the test
|
||||
os.Setenv("PATH", test.PrependToPath(testBinPath, moduleRoot))
|
||||
os.Setenv("PATH", prependToPath(testBinPath, moduleRoot))
|
||||
os.Setenv("XDG_CONFIG_HOME", testInputPath)
|
||||
|
||||
// Confirm that the environment is configured correctly
|
||||
@@ -75,6 +71,31 @@ func TestMain(m *testing.M) {
|
||||
os.Exit(exitCode)
|
||||
}
|
||||
|
||||
func getModuleRoot() (string, error) {
|
||||
_, filename, _, _ := runtime.Caller(0)
|
||||
|
||||
return hasGoMod(filename)
|
||||
}
|
||||
|
||||
func hasGoMod(dir string) (string, error) {
|
||||
if dir == "" || dir == "/" {
|
||||
return "", fmt.Errorf("module root not found")
|
||||
}
|
||||
|
||||
_, err := os.Stat(filepath.Join(dir, "go.mod"))
|
||||
if err != nil {
|
||||
return hasGoMod(filepath.Dir(dir))
|
||||
}
|
||||
return dir, nil
|
||||
}
|
||||
|
||||
func prependToPath(additionalPaths ...string) string {
|
||||
paths := strings.Split(os.Getenv("PATH"), ":")
|
||||
paths = append(additionalPaths, paths...)
|
||||
|
||||
return strings.Join(paths, ":")
|
||||
}
|
||||
|
||||
// case 1) nvidia-container-runtime run --bundle
|
||||
// case 2) nvidia-container-runtime create --bundle
|
||||
// - Confirm the runtime handles bad input correctly
|
||||
@@ -84,6 +105,11 @@ func TestBadInput(t *testing.T) {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
cmdRun := exec.Command(nvidiaRuntime, "run", "--bundle")
|
||||
t.Logf("executing: %s\n", strings.Join(cmdRun.Args, " "))
|
||||
output, err := cmdRun.CombinedOutput()
|
||||
require.Errorf(t, err, "runtime should return an error", "output=%v", string(output))
|
||||
|
||||
cmdCreate := exec.Command(nvidiaRuntime, "create", "--bundle")
|
||||
t.Logf("executing: %s\n", strings.Join(cmdCreate.Args, " "))
|
||||
err = cmdCreate.Run()
|
||||
@@ -167,11 +193,11 @@ func TestDuplicateHook(t *testing.T) {
|
||||
require.Equal(t, 1, nvidiaHookCount(spec.Hooks), "exactly one nvidia prestart hook should be inserted correctly into config.json")
|
||||
}
|
||||
|
||||
// addNVIDIAHook is a basic wrapper for an addHookModifier that is used for
|
||||
// addNVIDIAHook is a basic wrapper for nvidiaContainerRunime.addNVIDIAHook that is used for
|
||||
// testing.
|
||||
func addNVIDIAHook(spec *specs.Spec) error {
|
||||
m := modifier.NewStableRuntimeModifier(logger.Logger)
|
||||
return m.Modify(spec)
|
||||
r := nvidiaContainerRuntime{logger: logger.Logger}
|
||||
return r.addNVIDIAHook(spec)
|
||||
}
|
||||
|
||||
func (c testConfig) getRuntimeSpec() (specs.Spec, error) {
|
||||
@@ -244,3 +270,24 @@ func nvidiaHookCount(hooks *specs.Hooks) int {
|
||||
}
|
||||
return count
|
||||
}
|
||||
|
||||
func TestGetConfigWithCustomConfig(t *testing.T) {
|
||||
wd, err := os.Getwd()
|
||||
require.NoError(t, err)
|
||||
|
||||
// By default debug is disabled
|
||||
contents := []byte("[nvidia-container-runtime]\ndebug = \"/nvidia-container-toolkit.log\"")
|
||||
testDir := filepath.Join(wd, "test")
|
||||
filename := filepath.Join(testDir, configFilePath)
|
||||
|
||||
os.Setenv(configOverride, testDir)
|
||||
|
||||
require.NoError(t, os.MkdirAll(filepath.Dir(filename), 0766))
|
||||
require.NoError(t, ioutil.WriteFile(filename, contents, 0766))
|
||||
|
||||
defer func() { require.NoError(t, os.RemoveAll(testDir)) }()
|
||||
|
||||
cfg, err := getConfig()
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, cfg.debugFilePath, "/nvidia-container-toolkit.log")
|
||||
}
|
||||
|
||||
@@ -1,166 +0,0 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package modifier
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/cuda"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover/csv"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/edits"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/requirements"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// csvMode represents the modifications as performed by the csv runtime mode
|
||||
type csvMode struct {
|
||||
logger *logrus.Logger
|
||||
discoverer discover.Discover
|
||||
}
|
||||
|
||||
const (
|
||||
visibleDevicesEnvvar = "NVIDIA_VISIBLE_DEVICES"
|
||||
visibleDevicesVoid = "void"
|
||||
|
||||
nvidiaRequireJetpackEnvvar = "NVIDIA_REQUIRE_JETPACK"
|
||||
)
|
||||
|
||||
// NewCSVModifier creates a modifier that applies modications to an OCI spec if required by the runtime wrapper.
|
||||
// The modifications are defined by CSV MountSpecs.
|
||||
func NewCSVModifier(logger *logrus.Logger, cfg *config.Config, ociSpec oci.Spec) (oci.SpecModifier, error) {
|
||||
rawSpec, err := ociSpec.Load()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to load OCI spec: %v", err)
|
||||
}
|
||||
|
||||
// In experimental mode, we check whether a modification is required at all and return the lowlevelRuntime directly
|
||||
// if no modification is required.
|
||||
visibleDevices, exists := ociSpec.LookupEnv(visibleDevicesEnvvar)
|
||||
if !exists || visibleDevices == "" || visibleDevices == visibleDevicesVoid {
|
||||
logger.Infof("No modification required: %v=%v (exists=%v)", visibleDevicesEnvvar, visibleDevices, exists)
|
||||
return nil, nil
|
||||
}
|
||||
logger.Infof("Constructing modifier from config: %+v", *cfg)
|
||||
|
||||
config := &discover.Config{
|
||||
Root: cfg.NVIDIAContainerCLIConfig.Root,
|
||||
NVIDIAContainerToolkitCLIExecutablePath: cfg.NVIDIACTKConfig.Path,
|
||||
}
|
||||
|
||||
// TODO: Once the devices have been encapsulated in the CUDA image, this can be moved to before the
|
||||
// visible devices are checked.
|
||||
image, err := image.NewCUDAImageFromSpec(rawSpec)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if err := checkRequirements(logger, &image); err != nil {
|
||||
return nil, fmt.Errorf("requirements not met: %v", err)
|
||||
}
|
||||
|
||||
csvFiles, err := csv.GetFileList(cfg.NVIDIAContainerRuntimeConfig.Modes.CSV.MountSpecPath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get list of CSV files: %v", err)
|
||||
}
|
||||
|
||||
nvidiaRequireJetpack, _ := ociSpec.LookupEnv(nvidiaRequireJetpackEnvvar)
|
||||
if nvidiaRequireJetpack != "csv-mounts=all" {
|
||||
csvFiles = csv.BaseFilesOnly(csvFiles)
|
||||
}
|
||||
|
||||
csvDiscoverer, err := discover.NewFromCSVFiles(logger, csvFiles, config.Root)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create CSV discoverer: %v", err)
|
||||
}
|
||||
|
||||
ldcacheUpdateHook, err := discover.NewLDCacheUpdateHook(logger, csvDiscoverer, config)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create ldcach update hook discoverer: %v", err)
|
||||
}
|
||||
|
||||
createSymlinksHook, err := discover.NewCreateSymlinksHook(logger, csvFiles, csvDiscoverer, config)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create symlink hook discoverer: %v", err)
|
||||
}
|
||||
|
||||
d := discover.NewList(csvDiscoverer, ldcacheUpdateHook, createSymlinksHook)
|
||||
|
||||
return newModifierFromDiscoverer(logger, d)
|
||||
}
|
||||
|
||||
// newModifierFromDiscoverer created a modifier that aplies the discovered
|
||||
// modifications to an OCI spec if require by the runtime wrapper.
|
||||
func newModifierFromDiscoverer(logger *logrus.Logger, d discover.Discover) (oci.SpecModifier, error) {
|
||||
m := csvMode{
|
||||
logger: logger,
|
||||
discoverer: d,
|
||||
}
|
||||
return &m, nil
|
||||
}
|
||||
|
||||
// Modify applies the required modifications to the incomming OCI spec. These modifications
|
||||
// are applied in-place.
|
||||
func (m csvMode) Modify(spec *specs.Spec) error {
|
||||
err := nvidiaContainerRuntimeHookRemover{m.logger}.Modify(spec)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to remove existing hooks: %v", err)
|
||||
}
|
||||
|
||||
specEdits, err := edits.NewSpecEdits(m.logger, m.discoverer)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get required container edits: %v", err)
|
||||
}
|
||||
|
||||
return specEdits.Modify(spec)
|
||||
}
|
||||
|
||||
func checkRequirements(logger *logrus.Logger, image *image.CUDA) error {
|
||||
if image.HasDisableRequire() {
|
||||
// TODO: We could print the real value here instead
|
||||
logger.Debugf("NVIDIA_DISABLE_REQUIRE=%v; skipping requirement checks", true)
|
||||
return nil
|
||||
}
|
||||
|
||||
imageRequirements, err := image.GetRequirements()
|
||||
if err != nil {
|
||||
// TODO: Should we treat this as a failure, or just issue a warning?
|
||||
return fmt.Errorf("failed to get image requirements: %v", err)
|
||||
}
|
||||
|
||||
r := requirements.New(logger, imageRequirements)
|
||||
|
||||
cudaVersion, err := cuda.Version()
|
||||
if err != nil {
|
||||
logger.Warnf("Failed to get CUDA version: %v", err)
|
||||
} else {
|
||||
r.AddVersionProperty(requirements.CUDA, cudaVersion)
|
||||
}
|
||||
|
||||
compteCapability, err := cuda.ComputeCapability(0)
|
||||
if err != nil {
|
||||
logger.Warnf("Failed to get CUDA Compute Capability: %v", err)
|
||||
} else {
|
||||
r.AddVersionProperty(requirements.ARCH, compteCapability)
|
||||
}
|
||||
|
||||
return r.Assert()
|
||||
}
|
||||
@@ -1,284 +0,0 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package modifier
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
testlog "github.com/sirupsen/logrus/hooks/test"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestNewCSVModifier(t *testing.T) {
|
||||
logger, _ := testlog.NewNullLogger()
|
||||
|
||||
testCases := []struct {
|
||||
description string
|
||||
cfg *config.Config
|
||||
spec oci.Spec
|
||||
visibleDevices string
|
||||
expectedError error
|
||||
expectedNil bool
|
||||
}{
|
||||
{
|
||||
description: "spec load error returns error",
|
||||
spec: &oci.SpecMock{
|
||||
LoadFunc: func() (*specs.Spec, error) {
|
||||
return nil, fmt.Errorf("load failed")
|
||||
},
|
||||
},
|
||||
expectedError: fmt.Errorf("load failed"),
|
||||
},
|
||||
{
|
||||
description: "visible devices not set returns nil",
|
||||
visibleDevices: "NOT_SET",
|
||||
expectedNil: true,
|
||||
},
|
||||
{
|
||||
description: "visible devices empty returns nil",
|
||||
visibleDevices: "",
|
||||
expectedNil: true,
|
||||
},
|
||||
{
|
||||
description: "visible devices 'void' returns nil",
|
||||
visibleDevices: "void",
|
||||
expectedNil: true,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
spec := tc.spec
|
||||
if spec == nil {
|
||||
spec = &oci.SpecMock{
|
||||
LookupEnvFunc: func(s string) (string, bool) {
|
||||
if tc.visibleDevices != "NOT_SET" && s == visibleDevicesEnvvar {
|
||||
return tc.visibleDevices, true
|
||||
}
|
||||
return "", false
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
m, err := NewCSVModifier(logger, tc.cfg, spec)
|
||||
if tc.expectedError != nil {
|
||||
require.Error(t, err)
|
||||
} else {
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
if tc.expectedNil || tc.expectedError != nil {
|
||||
require.Nil(t, m)
|
||||
} else {
|
||||
require.NotNil(t, m)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestExperimentalModifier(t *testing.T) {
|
||||
logger, _ := testlog.NewNullLogger()
|
||||
|
||||
testCases := []struct {
|
||||
description string
|
||||
discover *discover.DiscoverMock
|
||||
spec *specs.Spec
|
||||
expectedError error
|
||||
expectedSpec *specs.Spec
|
||||
}{
|
||||
{
|
||||
description: "empty discoverer does not modify spec",
|
||||
discover: &discover.DiscoverMock{},
|
||||
},
|
||||
{
|
||||
description: "failed hooks discoverer returns error",
|
||||
discover: &discover.DiscoverMock{
|
||||
HooksFunc: func() ([]discover.Hook, error) {
|
||||
return nil, fmt.Errorf("discover.Hooks error")
|
||||
},
|
||||
},
|
||||
expectedError: fmt.Errorf("discover.Hooks error"),
|
||||
},
|
||||
{
|
||||
description: "discovered hooks are injected into spec",
|
||||
spec: &specs.Spec{},
|
||||
discover: &discover.DiscoverMock{
|
||||
HooksFunc: func() ([]discover.Hook, error) {
|
||||
hooks := []discover.Hook{
|
||||
{
|
||||
Lifecycle: "prestart",
|
||||
Path: "/hook/a",
|
||||
Args: []string{"/hook/a", "arga"},
|
||||
},
|
||||
{
|
||||
Lifecycle: "createContainer",
|
||||
Path: "/hook/b",
|
||||
Args: []string{"/hook/b", "argb"},
|
||||
},
|
||||
}
|
||||
return hooks, nil
|
||||
},
|
||||
},
|
||||
expectedSpec: &specs.Spec{
|
||||
Hooks: &specs.Hooks{
|
||||
Prestart: []specs.Hook{
|
||||
{
|
||||
Path: "/hook/a",
|
||||
Args: []string{"/hook/a", "arga"},
|
||||
},
|
||||
},
|
||||
CreateContainer: []specs.Hook{
|
||||
{
|
||||
Path: "/hook/b",
|
||||
Args: []string{"/hook/b", "argb"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "existing hooks are maintained",
|
||||
spec: &specs.Spec{
|
||||
Hooks: &specs.Hooks{
|
||||
Prestart: []specs.Hook{
|
||||
{
|
||||
Path: "/hook/a",
|
||||
Args: []string{"/hook/a", "arga"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
discover: &discover.DiscoverMock{
|
||||
HooksFunc: func() ([]discover.Hook, error) {
|
||||
hooks := []discover.Hook{
|
||||
{
|
||||
Lifecycle: "prestart",
|
||||
Path: "/hook/b",
|
||||
Args: []string{"/hook/b", "argb"},
|
||||
},
|
||||
}
|
||||
return hooks, nil
|
||||
},
|
||||
},
|
||||
expectedSpec: &specs.Spec{
|
||||
Hooks: &specs.Hooks{
|
||||
Prestart: []specs.Hook{
|
||||
{
|
||||
Path: "/hook/a",
|
||||
Args: []string{"/hook/a", "arga"},
|
||||
},
|
||||
{
|
||||
Path: "/hook/b",
|
||||
Args: []string{"/hook/b", "argb"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "modification removes existing nvidia-container-runtime-hook",
|
||||
spec: &specs.Spec{
|
||||
Hooks: &specs.Hooks{
|
||||
Prestart: []specs.Hook{
|
||||
{
|
||||
Path: "/path/to/nvidia-container-runtime-hook",
|
||||
Args: []string{"/path/to/nvidia-container-runtime-hook", "prestart"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
discover: &discover.DiscoverMock{
|
||||
HooksFunc: func() ([]discover.Hook, error) {
|
||||
hooks := []discover.Hook{
|
||||
{
|
||||
Lifecycle: "prestart",
|
||||
Path: "/hook/b",
|
||||
Args: []string{"/hook/b", "argb"},
|
||||
},
|
||||
}
|
||||
return hooks, nil
|
||||
},
|
||||
},
|
||||
expectedSpec: &specs.Spec{
|
||||
Hooks: &specs.Hooks{
|
||||
Prestart: []specs.Hook{
|
||||
{
|
||||
Path: "/hook/b",
|
||||
Args: []string{"/hook/b", "argb"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "modification removes existing nvidia-container-toolkit",
|
||||
spec: &specs.Spec{
|
||||
Hooks: &specs.Hooks{
|
||||
Prestart: []specs.Hook{
|
||||
{
|
||||
Path: "/path/to/nvidia-container-toolkit",
|
||||
Args: []string{"/path/to/nvidia-container-toolkit", "prestart"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
discover: &discover.DiscoverMock{
|
||||
HooksFunc: func() ([]discover.Hook, error) {
|
||||
hooks := []discover.Hook{
|
||||
{
|
||||
Lifecycle: "prestart",
|
||||
Path: "/hook/b",
|
||||
Args: []string{"/hook/b", "argb"},
|
||||
},
|
||||
}
|
||||
return hooks, nil
|
||||
},
|
||||
},
|
||||
expectedSpec: &specs.Spec{
|
||||
Hooks: &specs.Hooks{
|
||||
Prestart: []specs.Hook{
|
||||
{
|
||||
Path: "/hook/b",
|
||||
Args: []string{"/hook/b", "argb"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
m, err := newModifierFromDiscoverer(logger, tc.discover)
|
||||
require.NoError(t, err)
|
||||
|
||||
err = m.Modify(tc.spec)
|
||||
if tc.expectedError != nil {
|
||||
require.Error(t, err)
|
||||
} else {
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
require.EqualValues(t, tc.expectedSpec, tc.spec)
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -1,79 +0,0 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package modifier
|
||||
|
||||
import (
|
||||
"path/filepath"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// nvidiaContainerRuntimeHookRemover is a spec modifer that detects and removes inserted nvidia-container-runtime hooks
|
||||
type nvidiaContainerRuntimeHookRemover struct {
|
||||
logger *logrus.Logger
|
||||
}
|
||||
|
||||
var _ oci.SpecModifier = (*nvidiaContainerRuntimeHookRemover)(nil)
|
||||
|
||||
// Modify removes any NVIDIA Container Runtime hooks from the provided spec
|
||||
func (m nvidiaContainerRuntimeHookRemover) Modify(spec *specs.Spec) error {
|
||||
if spec == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
if spec.Hooks == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
if len(spec.Hooks.Prestart) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
var newPrestart []specs.Hook
|
||||
|
||||
for _, hook := range spec.Hooks.Prestart {
|
||||
if isNVIDIAContainerRuntimeHook(&hook) {
|
||||
m.logger.Debugf("Removing hook %v", hook)
|
||||
continue
|
||||
}
|
||||
newPrestart = append(newPrestart, hook)
|
||||
}
|
||||
|
||||
if len(newPrestart) != len(spec.Hooks.Prestart) {
|
||||
m.logger.Debugf("Updating 'prestart' hooks to %v", newPrestart)
|
||||
spec.Hooks.Prestart = newPrestart
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// isNVIDIAContainerRuntimeHook checks if the provided hook is an nvidia-container-runtime-hook
|
||||
// or nvidia-container-toolkit hook. These are included, for example, by the non-experimental
|
||||
// nvidia-container-runtime or docker when specifying the --gpus flag.
|
||||
func isNVIDIAContainerRuntimeHook(hook *specs.Hook) bool {
|
||||
bins := map[string]struct{}{
|
||||
config.NVIDIAContainerRuntimeHookExecutable: {},
|
||||
config.NVIDIAContainerToolkitExecutable: {},
|
||||
}
|
||||
|
||||
_, exists := bins[filepath.Base(hook.Path)]
|
||||
|
||||
return exists
|
||||
}
|
||||
@@ -1,77 +0,0 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package modifier
|
||||
|
||||
import (
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// NewStableRuntimeModifier creates an OCI spec modifier that inserts the NVIDIA Container Runtime Hook into an OCI
|
||||
// spec. The specified logger is used to capture log output.
|
||||
func NewStableRuntimeModifier(logger *logrus.Logger) oci.SpecModifier {
|
||||
m := stableRuntimeModifier{logger: logger}
|
||||
|
||||
return &m
|
||||
}
|
||||
|
||||
// stableRuntimeModifier modifies an OCI spec inplace, inserting the nvidia-container-runtime-hook as a
|
||||
// prestart hook. If the hook is already present, no modification is made.
|
||||
type stableRuntimeModifier struct {
|
||||
logger *logrus.Logger
|
||||
}
|
||||
|
||||
// Modify applies the required modification to the incoming OCI spec, inserting the nvidia-container-runtime-hook
|
||||
// as a prestart hook.
|
||||
func (m stableRuntimeModifier) Modify(spec *specs.Spec) error {
|
||||
path, err := exec.LookPath(config.NVIDIAContainerRuntimeHookExecutable)
|
||||
if err != nil {
|
||||
path = filepath.Join(config.DefaultExecutableDir, config.NVIDIAContainerRuntimeHookExecutable)
|
||||
_, err = os.Stat(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
m.logger.Infof("Using prestart hook path: %s", path)
|
||||
|
||||
args := []string{path}
|
||||
if spec.Hooks == nil {
|
||||
spec.Hooks = &specs.Hooks{}
|
||||
} else if len(spec.Hooks.Prestart) != 0 {
|
||||
for _, hook := range spec.Hooks.Prestart {
|
||||
if strings.Contains(hook.Path, config.NVIDIAContainerRuntimeHookExecutable) {
|
||||
m.logger.Infof("existing nvidia prestart hook found in OCI spec")
|
||||
return nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
spec.Hooks.Prestart = append(spec.Hooks.Prestart, specs.Hook{
|
||||
Path: path,
|
||||
Args: append(args, "prestart"),
|
||||
})
|
||||
|
||||
return nil
|
||||
}
|
||||
@@ -1,170 +0,0 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package modifier
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/test"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
"github.com/sirupsen/logrus"
|
||||
testlog "github.com/sirupsen/logrus/hooks/test"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
type testConfig struct {
|
||||
root string
|
||||
binPath string
|
||||
}
|
||||
|
||||
var cfg *testConfig
|
||||
|
||||
func TestMain(m *testing.M) {
|
||||
// TEST SETUP
|
||||
// Determine the module root and the test binary path
|
||||
var err error
|
||||
moduleRoot, err := test.GetModuleRoot()
|
||||
if err != nil {
|
||||
logrus.Fatalf("error in test setup: could not get module root: %v", err)
|
||||
}
|
||||
testBinPath := filepath.Join(moduleRoot, "test", "bin")
|
||||
|
||||
// Set the environment variables for the test
|
||||
os.Setenv("PATH", test.PrependToPath(testBinPath, moduleRoot))
|
||||
|
||||
// Store the root and binary paths in the test Config
|
||||
cfg = &testConfig{
|
||||
root: moduleRoot,
|
||||
binPath: testBinPath,
|
||||
}
|
||||
|
||||
// RUN TESTS
|
||||
exitCode := m.Run()
|
||||
|
||||
os.Exit(exitCode)
|
||||
}
|
||||
|
||||
func TestAddHookModifier(t *testing.T) {
|
||||
logger, logHook := testlog.NewNullLogger()
|
||||
|
||||
testHookPath := filepath.Join(cfg.binPath, "nvidia-container-runtime-hook")
|
||||
|
||||
testCases := []struct {
|
||||
description string
|
||||
spec specs.Spec
|
||||
expectedError error
|
||||
expectedSpec specs.Spec
|
||||
}{
|
||||
{
|
||||
description: "empty spec adds hook",
|
||||
spec: specs.Spec{},
|
||||
expectedSpec: specs.Spec{
|
||||
Hooks: &specs.Hooks{
|
||||
Prestart: []specs.Hook{
|
||||
{
|
||||
Path: testHookPath,
|
||||
Args: []string{testHookPath, "prestart"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "spec with empty hooks adds hook",
|
||||
spec: specs.Spec{
|
||||
Hooks: &specs.Hooks{},
|
||||
},
|
||||
expectedSpec: specs.Spec{
|
||||
Hooks: &specs.Hooks{
|
||||
Prestart: []specs.Hook{
|
||||
{
|
||||
Path: testHookPath,
|
||||
Args: []string{testHookPath, "prestart"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "hook is not replaced",
|
||||
spec: specs.Spec{
|
||||
Hooks: &specs.Hooks{
|
||||
Prestart: []specs.Hook{
|
||||
{
|
||||
Path: "nvidia-container-runtime-hook",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
expectedSpec: specs.Spec{
|
||||
Hooks: &specs.Hooks{
|
||||
Prestart: []specs.Hook{
|
||||
{
|
||||
Path: "nvidia-container-runtime-hook",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "other hooks are not replaced",
|
||||
spec: specs.Spec{
|
||||
Hooks: &specs.Hooks{
|
||||
Prestart: []specs.Hook{
|
||||
{
|
||||
Path: "some-hook",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
expectedSpec: specs.Spec{
|
||||
Hooks: &specs.Hooks{
|
||||
Prestart: []specs.Hook{
|
||||
{
|
||||
Path: "some-hook",
|
||||
},
|
||||
{
|
||||
Path: testHookPath,
|
||||
Args: []string{testHookPath, "prestart"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
logHook.Reset()
|
||||
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
|
||||
m := NewStableRuntimeModifier(logger)
|
||||
|
||||
err := m.Modify(&tc.spec)
|
||||
if tc.expectedError != nil {
|
||||
require.Error(t, err)
|
||||
} else {
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
require.EqualValues(t, tc.expectedSpec, tc.spec)
|
||||
})
|
||||
}
|
||||
|
||||
}
|
||||
132
cmd/nvidia-container-runtime/nvcr.go
Normal file
132
cmd/nvidia-container-runtime/nvcr.go
Normal file
@@ -0,0 +1,132 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"strings"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// nvidiaContainerRuntime encapsulates the NVIDIA Container Runtime. It wraps the specified runtime, conditionally
|
||||
// modifying the specified OCI specification before invoking the runtime.
|
||||
type nvidiaContainerRuntime struct {
|
||||
logger *log.Logger
|
||||
runtime oci.Runtime
|
||||
ociSpec oci.Spec
|
||||
}
|
||||
|
||||
var _ oci.Runtime = (*nvidiaContainerRuntime)(nil)
|
||||
|
||||
// newNvidiaContainerRuntime is a constructor for a standard runtime shim.
|
||||
func newNvidiaContainerRuntimeWithLogger(logger *log.Logger, runtime oci.Runtime, ociSpec oci.Spec) (oci.Runtime, error) {
|
||||
r := nvidiaContainerRuntime{
|
||||
logger: logger,
|
||||
runtime: runtime,
|
||||
ociSpec: ociSpec,
|
||||
}
|
||||
|
||||
return &r, nil
|
||||
}
|
||||
|
||||
// Exec defines the entrypoint for the NVIDIA Container Runtime. A check is performed to see whether modifications
|
||||
// to the OCI spec are required -- and applicable modifcations applied. The supplied arguments are then
|
||||
// forwarded to the underlying runtime's Exec method.
|
||||
func (r nvidiaContainerRuntime) Exec(args []string) error {
|
||||
if r.modificationRequired(args) {
|
||||
err := r.modifyOCISpec()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error modifying OCI spec: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
r.logger.Println("Forwarding command to runtime")
|
||||
return r.runtime.Exec(args)
|
||||
}
|
||||
|
||||
// modificationRequired checks the intput arguments to determine whether a modification
|
||||
// to the OCI spec is required.
|
||||
func (r nvidiaContainerRuntime) modificationRequired(args []string) bool {
|
||||
if oci.HasCreateSubcommand(args) {
|
||||
r.logger.Infof("'create' command detected; modification required")
|
||||
return true
|
||||
}
|
||||
|
||||
r.logger.Infof("No modification required")
|
||||
return false
|
||||
}
|
||||
|
||||
// modifyOCISpec loads and modifies the OCI spec specified in the nvidiaContainerRuntime
|
||||
// struct. The spec is modified in-place and written to the same file as the input after
|
||||
// modifcationas are applied.
|
||||
func (r nvidiaContainerRuntime) modifyOCISpec() error {
|
||||
err := r.ociSpec.Load()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error loading OCI specification for modification: %v", err)
|
||||
}
|
||||
|
||||
err = r.ociSpec.Modify(r.addNVIDIAHook)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error injecting NVIDIA Container Runtime hook: %v", err)
|
||||
}
|
||||
|
||||
err = r.ociSpec.Flush()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error writing modified OCI specification: %v", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// addNVIDIAHook modifies the specified OCI specification in-place, inserting a
|
||||
// prestart hook.
|
||||
func (r nvidiaContainerRuntime) addNVIDIAHook(spec *specs.Spec) error {
|
||||
path, err := exec.LookPath("nvidia-container-runtime-hook")
|
||||
if err != nil {
|
||||
path = hookDefaultFilePath
|
||||
_, err = os.Stat(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
r.logger.Printf("prestart hook path: %s\n", path)
|
||||
|
||||
args := []string{path}
|
||||
if spec.Hooks == nil {
|
||||
spec.Hooks = &specs.Hooks{}
|
||||
} else if len(spec.Hooks.Prestart) != 0 {
|
||||
for _, hook := range spec.Hooks.Prestart {
|
||||
if !strings.Contains(hook.Path, "nvidia-container-runtime-hook") {
|
||||
continue
|
||||
}
|
||||
r.logger.Println("existing nvidia prestart hook in OCI spec file")
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
spec.Hooks.Prestart = append(spec.Hooks.Prestart, specs.Hook{
|
||||
Path: path,
|
||||
Args: append(args, "prestart"),
|
||||
})
|
||||
|
||||
return nil
|
||||
}
|
||||
203
cmd/nvidia-container-runtime/nvcr_test.go
Normal file
203
cmd/nvidia-container-runtime/nvcr_test.go
Normal file
@@ -0,0 +1,203 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
testlog "github.com/sirupsen/logrus/hooks/test"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestAddNvidiaHook(t *testing.T) {
|
||||
logger, logHook := testlog.NewNullLogger()
|
||||
shim := nvidiaContainerRuntime{
|
||||
logger: logger,
|
||||
}
|
||||
|
||||
testCases := []struct {
|
||||
spec *specs.Spec
|
||||
errorPrefix string
|
||||
shouldNotAdd bool
|
||||
}{
|
||||
{
|
||||
spec: &specs.Spec{},
|
||||
},
|
||||
{
|
||||
spec: &specs.Spec{
|
||||
Hooks: &specs.Hooks{},
|
||||
},
|
||||
},
|
||||
{
|
||||
spec: &specs.Spec{
|
||||
Hooks: &specs.Hooks{
|
||||
Prestart: []specs.Hook{{
|
||||
Path: "some-hook",
|
||||
}},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
spec: &specs.Spec{
|
||||
Hooks: &specs.Hooks{
|
||||
Prestart: []specs.Hook{{
|
||||
Path: "nvidia-container-runtime-hook",
|
||||
}},
|
||||
},
|
||||
},
|
||||
shouldNotAdd: true,
|
||||
},
|
||||
}
|
||||
|
||||
for i, tc := range testCases {
|
||||
logHook.Reset()
|
||||
|
||||
var numPrestartHooks int
|
||||
if tc.spec.Hooks != nil {
|
||||
numPrestartHooks = len(tc.spec.Hooks.Prestart)
|
||||
}
|
||||
|
||||
err := shim.addNVIDIAHook(tc.spec)
|
||||
|
||||
if tc.errorPrefix == "" {
|
||||
require.NoErrorf(t, err, "%d: %v", i, tc)
|
||||
} else {
|
||||
require.Truef(t, strings.HasPrefix(err.Error(), tc.errorPrefix), "%d: %v", i, tc)
|
||||
|
||||
require.NotNilf(t, tc.spec.Hooks, "%d: %v", i, tc)
|
||||
require.Equalf(t, 1, nvidiaHookCount(tc.spec.Hooks), "%d: %v", i, tc)
|
||||
|
||||
if tc.shouldNotAdd {
|
||||
require.Equal(t, numPrestartHooks+1, len(tc.spec.Hooks.Poststart), "%d: %v", i, tc)
|
||||
} else {
|
||||
require.Equal(t, numPrestartHooks+1, len(tc.spec.Hooks.Poststart), "%d: %v", i, tc)
|
||||
|
||||
nvidiaHook := tc.spec.Hooks.Poststart[len(tc.spec.Hooks.Poststart)-1]
|
||||
|
||||
// TODO: This assumes that the hook has been set up in the makefile
|
||||
expectedPath := "/usr/bin/nvidia-container-runtime-hook"
|
||||
require.Equalf(t, expectedPath, nvidiaHook.Path, "%d: %v", i, tc)
|
||||
require.Equalf(t, []string{expectedPath, "prestart"}, nvidiaHook.Args, "%d: %v", i, tc)
|
||||
require.Emptyf(t, nvidiaHook.Env, "%d: %v", i, tc)
|
||||
require.Nilf(t, nvidiaHook.Timeout, "%d: %v", i, tc)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestNvidiaContainerRuntime(t *testing.T) {
|
||||
logger, hook := testlog.NewNullLogger()
|
||||
|
||||
testCases := []struct {
|
||||
shim nvidiaContainerRuntime
|
||||
shouldModify bool
|
||||
args []string
|
||||
modifyError error
|
||||
writeError error
|
||||
}{
|
||||
{
|
||||
shim: nvidiaContainerRuntime{},
|
||||
shouldModify: false,
|
||||
},
|
||||
{
|
||||
shim: nvidiaContainerRuntime{},
|
||||
args: []string{"create"},
|
||||
shouldModify: true,
|
||||
},
|
||||
{
|
||||
shim: nvidiaContainerRuntime{},
|
||||
args: []string{"--bundle=create"},
|
||||
shouldModify: false,
|
||||
},
|
||||
{
|
||||
shim: nvidiaContainerRuntime{},
|
||||
args: []string{"--bundle", "create"},
|
||||
shouldModify: false,
|
||||
},
|
||||
{
|
||||
shim: nvidiaContainerRuntime{},
|
||||
args: []string{"create"},
|
||||
shouldModify: true,
|
||||
},
|
||||
{
|
||||
shim: nvidiaContainerRuntime{},
|
||||
args: []string{"create"},
|
||||
modifyError: fmt.Errorf("error modifying"),
|
||||
shouldModify: true,
|
||||
},
|
||||
{
|
||||
shim: nvidiaContainerRuntime{},
|
||||
args: []string{"create"},
|
||||
writeError: fmt.Errorf("error writing"),
|
||||
shouldModify: true,
|
||||
},
|
||||
}
|
||||
|
||||
for i, tc := range testCases {
|
||||
tc.shim.logger = logger
|
||||
hook.Reset()
|
||||
|
||||
ociMock := &oci.SpecMock{
|
||||
ModifyFunc: func(specModifier oci.SpecModifier) error {
|
||||
return tc.modifyError
|
||||
},
|
||||
FlushFunc: func() error {
|
||||
return tc.writeError
|
||||
},
|
||||
}
|
||||
require.Equal(t, tc.shouldModify, tc.shim.modificationRequired(tc.args), "%d: %v", i, tc)
|
||||
|
||||
tc.shim.ociSpec = ociMock
|
||||
tc.shim.runtime = &MockShim{}
|
||||
|
||||
err := tc.shim.Exec(tc.args)
|
||||
if tc.modifyError != nil || tc.writeError != nil {
|
||||
require.Error(t, err, "%d: %v", i, tc)
|
||||
} else {
|
||||
require.NoError(t, err, "%d: %v", i, tc)
|
||||
}
|
||||
|
||||
if tc.shouldModify {
|
||||
require.Equal(t, 1, len(ociMock.ModifyCalls()), "%d: %v", i, tc)
|
||||
} else {
|
||||
require.Equal(t, 0, len(ociMock.ModifyCalls()), "%d: %v", i, tc)
|
||||
}
|
||||
|
||||
writeExpected := tc.shouldModify && tc.modifyError == nil
|
||||
if writeExpected {
|
||||
require.Equal(t, 1, len(ociMock.FlushCalls()), "%d: %v", i, tc)
|
||||
} else {
|
||||
require.Equal(t, 0, len(ociMock.FlushCalls()), "%d: %v", i, tc)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type MockShim struct {
|
||||
called bool
|
||||
args []string
|
||||
returnError error
|
||||
}
|
||||
|
||||
func (m *MockShim) Exec(args []string) error {
|
||||
m.called = true
|
||||
m.args = args
|
||||
return m.returnError
|
||||
}
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
@@ -19,55 +19,56 @@ package main
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-container-runtime/modifier"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/info"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/runtime"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// newNVIDIAContainerRuntime is a factory method that constructs a runtime based on the selected configuration and specified logger
|
||||
func newNVIDIAContainerRuntime(logger *logrus.Logger, cfg *config.Config, argv []string) (oci.Runtime, error) {
|
||||
lowLevelRuntime, err := oci.NewLowLevelRuntime(logger, cfg.NVIDIAContainerRuntimeConfig.Runtimes)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error constructing low-level runtime: %v", err)
|
||||
}
|
||||
const (
|
||||
ociSpecFileName = "config.json"
|
||||
dockerRuncExecutableName = "docker-runc"
|
||||
runcExecutableName = "runc"
|
||||
)
|
||||
|
||||
if !oci.HasCreateSubcommand(argv) {
|
||||
logger.Debugf("Skipping modifier for non-create subcommand")
|
||||
return lowLevelRuntime, nil
|
||||
}
|
||||
|
||||
ociSpec, err := oci.NewSpec(logger, argv)
|
||||
// newRuntime is a factory method that constructs a runtime based on the selected configuration.
|
||||
func newRuntime(argv []string) (oci.Runtime, error) {
|
||||
ociSpec, err := newOCISpec(argv)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error constructing OCI specification: %v", err)
|
||||
}
|
||||
|
||||
specModifier, err := newSpecModifier(logger, cfg, ociSpec, argv)
|
||||
runc, err := newRuncRuntime()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to construct OCI spec modifier: %v", err)
|
||||
return nil, fmt.Errorf("error constructing runc runtime: %v", err)
|
||||
}
|
||||
|
||||
// Create the wrapping runtime with the specified modifier
|
||||
r := runtime.NewModifyingRuntimeWrapper(
|
||||
logger,
|
||||
lowLevelRuntime,
|
||||
ociSpec,
|
||||
specModifier,
|
||||
)
|
||||
r, err := newNvidiaContainerRuntimeWithLogger(logger.Logger, runc, ociSpec)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error constructing NVIDIA Container Runtime: %v", err)
|
||||
}
|
||||
|
||||
return r, nil
|
||||
}
|
||||
|
||||
// newSpecModifier is a factory method that creates constructs an OCI spec modifer based on the provided config.
|
||||
func newSpecModifier(logger *logrus.Logger, cfg *config.Config, ociSpec oci.Spec, argv []string) (oci.SpecModifier, error) {
|
||||
switch info.ResolveAutoMode(logger, cfg.NVIDIAContainerRuntimeConfig.Mode) {
|
||||
case "legacy":
|
||||
return modifier.NewStableRuntimeModifier(logger), nil
|
||||
case "csv":
|
||||
return modifier.NewCSVModifier(logger, cfg, ociSpec)
|
||||
// newOCISpec constructs an OCI spec for the provided arguments
|
||||
func newOCISpec(argv []string) (oci.Spec, error) {
|
||||
bundleDir, err := oci.GetBundleDir(argv)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error parsing command line arguments: %v", err)
|
||||
}
|
||||
logger.Infof("Using bundle directory: %v", bundleDir)
|
||||
|
||||
return nil, fmt.Errorf("invalid runtime mode: %v", cfg.NVIDIAContainerRuntimeConfig.Mode)
|
||||
ociSpecPath := oci.GetSpecFilePath(bundleDir)
|
||||
logger.Infof("Using OCI specification file path: %v", ociSpecPath)
|
||||
|
||||
ociSpec := oci.NewSpecFromFile(ociSpecPath)
|
||||
|
||||
return ociSpec, nil
|
||||
}
|
||||
|
||||
// newRuncRuntime locates the runc binary and wraps it in a SyscallExecRuntime
|
||||
func newRuncRuntime() (oci.Runtime, error) {
|
||||
return oci.NewLowLevelRuntimeWithLogger(
|
||||
logger.Logger,
|
||||
dockerRuncExecutableName,
|
||||
runcExecutableName,
|
||||
)
|
||||
}
|
||||
|
||||
@@ -17,113 +17,14 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
testlog "github.com/sirupsen/logrus/hooks/test"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestFactoryMethod(t *testing.T) {
|
||||
logger, _ := testlog.NewNullLogger()
|
||||
func TestConstructor(t *testing.T) {
|
||||
shim, err := newRuntime([]string{})
|
||||
|
||||
testCases := []struct {
|
||||
description string
|
||||
cfg *config.Config
|
||||
spec *specs.Spec
|
||||
expectedError bool
|
||||
}{
|
||||
{
|
||||
description: "empty config raises error",
|
||||
cfg: &config.Config{
|
||||
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{},
|
||||
},
|
||||
expectedError: true,
|
||||
},
|
||||
{
|
||||
description: "config with runtime raises no error",
|
||||
cfg: &config.Config{
|
||||
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
|
||||
Runtimes: []string{"runc"},
|
||||
Mode: "legacy",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "csv mode is supported",
|
||||
cfg: &config.Config{
|
||||
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
|
||||
Runtimes: []string{"runc"},
|
||||
Mode: "csv",
|
||||
},
|
||||
},
|
||||
spec: &specs.Spec{
|
||||
Process: &specs.Process{
|
||||
Env: []string{
|
||||
"NVIDIA_VISIBLE_DEVICES=all",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "non-legacy discover mode raises error",
|
||||
cfg: &config.Config{
|
||||
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
|
||||
Runtimes: []string{"runc"},
|
||||
Mode: "non-legacy",
|
||||
},
|
||||
},
|
||||
expectedError: true,
|
||||
},
|
||||
{
|
||||
description: "legacy discover mode returns modifier",
|
||||
cfg: &config.Config{
|
||||
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
|
||||
Runtimes: []string{"runc"},
|
||||
Mode: "legacy",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "csv discover mode returns modifier",
|
||||
cfg: &config.Config{
|
||||
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
|
||||
Runtimes: []string{"runc"},
|
||||
Mode: "csv",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "empty mode raises error",
|
||||
cfg: &config.Config{
|
||||
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
|
||||
Runtimes: []string{"runc"},
|
||||
},
|
||||
},
|
||||
expectedError: true,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
bundleDir := t.TempDir()
|
||||
|
||||
specFile, err := os.Create(filepath.Join(bundleDir, "config.json"))
|
||||
require.NoError(t, err)
|
||||
require.NoError(t, json.NewEncoder(specFile).Encode(tc.spec))
|
||||
|
||||
argv := []string{"--bundle", bundleDir, "create"}
|
||||
|
||||
_, err = newNVIDIAContainerRuntime(logger, tc.cfg, argv)
|
||||
if tc.expectedError {
|
||||
require.Error(t, err)
|
||||
} else {
|
||||
require.NoError(t, err)
|
||||
}
|
||||
})
|
||||
}
|
||||
require.NoError(t, err)
|
||||
require.NotNil(t, shim)
|
||||
}
|
||||
|
||||
@@ -7,9 +7,9 @@ import (
|
||||
"os"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
|
||||
"golang.org/x/mod/semver"
|
||||
)
|
||||
|
||||
@@ -104,6 +104,32 @@ type HookState struct {
|
||||
BundlePath string `json:"bundlePath"`
|
||||
}
|
||||
|
||||
func parseCudaVersion(cudaVersion string) (vmaj, vmin, vpatch uint32) {
|
||||
if _, err := fmt.Sscanf(cudaVersion, "%d.%d.%d\n", &vmaj, &vmin, &vpatch); err != nil {
|
||||
vpatch = 0
|
||||
if _, err := fmt.Sscanf(cudaVersion, "%d.%d\n", &vmaj, &vmin); err != nil {
|
||||
vmin = 0
|
||||
if _, err := fmt.Sscanf(cudaVersion, "%d\n", &vmaj); err != nil {
|
||||
log.Panicln("invalid CUDA version:", cudaVersion)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func getEnvMap(e []string) (m map[string]string) {
|
||||
m = make(map[string]string)
|
||||
for _, s := range e {
|
||||
p := strings.SplitN(s, "=", 2)
|
||||
if len(p) != 2 {
|
||||
log.Panicln("environment error")
|
||||
}
|
||||
m[p[0]] = p[1]
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func loadSpec(path string) (spec *Spec) {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
@@ -165,6 +191,12 @@ func isPrivileged(s *Spec) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func isLegacyCUDAImage(env map[string]string) bool {
|
||||
legacyCudaVersion := env[envCUDAVersion]
|
||||
cudaRequire := env[envNVRequireCUDA]
|
||||
return len(legacyCudaVersion) > 0 && len(cudaRequire) == 0
|
||||
}
|
||||
|
||||
func getDevicesFromEnvvar(env map[string]string, legacyImage bool) *string {
|
||||
// Build a list of envvars to consider.
|
||||
envVars := []string{envNVVisibleDevices}
|
||||
@@ -303,11 +335,27 @@ func getDriverCapabilities(env map[string]string, supportedDriverCapabilities Dr
|
||||
return capabilities
|
||||
}
|
||||
|
||||
func getNvidiaConfig(hookConfig *HookConfig, image image.CUDA, mounts []Mount, privileged bool) *nvidiaConfig {
|
||||
legacyImage := image.IsLegacy()
|
||||
func getRequirements(env map[string]string, legacyImage bool) []string {
|
||||
// All variables with the "NVIDIA_REQUIRE_" prefix are passed to nvidia-container-cli
|
||||
var requirements []string
|
||||
for name, value := range env {
|
||||
if strings.HasPrefix(name, envNVRequirePrefix) {
|
||||
requirements = append(requirements, value)
|
||||
}
|
||||
}
|
||||
if legacyImage {
|
||||
vmaj, vmin, _ := parseCudaVersion(env[envCUDAVersion])
|
||||
cudaRequire := fmt.Sprintf("cuda>=%d.%d", vmaj, vmin)
|
||||
requirements = append(requirements, cudaRequire)
|
||||
}
|
||||
return requirements
|
||||
}
|
||||
|
||||
func getNvidiaConfig(hookConfig *HookConfig, env map[string]string, mounts []Mount, privileged bool) *nvidiaConfig {
|
||||
legacyImage := isLegacyCUDAImage(env)
|
||||
|
||||
var devices string
|
||||
if d := getDevices(hookConfig, image, mounts, privileged, legacyImage); d != nil {
|
||||
if d := getDevices(hookConfig, env, mounts, privileged, legacyImage); d != nil {
|
||||
devices = *d
|
||||
} else {
|
||||
// 'nil' devices means this is not a GPU container.
|
||||
@@ -315,7 +363,7 @@ func getNvidiaConfig(hookConfig *HookConfig, image image.CUDA, mounts []Mount, p
|
||||
}
|
||||
|
||||
var migConfigDevices string
|
||||
if d := getMigConfigDevices(image); d != nil {
|
||||
if d := getMigConfigDevices(env); d != nil {
|
||||
migConfigDevices = *d
|
||||
}
|
||||
if !privileged && migConfigDevices != "" {
|
||||
@@ -323,21 +371,19 @@ func getNvidiaConfig(hookConfig *HookConfig, image image.CUDA, mounts []Mount, p
|
||||
}
|
||||
|
||||
var migMonitorDevices string
|
||||
if d := getMigMonitorDevices(image); d != nil {
|
||||
if d := getMigMonitorDevices(env); d != nil {
|
||||
migMonitorDevices = *d
|
||||
}
|
||||
if !privileged && migMonitorDevices != "" {
|
||||
log.Panicln("cannot set MIG_MONITOR_DEVICES in non privileged container")
|
||||
}
|
||||
|
||||
driverCapabilities := getDriverCapabilities(image, hookConfig.SupportedDriverCapabilities, legacyImage).String()
|
||||
driverCapabilities := getDriverCapabilities(env, hookConfig.SupportedDriverCapabilities, legacyImage).String()
|
||||
|
||||
requirements, err := image.GetRequirements()
|
||||
if err != nil {
|
||||
log.Panicln("failed to get requirements", err)
|
||||
}
|
||||
requirements := getRequirements(env, legacyImage)
|
||||
|
||||
disableRequire := image.HasDisableRequire()
|
||||
// Don't fail on invalid values.
|
||||
disableRequire, _ := strconv.ParseBool(env[envNVDisableRequire])
|
||||
|
||||
return &nvidiaConfig{
|
||||
Devices: devices,
|
||||
@@ -363,17 +409,13 @@ func getContainerConfig(hook HookConfig) (config containerConfig) {
|
||||
|
||||
s := loadSpec(path.Join(b, "config.json"))
|
||||
|
||||
image, err := image.NewCUDAImageFromEnv(s.Process.Env)
|
||||
if err != nil {
|
||||
log.Panicln(err)
|
||||
}
|
||||
|
||||
env := getEnvMap(s.Process.Env)
|
||||
privileged := isPrivileged(s)
|
||||
envSwarmGPU = hook.SwarmResource
|
||||
return containerConfig{
|
||||
Pid: h.Pid,
|
||||
Rootfs: s.Root.Path,
|
||||
Env: image,
|
||||
Nvidia: getNvidiaConfig(&hook, image, s.Mounts, privileged),
|
||||
Env: env,
|
||||
Nvidia: getNvidiaConfig(&hook, env, s.Mounts, privileged),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -7,7 +7,6 @@ import (
|
||||
"reflect"
|
||||
|
||||
"github.com/BurntSushi/toml"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -42,11 +41,10 @@ type HookConfig struct {
|
||||
AcceptDeviceListAsVolumeMounts bool `toml:"accept-nvidia-visible-devices-as-volume-mounts"`
|
||||
SupportedDriverCapabilities DriverCapabilities `toml:"supported-driver-capabilities"`
|
||||
|
||||
NvidiaContainerCLI CLIConfig `toml:"nvidia-container-cli"`
|
||||
NVIDIAContainerRuntime config.RuntimeConfig `toml:"nvidia-container-runtime"`
|
||||
NvidiaContainerCLI CLIConfig `toml:"nvidia-container-cli"`
|
||||
}
|
||||
|
||||
func getDefaultHookConfig() HookConfig {
|
||||
func getDefaultHookConfig() (config HookConfig) {
|
||||
return HookConfig{
|
||||
DisableRequire: false,
|
||||
SwarmResource: nil,
|
||||
@@ -65,7 +63,6 @@ func getDefaultHookConfig() HookConfig {
|
||||
User: nil,
|
||||
Ldconfig: nil,
|
||||
},
|
||||
NVIDIAContainerRuntime: *config.GetDefaultRuntimeConfig(),
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -7,6 +7,51 @@ import (
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestParseCudaVersionValid(t *testing.T) {
|
||||
var tests = []struct {
|
||||
version string
|
||||
expected [3]uint32
|
||||
}{
|
||||
{"0", [3]uint32{0, 0, 0}},
|
||||
{"8", [3]uint32{8, 0, 0}},
|
||||
{"7.5", [3]uint32{7, 5, 0}},
|
||||
{"9.0.116", [3]uint32{9, 0, 116}},
|
||||
{"4294967295.4294967295.4294967295", [3]uint32{4294967295, 4294967295, 4294967295}},
|
||||
}
|
||||
for i, c := range tests {
|
||||
vmaj, vmin, vpatch := parseCudaVersion(c.version)
|
||||
|
||||
version := [3]uint32{vmaj, vmin, vpatch}
|
||||
|
||||
require.Equal(t, c.expected, version, "%d: %v", i, c)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseCudaVersionInvalid(t *testing.T) {
|
||||
var tests = []string{
|
||||
"foo",
|
||||
"foo.5.10",
|
||||
"9.0.116.50",
|
||||
"9.0.116foo",
|
||||
"7.foo",
|
||||
"9.0.bar",
|
||||
"9.4294967296",
|
||||
"9.0.116.",
|
||||
"9..0",
|
||||
"9.",
|
||||
".5.10",
|
||||
"-9",
|
||||
"+9",
|
||||
"-9.1.116",
|
||||
"-9.-1.-116",
|
||||
}
|
||||
for _, c := range tests {
|
||||
require.Panics(t, func() {
|
||||
parseCudaVersion(c)
|
||||
}, "parseCudaVersion(%v)", c)
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsPrivileged(t *testing.T) {
|
||||
var tests = []struct {
|
||||
spec string
|
||||
|
||||
@@ -6,21 +6,20 @@ import (
|
||||
"log"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"runtime/debug"
|
||||
"strconv"
|
||||
"strings"
|
||||
"syscall"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/info"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
)
|
||||
|
||||
var (
|
||||
debugflag = flag.Bool("debug", false, "enable debug output")
|
||||
versionflag = flag.Bool("version", false, "enable version output")
|
||||
configflag = flag.String("config", "", "configuration file")
|
||||
debugflag = flag.Bool("debug", false, "enable debug output")
|
||||
configflag = flag.String("config", "", "configuration file")
|
||||
|
||||
defaultPATH = []string{"/usr/local/sbin", "/usr/local/bin", "/usr/sbin", "/usr/bin", "/sbin", "/bin"}
|
||||
)
|
||||
|
||||
func exit() {
|
||||
@@ -36,16 +35,28 @@ func exit() {
|
||||
os.Exit(0)
|
||||
}
|
||||
|
||||
func getPATH(config CLIConfig) string {
|
||||
dirs := filepath.SplitList(os.Getenv("PATH"))
|
||||
// directories from the hook environment have higher precedence
|
||||
dirs = append(dirs, defaultPATH...)
|
||||
|
||||
if config.Root != nil {
|
||||
rootDirs := []string{}
|
||||
for _, dir := range dirs {
|
||||
rootDirs = append(rootDirs, path.Join(*config.Root, dir))
|
||||
}
|
||||
// directories with the root prefix have higher precedence
|
||||
dirs = append(rootDirs, dirs...)
|
||||
}
|
||||
return strings.Join(dirs, ":")
|
||||
}
|
||||
|
||||
func getCLIPath(config CLIConfig) string {
|
||||
if config.Path != nil {
|
||||
return *config.Path
|
||||
}
|
||||
|
||||
var root string
|
||||
if config.Root != nil {
|
||||
root = *config.Root
|
||||
}
|
||||
if err := os.Setenv("PATH", lookup.GetPath(root)); err != nil {
|
||||
if err := os.Setenv("PATH", getPATH(config)); err != nil {
|
||||
log.Panicln("couldn't set PATH variable:", err)
|
||||
}
|
||||
|
||||
@@ -74,10 +85,6 @@ func doPrestart() {
|
||||
hook := getHookConfig()
|
||||
cli := hook.NvidiaContainerCLI
|
||||
|
||||
if info.ResolveAutoMode(&logInterceptor{}, hook.NVIDIAContainerRuntime.Mode) != "legacy" {
|
||||
log.Panicln("invoking the NVIDIA Container Runtime Hook directly (e.g. specifying the docker --gpus flag) is not supported. Please use the NVIDIA Container Runtime instead.")
|
||||
}
|
||||
|
||||
container := getContainerConfig(hook)
|
||||
nvidia := container.Nvidia
|
||||
if nvidia == nil {
|
||||
@@ -160,11 +167,6 @@ func main() {
|
||||
flag.Usage = usage
|
||||
flag.Parse()
|
||||
|
||||
if *versionflag {
|
||||
fmt.Printf("%v version %v\n", "NVIDIA Container Runtime Hook", info.GetVersionString())
|
||||
return
|
||||
}
|
||||
|
||||
args := flag.Args()
|
||||
if len(args) == 0 {
|
||||
flag.Usage()
|
||||
@@ -184,12 +186,3 @@ func main() {
|
||||
os.Exit(2)
|
||||
}
|
||||
}
|
||||
|
||||
// logInterceptor implements the info.Logger interface to allow for logging from this function.
|
||||
type logInterceptor struct{}
|
||||
|
||||
func (l *logInterceptor) Infof(format string, args ...interface{}) {
|
||||
log.Printf(format, args...)
|
||||
}
|
||||
|
||||
func (l *logInterceptor) Debugf(format string, args ...interface{}) {}
|
||||
|
||||
@@ -1,3 +0,0 @@
|
||||
# NVIDIA Container Toolkit CLI
|
||||
|
||||
The NVIDIA Container Toolkit CLI `nvidia-ctk` provides a number of utilities that are useful for working with the NVIDIA Container Toolkit.
|
||||
@@ -1,229 +0,0 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package symlinks
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover/csv"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
type command struct {
|
||||
logger *logrus.Logger
|
||||
}
|
||||
|
||||
type config struct {
|
||||
hostRoot string
|
||||
filenames cli.StringSlice
|
||||
links cli.StringSlice
|
||||
containerSpec string
|
||||
}
|
||||
|
||||
// NewCommand constructs a hook command with the specified logger
|
||||
func NewCommand(logger *logrus.Logger) *cli.Command {
|
||||
c := command{
|
||||
logger: logger,
|
||||
}
|
||||
return c.build()
|
||||
}
|
||||
|
||||
// build
|
||||
func (m command) build() *cli.Command {
|
||||
cfg := config{}
|
||||
|
||||
// Create the '' command
|
||||
c := cli.Command{
|
||||
Name: "create-symlinks",
|
||||
Usage: "A hook to create symlinks in the container. This can be used to proces CSV mount specs",
|
||||
Action: func(c *cli.Context) error {
|
||||
return m.run(c, &cfg)
|
||||
},
|
||||
}
|
||||
|
||||
c.Flags = []cli.Flag{
|
||||
&cli.StringFlag{
|
||||
Name: "host-root",
|
||||
Usage: "The root on the host filesystem to use to resolve symlinks",
|
||||
Destination: &cfg.hostRoot,
|
||||
},
|
||||
&cli.StringSliceFlag{
|
||||
Name: "csv-filename",
|
||||
Usage: "Specify a (CSV) filename to process",
|
||||
Destination: &cfg.filenames,
|
||||
},
|
||||
&cli.StringSliceFlag{
|
||||
Name: "link",
|
||||
Usage: "Specify a specific link to create. The link is specified as source:target",
|
||||
Destination: &cfg.links,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "container-spec",
|
||||
Usage: "Specify the path to the OCI container spec. If empty or '-' the spec will be read from STDIN",
|
||||
Destination: &cfg.containerSpec,
|
||||
},
|
||||
}
|
||||
|
||||
return &c
|
||||
}
|
||||
|
||||
func (m command) run(c *cli.Context, cfg *config) error {
|
||||
s, err := oci.LoadContainerState(cfg.containerSpec)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to load container state: %v", err)
|
||||
}
|
||||
|
||||
containerRoot, err := s.GetContainerRoot()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to determined container root: %v", err)
|
||||
}
|
||||
|
||||
csvFiles := cfg.filenames.Value()
|
||||
|
||||
chainLocator := lookup.NewSymlinkChainLocator(m.logger, cfg.hostRoot)
|
||||
|
||||
var candidates []string
|
||||
for _, file := range csvFiles {
|
||||
mountSpecs, err := csv.NewCSVFileParser(m.logger, file).Parse()
|
||||
if err != nil {
|
||||
m.logger.Debugf("Skipping CSV file %v: %v", file, err)
|
||||
continue
|
||||
}
|
||||
|
||||
for _, ms := range mountSpecs {
|
||||
if ms.Type != csv.MountSpecSym {
|
||||
continue
|
||||
}
|
||||
targets, err := chainLocator.Locate(ms.Path)
|
||||
if err != nil {
|
||||
m.logger.Warnf("Failed to locate symlink %v", ms.Path)
|
||||
}
|
||||
candidates = append(candidates, targets...)
|
||||
}
|
||||
}
|
||||
|
||||
created := make(map[string]bool)
|
||||
// candidates is a list of absolute paths to symlinks in a chain, or the final target of the chain.
|
||||
for _, candidate := range candidates {
|
||||
targets, err := m.Locate(candidate)
|
||||
if err != nil {
|
||||
m.logger.Debugf("Skipping invalid link: %v", err)
|
||||
continue
|
||||
} else if len(targets) != 1 {
|
||||
m.logger.Debugf("Unexepected number of targets: %v", targets)
|
||||
continue
|
||||
} else if targets[0] == candidate {
|
||||
m.logger.Debugf("%v is not a symlink", candidate)
|
||||
continue
|
||||
}
|
||||
|
||||
err = m.createLink(created, cfg.hostRoot, containerRoot, targets[0], candidate)
|
||||
if err != nil {
|
||||
m.logger.Warnf("Failed to create link %v: %v", []string{targets[0], candidate}, err)
|
||||
}
|
||||
}
|
||||
|
||||
links := cfg.links.Value()
|
||||
for _, l := range links {
|
||||
parts := strings.Split(l, ":")
|
||||
if len(parts) != 2 {
|
||||
m.logger.Warnf("Invalid link specification %v", l)
|
||||
continue
|
||||
}
|
||||
|
||||
err := m.createLink(created, cfg.hostRoot, containerRoot, parts[0], parts[1])
|
||||
if err != nil {
|
||||
m.logger.Warnf("Failed to create link %v: %v", parts, err)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
|
||||
}
|
||||
|
||||
func (m command) createLink(created map[string]bool, hostRoot string, containerRoot string, target string, link string) error {
|
||||
linkPath, err := changeRoot(hostRoot, containerRoot, link)
|
||||
if err != nil {
|
||||
m.logger.Warnf("Failed to resolve path for link %v relative to %v: %v", link, containerRoot, err)
|
||||
}
|
||||
if created[linkPath] {
|
||||
m.logger.Debugf("Link %v already created", linkPath)
|
||||
return nil
|
||||
}
|
||||
|
||||
targetPath, err := changeRoot(hostRoot, "/", target)
|
||||
if err != nil {
|
||||
m.logger.Warnf("Failed to resolve path for target %v relative to %v: %v", target, "/", err)
|
||||
}
|
||||
|
||||
m.logger.Infof("Symlinking %v to %v", linkPath, targetPath)
|
||||
err = os.MkdirAll(filepath.Dir(linkPath), 0755)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create directory: %v", err)
|
||||
}
|
||||
err = os.Symlink(target, linkPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create symlink: %v", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func changeRoot(current string, new string, path string) (string, error) {
|
||||
if !filepath.IsAbs(path) {
|
||||
return path, nil
|
||||
}
|
||||
|
||||
relative := path
|
||||
if current != "" {
|
||||
r, err := filepath.Rel(current, path)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
relative = r
|
||||
}
|
||||
|
||||
return filepath.Join(new, relative), nil
|
||||
}
|
||||
|
||||
// Locate returns the link target of the specified filename or an empty slice if the
|
||||
// specified filename is not a symlink.
|
||||
func (m command) Locate(filename string) ([]string, error) {
|
||||
info, err := os.Lstat(filename)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get file info: %v", info)
|
||||
}
|
||||
if info.Mode()&os.ModeSymlink == 0 {
|
||||
m.logger.Debugf("%v is not a symlink", filename)
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
target, err := os.Readlink(filename)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error checking symlink: %v", err)
|
||||
}
|
||||
|
||||
m.logger.Debugf("Resolved link: '%v' => '%v'", filename, target)
|
||||
|
||||
return []string{target}, nil
|
||||
}
|
||||
@@ -1,52 +0,0 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package hook
|
||||
|
||||
import (
|
||||
symlinks "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/hook/create-symlinks"
|
||||
ldcache "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/hook/update-ldcache"
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
type hookCommand struct {
|
||||
logger *logrus.Logger
|
||||
}
|
||||
|
||||
// NewCommand constructs a hook command with the specified logger
|
||||
func NewCommand(logger *logrus.Logger) *cli.Command {
|
||||
c := hookCommand{
|
||||
logger: logger,
|
||||
}
|
||||
return c.build()
|
||||
}
|
||||
|
||||
// build
|
||||
func (m hookCommand) build() *cli.Command {
|
||||
// Create the 'hook' command
|
||||
hook := cli.Command{
|
||||
Name: "hook",
|
||||
Usage: "A collection of hooks that may be injected into an OCI spec",
|
||||
}
|
||||
|
||||
hook.Subcommands = []*cli.Command{
|
||||
ldcache.NewCommand(m.logger),
|
||||
symlinks.NewCommand(m.logger),
|
||||
}
|
||||
|
||||
return &hook
|
||||
}
|
||||
@@ -1,129 +0,0 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package ldcache
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"syscall"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
type command struct {
|
||||
logger *logrus.Logger
|
||||
}
|
||||
|
||||
type config struct {
|
||||
folders cli.StringSlice
|
||||
containerSpec string
|
||||
}
|
||||
|
||||
// NewCommand constructs an update-ldcache command with the specified logger
|
||||
func NewCommand(logger *logrus.Logger) *cli.Command {
|
||||
c := command{
|
||||
logger: logger,
|
||||
}
|
||||
return c.build()
|
||||
}
|
||||
|
||||
// build the update-ldcache command
|
||||
func (m command) build() *cli.Command {
|
||||
cfg := config{}
|
||||
|
||||
// Create the 'update-ldcache' command
|
||||
c := cli.Command{
|
||||
Name: "update-ldcache",
|
||||
Usage: "Update ldcache in a container by running ldconfig",
|
||||
Action: func(c *cli.Context) error {
|
||||
return m.run(c, &cfg)
|
||||
},
|
||||
}
|
||||
|
||||
c.Flags = []cli.Flag{
|
||||
&cli.StringSliceFlag{
|
||||
Name: "folder",
|
||||
Usage: "Specifiy a folder to add to /etc/ld.so.conf before updating the ld cache",
|
||||
Destination: &cfg.folders,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "container-spec",
|
||||
Usage: "Specify the path to the OCI container spec. If empty or '-' the spec will be read from STDIN",
|
||||
Destination: &cfg.containerSpec,
|
||||
},
|
||||
}
|
||||
|
||||
return &c
|
||||
}
|
||||
|
||||
func (m command) run(c *cli.Context, cfg *config) error {
|
||||
s, err := oci.LoadContainerState(cfg.containerSpec)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to load container state: %v", err)
|
||||
}
|
||||
|
||||
containerRoot, err := s.GetContainerRoot()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to determined container root: %v", err)
|
||||
}
|
||||
|
||||
err = m.createConfig(containerRoot, cfg.folders.Value())
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to update ld.so.conf: %v", err)
|
||||
}
|
||||
|
||||
args := []string{"/sbin/ldconfig"}
|
||||
if containerRoot != "" {
|
||||
args = append(args, "-r", containerRoot)
|
||||
}
|
||||
|
||||
return syscall.Exec(args[0], args, nil)
|
||||
}
|
||||
|
||||
// createConfig creates (or updates) /etc/ld.so.conf.d/nvcr-<RANDOM_STRING>.conf in the container
|
||||
// to include the required paths.
|
||||
func (m command) createConfig(root string, folders []string) error {
|
||||
if len(folders) == 0 {
|
||||
m.logger.Debugf("No folders to add to /etc/ld.so.conf")
|
||||
return nil
|
||||
}
|
||||
|
||||
configFile, err := os.CreateTemp(filepath.Join(root, "/etc/ld.so.conf.d"), "nvcr-*.conf")
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create config file: %v", err)
|
||||
}
|
||||
defer configFile.Close()
|
||||
|
||||
m.logger.Debugf("Adding folders %v to %v", folders, configFile.Name())
|
||||
|
||||
configured := make(map[string]bool)
|
||||
for _, folder := range folders {
|
||||
if configured[folder] {
|
||||
continue
|
||||
}
|
||||
_, err = configFile.WriteString(fmt.Sprintf("%s\n", folder))
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to update ld.so.conf.d: %v", err)
|
||||
}
|
||||
configured[folder] = true
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
@@ -1,81 +0,0 @@
|
||||
/**
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"os"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/hook"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/info"
|
||||
log "github.com/sirupsen/logrus"
|
||||
cli "github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
var logger = log.New()
|
||||
|
||||
// config defines the options that can be set for the CLI through config files,
|
||||
// environment variables, or command line flags
|
||||
type config struct {
|
||||
// Debug indicates whether the CLI is started in "debug" mode
|
||||
Debug bool
|
||||
}
|
||||
|
||||
func main() {
|
||||
// Create a config struct to hold the parsed environment variables or command line flags
|
||||
config := config{}
|
||||
|
||||
// Create the top-level CLI
|
||||
c := cli.NewApp()
|
||||
c.Name = "NVIDIA Container Toolkit CLI"
|
||||
c.UseShortOptionHandling = true
|
||||
c.EnableBashCompletion = true
|
||||
c.Usage = "Tools to configure the NVIDIA Container Toolkit"
|
||||
c.Version = info.GetVersionString()
|
||||
|
||||
// Setup the flags for this command
|
||||
c.Flags = []cli.Flag{
|
||||
&cli.BoolFlag{
|
||||
Name: "debug",
|
||||
Aliases: []string{"d"},
|
||||
Usage: "Enable debug-level logging",
|
||||
Destination: &config.Debug,
|
||||
EnvVars: []string{"NVIDIA_CTK_DEBUG"},
|
||||
},
|
||||
}
|
||||
|
||||
// Set log-level for all subcommands
|
||||
c.Before = func(c *cli.Context) error {
|
||||
logLevel := log.InfoLevel
|
||||
if config.Debug {
|
||||
logLevel = log.DebugLevel
|
||||
}
|
||||
logger.SetLevel(logLevel)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Define the subcommands
|
||||
c.Commands = []*cli.Command{
|
||||
hook.NewCommand(logger),
|
||||
}
|
||||
|
||||
// Run the CLI
|
||||
err := c.Run(os.Args)
|
||||
if err != nil {
|
||||
log.Errorf("%v", err)
|
||||
log.Exit(1)
|
||||
}
|
||||
}
|
||||
@@ -16,4 +16,3 @@ ldconfig = "@/sbin/ldconfig"
|
||||
|
||||
[nvidia-container-runtime]
|
||||
#debug = "/var/log/nvidia-container-runtime.log"
|
||||
#experimental = false
|
||||
|
||||
@@ -16,4 +16,3 @@ ldconfig = "@/sbin/ldconfig"
|
||||
|
||||
[nvidia-container-runtime]
|
||||
#debug = "/var/log/nvidia-container-runtime.log"
|
||||
#experimental = false
|
||||
|
||||
@@ -16,4 +16,3 @@ ldconfig = "@/sbin/ldconfig"
|
||||
|
||||
[nvidia-container-runtime]
|
||||
#debug = "/var/log/nvidia-container-runtime.log"
|
||||
#experimental = false
|
||||
|
||||
@@ -16,4 +16,3 @@ ldconfig = "@/sbin/ldconfig"
|
||||
|
||||
[nvidia-container-runtime]
|
||||
#debug = "/var/log/nvidia-container-runtime.log"
|
||||
#experimental = false
|
||||
|
||||
@@ -16,11 +16,3 @@ ldconfig = "@/sbin/ldconfig.real"
|
||||
|
||||
[nvidia-container-runtime]
|
||||
#debug = "/var/log/nvidia-container-runtime.log"
|
||||
#experimental = false
|
||||
|
||||
# Specify the runtimes to consider. This list is processed in order and the PATH
|
||||
# searched for matching executables unless the entry is an absolute path.
|
||||
runtimes = [
|
||||
"docker-runc",
|
||||
"runc",
|
||||
]
|
||||
|
||||
19
config/config.toml.ubuntu+jetpack
Normal file
19
config/config.toml.ubuntu+jetpack
Normal file
@@ -0,0 +1,19 @@
|
||||
disable-require = false
|
||||
supported-driver-capabilities = "compute,compat32,graphics,utility,video,display"
|
||||
#swarm-resource = "DOCKER_RESOURCE_GPU"
|
||||
#accept-nvidia-visible-devices-envvar-when-unprivileged = true
|
||||
#accept-nvidia-visible-devices-as-volume-mounts = false
|
||||
|
||||
[nvidia-container-cli]
|
||||
#root = "/run/nvidia/driver"
|
||||
#path = "/usr/bin/nvidia-container-cli"
|
||||
environment = []
|
||||
#debug = "/var/log/nvidia-container-toolkit.log"
|
||||
#ldcache = "/etc/ld.so.cache"
|
||||
load-kmods = true
|
||||
#no-cgroups = false
|
||||
#user = "root:video"
|
||||
ldconfig = "@/sbin/ldconfig.real"
|
||||
|
||||
[nvidia-container-runtime]
|
||||
#debug = "/var/log/nvidia-container-runtime.log"
|
||||
@@ -4,10 +4,11 @@ FROM ${BASEIMAGE}
|
||||
RUN yum install -y \
|
||||
ca-certificates \
|
||||
gcc \
|
||||
wget \
|
||||
git \
|
||||
make \
|
||||
rpm-build \
|
||||
make && \
|
||||
wget \
|
||||
&& \
|
||||
rm -rf /var/cache/yum/*
|
||||
|
||||
ARG GOLANG_VERSION=0.0.0
|
||||
@@ -41,8 +42,6 @@ RUN mkdir -p $DIST_DIR /dist
|
||||
WORKDIR $GOPATH/src/nvidia-container-toolkit
|
||||
COPY . .
|
||||
|
||||
ARG GIT_COMMIT
|
||||
ENV GIT_COMMIT ${GIT_COMMIT}
|
||||
RUN make PREFIX=${DIST_DIR} cmds
|
||||
|
||||
ARG CONFIG_TOML_SUFFIX
|
||||
|
||||
@@ -4,10 +4,11 @@ FROM ${BASEIMAGE}
|
||||
RUN yum install -y \
|
||||
ca-certificates \
|
||||
gcc \
|
||||
wget \
|
||||
git \
|
||||
make \
|
||||
rpm-build && \
|
||||
rpm-build \
|
||||
wget \
|
||||
&& \
|
||||
rm -rf /var/cache/yum/*
|
||||
|
||||
ARG GOLANG_VERSION=0.0.0
|
||||
@@ -41,8 +42,6 @@ RUN mkdir -p $DIST_DIR /dist
|
||||
WORKDIR $GOPATH/src/nvidia-container-toolkit
|
||||
COPY . .
|
||||
|
||||
ARG GIT_COMMIT
|
||||
ENV GIT_COMMIT ${GIT_COMMIT}
|
||||
RUN make PREFIX=${DIST_DIR} cmds
|
||||
|
||||
ARG CONFIG_TOML_SUFFIX
|
||||
|
||||
@@ -48,8 +48,6 @@ RUN mkdir -p $DIST_DIR /dist
|
||||
WORKDIR $GOPATH/src/nvidia-container-toolkit
|
||||
COPY . .
|
||||
|
||||
ARG GIT_COMMIT
|
||||
ENV GIT_COMMIT ${GIT_COMMIT}
|
||||
RUN make PREFIX=${DIST_DIR} cmds
|
||||
|
||||
ARG CONFIG_TOML_SUFFIX
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
ARG GOLANG_VERSION=x.x.x
|
||||
FROM golang:${GOLANG_VERSION}
|
||||
|
||||
RUN go install golang.org/x/lint/golint@latest
|
||||
RUN go install github.com/matryer/moq@latest
|
||||
RUN go install github.com/gordonklaus/ineffassign@latest
|
||||
RUN go install github.com/client9/misspell/cmd/misspell@latest
|
||||
RUN go get -u golang.org/x/lint/golint
|
||||
RUN go get -u github.com/matryer/moq
|
||||
RUN go get -u github.com/gordonklaus/ineffassign
|
||||
RUN go get -u github.com/client9/misspell/cmd/misspell
|
||||
@@ -39,8 +39,6 @@ RUN mkdir -p $DIST_DIR /dist
|
||||
WORKDIR $GOPATH/src/nvidia-container-toolkit
|
||||
COPY . .
|
||||
|
||||
ARG GIT_COMMIT
|
||||
ENV GIT_COMMIT ${GIT_COMMIT}
|
||||
RUN make PREFIX=${DIST_DIR} cmds
|
||||
|
||||
# Hook for Project Atomic's fork of Docker: https://github.com/projectatomic/docker/tree/docker-1.13.1-rhel#add-dockerhooks-exec-custom-hooks-for-prestartpoststop-containerspatch
|
||||
|
||||
@@ -46,8 +46,6 @@ RUN mkdir -p $DIST_DIR /dist
|
||||
WORKDIR $GOPATH/src/nvidia-container-toolkit
|
||||
COPY . .
|
||||
|
||||
ARG GIT_COMMIT
|
||||
ENV GIT_COMMIT ${GIT_COMMIT}
|
||||
RUN make PREFIX=${DIST_DIR} cmds
|
||||
|
||||
ARG CONFIG_TOML_SUFFIX
|
||||
|
||||
@@ -98,7 +98,6 @@ docker-all: $(AMD64_TARGETS) $(X86_64_TARGETS) \
|
||||
# private centos target
|
||||
--centos%: OS := centos
|
||||
--centos%: PKG_REV := $(if $(LIB_TAG),0.1.$(LIB_TAG),1)
|
||||
--centos8%: BASEIMAGE = quay.io/centos/centos:stream8
|
||||
|
||||
# private amazonlinux target
|
||||
--amazonlinux%: OS := amazonlinux
|
||||
@@ -114,7 +113,6 @@ docker-all: $(AMD64_TARGETS) $(X86_64_TARGETS) \
|
||||
--rhel%: PKG_REV := $(if $(LIB_TAG),0.1.$(LIB_TAG),1)
|
||||
--rhel%: VERSION = $(patsubst rhel%-$(ARCH),%,$(TARGET_PLATFORM))
|
||||
--rhel%: ARTIFACTS_DIR = $(DIST_DIR)/rhel$(VERSION)/$(ARCH)
|
||||
--rhel8%: BASEIMAGE = quay.io/centos/centos:stream8
|
||||
|
||||
# We allow the CONFIG_TOML_SUFFIX to be overridden.
|
||||
CONFIG_TOML_SUFFIX ?= $(OS)
|
||||
@@ -124,18 +122,15 @@ docker-build-%:
|
||||
docker pull --platform=linux/$(ARCH) $(BASEIMAGE)
|
||||
DOCKER_BUILDKIT=1 \
|
||||
$(DOCKER) build \
|
||||
--platform=linux/$(ARCH) \
|
||||
--progress=plain \
|
||||
--build-arg BASEIMAGE="$(BASEIMAGE)" \
|
||||
--build-arg GOLANG_VERSION="$(GOLANG_VERSION)" \
|
||||
--build-arg PKG_VERS="$(LIB_VERSION)" \
|
||||
--build-arg PKG_REV="$(PKG_REV)" \
|
||||
--build-arg CONFIG_TOML_SUFFIX="$(CONFIG_TOML_SUFFIX)" \
|
||||
--build-arg GIT_COMMIT="$(GIT_COMMIT)" \
|
||||
--tag $(BUILDIMAGE) \
|
||||
--file $(DOCKERFILE) .
|
||||
$(DOCKER) run \
|
||||
--platform=linux/$(ARCH) \
|
||||
-e DISTRIB \
|
||||
-e SECTION \
|
||||
-v $(ARTIFACTS_DIR):/dist \
|
||||
|
||||
@@ -14,37 +14,43 @@
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package lookup
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"encoding/json"
|
||||
"os"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// NewDirectoryLocator creates a Locator that can be used to find directories at the specified root. A logger
|
||||
// is also specified.
|
||||
func NewDirectoryLocator(logger *log.Logger, root string) Locator {
|
||||
l := file{
|
||||
logger: logger,
|
||||
prefixes: []string{root},
|
||||
filter: assertDirectory,
|
||||
}
|
||||
func main() {
|
||||
log.Infof("Starting device discovery with NVML")
|
||||
|
||||
return &l
|
||||
}
|
||||
|
||||
// assertDirectory checks wither the specified path is a directory.
|
||||
func assertDirectory(filename string) error {
|
||||
info, err := os.Stat(filename)
|
||||
d, err := discover.NewNVMLServer("")
|
||||
if err != nil {
|
||||
return fmt.Errorf("error getting info for %v: %v", filename, err)
|
||||
log.Errorf("Error creating NVML Server: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
if !info.IsDir() {
|
||||
return fmt.Errorf("specified path '%v' is not a directory", filename)
|
||||
devices, err := d.Devices()
|
||||
if err != nil {
|
||||
log.Errorf("Error discovering devices: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
return nil
|
||||
mounts, err := d.Mounts()
|
||||
if err != nil {
|
||||
log.Errorf("Error discovering mounts: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
enc := json.NewEncoder(os.Stdout)
|
||||
enc.SetIndent("", " ")
|
||||
|
||||
log.Infof("Discovered devices:")
|
||||
enc.Encode(devices)
|
||||
|
||||
log.Infof("Discovered libraries:")
|
||||
enc.Encode(mounts)
|
||||
}
|
||||
65
examples/filter/main.go
Normal file
65
examples/filter/main.go
Normal file
@@ -0,0 +1,65 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"os"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/filter"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
func main() {
|
||||
d, err := discover.NewNVMLServer("")
|
||||
if err != nil {
|
||||
log.Errorf("Error discovering devices: %v", err)
|
||||
}
|
||||
|
||||
selected := filter.NewSelectDevicesFrom(d, "all", nil)
|
||||
|
||||
devices, err := selected.Devices()
|
||||
if err != nil {
|
||||
log.Errorf("Error discovering devices: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
mounts, err := selected.Mounts()
|
||||
if err != nil {
|
||||
log.Errorf("Error discovering mounts: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
hooks, err := selected.Hooks()
|
||||
if err != nil {
|
||||
log.Errorf("Error discovering hooks: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
enc := json.NewEncoder(os.Stdout)
|
||||
enc.SetIndent("", " ")
|
||||
|
||||
log.Infof("Discovered devices:")
|
||||
enc.Encode(devices)
|
||||
|
||||
log.Infof("Discovered libraries:")
|
||||
enc.Encode(mounts)
|
||||
|
||||
log.Infof("Discovered hook:")
|
||||
enc.Encode(hooks)
|
||||
}
|
||||
26
examples/print-ldcache/main.go
Normal file
26
examples/print-ldcache/main.go
Normal file
@@ -0,0 +1,26 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/ldcache"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
var logger = log.StandardLogger()
|
||||
|
||||
func main() {
|
||||
logger.SetLevel(log.DebugLevel)
|
||||
logger.Infof("Starting device discovery with NVML")
|
||||
|
||||
cache, err := ldcache.NewLDCacheWithLogger(logger, "/run/nvidia/driver")
|
||||
if err != nil {
|
||||
logger.Errorf("Error loading ldcache: %v", err)
|
||||
return
|
||||
}
|
||||
defer cache.Close()
|
||||
|
||||
libs32, libs64 := cache.Lookup("lib")
|
||||
|
||||
logger.Infof("32-bit: %v", libs32)
|
||||
logger.Infof("64-bit: %v", libs64)
|
||||
|
||||
}
|
||||
15
go.mod
15
go.mod
@@ -3,16 +3,15 @@ module github.com/NVIDIA/nvidia-container-toolkit
|
||||
go 1.14
|
||||
|
||||
require (
|
||||
github.com/BurntSushi/toml v1.0.0
|
||||
github.com/NVIDIA/go-nvml v0.11.6-0
|
||||
github.com/container-orchestrated-devices/container-device-interface v0.3.1-0.20220224133719-e5457123010b
|
||||
github.com/containers/podman/v4 v4.0.3
|
||||
github.com/opencontainers/runtime-spec v1.0.3-0.20211214071223-8958f93039ab
|
||||
github.com/pelletier/go-toml v1.9.4
|
||||
github.com/BurntSushi/toml v0.3.1
|
||||
github.com/NVIDIA/go-nvml v0.11.1-0
|
||||
github.com/containers/podman/v2 v2.2.1
|
||||
github.com/opencontainers/runtime-spec v1.0.3-0.20211101234015-a3c33d663ebc
|
||||
github.com/pelletier/go-toml v1.9.3
|
||||
github.com/sirupsen/logrus v1.8.1
|
||||
github.com/stretchr/testify v1.7.0
|
||||
github.com/tsaikd/KDGoLib v0.0.0-20191001134900-7f3cf518e07d
|
||||
github.com/urfave/cli/v2 v2.3.0
|
||||
golang.org/x/mod v0.5.0
|
||||
golang.org/x/sys v0.0.0-20220114195835-da31bd327af9
|
||||
golang.org/x/mod v0.3.0
|
||||
golang.org/x/sys v0.0.0-20210426230700-d19ff857e887
|
||||
)
|
||||
|
||||
@@ -1,48 +0,0 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package config
|
||||
|
||||
import (
|
||||
"github.com/pelletier/go-toml"
|
||||
)
|
||||
|
||||
// ContainerCLIConfig stores the options for the nvidia-container-cli
|
||||
type ContainerCLIConfig struct {
|
||||
Root string
|
||||
}
|
||||
|
||||
// getContainerCLIConfigFrom reads the nvidia container runtime config from the specified toml Tree.
|
||||
func getContainerCLIConfigFrom(toml *toml.Tree) *ContainerCLIConfig {
|
||||
cfg := getDefaultContainerCLIConfig()
|
||||
|
||||
if toml == nil {
|
||||
return cfg
|
||||
}
|
||||
|
||||
cfg.Root = toml.GetDefault("nvidia-container-cli.root", cfg.Root).(string)
|
||||
|
||||
return cfg
|
||||
}
|
||||
|
||||
// getDefaultContainerCLIConfig defines the default values for the config
|
||||
func getDefaultContainerCLIConfig() *ContainerCLIConfig {
|
||||
c := ContainerCLIConfig{
|
||||
Root: "",
|
||||
}
|
||||
|
||||
return &c
|
||||
}
|
||||
@@ -1,114 +0,0 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package config
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path"
|
||||
|
||||
"github.com/pelletier/go-toml"
|
||||
)
|
||||
|
||||
const (
|
||||
configOverride = "XDG_CONFIG_HOME"
|
||||
configFilePath = "nvidia-container-runtime/config.toml"
|
||||
)
|
||||
|
||||
var (
|
||||
// DefaultExecutableDir specifies the default path to use for executables if they cannot be located in the path.
|
||||
DefaultExecutableDir = "/usr/bin"
|
||||
|
||||
// NVIDIAContainerRuntimeHookExecutable is the executable name for the NVIDIA Container Runtime Hook
|
||||
NVIDIAContainerRuntimeHookExecutable = "nvidia-container-runtime-hook"
|
||||
// NVIDIAContainerToolkitExecutable is the executable name for the NVIDIA Container Toolkit (an alias for the NVIDIA Container Runtime Hook)
|
||||
NVIDIAContainerToolkitExecutable = "nvidia-container-toolkit"
|
||||
|
||||
configDir = "/etc/"
|
||||
)
|
||||
|
||||
// Config represents the contents of the config.toml file for the NVIDIA Container Toolkit
|
||||
// Note: This is currently duplicated by the HookConfig in cmd/nvidia-container-toolkit/hook_config.go
|
||||
type Config struct {
|
||||
NVIDIAContainerCLIConfig ContainerCLIConfig `toml:"nvidia-container-cli"`
|
||||
NVIDIACTKConfig CTKConfig `toml:"nvidia-ctk"`
|
||||
NVIDIAContainerRuntimeConfig RuntimeConfig `toml:"nvidia-container-runtime"`
|
||||
}
|
||||
|
||||
// GetConfig sets up the config struct. Values are read from a toml file
|
||||
// or set via the environment.
|
||||
func GetConfig() (*Config, error) {
|
||||
if XDGConfigDir := os.Getenv(configOverride); len(XDGConfigDir) != 0 {
|
||||
configDir = XDGConfigDir
|
||||
}
|
||||
|
||||
configFilePath := path.Join(configDir, configFilePath)
|
||||
|
||||
tomlFile, err := os.Open(configFilePath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to open config file %v: %v", configFilePath, err)
|
||||
}
|
||||
defer tomlFile.Close()
|
||||
|
||||
cfg, err := loadConfigFrom(tomlFile)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read config values: %v", err)
|
||||
}
|
||||
|
||||
return cfg, nil
|
||||
}
|
||||
|
||||
// loadRuntimeConfigFrom reads the config from the specified Reader
|
||||
func loadConfigFrom(reader io.Reader) (*Config, error) {
|
||||
toml, err := toml.LoadReader(reader)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return getConfigFrom(toml)
|
||||
}
|
||||
|
||||
// getConfigFrom reads the nvidia container runtime config from the specified toml Tree.
|
||||
func getConfigFrom(toml *toml.Tree) (*Config, error) {
|
||||
cfg := getDefaultConfig()
|
||||
|
||||
if toml == nil {
|
||||
return cfg, nil
|
||||
}
|
||||
|
||||
cfg.NVIDIAContainerCLIConfig = *getContainerCLIConfigFrom(toml)
|
||||
cfg.NVIDIACTKConfig = *getCTKConfigFrom(toml)
|
||||
runtimeConfig, err := getRuntimeConfigFrom(toml)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to load nvidia-container-runtime config: %v", err)
|
||||
}
|
||||
cfg.NVIDIAContainerRuntimeConfig = *runtimeConfig
|
||||
|
||||
return cfg, nil
|
||||
}
|
||||
|
||||
// getDefaultConfig defines the default values for the config
|
||||
func getDefaultConfig() *Config {
|
||||
c := Config{
|
||||
NVIDIAContainerCLIConfig: *getDefaultContainerCLIConfig(),
|
||||
NVIDIACTKConfig: *getDefaultCTKConfig(),
|
||||
NVIDIAContainerRuntimeConfig: *GetDefaultRuntimeConfig(),
|
||||
}
|
||||
|
||||
return &c
|
||||
}
|
||||
@@ -1,165 +0,0 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package config
|
||||
|
||||
import (
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestGetConfigWithCustomConfig(t *testing.T) {
|
||||
wd, err := os.Getwd()
|
||||
require.NoError(t, err)
|
||||
|
||||
// By default debug is disabled
|
||||
contents := []byte("[nvidia-container-runtime]\ndebug = \"/nvidia-container-toolkit.log\"")
|
||||
testDir := filepath.Join(wd, "test")
|
||||
filename := filepath.Join(testDir, configFilePath)
|
||||
|
||||
os.Setenv(configOverride, testDir)
|
||||
|
||||
require.NoError(t, os.MkdirAll(filepath.Dir(filename), 0766))
|
||||
require.NoError(t, ioutil.WriteFile(filename, contents, 0766))
|
||||
|
||||
defer func() { require.NoError(t, os.RemoveAll(testDir)) }()
|
||||
|
||||
cfg, err := GetConfig()
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, cfg.NVIDIAContainerRuntimeConfig.DebugFilePath, "/nvidia-container-toolkit.log")
|
||||
}
|
||||
|
||||
func TestGetConfig(t *testing.T) {
|
||||
testCases := []struct {
|
||||
description string
|
||||
contents []string
|
||||
expectedError error
|
||||
expectedConfig *Config
|
||||
}{
|
||||
{
|
||||
description: "empty config is default",
|
||||
expectedConfig: &Config{
|
||||
NVIDIAContainerCLIConfig: ContainerCLIConfig{
|
||||
Root: "",
|
||||
},
|
||||
NVIDIAContainerRuntimeConfig: RuntimeConfig{
|
||||
DebugFilePath: "/dev/null",
|
||||
LogLevel: "info",
|
||||
Runtimes: []string{"docker-runc", "runc"},
|
||||
Mode: "auto",
|
||||
Modes: modesConfig{
|
||||
CSV: csvModeConfig{
|
||||
MountSpecPath: "/etc/nvidia-container-runtime/host-files-for-container.d",
|
||||
},
|
||||
},
|
||||
},
|
||||
NVIDIACTKConfig: CTKConfig{
|
||||
Path: "nvidia-ctk",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "config options set inline",
|
||||
contents: []string{
|
||||
"nvidia-container-cli.root = \"/bar/baz\"",
|
||||
"nvidia-container-runtime.debug = \"/foo/bar\"",
|
||||
"nvidia-container-runtime.experimental = true",
|
||||
"nvidia-container-runtime.discover-mode = \"not-legacy\"",
|
||||
"nvidia-container-runtime.log-level = \"debug\"",
|
||||
"nvidia-container-runtime.runtimes = [\"/some/runtime\",]",
|
||||
"nvidia-container-runtime.mode = \"not-auto\"",
|
||||
"nvidia-container-runtime.modes.csv.mount-spec-path = \"/not/etc/nvidia-container-runtime/host-files-for-container.d\"",
|
||||
"nvidia-ctk.path = \"/foo/bar/nvidia-ctk\"",
|
||||
},
|
||||
expectedConfig: &Config{
|
||||
NVIDIAContainerCLIConfig: ContainerCLIConfig{
|
||||
Root: "/bar/baz",
|
||||
},
|
||||
NVIDIAContainerRuntimeConfig: RuntimeConfig{
|
||||
DebugFilePath: "/foo/bar",
|
||||
LogLevel: "debug",
|
||||
Runtimes: []string{"/some/runtime"},
|
||||
Mode: "not-auto",
|
||||
Modes: modesConfig{
|
||||
CSV: csvModeConfig{
|
||||
MountSpecPath: "/not/etc/nvidia-container-runtime/host-files-for-container.d",
|
||||
},
|
||||
},
|
||||
},
|
||||
NVIDIACTKConfig: CTKConfig{
|
||||
Path: "/foo/bar/nvidia-ctk",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "config options set in section",
|
||||
contents: []string{
|
||||
"[nvidia-container-cli]",
|
||||
"root = \"/bar/baz\"",
|
||||
"[nvidia-container-runtime]",
|
||||
"debug = \"/foo/bar\"",
|
||||
"experimental = true",
|
||||
"discover-mode = \"not-legacy\"",
|
||||
"log-level = \"debug\"",
|
||||
"runtimes = [\"/some/runtime\",]",
|
||||
"mode = \"not-auto\"",
|
||||
"[nvidia-container-runtime.modes.csv]",
|
||||
"mount-spec-path = \"/not/etc/nvidia-container-runtime/host-files-for-container.d\"",
|
||||
"[nvidia-ctk]",
|
||||
"path = \"/foo/bar/nvidia-ctk\"",
|
||||
},
|
||||
expectedConfig: &Config{
|
||||
NVIDIAContainerCLIConfig: ContainerCLIConfig{
|
||||
Root: "/bar/baz",
|
||||
},
|
||||
NVIDIAContainerRuntimeConfig: RuntimeConfig{
|
||||
DebugFilePath: "/foo/bar",
|
||||
LogLevel: "debug",
|
||||
Runtimes: []string{"/some/runtime"},
|
||||
Mode: "not-auto",
|
||||
Modes: modesConfig{
|
||||
CSV: csvModeConfig{
|
||||
MountSpecPath: "/not/etc/nvidia-container-runtime/host-files-for-container.d",
|
||||
},
|
||||
},
|
||||
},
|
||||
NVIDIACTKConfig: CTKConfig{
|
||||
Path: "/foo/bar/nvidia-ctk",
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
reader := strings.NewReader(strings.Join(tc.contents, "\n"))
|
||||
|
||||
cfg, err := loadConfigFrom(reader)
|
||||
if tc.expectedError != nil {
|
||||
require.Error(t, err)
|
||||
} else {
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
require.EqualValues(t, tc.expectedConfig, cfg)
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -1,143 +0,0 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package image
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
"golang.org/x/mod/semver"
|
||||
)
|
||||
|
||||
const (
|
||||
envCUDAVersion = "CUDA_VERSION"
|
||||
envNVRequirePrefix = "NVIDIA_REQUIRE_"
|
||||
envNVRequireCUDA = envNVRequirePrefix + "CUDA"
|
||||
envNVDisableRequire = "NVIDIA_DISABLE_REQUIRE"
|
||||
)
|
||||
|
||||
// CUDA represents a CUDA image that can be used for GPU computing. This wraps
|
||||
// a map of environment variable to values that can be used to perform lookups
|
||||
// such as requirements.
|
||||
type CUDA map[string]string
|
||||
|
||||
// NewCUDAImageFromSpec creates a CUDA image from the input OCI runtime spec.
|
||||
// The process environment is read (if present) to construc the CUDA Image.
|
||||
func NewCUDAImageFromSpec(spec *specs.Spec) (CUDA, error) {
|
||||
if spec == nil || spec.Process == nil {
|
||||
return NewCUDAImageFromEnv(nil)
|
||||
}
|
||||
|
||||
return NewCUDAImageFromEnv(spec.Process.Env)
|
||||
}
|
||||
|
||||
// NewCUDAImageFromEnv creates a CUDA image from the input environment. The environment
|
||||
// is a list of strings of the form ENVAR=VALUE.
|
||||
func NewCUDAImageFromEnv(env []string) (CUDA, error) {
|
||||
c := make(CUDA)
|
||||
|
||||
for _, e := range env {
|
||||
parts := strings.SplitN(e, "=", 2)
|
||||
if len(parts) != 2 {
|
||||
return nil, fmt.Errorf("invalid environment variable: %v", e)
|
||||
}
|
||||
c[parts[0]] = parts[1]
|
||||
}
|
||||
|
||||
return c, nil
|
||||
}
|
||||
|
||||
// IsLegacy returns whether the associated CUDA image is a "legacy" image. An
|
||||
// image is considered legacy if it has a CUDA_VERSION environment variable defined
|
||||
// and no NVIDIA_REQUIRE_CUDA environment variable defined.
|
||||
func (i CUDA) IsLegacy() bool {
|
||||
legacyCudaVersion := i[envCUDAVersion]
|
||||
cudaRequire := i[envNVRequireCUDA]
|
||||
return len(legacyCudaVersion) > 0 && len(cudaRequire) == 0
|
||||
}
|
||||
|
||||
// GetRequirements returns the requirements from all NVIDIA_REQUIRE_ environment
|
||||
// variables.
|
||||
func (i CUDA) GetRequirements() ([]string, error) {
|
||||
// TODO: We need not process this if disable require is set, but this will be done
|
||||
// in a single follow-up to ensure that the behavioural change is accurately captured.
|
||||
// if i.HasDisableRequire() {
|
||||
// return nil, nil
|
||||
// }
|
||||
|
||||
// All variables with the "NVIDIA_REQUIRE_" prefix are passed to nvidia-container-cli
|
||||
var requirements []string
|
||||
for name, value := range i {
|
||||
if strings.HasPrefix(name, envNVRequirePrefix) {
|
||||
requirements = append(requirements, value)
|
||||
}
|
||||
}
|
||||
if i.IsLegacy() {
|
||||
v, err := i.legacyVersion()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get version: %v", err)
|
||||
}
|
||||
cudaRequire := fmt.Sprintf("cuda>=%s", v)
|
||||
requirements = append(requirements, cudaRequire)
|
||||
}
|
||||
return requirements, nil
|
||||
}
|
||||
|
||||
// HasDisableRequire checks for the value of the NVIDIA_DISABLE_REQUIRE. If set
|
||||
// to a valid (true) boolean value this can be used to disable the requirement checks
|
||||
func (i CUDA) HasDisableRequire() bool {
|
||||
if disable, exists := i[envNVDisableRequire]; exists {
|
||||
// i.logger.Debugf("NVIDIA_DISABLE_REQUIRE=%v; skipping requirement checks", disable)
|
||||
d, _ := strconv.ParseBool(disable)
|
||||
return d
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func (i CUDA) legacyVersion() (string, error) {
|
||||
majorMinor, err := parseMajorMinorVersion(i[envCUDAVersion])
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("invalid CUDA version: %v", err)
|
||||
}
|
||||
|
||||
return majorMinor, nil
|
||||
}
|
||||
|
||||
func parseMajorMinorVersion(version string) (string, error) {
|
||||
vVersion := "v" + strings.TrimPrefix(version, "v")
|
||||
|
||||
if !semver.IsValid(vVersion) {
|
||||
return "", fmt.Errorf("invalid version string")
|
||||
}
|
||||
|
||||
majorMinor := strings.TrimPrefix(semver.MajorMinor(vVersion), "v")
|
||||
parts := strings.Split(majorMinor, ".")
|
||||
|
||||
var err error
|
||||
_, err = strconv.ParseUint(parts[0], 10, 32)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("invalid major version")
|
||||
}
|
||||
_, err = strconv.ParseUint(parts[1], 10, 32)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("invalid minor version")
|
||||
}
|
||||
return majorMinor, nil
|
||||
}
|
||||
@@ -1,71 +0,0 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package image
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestParseMajorMinorVersionValid(t *testing.T) {
|
||||
var tests = []struct {
|
||||
version string
|
||||
expected string
|
||||
}{
|
||||
{"0", "0.0"},
|
||||
{"8", "8.0"},
|
||||
{"7.5", "7.5"},
|
||||
{"9.0.116", "9.0"},
|
||||
{"4294967295.4294967295.4294967295", "4294967295.4294967295"},
|
||||
{"v11.6", "11.6"},
|
||||
}
|
||||
for _, c := range tests {
|
||||
t.Run(c.version, func(t *testing.T) {
|
||||
version, err := parseMajorMinorVersion(c.version)
|
||||
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, c.expected, version)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseMajorMinorVersionInvalid(t *testing.T) {
|
||||
var tests = []string{
|
||||
"foo",
|
||||
"foo.5.10",
|
||||
"9.0.116.50",
|
||||
"9.0.116foo",
|
||||
"7.foo",
|
||||
"9.0.bar",
|
||||
"9.4294967296",
|
||||
"9.0.116.",
|
||||
"9..0",
|
||||
"9.",
|
||||
".5.10",
|
||||
"-9",
|
||||
"+9",
|
||||
"-9.1.116",
|
||||
"-9.-1.-116",
|
||||
}
|
||||
for _, c := range tests {
|
||||
t.Run(c, func(t *testing.T) {
|
||||
_, err := parseMajorMinorVersion(c)
|
||||
require.Error(t, err)
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -1,95 +0,0 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package config
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/pelletier/go-toml"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
const (
|
||||
dockerRuncExecutableName = "docker-runc"
|
||||
runcExecutableName = "runc"
|
||||
|
||||
auto = "auto"
|
||||
)
|
||||
|
||||
// RuntimeConfig stores the config options for the NVIDIA Container Runtime
|
||||
type RuntimeConfig struct {
|
||||
DebugFilePath string `toml:"debug"`
|
||||
// LogLevel defines the logging level for the application
|
||||
LogLevel string `toml:"log-level"`
|
||||
// Runtimes defines the candidates for the low-level runtime
|
||||
Runtimes []string `toml:"runtimes"`
|
||||
Mode string `toml:"mode"`
|
||||
Modes modesConfig `toml:"modes"`
|
||||
}
|
||||
|
||||
// modesConfig defines (optional) per-mode configs
|
||||
type modesConfig struct {
|
||||
CSV csvModeConfig `toml:"csv"`
|
||||
}
|
||||
|
||||
type csvModeConfig struct {
|
||||
MountSpecPath string `toml:"mount-spec-path"`
|
||||
}
|
||||
|
||||
// dummy allows us to unmarshal only a RuntimeConfig from a *toml.Tree
|
||||
type dummy struct {
|
||||
Runtime RuntimeConfig `toml:"nvidia-container-runtime"`
|
||||
}
|
||||
|
||||
// getRuntimeConfigFrom reads the nvidia container runtime config from the specified toml Tree.
|
||||
func getRuntimeConfigFrom(toml *toml.Tree) (*RuntimeConfig, error) {
|
||||
cfg := GetDefaultRuntimeConfig()
|
||||
|
||||
if toml == nil {
|
||||
return cfg, nil
|
||||
}
|
||||
|
||||
d := dummy{
|
||||
Runtime: *cfg,
|
||||
}
|
||||
|
||||
if err := toml.Unmarshal(&d); err != nil {
|
||||
return nil, fmt.Errorf("failed to unmarshal runtime config: %v", err)
|
||||
}
|
||||
|
||||
return &d.Runtime, nil
|
||||
}
|
||||
|
||||
// GetDefaultRuntimeConfig defines the default values for the config
|
||||
func GetDefaultRuntimeConfig() *RuntimeConfig {
|
||||
c := RuntimeConfig{
|
||||
DebugFilePath: "/dev/null",
|
||||
LogLevel: logrus.InfoLevel.String(),
|
||||
Runtimes: []string{
|
||||
dockerRuncExecutableName,
|
||||
runcExecutableName,
|
||||
},
|
||||
Mode: auto,
|
||||
Modes: modesConfig{
|
||||
CSV: csvModeConfig{
|
||||
MountSpecPath: "/etc/nvidia-container-runtime/host-files-for-container.d",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
return &c
|
||||
}
|
||||
@@ -1,46 +0,0 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package config
|
||||
|
||||
import "github.com/pelletier/go-toml"
|
||||
|
||||
// CTKConfig stores the config options for the NVIDIA Container Toolkit CLI (nvidia-ctk)
|
||||
type CTKConfig struct {
|
||||
Path string `toml:"path"`
|
||||
}
|
||||
|
||||
// getCTKConfigFrom reads the nvidia container runtime config from the specified toml Tree.
|
||||
func getCTKConfigFrom(toml *toml.Tree) *CTKConfig {
|
||||
cfg := getDefaultCTKConfig()
|
||||
|
||||
if toml == nil {
|
||||
return cfg
|
||||
}
|
||||
|
||||
cfg.Path = toml.GetDefault("nvidia-ctk.path", cfg.Path).(string)
|
||||
|
||||
return cfg
|
||||
}
|
||||
|
||||
// getDefaultCTKConfig defines the default values for the config
|
||||
func getDefaultCTKConfig() *CTKConfig {
|
||||
c := CTKConfig{
|
||||
Path: "nvidia-ctk",
|
||||
}
|
||||
|
||||
return &c
|
||||
}
|
||||
@@ -1,137 +0,0 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package cuda
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/NVIDIA/go-nvml/pkg/dl"
|
||||
)
|
||||
|
||||
/*
|
||||
#cgo LDFLAGS: -Wl,--unresolved-symbols=ignore-in-object-files
|
||||
|
||||
#ifdef _WIN32
|
||||
#define CUDAAPI __stdcall
|
||||
#else
|
||||
#define CUDAAPI
|
||||
#endif
|
||||
|
||||
typedef int CUdevice;
|
||||
|
||||
typedef enum CUdevice_attribute_enum {
|
||||
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR = 75,
|
||||
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR = 76
|
||||
} CUdevice_attribute;
|
||||
|
||||
typedef enum cudaError_enum {
|
||||
CUDA_SUCCESS = 0
|
||||
} CUresult;
|
||||
|
||||
CUresult CUDAAPI cuInit(unsigned int Flags);
|
||||
CUresult CUDAAPI cuDriverGetVersion(int *driverVersion);
|
||||
CUresult CUDAAPI cuDeviceGet(CUdevice *device, int ordinal);
|
||||
CUresult CUDAAPI cuDeviceGetAttribute(int *pi, CUdevice_attribute attrib, CUdevice dev);
|
||||
*/
|
||||
import "C"
|
||||
|
||||
const (
|
||||
libraryName = "libcuda.so.1"
|
||||
libraryLoadFlags = dl.RTLD_LAZY | dl.RTLD_GLOBAL
|
||||
)
|
||||
|
||||
// cuda stores a reference the cuda dynamic library
|
||||
var lib *dl.DynamicLibrary
|
||||
|
||||
// Version returns the CUDA version of the driver as a string or an error if this
|
||||
// cannot be determined.
|
||||
func Version() (string, error) {
|
||||
lib, err := load()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer lib.Close()
|
||||
|
||||
if err := lib.Lookup("cuDriverGetVersion"); err != nil {
|
||||
return "", fmt.Errorf("failed to lookup symbol: %v", err)
|
||||
}
|
||||
|
||||
var version C.int
|
||||
if result := C.cuDriverGetVersion(&version); result != C.CUDA_SUCCESS {
|
||||
return "", fmt.Errorf("failed to get CUDA version: result=%v", result)
|
||||
}
|
||||
|
||||
major := version / 1000
|
||||
minor := version % 100 / 10
|
||||
|
||||
return fmt.Sprintf("%d.%d", major, minor), nil
|
||||
}
|
||||
|
||||
// ComputeCapability returns the CUDA compute capability of a device with the specified index as a string
|
||||
// or an error if this cannot be determined.
|
||||
func ComputeCapability(index int) (string, error) {
|
||||
lib, err := load()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer lib.Close()
|
||||
|
||||
if err := lib.Lookup("cuInit"); err != nil {
|
||||
return "", fmt.Errorf("failed to lookup symbol: %v", err)
|
||||
}
|
||||
if err := lib.Lookup("cuDeviceGet"); err != nil {
|
||||
return "", fmt.Errorf("failed to lookup symbol: %v", err)
|
||||
}
|
||||
if err := lib.Lookup("cuDeviceGetAttribute"); err != nil {
|
||||
return "", fmt.Errorf("failed to lookup symbol: %v", err)
|
||||
}
|
||||
|
||||
if result := C.cuInit(C.uint(0)); result != C.CUDA_SUCCESS {
|
||||
return "", fmt.Errorf("failed to initialize CUDA: result=%v", result)
|
||||
}
|
||||
|
||||
var device C.CUdevice
|
||||
// NOTE: We only query the first device
|
||||
if result := C.cuDeviceGet(&device, C.int(index)); result != C.CUDA_SUCCESS {
|
||||
return "", fmt.Errorf("failed to get CUDA device %v: result=%v", 0, result)
|
||||
}
|
||||
|
||||
var major C.int
|
||||
if result := C.cuDeviceGetAttribute(&major, C.CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, device); result != C.CUDA_SUCCESS {
|
||||
return "", fmt.Errorf("failed to get CUDA compute capability major for device %v : result=%v", 0, result)
|
||||
}
|
||||
|
||||
var minor C.int
|
||||
if result := C.cuDeviceGetAttribute(&minor, C.CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, device); result != C.CUDA_SUCCESS {
|
||||
return "", fmt.Errorf("failed to get CUDA compute capability minor for device %v: result=%v", 0, result)
|
||||
}
|
||||
|
||||
return fmt.Sprintf("%d.%d", major, minor), nil
|
||||
}
|
||||
|
||||
func load() (*dl.DynamicLibrary, error) {
|
||||
lib := dl.New(libraryName, libraryLoadFlags)
|
||||
if lib == nil {
|
||||
return nil, fmt.Errorf("error instantiating DynamicLibrary for CUDA")
|
||||
}
|
||||
err := lib.Open()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error opening DynamicLibrary for CUDA: %v", err)
|
||||
}
|
||||
|
||||
return lib, nil
|
||||
}
|
||||
51
internal/discover/binaries.go
Normal file
51
internal/discover/binaries.go
Normal file
@@ -0,0 +1,51 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package discover
|
||||
|
||||
import (
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// NewBinaryMounts creates a discoverer for binaries using the specified root
|
||||
func NewBinaryMounts(root string) Discover {
|
||||
return NewBinaryMountsWithLogger(log.StandardLogger(), root)
|
||||
}
|
||||
|
||||
// NewBinaryMountsWithLogger creates a Mounts discoverer as with NewBinaryMounts
|
||||
// with the specified logger
|
||||
func NewBinaryMountsWithLogger(logger *log.Logger, root string) Discover {
|
||||
d := mounts{
|
||||
logger: logger,
|
||||
lookup: lookup.NewPathLocatorWithLogger(logger, root),
|
||||
required: requiredBinaries,
|
||||
}
|
||||
return &d
|
||||
}
|
||||
|
||||
// requiredBinaries defines a set of binaries and their labels
|
||||
var requiredBinaries = map[string][]string{
|
||||
"utility": {
|
||||
"nvidia-smi", /* System management interface */
|
||||
"nvidia-debugdump", /* GPU coredump utility */
|
||||
"nvidia-persistenced", /* Persistence mode utility */
|
||||
},
|
||||
"compute": {
|
||||
"nvidia-cuda-mps-control", /* Multi process service CLI */
|
||||
"nvidia-cuda-mps-server", /* Multi process service server */
|
||||
},
|
||||
}
|
||||
73
internal/discover/binaries_test.go
Normal file
73
internal/discover/binaries_test.go
Normal file
@@ -0,0 +1,73 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package discover
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
testlog "github.com/sirupsen/logrus/hooks/test"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestBinaries(t *testing.T) {
|
||||
binaryLookup := map[string]string{
|
||||
"nvidia-smi": "/usr/bin/nvidia-smi",
|
||||
"nvidia-persistenced": "/usr/bin/nvidia-persistenced",
|
||||
"nvidia-debugdump": "test-duplicates",
|
||||
"nvidia-cuda-mps-control": "test-duplicates",
|
||||
}
|
||||
|
||||
logger, _ := testlog.NewNullLogger()
|
||||
|
||||
d := NewBinaryMountsWithLogger(logger, "")
|
||||
|
||||
// Override lookup for testing
|
||||
mockLocator := NewLocatorMockFromMap(binaryLookup)
|
||||
d.(*mounts).lookup = mockLocator
|
||||
|
||||
mounts, err := d.Mounts()
|
||||
require.NoError(t, err)
|
||||
|
||||
expected := []Mount{
|
||||
{
|
||||
Path: "/usr/bin/nvidia-smi",
|
||||
},
|
||||
{
|
||||
Path: "/usr/bin/nvidia-persistenced",
|
||||
},
|
||||
{
|
||||
Path: "test-duplicates",
|
||||
},
|
||||
}
|
||||
|
||||
require.Equal(t, len(expected), len(mounts))
|
||||
|
||||
devices, err := d.Devices()
|
||||
require.NoError(t, err)
|
||||
require.Empty(t, devices)
|
||||
|
||||
hooks, err := d.Hooks()
|
||||
require.NoError(t, err)
|
||||
require.Empty(t, hooks)
|
||||
}
|
||||
|
||||
func TestNewBinariesConstructor(t *testing.T) {
|
||||
b := NewBinaryMounts("").(*mounts)
|
||||
|
||||
require.NotNil(t, b.logger)
|
||||
require.NotNil(t, b.lookup)
|
||||
}
|
||||
@@ -18,26 +18,16 @@ package discover
|
||||
|
||||
import "fmt"
|
||||
|
||||
// list is a discoverer that contains a list of Discoverers. The output of the
|
||||
// Mounts functions is the concatenation of the output for each of the
|
||||
// composite is a discoverer that contains a list of Discoverers. The output of the
|
||||
// Devices, Mounts, and Hooks functions is the concatenation of the output for each of the
|
||||
// elements in the list.
|
||||
type list struct {
|
||||
type composite struct {
|
||||
discoverers []Discover
|
||||
}
|
||||
|
||||
var _ Discover = (*list)(nil)
|
||||
var _ Discover = (*composite)(nil)
|
||||
|
||||
// NewList creates a discoverer that is the composite of a list of discoveres.
|
||||
func NewList(d ...Discover) Discover {
|
||||
l := list{
|
||||
discoverers: d,
|
||||
}
|
||||
|
||||
return &l
|
||||
}
|
||||
|
||||
// Devices returns all devices from the included discoverers
|
||||
func (d list) Devices() ([]Device, error) {
|
||||
func (d composite) Devices() ([]Device, error) {
|
||||
var allDevices []Device
|
||||
|
||||
for i, di := range d.discoverers {
|
||||
@@ -51,8 +41,7 @@ func (d list) Devices() ([]Device, error) {
|
||||
return allDevices, nil
|
||||
}
|
||||
|
||||
// Mounts returns all mounts from the included discoverers
|
||||
func (d list) Mounts() ([]Mount, error) {
|
||||
func (d composite) Mounts() ([]Mount, error) {
|
||||
var allMounts []Mount
|
||||
|
||||
for i, di := range d.discoverers {
|
||||
@@ -66,8 +55,7 @@ func (d list) Mounts() ([]Mount, error) {
|
||||
return allMounts, nil
|
||||
}
|
||||
|
||||
// Hooks returns all Hooks from the included discoverers
|
||||
func (d list) Hooks() ([]Hook, error) {
|
||||
func (d composite) Hooks() ([]Hook, error) {
|
||||
var allHooks []Hook
|
||||
|
||||
for i, di := range d.discoverers {
|
||||
@@ -80,3 +68,7 @@ func (d list) Hooks() ([]Hook, error) {
|
||||
|
||||
return allHooks, nil
|
||||
}
|
||||
|
||||
func (d *composite) add(di ...Discover) {
|
||||
d.discoverers = append(d.discoverers, di...)
|
||||
}
|
||||
@@ -1,139 +0,0 @@
|
||||
/**
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package discover
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover/csv"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// charDevices is a discover for a list of character devices
|
||||
type charDevices mounts
|
||||
|
||||
var _ Discover = (*charDevices)(nil)
|
||||
|
||||
// NewFromCSVFiles creates a discoverer for the specified CSV files. A logger is also supplied.
|
||||
// The constructed discoverer is comprised of a list, with each element in the list being associated with a
|
||||
// single CSV files.
|
||||
func NewFromCSVFiles(logger *logrus.Logger, files []string, root string) (Discover, error) {
|
||||
if len(files) == 0 {
|
||||
logger.Warnf("No CSV files specified")
|
||||
return None{}, nil
|
||||
}
|
||||
|
||||
symlinkLocator := lookup.NewSymlinkLocator(logger, root)
|
||||
locators := map[csv.MountSpecType]lookup.Locator{
|
||||
csv.MountSpecDev: lookup.NewCharDeviceLocator(logger, root),
|
||||
csv.MountSpecDir: lookup.NewDirectoryLocator(logger, root),
|
||||
// Libraries and symlinks are handled in the same way
|
||||
csv.MountSpecLib: symlinkLocator,
|
||||
csv.MountSpecSym: symlinkLocator,
|
||||
}
|
||||
|
||||
var discoverers []Discover
|
||||
for _, filename := range files {
|
||||
d, err := NewFromCSVFile(logger, locators, filename)
|
||||
if err != nil {
|
||||
logger.Warnf("Skipping CSV file %v: %v", filename, err)
|
||||
continue
|
||||
}
|
||||
discoverers = append(discoverers, d)
|
||||
}
|
||||
|
||||
return &list{discoverers: discoverers}, nil
|
||||
}
|
||||
|
||||
// NewFromCSVFile creates a discoverer for the specified CSV file. A logger is also supplied.
|
||||
// The constructed discoverer is comprised of a list, with each element in the list being associated with a particular
|
||||
// MountSpecType.
|
||||
func NewFromCSVFile(logger *logrus.Logger, locators map[csv.MountSpecType]lookup.Locator, filename string) (Discover, error) {
|
||||
// Create a discoverer for each file-kind combination
|
||||
targets, err := csv.NewCSVFileParser(logger, filename).Parse()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse CSV file: %v", err)
|
||||
}
|
||||
if len(targets) == 0 {
|
||||
return nil, fmt.Errorf("CSV file is empty")
|
||||
}
|
||||
|
||||
return newFromMountSpecs(logger, locators, targets)
|
||||
}
|
||||
|
||||
// newFromMountSpecs creates a discoverer for the CSV file. A logger is also supplied.
|
||||
// A list of csvDiscoverers is returned, with each being associated with a single MountSpecType.
|
||||
func newFromMountSpecs(logger *logrus.Logger, locators map[csv.MountSpecType]lookup.Locator, targets []*csv.MountSpec) (Discover, error) {
|
||||
if len(targets) == 0 {
|
||||
return &None{}, nil
|
||||
}
|
||||
|
||||
var discoverers []Discover
|
||||
var mountSpecTypes []csv.MountSpecType
|
||||
candidatesByType := make(map[csv.MountSpecType][]string)
|
||||
for _, t := range targets {
|
||||
if _, exists := candidatesByType[t.Type]; !exists {
|
||||
mountSpecTypes = append(mountSpecTypes, t.Type)
|
||||
}
|
||||
candidatesByType[t.Type] = append(candidatesByType[t.Type], t.Path)
|
||||
}
|
||||
|
||||
for _, t := range mountSpecTypes {
|
||||
locator, exists := locators[t]
|
||||
if !exists {
|
||||
return nil, fmt.Errorf("no locator defined for '%v'", t)
|
||||
}
|
||||
|
||||
m := &mounts{
|
||||
logger: logger,
|
||||
lookup: locator,
|
||||
required: candidatesByType[t],
|
||||
}
|
||||
|
||||
switch t {
|
||||
case csv.MountSpecDev:
|
||||
// For device mount specs, we insert a charDevices into the list of discoverers.
|
||||
discoverers = append(discoverers, (*charDevices)(m))
|
||||
default:
|
||||
discoverers = append(discoverers, m)
|
||||
}
|
||||
}
|
||||
|
||||
return &list{discoverers: discoverers}, nil
|
||||
}
|
||||
|
||||
// Mounts returns the discovered mounts for the charDevices. Since this explicitly specifies a
|
||||
// device list, the mounts are nil.
|
||||
func (d *charDevices) Mounts() ([]Mount, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// Devices returns the discovered devices for the charDevices. Here the device nodes are first
|
||||
// discovered as mounts and these are converted to devices.
|
||||
func (d *charDevices) Devices() ([]Device, error) {
|
||||
devicesAsMounts, err := (*mounts)(d).Mounts()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var devices []Device
|
||||
for _, mount := range devicesAsMounts {
|
||||
devices = append(devices, Device(mount))
|
||||
}
|
||||
|
||||
return devices, nil
|
||||
}
|
||||
@@ -1,131 +0,0 @@
|
||||
/**
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package csv
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
const (
|
||||
// DefaultMountSpecPath is default location of CSV files that define the modifications required to the OCI spec
|
||||
DefaultMountSpecPath = "/etc/nvidia-container-runtime/host-files-for-container.d"
|
||||
)
|
||||
|
||||
// GetFileList returns the (non-recursive) list of CSV files in the specified
|
||||
// folder
|
||||
func GetFileList(root string) ([]string, error) {
|
||||
contents, err := os.ReadDir(root)
|
||||
if err != nil && errors.Is(err, os.ErrNotExist) {
|
||||
return nil, nil
|
||||
} else if err != nil {
|
||||
return nil, fmt.Errorf("failed to read the contents of %v: %v", root, err)
|
||||
}
|
||||
|
||||
var csvFilePaths []string
|
||||
for _, c := range contents {
|
||||
if c.IsDir() {
|
||||
continue
|
||||
}
|
||||
if c.Name() == ".csv" {
|
||||
continue
|
||||
}
|
||||
ext := strings.ToLower(filepath.Ext(c.Name()))
|
||||
if ext != ".csv" {
|
||||
continue
|
||||
}
|
||||
|
||||
csvFilePaths = append(csvFilePaths, filepath.Join(root, c.Name()))
|
||||
}
|
||||
|
||||
return csvFilePaths, nil
|
||||
}
|
||||
|
||||
// BaseFilesOnly filters out non-base CSV files from the list of CSV files.
|
||||
func BaseFilesOnly(filenames []string) []string {
|
||||
filter := map[string]bool{
|
||||
"l4t.csv": true,
|
||||
"drivers.csv": true,
|
||||
"devices.csv": true,
|
||||
}
|
||||
|
||||
var selected []string
|
||||
for _, file := range filenames {
|
||||
base := filepath.Base(file)
|
||||
if filter[base] {
|
||||
selected = append(selected, file)
|
||||
}
|
||||
}
|
||||
|
||||
return selected
|
||||
}
|
||||
|
||||
// Parser specifies an interface for parsing MountSpecs
|
||||
type Parser interface {
|
||||
Parse() ([]*MountSpec, error)
|
||||
}
|
||||
|
||||
type csv struct {
|
||||
logger *logrus.Logger
|
||||
filename string
|
||||
}
|
||||
|
||||
// NewCSVFileParser creates a new parser for reading MountSpecs from the specified CSV file
|
||||
func NewCSVFileParser(logger *logrus.Logger, filename string) Parser {
|
||||
p := csv{
|
||||
logger: logger,
|
||||
filename: filename,
|
||||
}
|
||||
|
||||
return &p
|
||||
}
|
||||
|
||||
// Parse parses the csv file and returns a list of MountSpecs in the file
|
||||
func (p csv) Parse() ([]*MountSpec, error) {
|
||||
reader, err := os.Open(p.filename)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to open %v for reading: %v", p.filename, err)
|
||||
}
|
||||
defer reader.Close()
|
||||
|
||||
return p.parseFromReader(reader), nil
|
||||
}
|
||||
|
||||
// parseFromReader parses the specified file and returns a list of required jetson mounts
|
||||
func (p csv) parseFromReader(reader io.Reader) []*MountSpec {
|
||||
var targets []*MountSpec
|
||||
|
||||
scanner := bufio.NewScanner(reader)
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
target, err := NewMountSpecFromLine(line)
|
||||
if err != nil {
|
||||
p.logger.Debugf("Skipping invalid mount spec '%v': %v", line, err)
|
||||
continue
|
||||
}
|
||||
targets = append(targets, target)
|
||||
}
|
||||
|
||||
return targets
|
||||
}
|
||||
@@ -1,83 +0,0 @@
|
||||
/**
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package csv
|
||||
|
||||
import (
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/test"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestGetFileList(t *testing.T) {
|
||||
moduleRoot, _ := test.GetModuleRoot()
|
||||
|
||||
testCases := []struct {
|
||||
description string
|
||||
root string
|
||||
files []string
|
||||
expectedError error
|
||||
}{
|
||||
{
|
||||
description: "returns list of CSV files",
|
||||
root: "test/input/csv_samples/",
|
||||
files: []string{
|
||||
"jetson.csv",
|
||||
"simple_wrong.csv",
|
||||
"simple.csv",
|
||||
"spaced.csv",
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "handles empty folder",
|
||||
root: "test/input/csv_samples/empty",
|
||||
},
|
||||
{
|
||||
description: "handles non-existent folder",
|
||||
root: "test/input/csv_samples/NONEXISTENT",
|
||||
},
|
||||
{
|
||||
description: "handles non-existent folder root",
|
||||
root: "/NONEXISTENT/test/input/csv_samples/",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
root := filepath.Join(moduleRoot, tc.root)
|
||||
files, err := GetFileList(root)
|
||||
|
||||
if tc.expectedError != nil {
|
||||
require.Error(t, err)
|
||||
require.Empty(t, files)
|
||||
return
|
||||
}
|
||||
|
||||
require.NoError(t, err)
|
||||
|
||||
var foundFiles []string
|
||||
for _, f := range files {
|
||||
require.Equal(t, root, filepath.Dir(f))
|
||||
require.Equal(t, ".csv", filepath.Ext(f))
|
||||
foundFiles = append(foundFiles, filepath.Base(f))
|
||||
}
|
||||
|
||||
require.ElementsMatch(t, tc.files, foundFiles)
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -1,74 +0,0 @@
|
||||
/**
|
||||
# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package csv
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// MountSpecType defines the mount types allowed in a CSV file
|
||||
type MountSpecType string
|
||||
|
||||
const (
|
||||
// MountSpecDev is used for character devices
|
||||
MountSpecDev = MountSpecType("dev")
|
||||
// MountSpecDir is used for directories
|
||||
MountSpecDir = MountSpecType("dir")
|
||||
// MountSpecLib is used for libraries or regular files
|
||||
MountSpecLib = MountSpecType("lib")
|
||||
// MountSpecSym is used for symlinks.
|
||||
MountSpecSym = MountSpecType("sym")
|
||||
)
|
||||
|
||||
// MountSpec represents a Jetson mount consisting of a type and a path.
|
||||
type MountSpec struct {
|
||||
Type MountSpecType
|
||||
Path string
|
||||
}
|
||||
|
||||
// NewMountSpecFromLine parses the specified line and returns the MountSpec or an error if the line is malformed
|
||||
func NewMountSpecFromLine(line string) (*MountSpec, error) {
|
||||
parts := strings.SplitN(strings.TrimSpace(line), ",", 2)
|
||||
if len(parts) < 2 {
|
||||
return nil, fmt.Errorf("failed to parse line: %v", line)
|
||||
}
|
||||
mountType := strings.TrimSpace(parts[0])
|
||||
path := strings.TrimSpace(parts[1])
|
||||
|
||||
return NewMountSpec(mountType, path)
|
||||
}
|
||||
|
||||
// NewMountSpec creates a MountSpec with the specified type and path. An error is returned if the type is invalid.
|
||||
func NewMountSpec(mountType string, path string) (*MountSpec, error) {
|
||||
mt := MountSpecType(mountType)
|
||||
switch mt {
|
||||
case MountSpecDev, MountSpecLib, MountSpecSym, MountSpecDir:
|
||||
default:
|
||||
return nil, fmt.Errorf("unexpected mount type: %v", mt)
|
||||
}
|
||||
if path == "" {
|
||||
return nil, fmt.Errorf("invalid path: %v", path)
|
||||
}
|
||||
|
||||
mount := MountSpec{
|
||||
Type: mt,
|
||||
Path: path,
|
||||
}
|
||||
|
||||
return &mount, nil
|
||||
}
|
||||
@@ -1,82 +0,0 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package csv
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestNewMountSpecFromLine(t *testing.T) {
|
||||
parseError := fmt.Errorf("failed to parse line")
|
||||
unexpectedError := fmt.Errorf("unexpected mount type")
|
||||
|
||||
testCases := []struct {
|
||||
line string
|
||||
expectedError error
|
||||
expectedValue MountSpec
|
||||
}{
|
||||
{
|
||||
line: "",
|
||||
expectedError: parseError,
|
||||
},
|
||||
{
|
||||
line: "\t",
|
||||
expectedError: parseError,
|
||||
},
|
||||
{
|
||||
line: ",",
|
||||
expectedError: parseError,
|
||||
},
|
||||
{
|
||||
line: "dev,",
|
||||
expectedError: parseError,
|
||||
},
|
||||
{
|
||||
line: "dev ,/a/path",
|
||||
expectedValue: MountSpec{
|
||||
Path: "/a/path",
|
||||
Type: "dev",
|
||||
},
|
||||
},
|
||||
{
|
||||
line: "dev ,/a/path,with,commas",
|
||||
expectedValue: MountSpec{
|
||||
Path: "/a/path,with,commas",
|
||||
Type: "dev",
|
||||
},
|
||||
},
|
||||
{
|
||||
line: "not-dev ,/a/path",
|
||||
expectedError: unexpectedError,
|
||||
},
|
||||
}
|
||||
|
||||
for i, tc := range testCases {
|
||||
t.Run(fmt.Sprintf("test case %d", i), func(t *testing.T) {
|
||||
target, err := NewMountSpecFromLine(tc.line)
|
||||
if tc.expectedError != nil {
|
||||
require.Error(t, err)
|
||||
return
|
||||
}
|
||||
require.NoError(t, err)
|
||||
require.EqualValues(t, &tc.expectedValue, target)
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -1,160 +0,0 @@
|
||||
/**
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package discover
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover/csv"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
testlog "github.com/sirupsen/logrus/hooks/test"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestCharDevices(t *testing.T) {
|
||||
logger, logHook := testlog.NewNullLogger()
|
||||
|
||||
testCases := []struct {
|
||||
description string
|
||||
input *charDevices
|
||||
expectedMounts []Mount
|
||||
expectedMountsError error
|
||||
expectedDevicesError error
|
||||
expectedDevices []Device
|
||||
}{
|
||||
{
|
||||
description: "dev mounts are empty",
|
||||
input: (*charDevices)(
|
||||
&mounts{
|
||||
lookup: &lookup.LocatorMock{
|
||||
LocateFunc: func(string) ([]string, error) {
|
||||
return []string{"located"}, nil
|
||||
},
|
||||
},
|
||||
required: []string{"required"},
|
||||
},
|
||||
),
|
||||
expectedDevices: []Device{{Path: "located"}},
|
||||
},
|
||||
{
|
||||
description: "dev devices returns error for nil lookup",
|
||||
input: &charDevices{},
|
||||
expectedDevicesError: fmt.Errorf("no lookup defined"),
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
logHook.Reset()
|
||||
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
tc.input.logger = logger
|
||||
|
||||
mounts, err := tc.input.Mounts()
|
||||
if tc.expectedMountsError != nil {
|
||||
require.Error(t, err)
|
||||
} else {
|
||||
require.NoError(t, err)
|
||||
}
|
||||
require.ElementsMatch(t, tc.expectedMounts, mounts)
|
||||
|
||||
devices, err := tc.input.Devices()
|
||||
if tc.expectedDevicesError != nil {
|
||||
require.Error(t, err)
|
||||
} else {
|
||||
require.NoError(t, err)
|
||||
}
|
||||
require.ElementsMatch(t, tc.expectedDevices, devices)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestNewFromMountSpec(t *testing.T) {
|
||||
logger, _ := testlog.NewNullLogger()
|
||||
|
||||
locators := map[csv.MountSpecType]lookup.Locator{
|
||||
"dev": &lookup.LocatorMock{},
|
||||
"lib": &lookup.LocatorMock{},
|
||||
}
|
||||
|
||||
testCases := []struct {
|
||||
description string
|
||||
targets []*csv.MountSpec
|
||||
expectedError error
|
||||
expectedDiscoverer Discover
|
||||
}{
|
||||
{
|
||||
description: "empty targets returns None discoverer list",
|
||||
expectedDiscoverer: &None{},
|
||||
},
|
||||
{
|
||||
description: "unexpected locator returns error",
|
||||
targets: []*csv.MountSpec{
|
||||
{
|
||||
Type: "foo",
|
||||
Path: "bar",
|
||||
},
|
||||
},
|
||||
expectedError: fmt.Errorf("no locator defined for foo"),
|
||||
},
|
||||
{
|
||||
description: "creates discoverers based on type",
|
||||
targets: []*csv.MountSpec{
|
||||
{
|
||||
Type: "dev",
|
||||
Path: "dev0",
|
||||
},
|
||||
{
|
||||
Type: "lib",
|
||||
Path: "lib0",
|
||||
},
|
||||
{
|
||||
Type: "dev",
|
||||
Path: "dev1",
|
||||
},
|
||||
},
|
||||
expectedDiscoverer: &list{
|
||||
discoverers: []Discover{
|
||||
(*charDevices)(
|
||||
&mounts{
|
||||
logger: logger,
|
||||
lookup: locators["dev"],
|
||||
required: []string{"dev0", "dev1"},
|
||||
},
|
||||
),
|
||||
&mounts{
|
||||
logger: logger,
|
||||
lookup: locators["lib"],
|
||||
required: []string{"lib0"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
discoverer, err := newFromMountSpecs(logger, locators, tc.targets)
|
||||
if tc.expectedError != nil {
|
||||
require.Error(t, err)
|
||||
return
|
||||
}
|
||||
require.NoError(t, err)
|
||||
require.EqualValues(t, tc.expectedDiscoverer, discoverer)
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
@@ -16,31 +16,48 @@
|
||||
|
||||
package discover
|
||||
|
||||
// Config represents the configuration options for discovery
|
||||
type Config struct {
|
||||
Root string
|
||||
NVIDIAContainerToolkitCLIExecutablePath string
|
||||
// DevicePath is a path in /dev associated with a device
|
||||
type DevicePath string
|
||||
|
||||
// ProcPath is a path in /proc associated with a devices
|
||||
type ProcPath string
|
||||
|
||||
// PCIBusID is the ID on the PCI bus of a device
|
||||
type PCIBusID string
|
||||
|
||||
// DeviceNode represents a device on the file system
|
||||
type DeviceNode struct {
|
||||
Path DevicePath
|
||||
Major int
|
||||
Minor int
|
||||
}
|
||||
|
||||
// Device represents a discovered character device.
|
||||
// Device represents a discovered device including identifiers (Index, UUID, PCI bus ID)
|
||||
// for selection and paths in /dev and /proc associated with the device
|
||||
type Device struct {
|
||||
Path string
|
||||
Index string
|
||||
UUID string
|
||||
PCIBusID PCIBusID
|
||||
DeviceNodes []DeviceNode
|
||||
ProcPaths []ProcPath
|
||||
}
|
||||
|
||||
// Mount represents a discovered mount.
|
||||
// Mount represents a discovered mount. This includes a set of labels
|
||||
// for selection and the mount path
|
||||
type Mount struct {
|
||||
Path string
|
||||
Path string
|
||||
Labels map[string]string
|
||||
}
|
||||
|
||||
// Hook represents a discovered hook.
|
||||
// Hook represents a discovered hook
|
||||
type Hook struct {
|
||||
Lifecycle string
|
||||
Path string
|
||||
Args []string
|
||||
Path string
|
||||
Args []string
|
||||
HookName string
|
||||
Labels map[string]string
|
||||
}
|
||||
|
||||
//go:generate moq -stub -out discover_mock.go . Discover
|
||||
// Discover defines an interface for discovering the devices, mounts, and hooks available on a system
|
||||
// Discover defines an interface for discovering the devices and mounts available on a system
|
||||
type Discover interface {
|
||||
Devices() ([]Device, error)
|
||||
Mounts() ([]Mount, error)
|
||||
|
||||
@@ -1,150 +0,0 @@
|
||||
// Code generated by moq; DO NOT EDIT.
|
||||
// github.com/matryer/moq
|
||||
|
||||
package discover
|
||||
|
||||
import (
|
||||
"sync"
|
||||
)
|
||||
|
||||
// Ensure, that DiscoverMock does implement Discover.
|
||||
// If this is not the case, regenerate this file with moq.
|
||||
var _ Discover = &DiscoverMock{}
|
||||
|
||||
// DiscoverMock is a mock implementation of Discover.
|
||||
//
|
||||
// func TestSomethingThatUsesDiscover(t *testing.T) {
|
||||
//
|
||||
// // make and configure a mocked Discover
|
||||
// mockedDiscover := &DiscoverMock{
|
||||
// DevicesFunc: func() ([]Device, error) {
|
||||
// panic("mock out the Devices method")
|
||||
// },
|
||||
// HooksFunc: func() ([]Hook, error) {
|
||||
// panic("mock out the Hooks method")
|
||||
// },
|
||||
// MountsFunc: func() ([]Mount, error) {
|
||||
// panic("mock out the Mounts method")
|
||||
// },
|
||||
// }
|
||||
//
|
||||
// // use mockedDiscover in code that requires Discover
|
||||
// // and then make assertions.
|
||||
//
|
||||
// }
|
||||
type DiscoverMock struct {
|
||||
// DevicesFunc mocks the Devices method.
|
||||
DevicesFunc func() ([]Device, error)
|
||||
|
||||
// HooksFunc mocks the Hooks method.
|
||||
HooksFunc func() ([]Hook, error)
|
||||
|
||||
// MountsFunc mocks the Mounts method.
|
||||
MountsFunc func() ([]Mount, error)
|
||||
|
||||
// calls tracks calls to the methods.
|
||||
calls struct {
|
||||
// Devices holds details about calls to the Devices method.
|
||||
Devices []struct {
|
||||
}
|
||||
// Hooks holds details about calls to the Hooks method.
|
||||
Hooks []struct {
|
||||
}
|
||||
// Mounts holds details about calls to the Mounts method.
|
||||
Mounts []struct {
|
||||
}
|
||||
}
|
||||
lockDevices sync.RWMutex
|
||||
lockHooks sync.RWMutex
|
||||
lockMounts sync.RWMutex
|
||||
}
|
||||
|
||||
// Devices calls DevicesFunc.
|
||||
func (mock *DiscoverMock) Devices() ([]Device, error) {
|
||||
callInfo := struct {
|
||||
}{}
|
||||
mock.lockDevices.Lock()
|
||||
mock.calls.Devices = append(mock.calls.Devices, callInfo)
|
||||
mock.lockDevices.Unlock()
|
||||
if mock.DevicesFunc == nil {
|
||||
var (
|
||||
devicesOut []Device
|
||||
errOut error
|
||||
)
|
||||
return devicesOut, errOut
|
||||
}
|
||||
return mock.DevicesFunc()
|
||||
}
|
||||
|
||||
// DevicesCalls gets all the calls that were made to Devices.
|
||||
// Check the length with:
|
||||
// len(mockedDiscover.DevicesCalls())
|
||||
func (mock *DiscoverMock) DevicesCalls() []struct {
|
||||
} {
|
||||
var calls []struct {
|
||||
}
|
||||
mock.lockDevices.RLock()
|
||||
calls = mock.calls.Devices
|
||||
mock.lockDevices.RUnlock()
|
||||
return calls
|
||||
}
|
||||
|
||||
// Hooks calls HooksFunc.
|
||||
func (mock *DiscoverMock) Hooks() ([]Hook, error) {
|
||||
callInfo := struct {
|
||||
}{}
|
||||
mock.lockHooks.Lock()
|
||||
mock.calls.Hooks = append(mock.calls.Hooks, callInfo)
|
||||
mock.lockHooks.Unlock()
|
||||
if mock.HooksFunc == nil {
|
||||
var (
|
||||
hooksOut []Hook
|
||||
errOut error
|
||||
)
|
||||
return hooksOut, errOut
|
||||
}
|
||||
return mock.HooksFunc()
|
||||
}
|
||||
|
||||
// HooksCalls gets all the calls that were made to Hooks.
|
||||
// Check the length with:
|
||||
// len(mockedDiscover.HooksCalls())
|
||||
func (mock *DiscoverMock) HooksCalls() []struct {
|
||||
} {
|
||||
var calls []struct {
|
||||
}
|
||||
mock.lockHooks.RLock()
|
||||
calls = mock.calls.Hooks
|
||||
mock.lockHooks.RUnlock()
|
||||
return calls
|
||||
}
|
||||
|
||||
// Mounts calls MountsFunc.
|
||||
func (mock *DiscoverMock) Mounts() ([]Mount, error) {
|
||||
callInfo := struct {
|
||||
}{}
|
||||
mock.lockMounts.Lock()
|
||||
mock.calls.Mounts = append(mock.calls.Mounts, callInfo)
|
||||
mock.lockMounts.Unlock()
|
||||
if mock.MountsFunc == nil {
|
||||
var (
|
||||
mountsOut []Mount
|
||||
errOut error
|
||||
)
|
||||
return mountsOut, errOut
|
||||
}
|
||||
return mock.MountsFunc()
|
||||
}
|
||||
|
||||
// MountsCalls gets all the calls that were made to Mounts.
|
||||
// Check the length with:
|
||||
// len(mockedDiscover.MountsCalls())
|
||||
func (mock *DiscoverMock) MountsCalls() []struct {
|
||||
} {
|
||||
var calls []struct {
|
||||
}
|
||||
mock.lockMounts.RLock()
|
||||
calls = mock.calls.Mounts
|
||||
mock.lockMounts.RUnlock()
|
||||
return calls
|
||||
}
|
||||
424
internal/discover/discover_nvml.go
Normal file
424
internal/discover/discover_nvml.go
Normal file
@@ -0,0 +1,424 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package discover
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/nvml"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/proc"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
const (
|
||||
// ControlDeviceUUID is used as the UUID for control devices such as nvidiactl or nvidia-modeset
|
||||
ControlDeviceUUID = "CONTROL"
|
||||
|
||||
// MIGConfigDeviceUUID is used to indicate the MIG config control device
|
||||
MIGConfigDeviceUUID = "CONFIG"
|
||||
|
||||
// MIGMonitorDeviceUUID is used to indicate the MIG monitor control device
|
||||
MIGMonitorDeviceUUID = "MONITOR"
|
||||
|
||||
nvidiaGPUDeviceName = "nvidia-frontend"
|
||||
nvidiaCapsDeviceName = "nvidia-caps"
|
||||
nvidiaUVMDeviceName = "nvidia-uvm"
|
||||
)
|
||||
|
||||
type nvmlDiscover struct {
|
||||
None
|
||||
logger *log.Logger
|
||||
nvml nvml.Interface
|
||||
migCaps map[ProcPath]DeviceNode
|
||||
nvidiaDevices proc.NvidiaDevices
|
||||
}
|
||||
|
||||
var _ Discover = (*nvmlDiscover)(nil)
|
||||
|
||||
// NewNVMLDiscover constructs a discoverer that uses NVML to find the devices
|
||||
// available on a system.
|
||||
func NewNVMLDiscover(nvml nvml.Interface) (Discover, error) {
|
||||
return NewNVMLDiscoverWithLogger(log.StandardLogger(), nvml)
|
||||
}
|
||||
|
||||
// NewNVMLDiscoverWithLogger constructs a discovered as with NewNVMLDiscover with the specified
|
||||
// logger
|
||||
func NewNVMLDiscoverWithLogger(logger *log.Logger, nvml nvml.Interface) (Discover, error) {
|
||||
nvidiaDevices, err := proc.GetNvidiaDevices()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error loading NVIDIA devices: %v", err)
|
||||
}
|
||||
|
||||
var migCaps map[ProcPath]DeviceNode
|
||||
nvcapsDevice, exists := nvidiaDevices.Get(nvidiaCapsDeviceName)
|
||||
if !exists {
|
||||
logger.Warnf("%v nvcaps device could not be found", nvidiaCapsDeviceName)
|
||||
} else if migCaps, err = getMigCaps(nvcapsDevice.Major); err != nil {
|
||||
logger.Warnf("Could not load MIG capability devices: %v", err)
|
||||
migCaps = nil
|
||||
}
|
||||
|
||||
discover := &nvmlDiscover{
|
||||
logger: logger,
|
||||
nvml: nvml,
|
||||
migCaps: migCaps,
|
||||
nvidiaDevices: nvidiaDevices,
|
||||
}
|
||||
|
||||
return discover, nil
|
||||
}
|
||||
|
||||
// hasMigSupport checks if MIG device discovery is supported.
|
||||
// Cases where this will be disabled include where no MIG minors file is
|
||||
// present.
|
||||
func (d nvmlDiscover) hasMigSupport() bool {
|
||||
return len(d.migCaps) > 0
|
||||
}
|
||||
|
||||
func (d *nvmlDiscover) Devices() ([]Device, error) {
|
||||
ret := d.nvml.Init()
|
||||
if ret.Value() != nvml.SUCCESS {
|
||||
return nil, fmt.Errorf("error initalizing NVML: %v", ret.Error())
|
||||
}
|
||||
defer d.tryShutdownNVML()
|
||||
|
||||
c, ret := d.nvml.DeviceGetCount()
|
||||
if ret.Value() != nvml.SUCCESS {
|
||||
return nil, fmt.Errorf("error getting device count: %v", ret.Error())
|
||||
}
|
||||
|
||||
var handles []nvml.Device
|
||||
for i := 0; i < c; i++ {
|
||||
handle, ret := d.nvml.DeviceGetHandleByIndex(i)
|
||||
if ret.Value() != nvml.SUCCESS {
|
||||
return nil, fmt.Errorf("error getting device handle for device %v: %v", i, ret.Error())
|
||||
}
|
||||
|
||||
if !d.hasMigSupport() {
|
||||
handles = append(handles, handle)
|
||||
continue
|
||||
}
|
||||
|
||||
migHandles, err := getMIGHandlesForDevice(handle)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error getting MIG handles for device: %v", err)
|
||||
}
|
||||
if len(migHandles) == 0 {
|
||||
handles = append(handles, handle)
|
||||
}
|
||||
handles = append(handles, migHandles...)
|
||||
}
|
||||
|
||||
return d.devicesByHandle(handles)
|
||||
}
|
||||
|
||||
func (d *nvmlDiscover) devicesByHandle(handles []nvml.Device) ([]Device, error) {
|
||||
var devices []Device
|
||||
var largestMinorNumber int
|
||||
for _, h := range handles {
|
||||
device, err := d.deviceFromNVMLHandle(h)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error constructing device from handle %v: %v", h, err)
|
||||
}
|
||||
devices = append(devices, device)
|
||||
|
||||
if largestMinorNumber < device.DeviceNodes[0].Minor {
|
||||
largestMinorNumber = device.DeviceNodes[0].Minor
|
||||
}
|
||||
}
|
||||
|
||||
controlDevices, err := d.getControlDevices()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error getting control devices: %v", err)
|
||||
}
|
||||
devices = append(devices, controlDevices...)
|
||||
|
||||
if d.hasMigSupport() {
|
||||
migControlDevices, err := d.getMigControlDevices(largestMinorNumber)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error getting MIG control devices: %v", err)
|
||||
}
|
||||
devices = append(devices, migControlDevices...)
|
||||
}
|
||||
|
||||
return devices, nil
|
||||
}
|
||||
|
||||
func (d *nvmlDiscover) deviceFromNVMLHandle(handle nvml.Device) (Device, error) {
|
||||
if d.hasMigSupport() {
|
||||
isMigDevice, ret := handle.IsMigDeviceHandle()
|
||||
if ret.Value() != nvml.SUCCESS {
|
||||
return Device{}, fmt.Errorf("error checking device handle: %v", ret.Error())
|
||||
}
|
||||
|
||||
if isMigDevice {
|
||||
return d.deviceFromMIGDeviceHandle(handle)
|
||||
}
|
||||
}
|
||||
|
||||
return d.deviceFromFullDeviceHandle(handle)
|
||||
}
|
||||
|
||||
func (d *nvmlDiscover) deviceFromFullDeviceHandle(handle nvml.Device) (Device, error) {
|
||||
index, ret := handle.GetIndex()
|
||||
if ret.Value() != nvml.SUCCESS {
|
||||
return Device{}, fmt.Errorf("error getting device index: %v", ret.Error())
|
||||
}
|
||||
|
||||
uuid, ret := handle.GetUUID()
|
||||
if ret.Value() != nvml.SUCCESS {
|
||||
return Device{}, fmt.Errorf("error getting device UUID: %v", ret.Error())
|
||||
}
|
||||
|
||||
pciInfo, ret := handle.GetPciInfo()
|
||||
if ret.Value() != nvml.SUCCESS {
|
||||
return Device{}, fmt.Errorf("error getting PCI info: %v", ret.Error())
|
||||
}
|
||||
busID := NewPCIBusID(pciInfo)
|
||||
|
||||
minor, ret := handle.GetMinorNumber()
|
||||
if ret.Value() != nvml.SUCCESS {
|
||||
return Device{}, fmt.Errorf("error getting minor number: %v", ret.Error())
|
||||
}
|
||||
|
||||
nvidiaGPUDevice, exists := d.nvidiaDevices.Get(nvidiaGPUDeviceName)
|
||||
if !exists {
|
||||
return Device{}, fmt.Errorf("device for '%v' does not exist", nvidiaGPUDeviceName)
|
||||
}
|
||||
|
||||
deviceNode := DeviceNode{
|
||||
Path: DevicePath(fmt.Sprintf("/dev/nvidia%d", minor)),
|
||||
Major: nvidiaGPUDevice.Major,
|
||||
Minor: minor,
|
||||
}
|
||||
|
||||
device := Device{
|
||||
Index: fmt.Sprintf("%d", index),
|
||||
PCIBusID: busID,
|
||||
UUID: uuid,
|
||||
DeviceNodes: []DeviceNode{deviceNode},
|
||||
ProcPaths: []ProcPath{busID.GetProcPath()},
|
||||
}
|
||||
|
||||
return device, nil
|
||||
}
|
||||
|
||||
func (d *nvmlDiscover) deviceFromMIGDeviceHandle(handle nvml.Device) (Device, error) {
|
||||
parent, ret := handle.GetDeviceHandleFromMigDeviceHandle()
|
||||
if ret.Value() != nvml.SUCCESS {
|
||||
return Device{}, fmt.Errorf("error getting parent device handle: %v", ret.Error())
|
||||
}
|
||||
|
||||
gpu, ret := parent.GetMinorNumber()
|
||||
if ret.Value() != nvml.SUCCESS {
|
||||
return Device{}, fmt.Errorf("error getting GPU minor number: %v", ret.Error())
|
||||
}
|
||||
|
||||
parentDevice, err := d.deviceFromFullDeviceHandle(parent)
|
||||
if err != nil {
|
||||
return Device{}, fmt.Errorf("error getting parent device: %v", err)
|
||||
}
|
||||
|
||||
index, ret := handle.GetIndex()
|
||||
if ret.Value() != nvml.SUCCESS {
|
||||
return Device{}, fmt.Errorf("error getting device index: %v", ret.Error())
|
||||
}
|
||||
|
||||
uuid, ret := handle.GetUUID()
|
||||
if ret.Value() != nvml.SUCCESS {
|
||||
return Device{}, fmt.Errorf("error getting device UUID: %v", ret.Error())
|
||||
}
|
||||
|
||||
capDeviceNodes := []DeviceNode{}
|
||||
procPaths, err := getProcPathsForMigDevice(gpu, handle)
|
||||
if err != nil {
|
||||
return Device{}, fmt.Errorf("error getting proc paths for MIG device: %v", err)
|
||||
}
|
||||
|
||||
for _, p := range procPaths {
|
||||
capDeviceNode, ok := d.migCaps[p]
|
||||
if !ok {
|
||||
return Device{}, fmt.Errorf("could not determine cap device path for %v", p)
|
||||
}
|
||||
capDeviceNodes = append(capDeviceNodes, capDeviceNode)
|
||||
}
|
||||
|
||||
device := Device{
|
||||
Index: fmt.Sprintf("%s:%d", parentDevice.Index, index),
|
||||
UUID: uuid,
|
||||
DeviceNodes: append(parentDevice.DeviceNodes, capDeviceNodes...),
|
||||
ProcPaths: append(parentDevice.ProcPaths, procPaths...),
|
||||
}
|
||||
|
||||
return device, nil
|
||||
}
|
||||
|
||||
func (d *nvmlDiscover) getControlDevices() ([]Device, error) {
|
||||
devices := []struct {
|
||||
name string
|
||||
path string
|
||||
minor int
|
||||
}{
|
||||
// TODO: Where is the best place to find these device Minors programatically?
|
||||
{nvidiaGPUDeviceName, "/dev/nvidia-modeset", 254},
|
||||
{nvidiaGPUDeviceName, "/dev/nvidiactl", 255},
|
||||
{nvidiaUVMDeviceName, "/dev/nvidia-uvm", 0},
|
||||
{nvidiaUVMDeviceName, "/dev/nvidia-uvm-tools", 1},
|
||||
}
|
||||
|
||||
var controlDevices []Device
|
||||
for _, info := range devices {
|
||||
device, exists := d.nvidiaDevices.Get(info.name)
|
||||
if !exists {
|
||||
d.logger.Warnf("device name %v not defined; skipping control devices %v", info.name, info.path)
|
||||
continue
|
||||
}
|
||||
|
||||
deviceNode := DeviceNode{
|
||||
Path: DevicePath(info.path),
|
||||
Major: device.Major,
|
||||
Minor: info.minor,
|
||||
}
|
||||
|
||||
controlDevices = append(controlDevices, Device{
|
||||
UUID: ControlDeviceUUID,
|
||||
DeviceNodes: []DeviceNode{deviceNode},
|
||||
ProcPaths: []ProcPath{},
|
||||
})
|
||||
|
||||
}
|
||||
|
||||
return controlDevices, nil
|
||||
}
|
||||
|
||||
func (d *nvmlDiscover) getMigControlDevices(numGpus int) ([]Device, error) {
|
||||
targets := map[string]ProcPath{
|
||||
MIGConfigDeviceUUID: ProcPath("/proc/driver/nvidia/capabilities/mig/config"),
|
||||
MIGMonitorDeviceUUID: ProcPath("/proc/driver/nvidia/capabilities/mig/monitor"),
|
||||
}
|
||||
|
||||
var devices []Device
|
||||
for id, procPath := range targets {
|
||||
deviceNode, exists := d.migCaps[procPath]
|
||||
if !exists {
|
||||
return nil, fmt.Errorf("device node for '%v' is undefined", procPath)
|
||||
}
|
||||
|
||||
var procPaths []ProcPath
|
||||
for gpu := 0; gpu <= numGpus; gpu++ {
|
||||
procPaths = append(procPaths, ProcPath(fmt.Sprintf("/proc/driver/nvidia/capabilities/gpu%d/mig", gpu)))
|
||||
}
|
||||
procPaths = append(procPaths, procPath)
|
||||
|
||||
devices = append(devices, Device{
|
||||
UUID: id,
|
||||
DeviceNodes: []DeviceNode{deviceNode},
|
||||
ProcPaths: procPaths,
|
||||
})
|
||||
}
|
||||
|
||||
return devices, nil
|
||||
}
|
||||
|
||||
func getProcPathsForMigDevice(gpu int, handle nvml.Device) ([]ProcPath, error) {
|
||||
gi, ret := handle.GetGPUInstanceId()
|
||||
if ret.Value() != nvml.SUCCESS {
|
||||
return nil, fmt.Errorf("error getting GPU instance ID: %v", ret.Error())
|
||||
}
|
||||
|
||||
ci, ret := handle.GetComputeInstanceId()
|
||||
if ret.Value() != nvml.SUCCESS {
|
||||
return nil, fmt.Errorf("error getting comput instance ID: %v", ret.Error())
|
||||
}
|
||||
|
||||
procPaths := []ProcPath{
|
||||
ProcPath(fmt.Sprintf("/proc/driver/nvidia/capabilities/gpu%d/mig/gi%d/access", gpu, gi)),
|
||||
ProcPath(fmt.Sprintf("/proc/driver/nvidia/capabilities/gpu%d/mig/gi%d/ci%d/access", gpu, gi, ci)),
|
||||
}
|
||||
|
||||
return procPaths, nil
|
||||
}
|
||||
|
||||
func getMIGHandlesForDevice(handle nvml.Device) ([]nvml.Device, error) {
|
||||
currentMigMode, _, ret := handle.GetMigMode()
|
||||
if ret.Value() == nvml.ERROR_NOT_SUPPORTED {
|
||||
return nil, nil
|
||||
}
|
||||
if ret.Value() != nvml.SUCCESS {
|
||||
return nil, fmt.Errorf("error getting MIG mode for device: %v", ret.Error())
|
||||
}
|
||||
if currentMigMode == nvml.DEVICE_MIG_DISABLE {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
maxMigDeviceCount, ret := handle.GetMaxMigDeviceCount()
|
||||
if ret.Value() != nvml.SUCCESS {
|
||||
return nil, fmt.Errorf("error getting number of MIG devices: %v", ret.Error())
|
||||
}
|
||||
|
||||
var migHandles []nvml.Device
|
||||
for mi := 0; mi < maxMigDeviceCount; mi++ {
|
||||
migHandle, ret := handle.GetMigDeviceHandleByIndex(mi)
|
||||
if ret.Value() == nvml.ERROR_NOT_FOUND {
|
||||
continue
|
||||
}
|
||||
|
||||
if ret.Value() != nvml.SUCCESS {
|
||||
return nil, fmt.Errorf("error getting MIG device %v: %v", mi, ret.Error())
|
||||
}
|
||||
|
||||
migHandles = append(migHandles, migHandle)
|
||||
}
|
||||
|
||||
return migHandles, nil
|
||||
}
|
||||
|
||||
func (d *nvmlDiscover) tryShutdownNVML() {
|
||||
ret := d.nvml.Shutdown()
|
||||
if ret.Value() != nvml.SUCCESS {
|
||||
d.logger.Warnf("Could not shut down NVML: %v", ret.Error())
|
||||
}
|
||||
}
|
||||
|
||||
// NewPCIBusID provides a utility function that returns the string representation
|
||||
// of the bus ID.
|
||||
func NewPCIBusID(p nvml.PciInfo) PCIBusID {
|
||||
var bytes []byte
|
||||
for _, b := range p.BusId {
|
||||
if byte(b) == '\x00' {
|
||||
break
|
||||
}
|
||||
bytes = append(bytes, byte(b))
|
||||
}
|
||||
return PCIBusID(string(bytes))
|
||||
}
|
||||
|
||||
// GetProcPath returns the path in /proc associated with the PCI bus ID
|
||||
func (p PCIBusID) GetProcPath() ProcPath {
|
||||
id := strings.ToLower(p.String())
|
||||
|
||||
if strings.HasPrefix(id, "0000") {
|
||||
id = id[4:]
|
||||
}
|
||||
return ProcPath(filepath.Join("/proc/driver/nvidia/gpus", id))
|
||||
}
|
||||
|
||||
func (p PCIBusID) String() string {
|
||||
return string(p)
|
||||
}
|
||||
44
internal/discover/discover_nvml_mig.go
Normal file
44
internal/discover/discover_nvml_mig.go
Normal file
@@ -0,0 +1,44 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package discover
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/nvcaps"
|
||||
)
|
||||
|
||||
// getMigCaps returns a mapping of MIG capability path to device nodes
|
||||
func getMigCaps(capDeviceMajor int) (map[ProcPath]DeviceNode, error) {
|
||||
migCaps, err := nvcaps.LoadMigMinors()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error loading MIG minors: %v", err)
|
||||
}
|
||||
return getMigCapsFromMigMinors(migCaps, capDeviceMajor), nil
|
||||
}
|
||||
|
||||
func getMigCapsFromMigMinors(migCaps map[nvcaps.MigCap]nvcaps.MigMinor, capDeviceMajor int) map[ProcPath]DeviceNode {
|
||||
capsDevicePaths := make(map[ProcPath]DeviceNode)
|
||||
for cap, minor := range migCaps {
|
||||
capsDevicePaths[ProcPath(cap.ProcPath())] = DeviceNode{
|
||||
Path: DevicePath(minor.DevicePath()),
|
||||
Major: capDeviceMajor,
|
||||
Minor: int(minor),
|
||||
}
|
||||
}
|
||||
return capsDevicePaths
|
||||
}
|
||||
41
internal/discover/discover_nvml_mig_test.go
Normal file
41
internal/discover/discover_nvml_mig_test.go
Normal file
@@ -0,0 +1,41 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package discover
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/nvcaps"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestGetMigCaps(t *testing.T) {
|
||||
migMinors := map[nvcaps.MigCap]nvcaps.MigMinor{
|
||||
"config": 1,
|
||||
"monitor": 2,
|
||||
"gpu0/gi0/access": 3,
|
||||
"gpu0/gi0/ci0/access": 4,
|
||||
}
|
||||
|
||||
migCapMajor := 999
|
||||
migCaps := getMigCapsFromMigMinors(migMinors, migCapMajor)
|
||||
|
||||
require.Len(t, migCaps, len(migMinors))
|
||||
for _, c := range migCaps {
|
||||
require.Equal(t, migCapMajor, c.Major)
|
||||
}
|
||||
}
|
||||
219
internal/discover/discover_nvml_test.go
Normal file
219
internal/discover/discover_nvml_test.go
Normal file
@@ -0,0 +1,219 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package discover
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/nvml"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/proc"
|
||||
testlog "github.com/sirupsen/logrus/hooks/test"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
const (
|
||||
nvidiaGPUDeviceMajorDefault = 195
|
||||
nvidiaCapsDeviceMajorDefault = 235
|
||||
)
|
||||
|
||||
func newTestDiscover() nvmlDiscover {
|
||||
logger, _ := testlog.NewNullLogger()
|
||||
nvml := nvml.NewMockNVMLServer(nvml.NewMockA100Device(0))
|
||||
|
||||
return nvmlDiscover{
|
||||
logger: logger,
|
||||
nvml: nvml,
|
||||
nvidiaDevices: proc.NewMockNvidiaDevices(
|
||||
proc.Device{Name: nvidiaGPUDeviceName, Major: nvidiaGPUDeviceMajorDefault},
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
func newTestDiscoverWithMIG() nvmlDiscover {
|
||||
device := &nvml.MockA100Device{
|
||||
Index: 0,
|
||||
MigMode: nvml.DEVICE_MIG_ENABLE,
|
||||
GpuInstances: make(map[*nvml.MockA100GpuInstance]struct{}),
|
||||
GpuInstanceCounter: 0,
|
||||
}
|
||||
// Create a single gi and ci on the device
|
||||
gpuInstanceProfileInfo := &nvml.GpuInstanceProfileInfo{
|
||||
Id: nvml.GPU_INSTANCE_PROFILE_7_SLICE,
|
||||
}
|
||||
gi, _ := device.CreateGpuInstance(gpuInstanceProfileInfo)
|
||||
|
||||
computeInstanceProfileInfo := &nvml.ComputeInstanceProfileInfo{
|
||||
Id: nvml.COMPUTE_INSTANCE_PROFILE_1_SLICE,
|
||||
}
|
||||
_, _ = gi.CreateComputeInstance(computeInstanceProfileInfo)
|
||||
|
||||
logger, _ := testlog.NewNullLogger()
|
||||
return nvmlDiscover{
|
||||
logger: logger,
|
||||
nvml: nvml.NewMockNVMLServer(device),
|
||||
migCaps: map[ProcPath]DeviceNode{
|
||||
ProcPath("/proc/driver/nvidia/capabilities/gpu0/mig/gi0/access"): {
|
||||
Path: DevicePath("/dev/nvidia-caps/nvidia-cap3"),
|
||||
Minor: 3,
|
||||
Major: 235,
|
||||
},
|
||||
ProcPath("/proc/driver/nvidia/capabilities/gpu0/mig/gi0/ci0/access"): {
|
||||
Path: DevicePath("/dev/nvidia-caps/nvidia-cap4"),
|
||||
Minor: 4,
|
||||
Major: 235,
|
||||
},
|
||||
ProcPath("/proc/driver/nvidia/capabilities/mig/config"): {
|
||||
Path: DevicePath("/dev/nvidia-caps/nvidia-cap1"),
|
||||
Minor: 1,
|
||||
Major: 235,
|
||||
},
|
||||
ProcPath("/proc/driver/nvidia/capabilities/mig/monitor"): {
|
||||
Path: DevicePath("/dev/nvidia-caps/nvidia-cap2"),
|
||||
Minor: 2,
|
||||
Major: 235,
|
||||
},
|
||||
},
|
||||
nvidiaDevices: proc.NewMockNvidiaDevices(
|
||||
proc.Device{Name: nvidiaGPUDeviceName, Major: nvidiaGPUDeviceMajorDefault},
|
||||
proc.Device{Name: nvidiaCapsDeviceName, Major: nvidiaCapsDeviceMajorDefault},
|
||||
),
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func TestDiscoverNvmlDevices(t *testing.T) {
|
||||
d := newTestDiscover()
|
||||
devices, err := d.Devices()
|
||||
|
||||
require.NoError(t, err)
|
||||
require.Len(t, devices, 3)
|
||||
|
||||
device := devices[0]
|
||||
|
||||
require.Equal(t, "0", device.Index)
|
||||
require.Equal(t, "GPU-0", device.UUID)
|
||||
require.Equal(t, "0000FFFF:FF:FF.F", device.PCIBusID.String())
|
||||
|
||||
expectedDeviceNodes := []DeviceNode{
|
||||
{
|
||||
Path: DevicePath("/dev/nvidia0"),
|
||||
Minor: 0,
|
||||
Major: nvidiaGPUDeviceMajorDefault,
|
||||
},
|
||||
}
|
||||
require.Equal(t, expectedDeviceNodes, device.DeviceNodes)
|
||||
|
||||
expectedProcPaths := []ProcPath{ProcPath("/proc/driver/nvidia/gpus/ffff:ff:ff.f")}
|
||||
require.Equal(t, expectedProcPaths, device.ProcPaths)
|
||||
}
|
||||
|
||||
func TestDiscoverNvmlMigDevices(t *testing.T) {
|
||||
d := newTestDiscoverWithMIG()
|
||||
|
||||
devices, err := d.Devices()
|
||||
|
||||
require.NoError(t, err)
|
||||
require.Len(t, devices, 5)
|
||||
|
||||
mig := devices[0]
|
||||
|
||||
require.Equal(t, "0:0", mig.Index)
|
||||
require.Equal(t, "MIG-0", mig.UUID)
|
||||
require.Empty(t, mig.PCIBusID)
|
||||
|
||||
expectedDeviceNodes := []DeviceNode{
|
||||
{
|
||||
Path: DevicePath("/dev/nvidia0"),
|
||||
Minor: 0,
|
||||
Major: nvidiaGPUDeviceMajorDefault,
|
||||
},
|
||||
{
|
||||
Path: DevicePath("/dev/nvidia-caps/nvidia-cap3"),
|
||||
Minor: 3,
|
||||
Major: nvidiaCapsDeviceMajorDefault,
|
||||
},
|
||||
{
|
||||
Path: DevicePath("/dev/nvidia-caps/nvidia-cap4"),
|
||||
Minor: 4,
|
||||
Major: nvidiaCapsDeviceMajorDefault,
|
||||
},
|
||||
}
|
||||
require.Equal(t, expectedDeviceNodes, mig.DeviceNodes)
|
||||
|
||||
expectedProcPaths := []ProcPath{
|
||||
ProcPath("/proc/driver/nvidia/gpus/ffff:ff:ff.f"),
|
||||
ProcPath("/proc/driver/nvidia/capabilities/gpu0/mig/gi0/access"),
|
||||
ProcPath("/proc/driver/nvidia/capabilities/gpu0/mig/gi0/ci0/access"),
|
||||
}
|
||||
require.Equal(t, expectedProcPaths, mig.ProcPaths)
|
||||
|
||||
var config *Device
|
||||
var monitor *Device
|
||||
for i, d := range devices {
|
||||
if d.UUID == "CONFIG" {
|
||||
config = &devices[i]
|
||||
}
|
||||
if d.UUID == "MONITOR" {
|
||||
monitor = &devices[i]
|
||||
}
|
||||
}
|
||||
|
||||
require.NotNil(t, config)
|
||||
require.NotNil(t, monitor)
|
||||
|
||||
require.Equal(t, "CONFIG", config.UUID)
|
||||
|
||||
expectedDeviceNodes = []DeviceNode{
|
||||
{
|
||||
Path: DevicePath("/dev/nvidia-caps/nvidia-cap1"),
|
||||
Minor: 1,
|
||||
Major: nvidiaCapsDeviceMajorDefault,
|
||||
},
|
||||
}
|
||||
require.Equal(t, expectedDeviceNodes, config.DeviceNodes)
|
||||
|
||||
require.Contains(t, config.ProcPaths, ProcPath("/proc/driver/nvidia/capabilities/mig/config"))
|
||||
require.Len(t, config.ProcPaths, 2)
|
||||
|
||||
require.Equal(t, "MONITOR", monitor.UUID)
|
||||
|
||||
expectedDeviceNodes = []DeviceNode{
|
||||
{
|
||||
Path: DevicePath("/dev/nvidia-caps/nvidia-cap2"),
|
||||
Minor: 2,
|
||||
Major: nvidiaCapsDeviceMajorDefault,
|
||||
},
|
||||
}
|
||||
require.Equal(t, expectedDeviceNodes, monitor.DeviceNodes)
|
||||
|
||||
require.Contains(t, monitor.ProcPaths, ProcPath("/proc/driver/nvidia/capabilities/mig/monitor"))
|
||||
require.Len(t, monitor.ProcPaths, 2)
|
||||
|
||||
}
|
||||
|
||||
func TestPCIBusID(t *testing.T) {
|
||||
testCases := map[string]ProcPath{
|
||||
"0000FFFF:FF:FF.F": "/proc/driver/nvidia/gpus/ffff:ff:ff.f",
|
||||
"FFFFFFFF:FF:FF.F": "/proc/driver/nvidia/gpus/ffffffff:ff:ff.f",
|
||||
}
|
||||
|
||||
for busID, procPath := range testCases {
|
||||
p := PCIBusID(busID)
|
||||
require.Equal(t, busID, p.String())
|
||||
require.Equal(t, procPath, p.GetProcPath())
|
||||
}
|
||||
}
|
||||
71
internal/discover/hooks.go
Normal file
71
internal/discover/hooks.go
Normal file
@@ -0,0 +1,71 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package discover
|
||||
|
||||
import (
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
type hooks struct {
|
||||
None
|
||||
logger *log.Logger
|
||||
}
|
||||
|
||||
var _ Discover = (*hooks)(nil)
|
||||
|
||||
// NewHooks creates a discoverer for linux containers
|
||||
func NewHooks() Discover {
|
||||
return NewHooksWithLogger(log.StandardLogger())
|
||||
}
|
||||
|
||||
// NewHooksWithLogger creates a discoverer as with NewHooks with the specified logger
|
||||
func NewHooksWithLogger(logger *log.Logger) Discover {
|
||||
h := hooks{
|
||||
logger: logger,
|
||||
}
|
||||
|
||||
return &h
|
||||
}
|
||||
|
||||
func (h hooks) Hooks() ([]Hook, error) {
|
||||
var hooks []Hook
|
||||
|
||||
hooks = append(hooks, newLdconfigHook())
|
||||
|
||||
return hooks, nil
|
||||
}
|
||||
|
||||
func newLdconfigHook() Hook {
|
||||
const rootPattern = "@Root.Path@"
|
||||
|
||||
h := Hook{
|
||||
Path: "/sbin/ldconfig",
|
||||
Args: []string{
|
||||
// TODO: Testing seems to indicate that this is -v flag is required
|
||||
"-v",
|
||||
"-r", rootPattern,
|
||||
},
|
||||
// TODO: CreateContainer hooks were only added to a later OCI spec version
|
||||
// We will have to find a way to deal with OCI versions before 1.0.2
|
||||
HookName: "create-container",
|
||||
Labels: map[string]string{
|
||||
"min-oci-version": "1.0.2",
|
||||
},
|
||||
}
|
||||
|
||||
return h
|
||||
}
|
||||
52
internal/discover/ipc.go
Normal file
52
internal/discover/ipc.go
Normal file
@@ -0,0 +1,52 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package discover
|
||||
|
||||
import (
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// NewIPCMounts creates a discoverer for IPC sockets
|
||||
func NewIPCMounts(root string) Discover {
|
||||
return NewIPCMountsWithLogger(log.StandardLogger(), root)
|
||||
}
|
||||
|
||||
// NewIPCMountsWithLogger creates a discovered as with NewIPCMounts with the
|
||||
// specified logger.
|
||||
func NewIPCMountsWithLogger(logger *log.Logger, root string) Discover {
|
||||
d := mounts{
|
||||
logger: logger,
|
||||
lookup: lookup.NewFileLocatorWithLogger(logger, root),
|
||||
required: requiredIPCs,
|
||||
}
|
||||
|
||||
return &d
|
||||
}
|
||||
|
||||
var requiredIPCs = map[string][]string{
|
||||
"nvidia-persistenced": {
|
||||
"/var/run/nvidia-persistenced/socket",
|
||||
},
|
||||
"nvidia-fabricmanager": {
|
||||
"/var/run/nvidia-fabricmanager/socket",
|
||||
},
|
||||
// TODO: This can be controlled by the NV_MPS_PIPE_DIR envvar
|
||||
"nvidia-mps": {
|
||||
"/tmp/nvidia-mps",
|
||||
},
|
||||
}
|
||||
56
internal/discover/ipc_test.go
Normal file
56
internal/discover/ipc_test.go
Normal file
@@ -0,0 +1,56 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package discover
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
testlog "github.com/sirupsen/logrus/hooks/test"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestIPCDiscover(t *testing.T) {
|
||||
|
||||
ipcLookup := map[string]string{
|
||||
"/var/run/nvidia-persistenced/socket": "/var/run/nvidia-persistenced/socket",
|
||||
"/var/run/nvidia-fabricmanager/socket": "fm-socket",
|
||||
}
|
||||
|
||||
logger, _ := testlog.NewNullLogger()
|
||||
|
||||
d := NewIPCMountsWithLogger(logger, "")
|
||||
|
||||
// Override lookup for testing
|
||||
mockLocator := NewLocatorMockFromMap(ipcLookup)
|
||||
d.(*mounts).lookup = mockLocator
|
||||
|
||||
mounts, err := d.Mounts()
|
||||
require.NoError(t, err)
|
||||
require.ElementsMatch(t, []Mount{
|
||||
{Path: "/var/run/nvidia-persistenced/socket", Labels: map[string]string{}},
|
||||
{Path: "fm-socket", Labels: map[string]string{}}}, mounts)
|
||||
|
||||
require.Len(t, mockLocator.LocateCalls(), 3)
|
||||
|
||||
devices, err := d.Devices()
|
||||
require.NoError(t, err)
|
||||
require.Empty(t, devices)
|
||||
|
||||
hooks, err := d.Hooks()
|
||||
require.NoError(t, err)
|
||||
require.Empty(t, hooks)
|
||||
}
|
||||
@@ -1,126 +0,0 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package discover
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
"github.com/container-orchestrated-devices/container-device-interface/pkg/cdi"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// NewLDCacheUpdateHook creates a discoverer that updates the ldcache for the specified mounts. A logger can also be specified
|
||||
func NewLDCacheUpdateHook(logger *logrus.Logger, mounts Discover, cfg *Config) (Discover, error) {
|
||||
d := ldconfig{
|
||||
logger: logger,
|
||||
mountsFrom: mounts,
|
||||
lookup: lookup.NewExecutableLocator(logger, cfg.Root),
|
||||
nvidiaCTKExecutablePath: cfg.NVIDIAContainerToolkitCLIExecutablePath,
|
||||
}
|
||||
|
||||
return &d, nil
|
||||
}
|
||||
|
||||
const (
|
||||
nvidiaCTKDefaultFilePath = "/usr/bin/nvidia-ctk"
|
||||
)
|
||||
|
||||
type ldconfig struct {
|
||||
None
|
||||
logger *logrus.Logger
|
||||
mountsFrom Discover
|
||||
lookup lookup.Locator
|
||||
nvidiaCTKExecutablePath string
|
||||
}
|
||||
|
||||
// Hooks checks the required mounts for libraries and returns a hook to update the LDcache for the discovered paths.
|
||||
func (d ldconfig) Hooks() ([]Hook, error) {
|
||||
mounts, err := d.mountsFrom.Mounts()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to discover mounts for ldcache update: %v", err)
|
||||
}
|
||||
|
||||
libDirs := getLibDirs(mounts)
|
||||
|
||||
hookPath := nvidiaCTKDefaultFilePath
|
||||
targets, err := d.lookup.Locate(d.nvidiaCTKExecutablePath)
|
||||
if err != nil {
|
||||
d.logger.Warnf("Failed to locate %v: %v", d.nvidiaCTKExecutablePath, err)
|
||||
} else if len(targets) == 0 {
|
||||
d.logger.Warnf("%v not found", d.nvidiaCTKExecutablePath)
|
||||
} else {
|
||||
d.logger.Debugf("Found %v candidates: %v", d.nvidiaCTKExecutablePath, targets)
|
||||
hookPath = targets[0]
|
||||
}
|
||||
d.logger.Debugf("Using NVIDIA Container Toolkit CLI path %v", hookPath)
|
||||
|
||||
args := []string{hookPath, "hook", "update-ldcache"}
|
||||
for _, f := range libDirs {
|
||||
args = append(args, "--folder", f)
|
||||
}
|
||||
h := Hook{
|
||||
Lifecycle: cdi.CreateContainerHook,
|
||||
Path: hookPath,
|
||||
Args: args,
|
||||
}
|
||||
|
||||
return []Hook{h}, nil
|
||||
}
|
||||
|
||||
// getLibDirs extracts the library dirs from the specified mounts
|
||||
func getLibDirs(mounts []Mount) []string {
|
||||
var paths []string
|
||||
checked := make(map[string]bool)
|
||||
|
||||
for _, m := range mounts {
|
||||
dir := filepath.Dir(m.Path)
|
||||
if dir == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
_, exists := checked[dir]
|
||||
if exists {
|
||||
continue
|
||||
}
|
||||
checked[dir] = isLibName(filepath.Base(m.Path))
|
||||
|
||||
if checked[dir] {
|
||||
paths = append(paths, dir)
|
||||
}
|
||||
}
|
||||
|
||||
sort.Strings(paths)
|
||||
|
||||
return paths
|
||||
}
|
||||
|
||||
// isLibName checks if the specified filename is a library (i.e. ends in `.so*`)
|
||||
func isLibName(filename string) bool {
|
||||
parts := strings.Split(filename, ".")
|
||||
|
||||
for _, p := range parts {
|
||||
if p == "so" {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
100
internal/discover/libraries.go
Normal file
100
internal/discover/libraries.go
Normal file
@@ -0,0 +1,100 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package discover
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// NewLibraries constructs discoverer for libraries
|
||||
func NewLibraries(root string) (Discover, error) {
|
||||
return NewLibrariesWithLogger(log.StandardLogger(), root)
|
||||
}
|
||||
|
||||
// NewLibrariesWithLogger constructs discoverer for libraries with the specified logger
|
||||
func NewLibrariesWithLogger(logger *log.Logger, root string) (Discover, error) {
|
||||
lookup, err := lookup.NewLibraryLocatorWithLogger(logger, root)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error constructing locator: %v", err)
|
||||
}
|
||||
|
||||
d := mounts{
|
||||
logger: logger,
|
||||
lookup: lookup,
|
||||
required: requiredLibraries,
|
||||
}
|
||||
return &d, nil
|
||||
}
|
||||
|
||||
// requiredLibraries defines a set of libraries and their labels
|
||||
var requiredLibraries = map[string][]string{
|
||||
"utility": {
|
||||
"libnvidia-ml.so", /* Management library */
|
||||
"libnvidia-cfg.so", /* GPU configuration */
|
||||
},
|
||||
"compute": {
|
||||
"libcuda.so", /* CUDA driver library */
|
||||
"libnvidia-opencl.so", /* NVIDIA OpenCL ICD */
|
||||
"libnvidia-ptxjitcompiler.so", /* PTX-SASS JIT compiler (used by libcuda) */
|
||||
"libnvidia-fatbinaryloader.so", /* fatbin loader (used by libcuda) */
|
||||
"libnvidia-allocator.so", /* NVIDIA allocator runtime library */
|
||||
"libnvidia-compiler.so", /* NVVM-PTX compiler for OpenCL (used by libnvidia-opencl) */
|
||||
},
|
||||
"video": {
|
||||
"libvdpau_nvidia.so", /* NVIDIA VDPAU ICD */
|
||||
"libnvidia-encode.so", /* Video encoder */
|
||||
"libnvidia-opticalflow.so", /* NVIDIA Opticalflow library */
|
||||
"libnvcuvid.so", /* Video decoder */
|
||||
},
|
||||
"graphics": {
|
||||
//"libnvidia-egl-wayland.so", /* EGL wayland platform extension (used by libEGL_nvidia) */
|
||||
"libnvidia-eglcore.so", /* EGL core (used by libGLES*[_nvidia] and libEGL_nvidia) */
|
||||
"libnvidia-glcore.so", /* OpenGL core (used by libGL or libGLX_nvidia) */
|
||||
"libnvidia-tls.so", /* Thread local storage (used by libGL or libGLX_nvidia) */
|
||||
"libnvidia-glsi.so", /* OpenGL system interaction (used by libEGL_nvidia) */
|
||||
"libnvidia-fbc.so", /* Framebuffer capture */
|
||||
"libnvidia-ifr.so", /* OpenGL framebuffer capture */
|
||||
"libnvidia-rtcore.so", /* Optix */
|
||||
"libnvoptix.so", /* Optix */
|
||||
},
|
||||
"glvnd": {
|
||||
//"libGLX.so", /* GLX ICD loader */
|
||||
//"libOpenGL.so", /* OpenGL ICD loader */
|
||||
//"libGLdispatch.so", /* OpenGL dispatch (used by libOpenGL, libEGL and libGLES*) */
|
||||
"libGLX_nvidia.so", /* OpenGL/GLX ICD */
|
||||
"libEGL_nvidia.so", /* EGL ICD */
|
||||
"libGLESv2_nvidia.so", /* OpenGL ES v2 ICD */
|
||||
"libGLESv1_CM_nvidia.so", /* OpenGL ES v1 common profile ICD */
|
||||
"libnvidia-glvkspirv.so", /* SPIR-V Lib for Vulkan */
|
||||
"libnvidia-cbl.so", /* VK_NV_ray_tracing */
|
||||
},
|
||||
"compat32": {
|
||||
"libGL.so", /* OpenGL/GLX legacy _or_ compatibility wrapper (GLVND) */
|
||||
"libEGL.so", /* EGL legacy _or_ ICD loader (GLVND) */
|
||||
"libGLESv1_CM.so", /* OpenGL ES v1 common profile legacy _or_ ICD loader (GLVND) */
|
||||
"libGLESv2.so", /* OpenGL ES v2 legacy _or_ ICD loader (GLVND) */
|
||||
},
|
||||
"ngx": {
|
||||
"libnvidia-ngx.so", /* NGX library */
|
||||
},
|
||||
"dxcore": {
|
||||
"libdxcore.so", /* Core library for dxcore support */
|
||||
},
|
||||
}
|
||||
56
internal/discover/libraries_test.go
Normal file
56
internal/discover/libraries_test.go
Normal file
@@ -0,0 +1,56 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package discover
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
testlog "github.com/sirupsen/logrus/hooks/test"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestLibraries(t *testing.T) {
|
||||
|
||||
libraryLookup := map[string]string{
|
||||
"libcuda.so": "/lib/libcuda.so.999.99",
|
||||
"libversion.so": "/lib/libversion.so.111.11",
|
||||
"libother.so": "/lib/libother.so.999.99",
|
||||
}
|
||||
|
||||
logger, _ := testlog.NewNullLogger()
|
||||
|
||||
d, err := NewLibrariesWithLogger(logger, "")
|
||||
|
||||
require.NoError(t, err)
|
||||
|
||||
// Override lookup for testing
|
||||
mockLocator := NewLocatorMockFromMap(libraryLookup)
|
||||
d.(*mounts).lookup = mockLocator
|
||||
|
||||
mounts, err := d.Mounts()
|
||||
require.NoError(t, err)
|
||||
|
||||
require.ElementsMatch(t, []Mount{{Path: "/lib/libcuda.so.999.99", Labels: map[string]string{}}}, mounts)
|
||||
|
||||
devices, err := d.Devices()
|
||||
require.NoError(t, err)
|
||||
require.Empty(t, devices)
|
||||
|
||||
hooks, err := d.Hooks()
|
||||
require.NoError(t, err)
|
||||
require.Empty(t, hooks)
|
||||
}
|
||||
@@ -18,68 +18,108 @@ package discover
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sync"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
"github.com/sirupsen/logrus"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
const (
|
||||
capabilityLabel = "capability"
|
||||
versionLabel = "version"
|
||||
)
|
||||
|
||||
// mounts is a generic discoverer for Mounts. It is customized by specifying the
|
||||
// required entities as a list and a Locator that is used to find the target mounts
|
||||
// based on the entry in the list.
|
||||
// required entities as a key-value pair as well as a Locator that is used to
|
||||
// identify the mounts that are to be included.
|
||||
type mounts struct {
|
||||
None
|
||||
logger *logrus.Logger
|
||||
logger *log.Logger
|
||||
lookup lookup.Locator
|
||||
required []string
|
||||
sync.Mutex
|
||||
cache []Mount
|
||||
required map[string][]string
|
||||
}
|
||||
|
||||
var _ Discover = (*mounts)(nil)
|
||||
|
||||
func (d *mounts) Mounts() ([]Mount, error) {
|
||||
func (d mounts) Mounts() ([]Mount, error) {
|
||||
mounts, err := d.uniqueMounts()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error discovering mounts: %v", err)
|
||||
}
|
||||
|
||||
return mounts.Slice(), nil
|
||||
}
|
||||
|
||||
func (d mounts) uniqueMounts() (mountsByPath, error) {
|
||||
if d.lookup == nil {
|
||||
return nil, fmt.Errorf("no lookup defined")
|
||||
}
|
||||
|
||||
if d.cache != nil {
|
||||
d.logger.Debugf("returning cached mounts")
|
||||
return d.cache, nil
|
||||
}
|
||||
mounts := make(mountsByPath)
|
||||
|
||||
d.Lock()
|
||||
defer d.Unlock()
|
||||
|
||||
paths := make(map[string]bool)
|
||||
|
||||
for _, candidate := range d.required {
|
||||
d.logger.Debugf("Locating %v", candidate)
|
||||
located, err := d.lookup.Locate(candidate)
|
||||
if err != nil {
|
||||
d.logger.Warnf("Could not locate %v: %v", candidate, err)
|
||||
continue
|
||||
}
|
||||
if len(located) == 0 {
|
||||
d.logger.Warnf("Missing %v", candidate)
|
||||
continue
|
||||
}
|
||||
d.logger.Debugf("Located %v as %v", candidate, located)
|
||||
for _, p := range located {
|
||||
paths[p] = true
|
||||
for id, keys := range d.required {
|
||||
for _, key := range keys {
|
||||
d.logger.Debugf("Locating %v [%v]", key, id)
|
||||
located, err := d.lookup.Locate(key)
|
||||
if err != nil {
|
||||
d.logger.Warnf("Could not locate %v [%v]: %v", key, id, err)
|
||||
continue
|
||||
}
|
||||
d.logger.Infof("Located %v [%v]: %v", key, id, located)
|
||||
for _, p := range located {
|
||||
// TODO: We need to add labels
|
||||
mount := newMount(p)
|
||||
mounts.Put(mount)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var mounts []Mount
|
||||
for path := range paths {
|
||||
d.logger.Infof("Selecting %v", path)
|
||||
mount := Mount{
|
||||
Path: path,
|
||||
}
|
||||
mounts = append(mounts, mount)
|
||||
}
|
||||
|
||||
d.cache = mounts
|
||||
|
||||
return mounts, nil
|
||||
}
|
||||
|
||||
type mountsByPath map[string]Mount
|
||||
|
||||
func (m mountsByPath) Slice() []Mount {
|
||||
var mounts []Mount
|
||||
for _, mount := range m {
|
||||
mounts = append(mounts, mount)
|
||||
}
|
||||
|
||||
return mounts
|
||||
}
|
||||
|
||||
func (m *mountsByPath) Put(value Mount) {
|
||||
key := value.Path
|
||||
mount, exists := (*m)[key]
|
||||
if !exists {
|
||||
(*m)[key] = value
|
||||
return
|
||||
}
|
||||
|
||||
for k, v := range value.Labels {
|
||||
mount.Labels[k] = v
|
||||
}
|
||||
(*m)[key] = mount
|
||||
}
|
||||
|
||||
// NewMountForCapability creates a mount with the specified capability label
|
||||
func NewMountForCapability(path string, capability string) Mount {
|
||||
return newMount(path, capabilityLabel, capability)
|
||||
}
|
||||
|
||||
// NewMountForVersion creates a mount with the specified version label
|
||||
func NewMountForVersion(path string, version string) Mount {
|
||||
return newMount(path, versionLabel, version)
|
||||
}
|
||||
|
||||
func newMount(path string, labels ...string) Mount {
|
||||
l := make(map[string]string)
|
||||
|
||||
for i := 0; i < len(labels)-1; i += 2 {
|
||||
l[labels[i]] = labels[i+1]
|
||||
}
|
||||
|
||||
return Mount{
|
||||
Path: path,
|
||||
Labels: l,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -22,144 +22,32 @@ import (
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
testlog "github.com/sirupsen/logrus/hooks/test"
|
||||
)
|
||||
|
||||
func TestMountsReturnsEmptyDevices(t *testing.T) {
|
||||
func TestMountsReturnsErrorForNoLookup(t *testing.T) {
|
||||
d := mounts{}
|
||||
devices, err := d.Devices()
|
||||
mounts, err := d.Mounts()
|
||||
|
||||
require.Error(t, err)
|
||||
require.Len(t, mounts, 0)
|
||||
|
||||
devices, err := d.Devices()
|
||||
require.NoError(t, err)
|
||||
require.Empty(t, devices)
|
||||
|
||||
hooks, err := d.Hooks()
|
||||
require.NoError(t, err)
|
||||
require.Empty(t, hooks)
|
||||
}
|
||||
|
||||
func TestMounts(t *testing.T) {
|
||||
logger, logHook := testlog.NewNullLogger()
|
||||
|
||||
testCases := []struct {
|
||||
description string
|
||||
expectedError error
|
||||
expectedMounts []Mount
|
||||
input *mounts
|
||||
}{
|
||||
{
|
||||
description: "nill lookup returns error",
|
||||
expectedError: fmt.Errorf("no lookup defined"),
|
||||
input: &mounts{},
|
||||
},
|
||||
{
|
||||
description: "empty required returns no mounts",
|
||||
expectedError: nil,
|
||||
input: &mounts{
|
||||
lookup: &lookup.LocatorMock{
|
||||
LocateFunc: func(string) ([]string, error) {
|
||||
return []string{"located"}, nil
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "required returns located",
|
||||
expectedError: nil,
|
||||
input: &mounts{
|
||||
lookup: &lookup.LocatorMock{
|
||||
LocateFunc: func(string) ([]string, error) {
|
||||
return []string{"located"}, nil
|
||||
},
|
||||
},
|
||||
required: []string{"required"},
|
||||
},
|
||||
expectedMounts: []Mount{{Path: "located"}},
|
||||
},
|
||||
{
|
||||
description: "mounts removes located duplicates",
|
||||
expectedError: nil,
|
||||
input: &mounts{
|
||||
lookup: &lookup.LocatorMock{
|
||||
LocateFunc: func(string) ([]string, error) {
|
||||
return []string{"located"}, nil
|
||||
},
|
||||
},
|
||||
required: []string{"required0", "required1"},
|
||||
},
|
||||
expectedMounts: []Mount{{Path: "located"}},
|
||||
},
|
||||
{
|
||||
description: "mounts skips located errors",
|
||||
input: &mounts{
|
||||
lookup: &lookup.LocatorMock{
|
||||
LocateFunc: func(s string) ([]string, error) {
|
||||
if s == "error" {
|
||||
return nil, fmt.Errorf(s)
|
||||
}
|
||||
return []string{s}, nil
|
||||
},
|
||||
},
|
||||
required: []string{"required0", "error", "required1"},
|
||||
},
|
||||
expectedMounts: []Mount{{Path: "required0"}, {Path: "required1"}},
|
||||
},
|
||||
{
|
||||
description: "mounts skips unlocated",
|
||||
input: &mounts{
|
||||
lookup: &lookup.LocatorMock{
|
||||
LocateFunc: func(s string) ([]string, error) {
|
||||
if s == "empty" {
|
||||
return nil, nil
|
||||
}
|
||||
return []string{s}, nil
|
||||
},
|
||||
},
|
||||
required: []string{"required0", "empty", "required1"},
|
||||
},
|
||||
expectedMounts: []Mount{{Path: "required0"}, {Path: "required1"}},
|
||||
},
|
||||
{
|
||||
description: "mounts skips unlocated",
|
||||
input: &mounts{
|
||||
lookup: &lookup.LocatorMock{
|
||||
LocateFunc: func(s string) ([]string, error) {
|
||||
if s == "multiple" {
|
||||
return []string{"multiple0", "multiple1"}, nil
|
||||
}
|
||||
return []string{s}, nil
|
||||
},
|
||||
},
|
||||
required: []string{"required0", "multiple", "required1"},
|
||||
},
|
||||
expectedMounts: []Mount{
|
||||
{Path: "required0"},
|
||||
{Path: "multiple0"},
|
||||
{Path: "multiple1"},
|
||||
{Path: "required1"},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
logHook.Reset()
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
tc.input.logger = logger
|
||||
mounts, err := tc.input.Mounts()
|
||||
|
||||
if tc.expectedError != nil {
|
||||
require.Error(t, err)
|
||||
} else {
|
||||
require.NoError(t, err)
|
||||
func NewLocatorMockFromMap(lookupMap map[string]string) *lookup.LocatorMock {
|
||||
return &lookup.LocatorMock{
|
||||
LocateFunc: func(key string) ([]string, error) {
|
||||
value, exists := lookupMap[key]
|
||||
if !exists {
|
||||
return nil, fmt.Errorf("key %v not found", key)
|
||||
}
|
||||
require.ElementsMatch(t, tc.expectedMounts, mounts)
|
||||
|
||||
// We check that the mock is called for each element of required
|
||||
if tc.input.lookup != nil {
|
||||
mock := tc.input.lookup.(*lookup.LocatorMock)
|
||||
require.Len(t, mock.LocateCalls(), len(tc.input.required))
|
||||
var args []string
|
||||
for _, c := range mock.LocateCalls() {
|
||||
args = append(args, c.S)
|
||||
}
|
||||
require.EqualValues(t, args, tc.input.required)
|
||||
}
|
||||
})
|
||||
return []string{value}, nil
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
@@ -25,7 +25,15 @@ import (
|
||||
func TestNone(t *testing.T) {
|
||||
d := None{}
|
||||
|
||||
devices, err := d.Devices()
|
||||
require.NoError(t, err)
|
||||
require.Empty(t, devices)
|
||||
|
||||
mounts, err := d.Mounts()
|
||||
require.NoError(t, err)
|
||||
require.Empty(t, mounts)
|
||||
|
||||
hooks, err := d.Hooks()
|
||||
require.NoError(t, err)
|
||||
require.Empty(t, hooks)
|
||||
}
|
||||
|
||||
71
internal/discover/nvml_server.go
Normal file
71
internal/discover/nvml_server.go
Normal file
@@ -0,0 +1,71 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package discover
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/nvml"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
type nvmlServer struct {
|
||||
logger *log.Logger
|
||||
composite
|
||||
}
|
||||
|
||||
var _ Discover = (*nvmlServer)(nil)
|
||||
|
||||
// NewNVMLServer constructs a discoverer for server systems using NVML to discover devices
|
||||
func NewNVMLServer(root string) (Discover, error) {
|
||||
return NewNVMLServerWithLogger(log.StandardLogger(), root)
|
||||
}
|
||||
|
||||
// NewNVMLServerWithLogger constructs a discoverer for server systems using NVML to discover devices with
|
||||
// the specified logger
|
||||
func NewNVMLServerWithLogger(logger *log.Logger, root string) (Discover, error) {
|
||||
return createNVMLServer(logger, nvml.New(), root)
|
||||
}
|
||||
|
||||
func createNVMLServer(logger *log.Logger, nvml nvml.Interface, root string) (Discover, error) {
|
||||
d := nvmlServer{
|
||||
logger: logger,
|
||||
}
|
||||
|
||||
devices, err := NewNVMLDiscoverWithLogger(logger, nvml)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error constructing NVML device discoverer: %v", err)
|
||||
}
|
||||
|
||||
libraries, err := NewLibrariesWithLogger(logger, root)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error constructing library discoverer: %v", err)
|
||||
}
|
||||
|
||||
d.add(
|
||||
// Device discovery
|
||||
devices,
|
||||
// Mounts discovery
|
||||
libraries,
|
||||
NewBinaryMountsWithLogger(logger, root),
|
||||
NewIPCMountsWithLogger(logger, root),
|
||||
// Hook discovery
|
||||
NewHooksWithLogger(logger),
|
||||
)
|
||||
|
||||
return &d, nil
|
||||
}
|
||||
66
internal/discover/nvml_server_test.go
Normal file
66
internal/discover/nvml_server_test.go
Normal file
@@ -0,0 +1,66 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package discover
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/nvml"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/proc"
|
||||
testlog "github.com/sirupsen/logrus/hooks/test"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
const (
|
||||
testMajor = 999
|
||||
)
|
||||
|
||||
func TestNVMLServerConstructor(t *testing.T) {
|
||||
logger, _ := testlog.NewNullLogger()
|
||||
|
||||
nvml := nvml.NewMockNVMLOnLunaServer()
|
||||
|
||||
d, err := createNVMLServer(logger, nvml, "")
|
||||
require.NoError(t, err)
|
||||
|
||||
instance := d.(*nvmlServer)
|
||||
require.Len(t, instance.discoverers, 1+3+1)
|
||||
|
||||
// We need to override the nvidiaDevices member of the nvmlDiscovery
|
||||
// TODO: Use a mock instead, or allow for injection into a constructor
|
||||
instance.discoverers[0].(*nvmlDiscover).nvidiaDevices = &mockNvidiaDevices{}
|
||||
|
||||
devices, err := d.Devices()
|
||||
|
||||
require.NoError(t, err)
|
||||
require.NotEmpty(t, devices)
|
||||
|
||||
_, err = d.Mounts()
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
type mockNvidiaDevices struct{}
|
||||
|
||||
var _ proc.NvidiaDevices = (*mockNvidiaDevices)(nil)
|
||||
|
||||
func (d mockNvidiaDevices) Get(name string) (proc.Device, bool) {
|
||||
return proc.Device{Name: name, Major: testMajor}, true
|
||||
}
|
||||
|
||||
func (d mockNvidiaDevices) Exists(string) bool {
|
||||
return false
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user