mirror of
https://github.com/NVIDIA/nvidia-container-toolkit
synced 2025-06-26 18:18:24 +00:00
Compare commits
27 Commits
v1.12.0-rc
...
experiment
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
8551c71a8a | ||
|
|
7ee81e91de | ||
|
|
5a6860adbf | ||
|
|
ce2b0dfdf0 | ||
|
|
6f0129bdf2 | ||
|
|
ffd98424d8 | ||
|
|
cf192169a8 | ||
|
|
eeabd2b94d | ||
|
|
d81329da1f | ||
|
|
3b84f527cc | ||
|
|
8da6e42d88 | ||
|
|
1a755d471a | ||
|
|
c05f9dffc5 | ||
|
|
7c5b913e09 | ||
|
|
c0d2d41f23 | ||
|
|
08cf87eb21 | ||
|
|
d3a9f512c4 | ||
|
|
73baa74ea8 | ||
|
|
39aa24690c | ||
|
|
dc6b7c703b | ||
|
|
b044b56c6e | ||
|
|
46cdf8c41a | ||
|
|
22ee5eb64f | ||
|
|
a7f3398a68 | ||
|
|
72fe259745 | ||
|
|
3b27d91026 | ||
|
|
d3997eceb2 |
106
.common-ci.yml
106
.common-ci.yml
@@ -12,15 +12,14 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
default:
|
||||
image: docker
|
||||
image: docker:stable
|
||||
services:
|
||||
- name: docker:dind
|
||||
- name: docker:stable-dind
|
||||
command: ["--experimental"]
|
||||
|
||||
variables:
|
||||
GIT_SUBMODULE_STRATEGY: recursive
|
||||
BUILDIMAGE: "${CI_REGISTRY_IMAGE}/build:${CI_COMMIT_SHORT_SHA}"
|
||||
BUILD_MULTI_ARCH_IMAGES: "true"
|
||||
|
||||
stages:
|
||||
- image
|
||||
@@ -33,81 +32,45 @@ stages:
|
||||
- test
|
||||
- scan
|
||||
- release
|
||||
|
||||
.main-or-manual:
|
||||
rules:
|
||||
- if: $CI_COMMIT_BRANCH == "main"
|
||||
- if: $CI_COMMIT_TAG && $CI_COMMIT_TAG != ""
|
||||
- if: $CI_PIPELINE_SOURCE == "schedule"
|
||||
when: manual
|
||||
- build-all
|
||||
|
||||
# Define the distribution targets
|
||||
.dist-amazonlinux2:
|
||||
rules:
|
||||
- !reference [.main-or-manual, rules]
|
||||
variables:
|
||||
DIST: amazonlinux2
|
||||
|
||||
.dist-centos7:
|
||||
rules:
|
||||
- !reference [.main-or-manual, rules]
|
||||
variables:
|
||||
DIST: centos7
|
||||
CVE_UPDATES: "cyrus-sasl-lib"
|
||||
CVE_UPDATES: "nss"
|
||||
|
||||
.dist-centos8:
|
||||
variables:
|
||||
DIST: centos8
|
||||
CVE_UPDATES: "cyrus-sasl-lib"
|
||||
|
||||
.dist-debian10:
|
||||
rules:
|
||||
- !reference [.main-or-manual, rules]
|
||||
variables:
|
||||
DIST: debian10
|
||||
|
||||
.dist-debian9:
|
||||
rules:
|
||||
- !reference [.main-or-manual, rules]
|
||||
variables:
|
||||
DIST: debian9
|
||||
|
||||
.dist-fedora35:
|
||||
rules:
|
||||
- !reference [.main-or-manual, rules]
|
||||
variables:
|
||||
DIST: fedora35
|
||||
|
||||
.dist-opensuse-leap15.1:
|
||||
rules:
|
||||
- !reference [.main-or-manual, rules]
|
||||
variables:
|
||||
DIST: opensuse-leap15.1
|
||||
|
||||
.dist-ubi8:
|
||||
rules:
|
||||
- !reference [.main-or-manual, rules]
|
||||
variables:
|
||||
DIST: ubi8
|
||||
CVE_UPDATES: "cyrus-sasl-lib"
|
||||
|
||||
.dist-ubuntu16.04:
|
||||
rules:
|
||||
- !reference [.main-or-manual, rules]
|
||||
variables:
|
||||
DIST: ubuntu16.04
|
||||
|
||||
.dist-ubuntu18.04:
|
||||
variables:
|
||||
DIST: ubuntu18.04
|
||||
CVE_UPDATES: "libsasl2-2 libsasl2-modules-db"
|
||||
|
||||
.dist-ubuntu20.04:
|
||||
rules:
|
||||
- !reference [.main-or-manual, rules]
|
||||
variables:
|
||||
DIST: ubuntu20.04
|
||||
CVE_UPDATES: "libsasl2-2 libsasl2-modules-db"
|
||||
|
||||
.dist-packaging:
|
||||
variables:
|
||||
@@ -127,8 +90,6 @@ stages:
|
||||
ARCH: arm64
|
||||
|
||||
.arch-ppc64le:
|
||||
rules:
|
||||
- !reference [.main-or-manual, rules]
|
||||
variables:
|
||||
ARCH: ppc64le
|
||||
|
||||
@@ -136,15 +97,6 @@ stages:
|
||||
variables:
|
||||
ARCH: x86_64
|
||||
|
||||
# Define the platform targets
|
||||
.platform-amd64:
|
||||
variables:
|
||||
PLATFORM: linux/amd64
|
||||
|
||||
.platform-arm64:
|
||||
variables:
|
||||
PLATFORM: linux/arm64
|
||||
|
||||
# Define test helpers
|
||||
.integration:
|
||||
stage: test
|
||||
@@ -166,30 +118,20 @@ test-packaging:
|
||||
needs:
|
||||
- image-packaging
|
||||
|
||||
# Download the regctl binary for use in the release steps
|
||||
.regctl-setup:
|
||||
before_script:
|
||||
- export REGCTL_VERSION=v0.3.10
|
||||
- apk add --no-cache curl
|
||||
- mkdir -p bin
|
||||
- curl -sSLo bin/regctl https://github.com/regclient/regclient/releases/download/${REGCTL_VERSION}/regctl-linux-amd64
|
||||
- chmod a+x bin/regctl
|
||||
- export PATH=$(pwd)/bin:${PATH}
|
||||
|
||||
# .release forms the base of the deployment jobs which push images to the CI registry.
|
||||
# This is extended with the version to be deployed (e.g. the SHA or TAG) and the
|
||||
# target os.
|
||||
.release:
|
||||
stage: release
|
||||
stage:
|
||||
release
|
||||
variables:
|
||||
# Define the source image for the release
|
||||
IMAGE_NAME: "${CI_REGISTRY_IMAGE}/container-toolkit"
|
||||
VERSION: "${CI_COMMIT_SHORT_SHA}"
|
||||
# OUT_IMAGE_VERSION is overridden for external releases
|
||||
OUT_IMAGE_VERSION: "${CI_COMMIT_SHORT_SHA}"
|
||||
stage: release
|
||||
before_script:
|
||||
- !reference [.regctl-setup, before_script]
|
||||
|
||||
# We ensure that the OUT_IMAGE_VERSION is set
|
||||
- 'echo Version: ${OUT_IMAGE_VERSION} ; [[ -n "${OUT_IMAGE_VERSION}" ]] || exit 1'
|
||||
|
||||
@@ -197,16 +139,16 @@ test-packaging:
|
||||
# need to tag the image.
|
||||
# Note: a leading 'v' is stripped from the version if present
|
||||
- apk add --no-cache make bash
|
||||
script:
|
||||
# Log in to the "output" registry, tag the image and push the image
|
||||
- 'echo "Logging in to CI registry ${CI_REGISTRY}"'
|
||||
- regctl registry login "${CI_REGISTRY}" -u "${CI_REGISTRY_USER}" -p "${CI_REGISTRY_PASSWORD}"
|
||||
- '[ ${CI_REGISTRY} = ${OUT_REGISTRY} ] || echo "Logging in to output registry ${OUT_REGISTRY}"'
|
||||
- '[ ${CI_REGISTRY} = ${OUT_REGISTRY} ] || regctl registry login "${OUT_REGISTRY}" -u "${OUT_REGISTRY_USER}" -p "${OUT_REGISTRY_TOKEN}"'
|
||||
|
||||
# Since OUT_IMAGE_NAME and OUT_IMAGE_VERSION are set, this will push the CI image to the
|
||||
# Target
|
||||
- make -f build/container/Makefile push-${DIST}
|
||||
- docker login -u "${CI_REGISTRY_USER}" -p "${CI_REGISTRY_PASSWORD}" "${CI_REGISTRY}"
|
||||
- docker pull "${IMAGE_NAME}:${VERSION}-${DIST}"
|
||||
script:
|
||||
- docker tag "${IMAGE_NAME}:${VERSION}-${DIST}" "${OUT_IMAGE_NAME}:${OUT_IMAGE_VERSION}-${DIST}"
|
||||
# Log in to the "output" registry, tag the image and push the image
|
||||
- 'echo "Logging in to output registry ${OUT_REGISTRY}"'
|
||||
- docker logout
|
||||
- docker login -u "${OUT_REGISTRY_USER}" -p "${OUT_REGISTRY_TOKEN}" "${OUT_REGISTRY}"
|
||||
- make IMAGE_NAME=${OUT_IMAGE_NAME} VERSION=${OUT_IMAGE_VERSION} -f build/container/Makefile push-${DIST}
|
||||
|
||||
# Define a staging release step that pushes an image to an internal "staging" repository
|
||||
# This is triggered for all pipelines (i.e. not only tags) to test the pipeline steps
|
||||
@@ -221,7 +163,7 @@ test-packaging:
|
||||
OUT_IMAGE_NAME: "${CI_REGISTRY_IMAGE}/staging/container-toolkit"
|
||||
|
||||
# Define an external release step that pushes an image to an external repository.
|
||||
# This includes a devlopment image off main.
|
||||
# This includes a devlopment image off master.
|
||||
.release:external:
|
||||
extends:
|
||||
- .release
|
||||
@@ -241,6 +183,13 @@ release:staging-centos7:
|
||||
needs:
|
||||
- image-centos7
|
||||
|
||||
release:staging-centos8:
|
||||
extends:
|
||||
- .release:staging
|
||||
- .dist-centos8
|
||||
needs:
|
||||
- image-centos8
|
||||
|
||||
release:staging-ubi8:
|
||||
extends:
|
||||
- .release:staging
|
||||
@@ -258,13 +207,6 @@ release:staging-ubuntu18.04:
|
||||
- test-crio-ubuntu18.04
|
||||
- test-docker-ubuntu18.04
|
||||
|
||||
release:staging-ubuntu20.04:
|
||||
extends:
|
||||
- .release:staging
|
||||
- .dist-ubuntu20.04
|
||||
needs:
|
||||
- image-ubuntu20.04
|
||||
|
||||
release:staging-packaging:
|
||||
extends:
|
||||
- .release:staging
|
||||
|
||||
5
.gitignore
vendored
5
.gitignore
vendored
@@ -1,11 +1,8 @@
|
||||
dist
|
||||
*.swp
|
||||
*.swo
|
||||
/coverage.out*
|
||||
/coverage.out
|
||||
/test/output/
|
||||
/nvidia-container-runtime
|
||||
/nvidia-container-runtime-hook
|
||||
/nvidia-container-toolkit
|
||||
/nvidia-ctk
|
||||
/shared-*
|
||||
/release-*
|
||||
147
.gitlab-ci.yml
147
.gitlab-ci.yml
@@ -94,9 +94,8 @@ unit-tests:
|
||||
- .multi-arch-build
|
||||
- .package-artifacts
|
||||
stage: package-build
|
||||
timeout: 3h
|
||||
script:
|
||||
- ./scripts/build-packages.sh ${DIST}-${ARCH}
|
||||
- ./scripts/release.sh ${DIST}-${ARCH}
|
||||
|
||||
artifacts:
|
||||
name: ${ARTIFACTS_NAME}
|
||||
@@ -158,18 +157,6 @@ package-debian9-amd64:
|
||||
- .dist-debian9
|
||||
- .arch-amd64
|
||||
|
||||
package-fedora35-aarch64:
|
||||
extends:
|
||||
- .package-build
|
||||
- .dist-fedora35
|
||||
- .arch-aarch64
|
||||
|
||||
package-fedora35-x86_64:
|
||||
extends:
|
||||
- .package-build
|
||||
- .dist-fedora35
|
||||
- .arch-x86_64
|
||||
|
||||
package-opensuse-leap15.1-x86_64:
|
||||
extends:
|
||||
- .package-build
|
||||
@@ -206,33 +193,19 @@ package-ubuntu18.04-ppc64le:
|
||||
- .dist-ubuntu18.04
|
||||
- .arch-ppc64le
|
||||
|
||||
.buildx-setup:
|
||||
before_script:
|
||||
- export BUILDX_VERSION=v0.6.3
|
||||
- apk add --no-cache curl
|
||||
- mkdir -p ~/.docker/cli-plugins
|
||||
- curl -sSLo ~/.docker/cli-plugins/docker-buildx "https://github.com/docker/buildx/releases/download/${BUILDX_VERSION}/buildx-${BUILDX_VERSION}.linux-amd64"
|
||||
- chmod a+x ~/.docker/cli-plugins/docker-buildx
|
||||
|
||||
- docker buildx create --use --platform=linux/amd64,linux/arm64
|
||||
|
||||
- '[[ -n "${SKIP_QEMU_SETUP}" ]] || docker run --rm --privileged multiarch/qemu-user-static --reset -p yes'
|
||||
|
||||
# Define the image build targets
|
||||
.image-build:
|
||||
stage: image-build
|
||||
variables:
|
||||
IMAGE_NAME: "${CI_REGISTRY_IMAGE}/container-toolkit"
|
||||
VERSION: "${CI_COMMIT_SHORT_SHA}"
|
||||
PUSH_ON_BUILD: "true"
|
||||
before_script:
|
||||
- !reference [.buildx-setup, before_script]
|
||||
|
||||
- apk add --no-cache bash make
|
||||
- 'echo "Logging in to CI registry ${CI_REGISTRY}"'
|
||||
- docker login -u "${CI_REGISTRY_USER}" -p "${CI_REGISTRY_PASSWORD}" "${CI_REGISTRY}"
|
||||
script:
|
||||
- make -f build/container/Makefile build-${DIST}
|
||||
- make -f build/container/Makefile push-${DIST}
|
||||
|
||||
image-centos7:
|
||||
extends:
|
||||
@@ -243,33 +216,31 @@ image-centos7:
|
||||
- package-centos7-ppc64le
|
||||
- package-centos7-x86_64
|
||||
|
||||
image-centos8:
|
||||
extends:
|
||||
- .image-build
|
||||
- .package-artifacts
|
||||
- .dist-centos8
|
||||
needs:
|
||||
- package-centos8-aarch64
|
||||
- package-centos8-x86_64
|
||||
- package-centos8-ppc64le
|
||||
|
||||
image-ubi8:
|
||||
extends:
|
||||
- .image-build
|
||||
- .package-artifacts
|
||||
- .dist-ubi8
|
||||
needs:
|
||||
# Note: The ubi8 image uses the centos8 packages
|
||||
- package-centos8-aarch64
|
||||
- package-centos8-x86_64
|
||||
- package-centos8-ppc64le
|
||||
# Note: The ubi8 image currently uses the centos7 packages
|
||||
- package-centos7-ppc64le
|
||||
- package-centos7-x86_64
|
||||
|
||||
image-ubuntu18.04:
|
||||
extends:
|
||||
- .image-build
|
||||
- .package-artifacts
|
||||
- .dist-ubuntu18.04
|
||||
needs:
|
||||
- package-ubuntu18.04-amd64
|
||||
- package-ubuntu18.04-arm64
|
||||
- job: package-ubuntu18.04-ppc64le
|
||||
optional: true
|
||||
|
||||
image-ubuntu20.04:
|
||||
extends:
|
||||
- .image-build
|
||||
- .package-artifacts
|
||||
- .dist-ubuntu20.04
|
||||
needs:
|
||||
- package-ubuntu18.04-amd64
|
||||
- package-ubuntu18.04-arm64
|
||||
@@ -282,36 +253,21 @@ image-packaging:
|
||||
- .package-artifacts
|
||||
- .dist-packaging
|
||||
needs:
|
||||
- job: package-centos8-aarch64
|
||||
- job: package-centos8-x86_64
|
||||
- job: package-ubuntu18.04-amd64
|
||||
- job: package-ubuntu18.04-arm64
|
||||
- job: package-amazonlinux2-aarch64
|
||||
optional: true
|
||||
- job: package-amazonlinux2-x86_64
|
||||
optional: true
|
||||
- job: package-centos7-ppc64le
|
||||
optional: true
|
||||
- job: package-centos7-x86_64
|
||||
optional: true
|
||||
- job: package-centos8-ppc64le
|
||||
optional: true
|
||||
- job: package-debian10-amd64
|
||||
optional: true
|
||||
- job: package-debian9-amd64
|
||||
optional: true
|
||||
- job: package-fedora35-aarch64
|
||||
optional: true
|
||||
- job: package-fedora35-x86_64
|
||||
optional: true
|
||||
- job: package-opensuse-leap15.1-x86_64
|
||||
optional: true
|
||||
- job: package-ubuntu16.04-amd64
|
||||
optional: true
|
||||
- job: package-ubuntu16.04-ppc64le
|
||||
optional: true
|
||||
- job: package-ubuntu18.04-ppc64le
|
||||
optional: true
|
||||
- package-amazonlinux2-aarch64
|
||||
- package-amazonlinux2-x86_64
|
||||
- package-centos7-ppc64le
|
||||
- package-centos7-x86_64
|
||||
- package-centos8-aarch64
|
||||
- package-centos8-ppc64le
|
||||
- package-centos8-x86_64
|
||||
- package-debian10-amd64
|
||||
- package-debian9-amd64
|
||||
- package-opensuse-leap15.1-x86_64
|
||||
- package-ubuntu16.04-amd64
|
||||
- package-ubuntu16.04-ppc64le
|
||||
- package-ubuntu18.04-amd64
|
||||
- package-ubuntu18.04-arm64
|
||||
- package-ubuntu18.04-ppc64le
|
||||
|
||||
# Define publish test helpers
|
||||
.test:toolkit:
|
||||
@@ -371,3 +327,46 @@ test-docker-ubuntu18.04:
|
||||
needs:
|
||||
- image-ubuntu18.04
|
||||
|
||||
# build-all jobs build packages for every OS / ARCH combination we support.
|
||||
#
|
||||
# They are run under two conditions:
|
||||
# 1) Automatically whenever a new tag is pushed to the repo (e.g. v1.1.0)
|
||||
# 2) Manually by a reviewer just before merging a MR.
|
||||
.build-all-for-arch:
|
||||
variables:
|
||||
# Setting DIST=docker invokes the docker- release targets
|
||||
DIST: docker
|
||||
extends:
|
||||
- .package-build
|
||||
stage: build-all
|
||||
timeout: 2h 30m
|
||||
rules:
|
||||
- if: $CI_COMMIT_TAG
|
||||
when: always
|
||||
|
||||
# The full set of build-all jobs organized to
|
||||
# have builds for each ARCH run in parallel.
|
||||
build-all-amd64:
|
||||
extends:
|
||||
- .build-all-for-arch
|
||||
- .arch-amd64
|
||||
|
||||
build-all-x86_64:
|
||||
extends:
|
||||
- .build-all-for-arch
|
||||
- .arch-x86_64
|
||||
|
||||
build-all-ppc64le:
|
||||
extends:
|
||||
- .build-all-for-arch
|
||||
- .arch-ppc64le
|
||||
|
||||
build-all-arm64:
|
||||
extends:
|
||||
- .build-all-for-arch
|
||||
- .arch-arm64
|
||||
|
||||
build-all-aarch64:
|
||||
extends:
|
||||
- .build-all-for-arch
|
||||
- .arch-aarch64
|
||||
|
||||
3
.gitmodules
vendored
3
.gitmodules
vendored
@@ -1,12 +1,9 @@
|
||||
[submodule "third_party/libnvidia-container"]
|
||||
path = third_party/libnvidia-container
|
||||
url = https://gitlab.com/nvidia/container-toolkit/libnvidia-container.git
|
||||
branch = main
|
||||
[submodule "third_party/nvidia-container-runtime"]
|
||||
path = third_party/nvidia-container-runtime
|
||||
url = https://gitlab.com/nvidia/container-toolkit/container-runtime.git
|
||||
branch = main
|
||||
[submodule "third_party/nvidia-docker"]
|
||||
path = third_party/nvidia-docker
|
||||
url = https://gitlab.com/nvidia/container-toolkit/nvidia-docker.git
|
||||
branch = main
|
||||
|
||||
141
.nvidia-ci.yml
141
.nvidia-ci.yml
@@ -27,8 +27,8 @@ default:
|
||||
variables:
|
||||
DOCKER_DRIVER: overlay2
|
||||
DOCKER_TLS_CERTDIR: "/certs"
|
||||
# Release "devel"-tagged images off the main branch
|
||||
RELEASE_DEVEL_BRANCH: "main"
|
||||
# Release "devel"-tagged images off the master branch
|
||||
RELEASE_DEVEL_BRANCH: "master"
|
||||
DEVEL_RELEASE_IMAGE_VERSION: "devel"
|
||||
# On the multi-arch builder we don't need the qemu setup.
|
||||
SKIP_QEMU_SETUP: "1"
|
||||
@@ -46,11 +46,9 @@ variables:
|
||||
OUT_REGISTRY_TOKEN: "${CI_REGISTRY_PASSWORD}"
|
||||
OUT_REGISTRY: "${CI_REGISTRY}"
|
||||
OUT_IMAGE_NAME: "${CI_REGISTRY_IMAGE}/container-toolkit"
|
||||
PUSH_MULTIPLE_TAGS: "false"
|
||||
# We delay the job start to allow the public pipeline to generate the required images.
|
||||
rules:
|
||||
- when: delayed
|
||||
start_in: 30 minutes
|
||||
when: delayed
|
||||
start_in: 30 minutes
|
||||
timeout: 30 minutes
|
||||
retry:
|
||||
max: 2
|
||||
@@ -58,39 +56,39 @@ variables:
|
||||
- job_execution_timeout
|
||||
- stuck_or_timeout_failure
|
||||
before_script:
|
||||
- !reference [.regctl-setup, before_script]
|
||||
- apk add --no-cache make bash
|
||||
- >
|
||||
regctl manifest get ${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION}-${DIST} --list > /dev/null && echo "${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION}-${DIST}" || ( echo "${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION}-${DIST} does not exist" && sleep infinity )
|
||||
docker pull ${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION}-${DIST} > /dev/null && echo "${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION}-${DIST}" || ( echo "${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION}-${DIST} does not exist" && sleep infinity )
|
||||
script:
|
||||
- regctl registry login "${OUT_REGISTRY}" -u "${OUT_REGISTRY_USER}" -p "${OUT_REGISTRY_TOKEN}"
|
||||
- make -f build/container/Makefile IMAGE=${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION}-${DIST} OUT_IMAGE=${OUT_IMAGE_NAME}:${CI_COMMIT_SHORT_SHA}-${DIST} push-${DIST}
|
||||
- docker pull ${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION}-${DIST}
|
||||
- docker tag ${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION}-${DIST} ${OUT_IMAGE_NAME}:${CI_COMMIT_SHORT_SHA}-${DIST}
|
||||
- docker login -u "${OUT_REGISTRY_USER}" -p "${OUT_REGISTRY_TOKEN}" "${OUT_REGISTRY}"
|
||||
- docker push ${OUT_IMAGE_NAME}:${CI_COMMIT_SHORT_SHA}-${DIST}
|
||||
|
||||
image-centos7:
|
||||
extends:
|
||||
- .dist-centos7
|
||||
- .image-pull
|
||||
- .dist-centos7
|
||||
|
||||
image-centos8:
|
||||
extends:
|
||||
- .image-pull
|
||||
- .dist-centos8
|
||||
|
||||
image-ubi8:
|
||||
extends:
|
||||
- .dist-ubi8
|
||||
- .image-pull
|
||||
- .dist-ubi8
|
||||
|
||||
image-ubuntu18.04:
|
||||
extends:
|
||||
- .image-pull
|
||||
- .dist-ubuntu18.04
|
||||
- .image-pull
|
||||
|
||||
image-ubuntu20.04:
|
||||
extends:
|
||||
- .dist-ubuntu20.04
|
||||
- .image-pull
|
||||
|
||||
# The DIST=packaging target creates an image containing all built packages
|
||||
image-packaging:
|
||||
extends:
|
||||
- .dist-packaging
|
||||
- .image-pull
|
||||
- .dist-packaging
|
||||
|
||||
# We skip the integration tests for the internal CI:
|
||||
.integration:
|
||||
@@ -108,13 +106,13 @@ image-packaging:
|
||||
variables:
|
||||
IMAGE: "${CI_REGISTRY_IMAGE}/container-toolkit:${CI_COMMIT_SHORT_SHA}-${DIST}"
|
||||
IMAGE_ARCHIVE: "container-toolkit.tar"
|
||||
rules:
|
||||
- if: $SKIP_SCANS != "yes"
|
||||
- when: manual
|
||||
except:
|
||||
variables:
|
||||
- $SKIP_SCANS && $SKIP_SCANS == "yes"
|
||||
before_script:
|
||||
- docker login -u "${CI_REGISTRY_USER}" -p "${CI_REGISTRY_PASSWORD}" "${CI_REGISTRY}"
|
||||
# TODO: We should specify the architecture here and scan all architectures
|
||||
- docker pull --platform="${PLATFORM}" "${IMAGE}"
|
||||
- docker pull "${IMAGE}"
|
||||
- docker save "${IMAGE}" -o "${IMAGE_ARCHIVE}"
|
||||
- AuthHeader=$(echo -n $SSA_CLIENT_ID:$SSA_CLIENT_SECRET | base64 -w0)
|
||||
- >
|
||||
@@ -133,65 +131,34 @@ image-packaging:
|
||||
- policy_evaluation.json
|
||||
|
||||
# Define the scan targets
|
||||
scan-centos7-amd64:
|
||||
scan-centos7:
|
||||
extends:
|
||||
- .dist-centos7
|
||||
- .platform-amd64
|
||||
- .scan
|
||||
- .dist-centos7
|
||||
needs:
|
||||
- image-centos7
|
||||
|
||||
scan-centos7-arm64:
|
||||
scan-centos8:
|
||||
extends:
|
||||
- .dist-centos7
|
||||
- .platform-arm64
|
||||
- .scan
|
||||
- .dist-centos8
|
||||
needs:
|
||||
- image-centos7
|
||||
- scan-centos7-amd64
|
||||
- image-centos8
|
||||
|
||||
scan-ubuntu18.04-amd64:
|
||||
scan-ubuntu18.04:
|
||||
extends:
|
||||
- .scan
|
||||
- .dist-ubuntu18.04
|
||||
- .platform-amd64
|
||||
- .scan
|
||||
needs:
|
||||
- image-ubuntu18.04
|
||||
|
||||
scan-ubuntu20.04-amd64:
|
||||
scan-ubi8:
|
||||
extends:
|
||||
- .dist-ubuntu20.04
|
||||
- .platform-amd64
|
||||
- .scan
|
||||
needs:
|
||||
- image-ubuntu20.04
|
||||
|
||||
scan-ubuntu20.04-arm64:
|
||||
extends:
|
||||
- .dist-ubuntu20.04
|
||||
- .platform-arm64
|
||||
- .scan
|
||||
needs:
|
||||
- image-ubuntu20.04
|
||||
- scan-ubuntu20.04-amd64
|
||||
|
||||
scan-ubi8-amd64:
|
||||
extends:
|
||||
- .dist-ubi8
|
||||
- .platform-amd64
|
||||
- .scan
|
||||
needs:
|
||||
- image-ubi8
|
||||
|
||||
scan-ubi8-arm64:
|
||||
extends:
|
||||
- .dist-ubi8
|
||||
- .platform-arm64
|
||||
- .scan
|
||||
needs:
|
||||
- image-ubi8
|
||||
- scan-ubi8-amd64
|
||||
|
||||
# Define external release helpers
|
||||
.release:ngc:
|
||||
extends:
|
||||
@@ -202,6 +169,15 @@ scan-ubi8-arm64:
|
||||
OUT_REGISTRY: "${NGC_REGISTRY}"
|
||||
OUT_IMAGE_NAME: "${NGC_REGISTRY_IMAGE}"
|
||||
|
||||
.release:dockerhub:
|
||||
extends:
|
||||
- .release:external
|
||||
variables:
|
||||
OUT_REGISTRY_USER: "${REGISTRY_USER}"
|
||||
OUT_REGISTRY_TOKEN: "${REGISTRY_TOKEN}"
|
||||
OUT_REGISTRY: "${DOCKERHUB_REGISTRY}"
|
||||
OUT_IMAGE_NAME: "${REGISTRY_IMAGE}"
|
||||
|
||||
release:staging-ubuntu18.04:
|
||||
extends:
|
||||
- .release:staging
|
||||
@@ -213,20 +189,41 @@ release:staging-ubuntu18.04:
|
||||
# Release to NGC
|
||||
release:ngc-centos7:
|
||||
extends:
|
||||
- .release:ngc
|
||||
- .dist-centos7
|
||||
- .release:ngc
|
||||
|
||||
release:ngc-ubuntu18.04:
|
||||
release:ngc-centos8:
|
||||
extends:
|
||||
- .release:ngc
|
||||
- .dist-centos8
|
||||
|
||||
release:ngc-ubuntu18:
|
||||
extends:
|
||||
- .release:ngc
|
||||
- .dist-ubuntu18.04
|
||||
- .release:ngc
|
||||
|
||||
release:ngc-ubuntu20.04:
|
||||
extends:
|
||||
- .dist-ubuntu20.04
|
||||
- .release:ngc
|
||||
|
||||
release:ngc-ubi8:
|
||||
extends:
|
||||
- .dist-ubi8
|
||||
- .release:ngc
|
||||
- .dist-ubi8
|
||||
|
||||
# Release to Dockerhub
|
||||
release:dockerhub-centos7:
|
||||
extends:
|
||||
- .release:dockerhub
|
||||
- .dist-centos7
|
||||
|
||||
release:dockerhub-centos8:
|
||||
extends:
|
||||
- .release:dockerhub
|
||||
- .dist-centos8
|
||||
|
||||
release:dockerhub-ubuntu18:
|
||||
extends:
|
||||
- .release:dockerhub
|
||||
- .dist-ubuntu18.04
|
||||
|
||||
release:dockerhub-ubi8:
|
||||
extends:
|
||||
- .release:dockerhub
|
||||
- .dist-ubi8
|
||||
|
||||
202
CHANGELOG.md
202
CHANGELOG.md
@@ -1,202 +0,0 @@
|
||||
# NVIDIA Container Toolkit Changelog
|
||||
|
||||
## v1.12.0-rc.1
|
||||
|
||||
* Add support for multiple Docker Swarm resources
|
||||
* Improve injection of Vulkan configurations and libraries
|
||||
* Add `nvidia-ctk info generate-cdi` command to generated CDI specification for available devices
|
||||
* [libnvidia-container] Include NVVM compiler library in compute libs
|
||||
## v1.11.0
|
||||
|
||||
* Promote v1.11.0-rc.3 to v1.11.0
|
||||
|
||||
## v1.11.0-rc.3
|
||||
|
||||
* Build fedora35 packages
|
||||
* Introduce an `nvidia-container-toolkit-base` package for better dependency management
|
||||
* Fix removal of `nvidia-container-runtime-hook` on RPM-based systems
|
||||
* Inject platform files into container on Tegra-based systems
|
||||
* [toolkit container] Update CUDA base images to 11.7.1
|
||||
* [libnvidia-container] Preload libgcc_s.so.1 on arm64 systems
|
||||
|
||||
## v1.11.0-rc.2
|
||||
|
||||
* Allow `accept-nvidia-visible-devices-*` config options to be set by toolkit container
|
||||
* [libnvidia-container] Fix bug where LDCache was not updated when the `--no-pivot-root` option was specified
|
||||
|
||||
## v1.11.0-rc.1
|
||||
|
||||
* Add discovery of GPUDirect Storage (`nvidia-fs*`) devices if the `NVIDIA_GDS` environment variable of the container is set to `enabled`
|
||||
* Add discovery of MOFED Infiniband devices if the `NVIDIA_MOFED` environment variable of the container is set to `enabled`
|
||||
* Fix bug in CSV mode where libraries listed as `sym` entries in mount specification are not added to the LDCache.
|
||||
* Rename `nvidia-container-toolkit` executable to `nvidia-container-runtime-hook` and create `nvidia-container-toolkit` as a symlink to `nvidia-container-runtime-hook` instead.
|
||||
* Add `nvidia-ctk runtime configure` command to configure the Docker config file (e.g. `/etc/docker/daemon.json`) for use with the NVIDIA Container Runtime.
|
||||
|
||||
## v1.10.0
|
||||
|
||||
* Promote v1.10.0-rc.3 to v1.10.0
|
||||
|
||||
## v1.10.0-rc.3
|
||||
|
||||
* Use default config instead of raising an error if config file cannot be found
|
||||
* Ignore NVIDIA_REQUIRE_JETPACK* environment variables for requirement checks
|
||||
* Fix bug in detection of Tegra systems where `/sys/devices/soc0/family` is ignored
|
||||
* Fix bug where links to devices were detected as devices
|
||||
* [libnvida-container] Fix bug introduced when adding libcudadebugger.so to list of libraries
|
||||
|
||||
## v1.10.0-rc.2
|
||||
|
||||
* Add support for NVIDIA_REQUIRE_* checks for cuda version and arch to csv mode
|
||||
* Switch to debug logging to reduce log verbosity
|
||||
* Support logging to logs requested in command line
|
||||
* Fix bug when launching containers with relative root path (e.g. using containerd)
|
||||
* Allow low-level runtime path to be set explicitly as nvidia-container-runtime.runtimes option
|
||||
* Fix failure to locate low-level runtime if PATH envvar is unset
|
||||
* Replace experimental option for NVIDIA Container Runtime with nvidia-container-runtime.mode = csv option
|
||||
* Use csv as default mode on Tegra systems without NVML
|
||||
* Add --version flag to all CLIs
|
||||
* [libnvidia-container] Bump libtirpc to 1.3.2
|
||||
* [libnvidia-container] Fix bug when running host ldconfig using glibc compiled with a non-standard prefix
|
||||
* [libnvidia-container] Add libcudadebugger.so to list of compute libraries
|
||||
|
||||
## v1.10.0-rc.1
|
||||
|
||||
* Include nvidia-ctk CLI in installed binaries
|
||||
* Add experimental option to NVIDIA Container Runtime
|
||||
|
||||
## v1.9.0
|
||||
|
||||
* [libnvidia-container] Add additional check for Tegra in /sys/.../family file in CLI
|
||||
* [libnvidia-container] Update jetpack-specific CLI option to only load Base CSV files by default
|
||||
* [libnvidia-container] Fix bug (from 1.8.0) when mounting GSP firmware into containers without /lib to /usr/lib symlinks
|
||||
* [libnvidia-container] Update nvml.h to CUDA 11.6.1 nvML_DEV 11.6.55
|
||||
* [libnvidia-container] Update switch statement to include new brands from latest nvml.h
|
||||
* [libnvidia-container] Process all --require flags on Jetson platforms
|
||||
* [libnvidia-container] Fix long-standing issue with running ldconfig on Debian systems
|
||||
|
||||
## v1.8.1
|
||||
|
||||
* [libnvidia-container] Fix bug in determining cgroup root when running in nested containers
|
||||
* [libnvidia-container] Fix permission issue when determining cgroup version
|
||||
|
||||
## v1.8.0
|
||||
|
||||
* Promote 1.8.0-rc.2-1 to 1.8.0
|
||||
|
||||
## v1.8.0-rc.2
|
||||
|
||||
* Remove support for building amazonlinux1 packages
|
||||
|
||||
## v1.8.0-rc.1
|
||||
|
||||
* [libnvidia-container] Add support for cgroupv2
|
||||
* Release toolkit-container images from nvidia-container-toolkit repository
|
||||
|
||||
## v1.7.0
|
||||
|
||||
* Promote 1.7.0-rc.1-1 to 1.7.0
|
||||
* Bump Golang version to 1.16.4
|
||||
|
||||
## v1.7.0-rc.1
|
||||
|
||||
* Specify containerd runtime type as string in config tools to remove dependency on containerd package
|
||||
* Add supported-driver-capabilities config option to allow for a subset of all driver capabilities to be specified
|
||||
|
||||
## v1.6.0
|
||||
|
||||
* Promote 1.6.0-rc.3-1 to 1.6.0
|
||||
* Fix unnecessary logging to stderr instead of configured nvidia-container-runtime log file
|
||||
|
||||
## v1.6.0-rc.3
|
||||
|
||||
* Add supported-driver-capabilities config option to the nvidia-container-toolkit
|
||||
* Move OCI and command line checks for runtime to internal oci package
|
||||
|
||||
## v1.6.0-rc.2
|
||||
|
||||
* Use relative path to OCI specification file (config.json) if bundle path is not specified as an argument to the nvidia-container-runtime
|
||||
|
||||
## v1.6.0-rc.1
|
||||
|
||||
* Add AARCH64 package for Amazon Linux 2
|
||||
* Include nvidia-container-runtime into nvidia-container-toolkit package
|
||||
|
||||
## v1.5.1
|
||||
|
||||
* Fix bug where Docker Swarm device selection is ignored if NVIDIA_VISIBLE_DEVICES is also set
|
||||
* Improve unit testing by using require package and adding coverage reports
|
||||
* Remove unneeded go dependencies by running go mod tidy
|
||||
* Move contents of pkg directory to cmd for CLI tools
|
||||
* Ensure make binary target explicitly sets GOOS
|
||||
|
||||
## v1.5.0
|
||||
|
||||
* Add dependence on libnvidia-container-tools >= 1.4.0
|
||||
* Add golang check targets to Makefile
|
||||
* Add Jenkinsfile definition for build targets
|
||||
* Move docker.mk to docker folder
|
||||
|
||||
## v1.4.2
|
||||
|
||||
* Add dependence on libnvidia-container-tools >= 1.3.3
|
||||
|
||||
## v1.4.1
|
||||
|
||||
* Ignore NVIDIA_VISIBLE_DEVICES for containers with insufficent privileges
|
||||
* Add dependence on libnvidia-container-tools >= 1.3.2
|
||||
|
||||
## v1.4.0
|
||||
|
||||
* Add 'compute' capability to list of defaults
|
||||
* Add dependence on libnvidia-container-tools >= 1.3.1
|
||||
|
||||
## v1.3.0
|
||||
|
||||
* Promote 1.3.0-rc.2-1 to 1.3.0
|
||||
* Add dependence on libnvidia-container-tools >= 1.3.0
|
||||
|
||||
## v1.3.0-rc.2
|
||||
|
||||
* 2c180947 Add more tests for new semantics with device list from volume mounts
|
||||
* 7c003857 Refactor accepting device lists from volume mounts as a boolean
|
||||
|
||||
## v1.3.0-rc.1
|
||||
|
||||
* b50d86c1 Update build system to accept a TAG variable for things like rc.x
|
||||
* fe65573b Add common CI tests for things like golint, gofmt, unit tests, etc.
|
||||
* da6fbb34 Revert "Add ability to merge envars of the form NVIDIA_VISIBLE_DEVICES_*"
|
||||
* a7fb3330 Flip build-all targets to run automatically on merge requests
|
||||
* 8b248b66 Rename github.com/NVIDIA/container-toolkit to nvidia-container-toolkit
|
||||
* da36874e Add new config options to pull device list from mounted files instead of ENVVAR
|
||||
|
||||
## v1.2.1
|
||||
|
||||
* 4e6e0ed4 Add 'ngx' to list of*all* driver capabilities
|
||||
* 2f4af743 List config.toml as a config file in the RPM SPEC
|
||||
|
||||
## v1.2.0
|
||||
|
||||
* 8e0aab46 Fix repo listed in changelog for debian distributions
|
||||
* 320bb6e4 Update dependence on libnvidia-container to 1.2.0
|
||||
* 6cfc8097 Update package license to match source license
|
||||
* e7dc3cbb Fix debian copyright file
|
||||
* d3aee3e0 Add the 'ngx' driver capability
|
||||
|
||||
## v1.1.2
|
||||
|
||||
* c32237f3 Add support for parsing Linux Capabilities for older OCI specs
|
||||
|
||||
## v1.1.1
|
||||
|
||||
* d202aded Update dependence to libnvidia-container 1.1.1
|
||||
|
||||
## v1.1.0
|
||||
|
||||
* 4e4de762 Update build system to support multi-arch builds
|
||||
* fcc1d116 Add support for MIG (Multi-Instance GPUs)
|
||||
* d4ff0416 Add ability to merge envars of the form NVIDIA_VISIBLE_DEVICES_*
|
||||
* 60f165ad Add no-pivot option to toolkit
|
||||
|
||||
## v1.0.5
|
||||
|
||||
* Initial release. Replaces older package nvidia-container-runtime-hook. (Closes: #XXXXXX)
|
||||
@@ -13,7 +13,7 @@ The `nvidia-container-toolkit` resides in this repo directly.
|
||||
|
||||
In oder to build the packages, the following command is executed
|
||||
```sh
|
||||
./scripts/build-packages.sh TARGET
|
||||
./scripts/build-all-components.sh TARGET
|
||||
```
|
||||
where `TARGET` is a make target that is valid for each of the sub-components.
|
||||
|
||||
@@ -21,8 +21,6 @@ These include:
|
||||
* `ubuntu18.04-amd64`
|
||||
* `centos8-x86_64`
|
||||
|
||||
If no `TARGET` is specified, all valid release targets are built.
|
||||
|
||||
The packages are generated in the `dist` folder.
|
||||
|
||||
## Testing local changes
|
||||
@@ -39,23 +37,9 @@ The [test/release](./test/release/) folder contains documentation on how the ins
|
||||
|
||||
## Releasing
|
||||
|
||||
In order to release packages required for a release, a utility script
|
||||
[`scripts/release-packages.sh`](./scripts/release-packages.sh) is provided.
|
||||
This script can be executed as follows:
|
||||
|
||||
```bash
|
||||
GPG_LOCAL_USER="GPG_USER" \
|
||||
MASTER_KEY_PATH=/path/to/gpg-master.key \
|
||||
SUB_KEY_PATH=/path/to/gpg-subkey.key \
|
||||
./scripts/release-packages.sh REPO PACKAGE_REPO_ROOT [REFERENCE]
|
||||
A utility script [`scripts/release.sh`](./scripts/release.sh) is provided to build
|
||||
packages required for release. If run without arguments, all supported distribution-architecture combinations are built. A specific distribution-architecture pair can also be provided
|
||||
```sh
|
||||
./scripts/release.sh ubuntu18.04-amd64
|
||||
```
|
||||
|
||||
Where `REPO` is one of `stable` or `experimental`, `PACKAGE_REPO_ROOT` is the local path to the `libnvidia-container` repository checked out to the `gh-pages` branch, and `REFERENCE` is the git SHA that is to be released. If reference is not specified `HEAD` is assumed.
|
||||
|
||||
This scripts performs the following basic functions:
|
||||
* Pulls the package image defined by the `REFERENCE` git SHA from the staging registry,
|
||||
* Copies the required packages to the package repository at `PACKAGE_REPO_ROOT/REPO`,
|
||||
* Signs the packages using the specified GPG keys
|
||||
|
||||
While the last two are performed, commits are added to the package repository. These can be pushed to the relevant repository.
|
||||
|
||||
where the `amd64` builds for `ubuntu18.04` are provided as an example.
|
||||
|
||||
34
Makefile
34
Makefile
@@ -38,21 +38,16 @@ EXAMPLE_TARGETS := $(patsubst %,example-%, $(EXAMPLES))
|
||||
CMDS := $(patsubst ./cmd/%/,%,$(sort $(dir $(wildcard ./cmd/*/))))
|
||||
CMD_TARGETS := $(patsubst %,cmd-%, $(CMDS))
|
||||
|
||||
$(info CMD_TARGETS=$(CMD_TARGETS))
|
||||
|
||||
CHECK_TARGETS := assert-fmt vet lint ineffassign misspell
|
||||
MAKE_TARGETS := binaries build check fmt lint-internal test examples cmds coverage generate licenses $(CHECK_TARGETS)
|
||||
MAKE_TARGETS := binaries build check fmt lint-internal test examples cmds coverage generate $(CHECK_TARGETS)
|
||||
|
||||
TARGETS := $(MAKE_TARGETS) $(EXAMPLE_TARGETS) $(CMD_TARGETS)
|
||||
|
||||
DOCKER_TARGETS := $(patsubst %,docker-%, $(TARGETS))
|
||||
.PHONY: $(TARGETS) $(DOCKER_TARGETS)
|
||||
|
||||
ifeq ($(VERSION),)
|
||||
CLI_VERSION = $(LIB_VERSION)$(if $(LIB_TAG),-$(LIB_TAG))
|
||||
else
|
||||
CLI_VERSION = $(VERSION)
|
||||
endif
|
||||
CLI_VERSION_PACKAGE = github.com/NVIDIA/nvidia-container-toolkit/internal/info
|
||||
|
||||
GOOS ?= linux
|
||||
|
||||
binaries: cmds
|
||||
@@ -61,7 +56,7 @@ cmd-%: COMMAND_BUILD_OPTIONS = -o $(PREFIX)/$(*)
|
||||
endif
|
||||
cmds: $(CMD_TARGETS)
|
||||
$(CMD_TARGETS): cmd-%:
|
||||
GOOS=$(GOOS) go build -ldflags "-s -w -X $(CLI_VERSION_PACKAGE).gitCommit=$(GIT_COMMIT) -X $(CLI_VERSION_PACKAGE).version=$(CLI_VERSION)" $(COMMAND_BUILD_OPTIONS) $(MODULE)/cmd/$(*)
|
||||
GOOS=$(GOOS) go build -ldflags "-s -w" $(COMMAND_BUILD_OPTIONS) $(MODULE)/cmd/$(*)
|
||||
|
||||
build:
|
||||
GOOS=$(GOOS) go build ./...
|
||||
@@ -95,7 +90,11 @@ ineffassign:
|
||||
|
||||
lint:
|
||||
# We use `go list -f '{{.Dir}}' $(MODULE)/...` to skip the `vendor` folder.
|
||||
go list -f '{{.Dir}}' $(MODULE)/... | xargs golint -set_exit_status
|
||||
go list -f '{{.Dir}}' $(MODULE)/... | grep -v /internal/ | xargs golint -set_exit_status
|
||||
|
||||
lint-internal:
|
||||
# We use `go list -f '{{.Dir}}' $(MODULE)/...` to skip the `vendor` folder.
|
||||
go list -f '{{.Dir}}' $(MODULE)/internal/... | xargs golint -set_exit_status
|
||||
|
||||
misspell:
|
||||
misspell $(MODULE)/...
|
||||
@@ -103,9 +102,6 @@ misspell:
|
||||
vet:
|
||||
go vet $(MODULE)/...
|
||||
|
||||
licenses:
|
||||
go-licenses csv $(MODULE)/...
|
||||
|
||||
COVERAGE_FILE := coverage.out
|
||||
test: build cmds
|
||||
go test -v -coverprofile=$(COVERAGE_FILE) $(MODULE)/...
|
||||
@@ -146,15 +142,3 @@ $(DOCKER_TARGETS): docker-%: .build-image
|
||||
--user $$(id -u):$$(id -g) \
|
||||
$(BUILDIMAGE) \
|
||||
make $(*)
|
||||
|
||||
# Start an interactive shell using the development image.
|
||||
PHONY: .shell
|
||||
.shell:
|
||||
$(DOCKER) run \
|
||||
--rm \
|
||||
-ti \
|
||||
-e GOCACHE=/tmp/.cache \
|
||||
-v $(PWD):$(PWD) \
|
||||
-w $(PWD) \
|
||||
--user $$(id -u):$$(id -g) \
|
||||
$(BUILDIMAGE)
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
# NVIDIA Container Toolkit
|
||||
|
||||
[](https://raw.githubusercontent.com/NVIDIA/nvidia-container-toolkit/main/LICENSE)
|
||||
[](https://raw.githubusercontent.com/NVIDIA/nvidia-container-toolkit/master/LICENSE)
|
||||
[](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/overview.html)
|
||||
[](https://nvidia.github.io/libnvidia-container)
|
||||
|
||||
|
||||
@@ -42,16 +42,6 @@ RUN GOPATH=/artifacts go install -ldflags="-s -w -X 'main.Version=${VERSION}'" .
|
||||
|
||||
FROM nvidia/cuda:${CUDA_VERSION}-base-${BASE_DIST}
|
||||
|
||||
ARG BASE_DIST
|
||||
# See https://www.centos.org/centos-linux-eol/
|
||||
# and https://stackoverflow.com/a/70930049 for move to vault.centos.org
|
||||
# and https://serverfault.com/questions/1093922/failing-to-run-yum-update-in-centos-8 for move to vault.epel.cloud
|
||||
RUN [[ "${BASE_DIST}" != "centos8" ]] || \
|
||||
( \
|
||||
sed -i 's/mirrorlist/#mirrorlist/g' /etc/yum.repos.d/CentOS-Linux-* && \
|
||||
sed -i 's|#baseurl=http://mirror.centos.org|baseurl=http://vault.epel.cloud|g' /etc/yum.repos.d/CentOS-Linux-* \
|
||||
)
|
||||
|
||||
ENV NVIDIA_DISABLE_REQUIRE="true"
|
||||
ENV NVIDIA_VISIBLE_DEVICES=all
|
||||
ENV NVIDIA_DRIVER_CAPABILITIES=utility
|
||||
@@ -63,13 +53,11 @@ COPY ${ARTIFACTS_ROOT}/${PACKAGE_DIST} /artifacts/packages/${PACKAGE_DIST}
|
||||
WORKDIR /artifacts/packages
|
||||
|
||||
ARG PACKAGE_VERSION
|
||||
ARG TARGETARCH
|
||||
ENV PACKAGE_ARCH ${TARGETARCH}
|
||||
RUN PACKAGE_ARCH=${PACKAGE_ARCH/amd64/x86_64} && PACKAGE_ARCH=${PACKAGE_ARCH/arm64/aarch64} && \
|
||||
yum localinstall -y \
|
||||
${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container1-1.*.rpm \
|
||||
${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container-tools-1.*.rpm \
|
||||
${PACKAGE_DIST}/${PACKAGE_ARCH}/nvidia-container-toolkit*-${PACKAGE_VERSION}*.rpm
|
||||
ARG PACKAGE_ARCH
|
||||
RUN yum localinstall -y \
|
||||
${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container1-${PACKAGE_VERSION}*.rpm \
|
||||
${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container-tools-${PACKAGE_VERSION}*.rpm \
|
||||
${PACKAGE_DIST}/${PACKAGE_ARCH}/nvidia-container-toolkit-${PACKAGE_VERSION}*.rpm
|
||||
|
||||
WORKDIR /work
|
||||
|
||||
@@ -85,7 +73,7 @@ LABEL release="N/A"
|
||||
LABEL summary="Automatically Configure your Container Runtime for GPU support."
|
||||
LABEL description="See summary"
|
||||
|
||||
RUN mkdir /licenses && mv /NGC-DL-CONTAINER-LICENSE /licenses/NGC-DL-CONTAINER-LICENSE
|
||||
COPY ./LICENSE /licenses/LICENSE
|
||||
|
||||
# Install / upgrade packages here that are required to resolve CVEs
|
||||
ARG CVE_UPDATES
|
||||
|
||||
@@ -26,4 +26,4 @@ COPY ${ARTIFACTS_ROOT} /artifacts/packages/
|
||||
|
||||
WORKDIR /artifacts/packages
|
||||
|
||||
RUN mkdir /licenses && mv /NGC-DL-CONTAINER-LICENSE /licenses/NGC-DL-CONTAINER-LICENSE
|
||||
COPY ./LICENSE /licenses/LICENSE
|
||||
|
||||
@@ -40,15 +40,11 @@ COPY . .
|
||||
RUN GOPATH=/artifacts go install -ldflags="-s -w -X 'main.Version=${VERSION}'" ./tools/...
|
||||
|
||||
|
||||
FROM nvcr.io/nvidia/cuda:${CUDA_VERSION}-base-${BASE_DIST}
|
||||
|
||||
# Remove the CUDA repository configurations to avoid issues with rotated GPG keys
|
||||
RUN rm -f /etc/apt/sources.list.d/cuda.list
|
||||
FROM nvidia/cuda:${CUDA_VERSION}-base-${BASE_DIST}
|
||||
|
||||
ARG DEBIAN_FRONTEND=noninteractive
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
libcap2 \
|
||||
curl \
|
||||
&& \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
|
||||
@@ -63,21 +59,11 @@ COPY ${ARTIFACTS_ROOT}/${PACKAGE_DIST} /artifacts/packages/${PACKAGE_DIST}
|
||||
WORKDIR /artifacts/packages
|
||||
|
||||
ARG PACKAGE_VERSION
|
||||
ARG TARGETARCH
|
||||
ENV PACKAGE_ARCH ${TARGETARCH}
|
||||
|
||||
ARG LIBNVIDIA_CONTAINER_REPO="https://nvidia.github.io/libnvidia-container"
|
||||
ARG LIBNVIDIA_CONTAINER0_VERSION
|
||||
RUN if [ "${PACKAGE_ARCH}" = "arm64" ]; then \
|
||||
curl -L ${LIBNVIDIA_CONTAINER_REPO}/${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container0_${LIBNVIDIA_CONTAINER0_VERSION}_${PACKAGE_ARCH}.deb \
|
||||
--output ${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container0_${LIBNVIDIA_CONTAINER0_VERSION}_${PACKAGE_ARCH}.deb && \
|
||||
dpkg -i ${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container0_${LIBNVIDIA_CONTAINER0_VERSION}_${PACKAGE_ARCH}.deb; \
|
||||
fi
|
||||
|
||||
ARG PACKAGE_ARCH
|
||||
RUN dpkg -i \
|
||||
${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container1_1.*.deb \
|
||||
${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container-tools_1.*.deb \
|
||||
${PACKAGE_DIST}/${PACKAGE_ARCH}/nvidia-container-toolkit*_${PACKAGE_VERSION}*.deb
|
||||
${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container1_${PACKAGE_VERSION}*.deb \
|
||||
${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container-tools_${PACKAGE_VERSION}*.deb \
|
||||
${PACKAGE_DIST}/${PACKAGE_ARCH}/nvidia-container-toolkit_${PACKAGE_VERSION}*.deb
|
||||
|
||||
WORKDIR /work
|
||||
|
||||
@@ -93,13 +79,6 @@ LABEL release="N/A"
|
||||
LABEL summary="Automatically Configure your Container Runtime for GPU support."
|
||||
LABEL description="See summary"
|
||||
|
||||
RUN mkdir /licenses && mv /NGC-DL-CONTAINER-LICENSE /licenses/NGC-DL-CONTAINER-LICENSE
|
||||
|
||||
# Install / upgrade packages here that are required to resolve CVEs
|
||||
ARG CVE_UPDATES
|
||||
RUN if [ -n "${CVE_UPDATES}" ]; then \
|
||||
apt-get update && apt-get upgrade -y ${CVE_UPDATES} && \
|
||||
rm -rf /var/lib/apt/lists/*; \
|
||||
fi
|
||||
COPY ./LICENSE /licenses/LICENSE
|
||||
|
||||
ENTRYPOINT ["/work/nvidia-toolkit"]
|
||||
|
||||
@@ -12,14 +12,7 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
BUILD_MULTI_ARCH_IMAGES ?= false
|
||||
DOCKER ?= docker
|
||||
|
||||
BUILDX =
|
||||
ifeq ($(BUILD_MULTI_ARCH_IMAGES),true)
|
||||
BUILDX = buildx
|
||||
endif
|
||||
|
||||
DOCKER ?= docker
|
||||
MKDIR ?= mkdir
|
||||
DIST_DIR ?= $(CURDIR)/dist
|
||||
|
||||
@@ -32,47 +25,36 @@ IMAGE_NAME := $(REGISTRY)/container-toolkit
|
||||
endif
|
||||
|
||||
VERSION ?= $(LIB_VERSION)$(if $(LIB_TAG),-$(LIB_TAG))
|
||||
IMAGE_VERSION := $(VERSION)
|
||||
|
||||
IMAGE_TAG ?= $(VERSION)-$(DIST)
|
||||
IMAGE = $(IMAGE_NAME):$(IMAGE_TAG)
|
||||
|
||||
OUT_IMAGE_NAME ?= $(IMAGE_NAME)
|
||||
OUT_IMAGE_VERSION ?= $(IMAGE_VERSION)
|
||||
OUT_IMAGE_TAG = $(OUT_IMAGE_VERSION)-$(DIST)
|
||||
OUT_IMAGE = $(OUT_IMAGE_NAME):$(OUT_IMAGE_TAG)
|
||||
|
||||
##### Public rules #####
|
||||
DEFAULT_PUSH_TARGET := ubuntu20.04
|
||||
DISTRIBUTIONS := ubuntu20.04 ubuntu18.04 ubi8 centos7
|
||||
DEFAULT_PUSH_TARGET := ubuntu18.04
|
||||
TARGETS := ubuntu20.04 ubuntu18.04 ubi8 centos7 centos8
|
||||
|
||||
META_TARGETS := packaging
|
||||
|
||||
BUILD_TARGETS := $(patsubst %,build-%,$(DISTRIBUTIONS) $(META_TARGETS))
|
||||
PUSH_TARGETS := $(patsubst %,push-%,$(DISTRIBUTIONS) $(META_TARGETS))
|
||||
TEST_TARGETS := $(patsubst %,test-%, $(DISTRIBUTIONS))
|
||||
BUILD_TARGETS := $(patsubst %,build-%,$(TARGETS) $(META_TARGETS))
|
||||
PUSH_TARGETS := $(patsubst %,push-%,$(TARGETS) $(META_TARGETS))
|
||||
TEST_TARGETS := $(patsubst %,test-%, $(TARGETS))
|
||||
|
||||
.PHONY: $(DISTRIBUTIONS) $(PUSH_TARGETS) $(BUILD_TARGETS) $(TEST_TARGETS)
|
||||
.PHONY: $(TARGETS) $(PUSH_TARGETS) $(BUILD_TARGETS) $(TEST_TARGETS)
|
||||
|
||||
ifneq ($(BUILD_MULTI_ARCH_IMAGES),true)
|
||||
include $(CURDIR)/build/container/native-only.mk
|
||||
else
|
||||
include $(CURDIR)/build/container/multi-arch.mk
|
||||
endif
|
||||
push-%: DIST = $(*)
|
||||
$(PUSH_TARGETS): push-%:
|
||||
$(DOCKER) push "$(IMAGE_NAME):$(IMAGE_TAG)"
|
||||
|
||||
# For the default push target we also push a short tag equal to the version.
|
||||
# We skip this for the development release
|
||||
DEVEL_RELEASE_IMAGE_VERSION ?= devel
|
||||
PUSH_MULTIPLE_TAGS ?= true
|
||||
ifeq ($(strip $(OUT_IMAGE_VERSION)),$(DEVEL_RELEASE_IMAGE_VERSION))
|
||||
PUSH_MULTIPLE_TAGS = false
|
||||
endif
|
||||
ifeq ($(PUSH_MULTIPLE_TAGS),true)
|
||||
ifneq ($(strip $(VERSION)),$(DEVEL_RELEASE_IMAGE_VERSION))
|
||||
push-$(DEFAULT_PUSH_TARGET): push-short
|
||||
endif
|
||||
push-short:
|
||||
$(DOCKER) tag "$(IMAGE_NAME):$(VERSION)-$(DEFAULT_PUSH_TARGET)" "$(IMAGE_NAME):$(VERSION)"
|
||||
$(DOCKER) push "$(IMAGE_NAME):$(VERSION)"
|
||||
|
||||
push-%: DIST = $(*)
|
||||
push-short: DIST = $(DEFAULT_PUSH_TARGET)
|
||||
|
||||
build-%: DIST = $(*)
|
||||
build-%: DOCKERFILE = $(CURDIR)/build/container/Dockerfile.$(DOCKERFILE_SUFFIX)
|
||||
@@ -82,17 +64,16 @@ ARTIFACTS_ROOT ?= $(shell realpath --relative-to=$(CURDIR) $(DIST_DIR))
|
||||
# Use a generic build target to build the relevant images
|
||||
$(BUILD_TARGETS): build-%: $(ARTIFACTS_ROOT)
|
||||
DOCKER_BUILDKIT=1 \
|
||||
$(DOCKER) $(BUILDX) build --pull \
|
||||
$(DOCKER_BUILD_OPTIONS) \
|
||||
$(DOCKER_BUILD_PLATFORM_OPTIONS) \
|
||||
$(DOCKER) build --pull \
|
||||
--platform=linux/amd64 \
|
||||
--tag $(IMAGE) \
|
||||
--build-arg ARTIFACTS_ROOT="$(ARTIFACTS_ROOT)" \
|
||||
--build-arg BASE_DIST="$(BASE_DIST)" \
|
||||
--build-arg CUDA_VERSION="$(CUDA_VERSION)" \
|
||||
--build-arg GOLANG_VERSION="$(GOLANG_VERSION)" \
|
||||
--build-arg LIBNVIDIA_CONTAINER0_VERSION="$(LIBNVIDIA_CONTAINER0_DEPENDENCY)" \
|
||||
--build-arg PACKAGE_DIST="$(PACKAGE_DIST)" \
|
||||
--build-arg PACKAGE_VERSION="$(PACKAGE_VERSION)" \
|
||||
--build-arg PACKAGE_ARCH="$(PACKAGE_ARCH)" \
|
||||
--build-arg VERSION="$(VERSION)" \
|
||||
--build-arg CVE_UPDATES="$(CVE_UPDATES)" \
|
||||
-f $(DOCKERFILE) \
|
||||
@@ -101,19 +82,22 @@ $(BUILD_TARGETS): build-%: $(ARTIFACTS_ROOT)
|
||||
|
||||
build-ubuntu%: BASE_DIST = $(*)
|
||||
build-ubuntu%: DOCKERFILE_SUFFIX := ubuntu
|
||||
build-ubuntu%: PACKAGE_DIST = ubuntu18.04
|
||||
build-ubuntu%: PACKAGE_ARCH := amd64
|
||||
build-ubuntu%: PACKAGE_DIST = $(BASE_DIST)
|
||||
build-ubuntu%: PACKAGE_VERSION := $(LIB_VERSION)$(if $(LIB_TAG),~$(LIB_TAG))
|
||||
build-ubuntu%: LIBNVIDIA_CONTAINER0_DEPENDENCY=$(LIBNVIDIA_CONTAINER0_VERSION)
|
||||
|
||||
# TODO: Update this to use the centos8 packages
|
||||
build-ubi8: BASE_DIST := ubi8
|
||||
build-ubi8: DOCKERFILE_SUFFIX := centos
|
||||
build-ubi8: PACKAGE_DIST = centos8
|
||||
build-ubi8: PACKAGE_ARCH := x86_64
|
||||
build-ubi8: PACKAGE_DIST = centos7
|
||||
build-ubi8: PACKAGE_VERSION := $(LIB_VERSION)-$(if $(LIB_TAG),0.1.$(LIB_TAG),1)
|
||||
|
||||
build-centos7: BASE_DIST = $(*)
|
||||
build-centos7: DOCKERFILE_SUFFIX := centos
|
||||
build-centos7: PACKAGE_DIST = $(BASE_DIST)
|
||||
build-centos7: PACKAGE_VERSION := $(LIB_VERSION)-$(if $(LIB_TAG),0.1.$(LIB_TAG),1)
|
||||
build-centos%: BASE_DIST = $(*)
|
||||
build-centos%: DOCKERFILE_SUFFIX := centos
|
||||
build-centos%: PACKAGE_ARCH := x86_64
|
||||
build-centos%: PACKAGE_DIST = $(BASE_DIST)
|
||||
build-centos%: PACKAGE_VERSION := $(LIB_VERSION)-$(if $(LIB_TAG),0.1.$(LIB_TAG),1)
|
||||
|
||||
build-packaging: BASE_DIST := ubuntu20.04
|
||||
build-packaging: DOCKERFILE_SUFFIX := packaging
|
||||
|
||||
@@ -1,37 +0,0 @@
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
PUSH_ON_BUILD ?= false
|
||||
DOCKER_BUILD_OPTIONS = --output=type=image,push=$(PUSH_ON_BUILD)
|
||||
DOCKER_BUILD_PLATFORM_OPTIONS = --platform=linux/amd64,linux/arm64
|
||||
|
||||
REGCTL ?= regctl
|
||||
$(PUSH_TARGETS): push-%:
|
||||
$(REGCTL) \
|
||||
image copy \
|
||||
$(IMAGE) $(OUT_IMAGE)
|
||||
|
||||
push-short:
|
||||
$(REGCTL) \
|
||||
image copy \
|
||||
$(IMAGE) $(OUT_IMAGE_NAME):$(OUT_IMAGE_VERSION)
|
||||
|
||||
# We only have x86_64 packages for centos7
|
||||
build-centos7: DOCKER_BUILD_PLATFORM_OPTIONS = --platform=linux/amd64
|
||||
|
||||
# We only generate amd64 image for ubuntu18.04
|
||||
build-ubuntu18.04: DOCKER_BUILD_PLATFORM_OPTIONS = --platform=linux/amd64
|
||||
|
||||
# We only generate a single image for packaging targets
|
||||
build-packaging: DOCKER_BUILD_PLATFORM_OPTIONS = --platform=linux/amd64
|
||||
170
cmd/nvidia-container-runtime.experimental/README.md
Normal file
170
cmd/nvidia-container-runtime.experimental/README.md
Normal file
@@ -0,0 +1,170 @@
|
||||
# The Experimental NVIDIA Container Runtime
|
||||
|
||||
## Introduction
|
||||
|
||||
The experimental NVIDIA Container Runtime is a proof-of-concept runtime that
|
||||
approaches the problem of making GPUs (or other NVIDIA devices) available in
|
||||
containerized environments in a different manner to the existing
|
||||
[NVIDIA Container Runtime](../nvidia-container-runtime). Wherease the current
|
||||
runtime relies on the [NVIDIA Container Library](https://github.com/NVIDIA/libnvidia-container)
|
||||
to perform the modifications to a container, the experiemental runtime aims to
|
||||
express the required modifications in terms of changes to a container's [OCI
|
||||
runtime specification](https://github.com/opencontainers/runtime-spec). This
|
||||
also aligns with open initiatives such as the [Container Device Interface (CDI)](https://github.com/container-orchestrated-devices/container-device-interface).
|
||||
|
||||
## Known Limitations
|
||||
|
||||
* The path of NVIDIA CUDA libraries / binaries injected into the container currently match that of the host system. This means that on an Ubuntu-based host systems these would be at `/usr/lib/x86_64-linux-gnu`
|
||||
even if the container distribution would normally expect these at another location (e.g. `/usr/lib64`)
|
||||
* Tools such as `nvidia-smi` may create additional device nodes in the container when run. This is
|
||||
prevented in the "classic" runtime (and the NVIDIA Container Library) by modifying
|
||||
the `/proc/driver/nvidia/params` file in the container.
|
||||
* Other `NVIDIA_*` environment variables (e.g. `NVIDIA_DRIVER_CAPABILITIES`) are
|
||||
not considered to filter mounted libraries or binaries. This is equivalent to
|
||||
always using `NVIDIA_DRIVER_CAPABILITIES=all`.
|
||||
|
||||
## Building / Installing
|
||||
|
||||
The experimental NVIDIA Container Runtime is a self-contained golang binary and
|
||||
can thus be built from source, or installed directly using `go install`.
|
||||
|
||||
### From source
|
||||
|
||||
After cloning the `nvidia-container-toolkit` repository from [GitLab](https://gitlab.com/nvidia/container-toolkit/container-toolkit)
|
||||
or from the read-only mirror on [GitHub](https://github.com/NVIDIA/nvidia-container-toolkit)
|
||||
running the following make command in the repository root:
|
||||
```bash
|
||||
make cmd-nvidia-container-runtime.experimental
|
||||
```
|
||||
will create an executable file `nvidia-container-runtime.experimental` in the
|
||||
root.
|
||||
|
||||
A dockerized target:
|
||||
```bash
|
||||
make docker-cmd-nvidia-container-runtime.experimental
|
||||
```
|
||||
will also create the executable file `nvidia-container-runtime.experimental`
|
||||
without requiring the setup of a development environment (with the exception)
|
||||
of having `make` and `docker` installed.
|
||||
|
||||
### Go install
|
||||
|
||||
The experimental NVIDIA Container Runtime can also be `go installed` by running
|
||||
the following command:
|
||||
|
||||
```bash
|
||||
go install github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-container-runtime.experimental@experimental
|
||||
```
|
||||
which will build and install the `nvidia-container-runtime.experimental`
|
||||
executable in the `${GOPATH}/bin` folder.
|
||||
|
||||
## Using the Runtime
|
||||
|
||||
The experimental NVIDIA Container Runtime is intended as a drop-in replacement
|
||||
for the "classic" NVIDIA Container Runtime. As such it is used in the same
|
||||
way (with the exception of the known limitiations noted above).
|
||||
|
||||
In general terms, to use the experimental NVIDIA Container Runtime to launch a
|
||||
container with GPU support, it should be inserted as a shim for the desired
|
||||
low-level OCI-compliant runtime (e.g. `runc` or `crun`). How this is achieved
|
||||
depends on how containers are being launched.
|
||||
|
||||
### Docker
|
||||
In the case of `docker` for example, the runtime must be registered with the
|
||||
Docker daemon. This can be done by modifying the `/etc/docker/daemon.json` file
|
||||
to contain the following:
|
||||
```json
|
||||
{
|
||||
"runtimes": {
|
||||
"nvidia-experimental": {
|
||||
"path": "nvidia-container-runtime.experimental",
|
||||
"runtimeArgs": []
|
||||
}
|
||||
},
|
||||
}
|
||||
```
|
||||
This can then be invoked from docker by including the `--runtime=nvidia-experimental`
|
||||
option when executing a `docker run` command.
|
||||
|
||||
### Runc
|
||||
|
||||
If `runc` is being used to run a container directly substituting the `runc`
|
||||
command for `nvidia-container-runtime.experimental` should be sufficient as
|
||||
the latter will `exec` to `runc` once the required (in-place) modifications have
|
||||
been made to the container's OCI spec (`config.json` file).
|
||||
|
||||
## Configuration
|
||||
|
||||
### Runtime Path
|
||||
The experimental NVIDIA Container Runtime allows for the path to the low-level
|
||||
runtime to be specified. This is done by setting the following option in the
|
||||
`/etc/nvidia-container-runtime/config.toml` file or setting the
|
||||
`NVIDIA_CONTAINER_RUNTIME_PATH` environment variable.
|
||||
|
||||
```toml
|
||||
[nvidia-container-runtime.experimental]
|
||||
|
||||
runtime-path = "/path/to/low-level-runtime"
|
||||
```
|
||||
This path can be set to the path for `runc` or `crun` on a system and if it is
|
||||
a relative path, the `PATH` is searched for a matching executable.
|
||||
|
||||
### Device Selection
|
||||
In order to select a specific device, the experimental NVIDIA Container Runtime
|
||||
mimics the behaviour of the "classic" runtime. That is to say that the values of
|
||||
certain [environment variables](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/user-guide.html#environment-variables-oci-spec)
|
||||
**in the container's OCI specification** control the behaviour of the runtime.
|
||||
|
||||
#### `NVIDIA_VISIBLE_DEVICES`
|
||||
This variable controls which GPUs will be made accessible inside the container.
|
||||
|
||||
##### Possible values
|
||||
* `0,1,2`, `GPU-fef8089b` …: a comma-separated list of GPU UUID(s) or index(es).
|
||||
* `all`: all GPUs will be accessible, this is the default value in our container images.
|
||||
* `none`: no GPU will be accessible, but driver capabilities will be enabled.
|
||||
* `void` or *empty* or *unset*: `nvidia-container-runtime` will have the same behavior as `runc`.
|
||||
|
||||
**Note**: When running on a MIG capable device, the following values will also be available:
|
||||
* `0:0,0:1,1:0`, `MIG-GPU-fef8089b/0/1` …: a comma-separated list of MIG Device UUID(s) or index(es).
|
||||
|
||||
Where the MIG device indices have the form `<GPU Device Index>:<MIG Device Index>` as seen in the example output:
|
||||
```
|
||||
$ nvidia-smi -L
|
||||
GPU 0: Graphics Device (UUID: GPU-b8ea3855-276c-c9cb-b366-c6fa655957c5)
|
||||
MIG Device 0: (UUID: MIG-GPU-b8ea3855-276c-c9cb-b366-c6fa655957c5/1/0)
|
||||
MIG Device 1: (UUID: MIG-GPU-b8ea3855-276c-c9cb-b366-c6fa655957c5/1/1)
|
||||
MIG Device 2: (UUID: MIG-GPU-b8ea3855-276c-c9cb-b366-c6fa655957c5/11/0)
|
||||
```
|
||||
|
||||
#### `NVIDIA_MIG_CONFIG_DEVICES`
|
||||
This variable controls which of the visible GPUs can have their MIG
|
||||
configuration managed from within the container. This includes enabling and
|
||||
disabling MIG mode, creating and destroying GPU Instances and Compute
|
||||
Instances, etc.
|
||||
|
||||
##### Possible values
|
||||
* `all`: Allow all MIG-capable GPUs in the visible device list to have their
|
||||
MIG configurations managed.
|
||||
|
||||
**Note**:
|
||||
* This feature is only available on MIG capable devices (e.g. the A100).
|
||||
* To use this feature, the container must be started with `CAP_SYS_ADMIN` privileges.
|
||||
* When not running as `root`, the container user must have read access to the
|
||||
`/proc/driver/nvidia/capabilities/mig/config` file on the host.
|
||||
|
||||
#### `NVIDIA_MIG_MONITOR_DEVICES`
|
||||
This variable controls which of the visible GPUs can have aggregate information
|
||||
about all of their MIG devices monitored from within the container. This
|
||||
includes inspecting the aggregate memory usage, listing the aggregate running
|
||||
processes, etc.
|
||||
|
||||
##### Possible values
|
||||
* `all`: Allow all MIG-capable GPUs in the visible device list to have their
|
||||
MIG devices monitored.
|
||||
|
||||
**Note**:
|
||||
* This feature is only available on MIG capable devices (e.g. the A100).
|
||||
* To use this feature, the container must be started with `CAP_SYS_ADMIN` privileges.
|
||||
* When not running as `root`, the container user must have read access to the
|
||||
`/proc/driver/nvidia/capabilities/mig/monitor` file on the host.
|
||||
|
||||
@@ -0,0 +1,52 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package config
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// list represents a list of config updaters that are applied in order, with later
|
||||
// configs having preference.
|
||||
type list struct {
|
||||
logger *log.Logger
|
||||
configs []configUpdater
|
||||
}
|
||||
|
||||
var _ configUpdater = (*list)(nil)
|
||||
|
||||
func newListWithLogger(logger *log.Logger, ci ...configUpdater) configUpdater {
|
||||
c := list{
|
||||
logger: logger,
|
||||
configs: ci,
|
||||
}
|
||||
return &c
|
||||
}
|
||||
|
||||
func (c list) Update(cfg *Config) error {
|
||||
for i, u := range c.configs {
|
||||
c.logger.Debugf("Applying config %v: %v", i, u)
|
||||
err := u.Update(cfg)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error applying config %v: %v", i, err)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
@@ -0,0 +1,100 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package config
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
testlog "github.com/sirupsen/logrus/hooks/test"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestComposite(t *testing.T) {
|
||||
logger, _ := testlog.NewNullLogger()
|
||||
|
||||
// Empty list
|
||||
c := newListWithLogger(logger)
|
||||
|
||||
cfg := &Config{}
|
||||
err := c.Update(cfg)
|
||||
|
||||
require.NoError(t, err)
|
||||
require.EqualValues(t, &Config{}, cfg)
|
||||
|
||||
// Add a single mock config
|
||||
mockConfigUpdater := &configUpdaterMock{
|
||||
UpdateFunc: func(cfg *Config) error {
|
||||
cfg.DebugFilePath = "updated"
|
||||
return nil
|
||||
},
|
||||
}
|
||||
|
||||
c = newListWithLogger(logger, mockConfigUpdater)
|
||||
cfg = &Config{}
|
||||
err = c.Update(cfg)
|
||||
require.NoError(t, err)
|
||||
require.EqualValues(t, &Config{DebugFilePath: "updated"}, cfg)
|
||||
|
||||
require.Len(t, mockConfigUpdater.UpdateCalls(), 1)
|
||||
|
||||
// Reset the calls
|
||||
mockConfigUpdater.calls = struct{ Update []struct{ Config *Config } }{}
|
||||
|
||||
// define an error config
|
||||
errorConfigUpdater := &configUpdaterMock{
|
||||
UpdateFunc: func(cfg *Config) error {
|
||||
return fmt.Errorf("mock error")
|
||||
},
|
||||
}
|
||||
|
||||
c = newListWithLogger(logger, errorConfigUpdater, mockConfigUpdater)
|
||||
cfg = &Config{}
|
||||
err = c.Update(cfg)
|
||||
require.Error(t, err)
|
||||
require.EqualValues(t, &Config{}, cfg)
|
||||
|
||||
require.Len(t, errorConfigUpdater.UpdateCalls(), 1)
|
||||
require.Len(t, mockConfigUpdater.UpdateCalls(), 0)
|
||||
|
||||
// Reset the calls
|
||||
mockConfigUpdater.calls = struct{ Update []struct{ Config *Config } }{}
|
||||
errorConfigUpdater.calls = struct{ Update []struct{ Config *Config } }{}
|
||||
|
||||
// Change the order of the config and error
|
||||
c = newListWithLogger(logger, mockConfigUpdater, errorConfigUpdater)
|
||||
cfg = &Config{}
|
||||
err = c.Update(cfg)
|
||||
require.Error(t, err)
|
||||
require.EqualValues(t, &Config{DebugFilePath: "updated"}, cfg)
|
||||
|
||||
require.Len(t, errorConfigUpdater.UpdateCalls(), 1)
|
||||
require.Len(t, mockConfigUpdater.UpdateCalls(), 1)
|
||||
|
||||
// Reset the calls
|
||||
mockConfigUpdater.calls = struct{ Update []struct{ Config *Config } }{}
|
||||
errorConfigUpdater.calls = struct{ Update []struct{ Config *Config } }{}
|
||||
|
||||
// Call the mock twice
|
||||
c = newListWithLogger(logger, mockConfigUpdater, mockConfigUpdater)
|
||||
cfg = &Config{}
|
||||
err = c.Update(cfg)
|
||||
require.NoError(t, err)
|
||||
require.EqualValues(t, &Config{DebugFilePath: "updated"}, cfg)
|
||||
|
||||
require.Len(t, mockConfigUpdater.UpdateCalls(), 2)
|
||||
}
|
||||
63
cmd/nvidia-container-runtime.experimental/config/config.go
Normal file
63
cmd/nvidia-container-runtime.experimental/config/config.go
Normal file
@@ -0,0 +1,63 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package config
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// Config defines the configuration options for the NVIDIA Container Runtime
|
||||
type Config struct {
|
||||
// Root defines the root of the file system to be used for locating mounts
|
||||
Root string
|
||||
// DebugFilePath defines a log file to print debug output to
|
||||
DebugFilePath string
|
||||
// RuntimePath defines the path to an OCI compliant runtime
|
||||
RuntimePath string
|
||||
// LogLevel defines the logging level for the application
|
||||
LogLevel string
|
||||
}
|
||||
|
||||
//go:generate moq -stub -out config_mock.go . configUpdater
|
||||
|
||||
// configUpdate represents an interface for applying updates to a config.
|
||||
type configUpdater interface {
|
||||
Update(*Config) error
|
||||
}
|
||||
|
||||
// GetConfig returns a config struct with the values resolved. The values are defined in order of
|
||||
// priority:
|
||||
// 1. From the associated environment variables
|
||||
// 2. From the loaded config file
|
||||
// 3. From the default values defined in the `defaultConfig` function
|
||||
func GetConfig(logger *log.Logger) (*Config, error) {
|
||||
cfg := &Config{}
|
||||
|
||||
configs := newListWithLogger(logger,
|
||||
newDefaultConfig(),
|
||||
newDefaultConfigFileWithLogger(logger),
|
||||
newConfigFromEnvironment(),
|
||||
)
|
||||
|
||||
err := configs.Update(cfg)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error getting config: %v", err)
|
||||
}
|
||||
return cfg, nil
|
||||
}
|
||||
@@ -0,0 +1,76 @@
|
||||
// Code generated by moq; DO NOT EDIT.
|
||||
// github.com/matryer/moq
|
||||
|
||||
package config
|
||||
|
||||
import (
|
||||
"sync"
|
||||
)
|
||||
|
||||
// Ensure, that configUpdaterMock does implement configUpdater.
|
||||
// If this is not the case, regenerate this file with moq.
|
||||
var _ configUpdater = &configUpdaterMock{}
|
||||
|
||||
// configUpdaterMock is a mock implementation of configUpdater.
|
||||
//
|
||||
// func TestSomethingThatUsesconfigUpdater(t *testing.T) {
|
||||
//
|
||||
// // make and configure a mocked configUpdater
|
||||
// mockedconfigUpdater := &configUpdaterMock{
|
||||
// UpdateFunc: func(config *Config) error {
|
||||
// panic("mock out the Update method")
|
||||
// },
|
||||
// }
|
||||
//
|
||||
// // use mockedconfigUpdater in code that requires configUpdater
|
||||
// // and then make assertions.
|
||||
//
|
||||
// }
|
||||
type configUpdaterMock struct {
|
||||
// UpdateFunc mocks the Update method.
|
||||
UpdateFunc func(config *Config) error
|
||||
|
||||
// calls tracks calls to the methods.
|
||||
calls struct {
|
||||
// Update holds details about calls to the Update method.
|
||||
Update []struct {
|
||||
// Config is the config argument value.
|
||||
Config *Config
|
||||
}
|
||||
}
|
||||
lockUpdate sync.RWMutex
|
||||
}
|
||||
|
||||
// Update calls UpdateFunc.
|
||||
func (mock *configUpdaterMock) Update(config *Config) error {
|
||||
callInfo := struct {
|
||||
Config *Config
|
||||
}{
|
||||
Config: config,
|
||||
}
|
||||
mock.lockUpdate.Lock()
|
||||
mock.calls.Update = append(mock.calls.Update, callInfo)
|
||||
mock.lockUpdate.Unlock()
|
||||
if mock.UpdateFunc == nil {
|
||||
var (
|
||||
errOut error
|
||||
)
|
||||
return errOut
|
||||
}
|
||||
return mock.UpdateFunc(config)
|
||||
}
|
||||
|
||||
// UpdateCalls gets all the calls that were made to Update.
|
||||
// Check the length with:
|
||||
// len(mockedconfigUpdater.UpdateCalls())
|
||||
func (mock *configUpdaterMock) UpdateCalls() []struct {
|
||||
Config *Config
|
||||
} {
|
||||
var calls []struct {
|
||||
Config *Config
|
||||
}
|
||||
mock.lockUpdate.RLock()
|
||||
calls = mock.calls.Update
|
||||
mock.lockUpdate.RUnlock()
|
||||
return calls
|
||||
}
|
||||
@@ -0,0 +1,58 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package config
|
||||
|
||||
import (
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
testlog "github.com/sirupsen/logrus/hooks/test"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestGetConfigWithCustomConfig(t *testing.T) {
|
||||
wd, err := os.Getwd()
|
||||
require.NoError(t, err)
|
||||
|
||||
// By default debug is disabled
|
||||
contents := []byte("[nvidia-container-runtime]\ndebug = \"/nvidia-container-toolkit.log\"")
|
||||
testDir := path.Join(wd, "test")
|
||||
filename := path.Join(testDir, configFileRelativePath)
|
||||
|
||||
previousConfig, present := os.LookupEnv(configOverride)
|
||||
os.Setenv(configOverride, testDir)
|
||||
defer func() {
|
||||
if present {
|
||||
os.Setenv(configOverride, previousConfig)
|
||||
} else {
|
||||
os.Unsetenv(configOverride)
|
||||
}
|
||||
}()
|
||||
|
||||
require.NoError(t, os.MkdirAll(filepath.Dir(filename), 0766))
|
||||
require.NoError(t, ioutil.WriteFile(filename, contents, 0766))
|
||||
|
||||
defer func() { require.NoError(t, os.RemoveAll(testDir)) }()
|
||||
|
||||
logger, _ := testlog.NewNullLogger()
|
||||
cfg, err := GetConfig(logger)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, "/nvidia-container-toolkit.log", cfg.DebugFilePath)
|
||||
}
|
||||
42
test/release/docker/fedora35/install_repo.sh → cmd/nvidia-container-runtime.experimental/config/default.go
Executable file → Normal file
42
test/release/docker/fedora35/install_repo.sh → cmd/nvidia-container-runtime.experimental/config/default.go
Executable file → Normal file
@@ -1,5 +1,4 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@@ -13,13 +12,36 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
# This script is used to build the packages for the components of the NVIDIA
|
||||
# Container Stack. These include the nvidia-container-toolkit in this repository
|
||||
# as well as the components included in the third_party folder.
|
||||
# All required packages are generated in the specified dist folder.
|
||||
package config
|
||||
|
||||
test_repo=$1
|
||||
echo "Setting up TEST repo: ${test_repo}"
|
||||
sed -i -e "s#nvidia\.github\.io/libnvidia-container#${test_repo}/libnvidia-container#g" /etc/yum.repos.d/nvidia-container-toolkit.repo
|
||||
yum-config-manager --enable libnvidia-container-experimental
|
||||
import (
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
type defaultConfig struct{}
|
||||
|
||||
var _ configUpdater = (*defaultConfig)(nil)
|
||||
|
||||
func newDefaultConfig() configUpdater {
|
||||
c := defaultConfig{}
|
||||
return &c
|
||||
}
|
||||
|
||||
// Update defines the the default values for the config options
|
||||
func (c defaultConfig) Update(cfg *Config) error {
|
||||
*cfg = Config{
|
||||
DebugFilePath: "/dev/null",
|
||||
LogLevel: log.InfoLevel.String(),
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func getDefaultConfig() *Config {
|
||||
cfg := &Config{}
|
||||
|
||||
defaultConfig{}.Update(cfg)
|
||||
|
||||
return cfg
|
||||
}
|
||||
@@ -0,0 +1,37 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package config
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestDefaultConfigUpdate(t *testing.T) {
|
||||
cfg := &Config{}
|
||||
|
||||
require.Empty(t, cfg.DebugFilePath)
|
||||
|
||||
c := defaultConfig{}
|
||||
|
||||
err := c.Update(cfg)
|
||||
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, "/dev/null", cfg.DebugFilePath)
|
||||
require.Equal(t, "", cfg.Root)
|
||||
}
|
||||
@@ -0,0 +1,61 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package config
|
||||
|
||||
import (
|
||||
"os"
|
||||
"strings"
|
||||
)
|
||||
|
||||
const (
|
||||
debugFilePathEnvvarName = "NVIDIA_CONTAINER_RUNTIME_DEBUG"
|
||||
runtimePathEnvvarName = "NVIDIA_CONTAINER_RUNTIME_PATH"
|
||||
rootEnvvarName = "NVIDIA_CONTAINER_RUNTIME_ROOT"
|
||||
logLevelEnvvarName = "NVIDIA_CONTAINER_RUNTIME_LOG_LEVEL"
|
||||
)
|
||||
|
||||
type envConfig struct{}
|
||||
|
||||
func newConfigFromEnvironment() configUpdater {
|
||||
c := envConfig{}
|
||||
|
||||
return &c
|
||||
}
|
||||
|
||||
func (c envConfig) Update(cfg *Config) error {
|
||||
debugFilePathEnvvar, exists := os.LookupEnv(debugFilePathEnvvarName)
|
||||
if exists && strings.TrimSpace(debugFilePathEnvvar) != "" {
|
||||
cfg.DebugFilePath = debugFilePathEnvvar
|
||||
}
|
||||
|
||||
runtimePathEnvvar, exists := os.LookupEnv(runtimePathEnvvarName)
|
||||
if exists && strings.TrimSpace(runtimePathEnvvar) != "" {
|
||||
cfg.RuntimePath = runtimePathEnvvar
|
||||
}
|
||||
|
||||
rootEnvvar, exists := os.LookupEnv(rootEnvvarName)
|
||||
if exists && strings.TrimSpace(rootEnvvar) != "" {
|
||||
cfg.Root = rootEnvvar
|
||||
}
|
||||
|
||||
logLevelEnvvar, exists := os.LookupEnv(logLevelEnvvarName)
|
||||
if exists && strings.TrimSpace(logLevelEnvvar) != "" {
|
||||
cfg.LogLevel = logLevelEnvvar
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
@@ -1,5 +1,5 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
@@ -12,13 +12,18 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
*/
|
||||
|
||||
package constraints
|
||||
package config
|
||||
|
||||
//go:generate moq -stub -out constraint_mock.go . Constraint
|
||||
// Constraint represents a constraint that is to be evaluated
|
||||
type Constraint interface {
|
||||
String() string
|
||||
Assert() error
|
||||
// noop implements a configUpdater that does not update a config.
|
||||
type noop struct{}
|
||||
|
||||
func newNoopConfigUpdater() configUpdater {
|
||||
c := noop{}
|
||||
return &c
|
||||
}
|
||||
|
||||
func (c noop) Update(cfg *Config) error {
|
||||
return nil
|
||||
}
|
||||
@@ -0,0 +1,36 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package config
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestNoopDoesNotModifyConfig(t *testing.T) {
|
||||
cfg := &Config{
|
||||
DebugFilePath: "test-path",
|
||||
}
|
||||
|
||||
c := newNoopConfigUpdater()
|
||||
|
||||
err := c.Update(cfg)
|
||||
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, "test-path", cfg.DebugFilePath)
|
||||
}
|
||||
158
cmd/nvidia-container-runtime.experimental/config/toml.go
Normal file
158
cmd/nvidia-container-runtime.experimental/config/toml.go
Normal file
@@ -0,0 +1,158 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package config
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/pelletier/go-toml"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
const (
|
||||
configFileRelativePath = "nvidia-container-runtime/config.toml"
|
||||
configOverride = "XDG_CONFIG_HOME"
|
||||
defaultConfigRoot = "/etc"
|
||||
|
||||
nvidiaContainerCliSection = "nvidia-container-cli"
|
||||
nvidiaContainerRuntimeConfigSection = "nvidia-container-runtime"
|
||||
nvidiaContainerRuntimeExperimentalConfigSection = "nvidia-container-runtime.experimental"
|
||||
)
|
||||
|
||||
type tomlConfig struct {
|
||||
logger *log.Logger
|
||||
path string
|
||||
sections []tomlSection
|
||||
}
|
||||
|
||||
type tomlSection struct {
|
||||
section string
|
||||
keys map[string]struct{}
|
||||
}
|
||||
|
||||
func newDefaultConfigFileWithLogger(logger *log.Logger) configUpdater {
|
||||
configDir := defaultConfigRoot
|
||||
if XDGConfigDir := os.Getenv(configOverride); len(XDGConfigDir) != 0 {
|
||||
configDir = XDGConfigDir
|
||||
}
|
||||
configFilePath := filepath.Join(configDir, configFileRelativePath)
|
||||
|
||||
return newConfigFromFileWithLogger(logger, configFilePath)
|
||||
}
|
||||
|
||||
func newConfigFromFileWithLogger(logger *log.Logger, filepath string) configUpdater {
|
||||
if _, err := os.Stat(filepath); os.IsNotExist(err) {
|
||||
logger.Warnf("The config file '%v' does not exist", filepath)
|
||||
return newNoopConfigUpdater()
|
||||
}
|
||||
|
||||
sections := []tomlSection{
|
||||
{
|
||||
section: nvidiaContainerRuntimeConfigSection,
|
||||
},
|
||||
{
|
||||
section: nvidiaContainerRuntimeExperimentalConfigSection,
|
||||
},
|
||||
{
|
||||
section: nvidiaContainerCliSection,
|
||||
keys: map[string]struct{}{
|
||||
"root": {},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
c := tomlConfig{
|
||||
logger: logger,
|
||||
path: filepath,
|
||||
sections: sections,
|
||||
}
|
||||
|
||||
return &c
|
||||
}
|
||||
|
||||
func (c tomlConfig) Update(cfg *Config) error {
|
||||
configFile, err := os.Open(c.path)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error opening config file %v: %v", c.path, err)
|
||||
}
|
||||
defer configFile.Close()
|
||||
|
||||
return c.updateFromReader(cfg, configFile)
|
||||
}
|
||||
|
||||
func (c tomlConfig) updateFromReader(cfg *Config, reader io.Reader) error {
|
||||
toml, err := toml.LoadReader(reader)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error reading TOML contents: %v", err)
|
||||
}
|
||||
|
||||
for _, section := range c.sections {
|
||||
if v, ok := section.GetStringFrom(toml, "debug"); ok {
|
||||
cfg.DebugFilePath = v
|
||||
}
|
||||
|
||||
if v, ok := section.GetStringFrom(toml, "runtime-path"); ok {
|
||||
cfg.RuntimePath = v
|
||||
}
|
||||
|
||||
if v, ok := section.GetStringFrom(toml, "root"); ok {
|
||||
cfg.Root = v
|
||||
}
|
||||
|
||||
if v, ok := section.GetStringFrom(toml, "log-level"); ok {
|
||||
cfg.LogLevel = v
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c tomlSection) GetStringFrom(toml *toml.Tree, key string) (string, bool) {
|
||||
value := c.GetFrom(toml, key)
|
||||
if value != nil {
|
||||
if v, ok := value.(string); ok {
|
||||
return v, ok
|
||||
}
|
||||
}
|
||||
return "", false
|
||||
}
|
||||
|
||||
func (c tomlSection) GetFrom(toml *toml.Tree, key string) interface{} {
|
||||
if !c.validKey(key) {
|
||||
return nil
|
||||
}
|
||||
return toml.Get(c.configKey(key))
|
||||
}
|
||||
|
||||
func (c tomlSection) validKey(key string) bool {
|
||||
if c.keys == nil {
|
||||
return true
|
||||
}
|
||||
|
||||
_, exists := c.keys[key]
|
||||
|
||||
return exists
|
||||
}
|
||||
|
||||
func (c tomlSection) configKey(key string) string {
|
||||
if c.section == "" {
|
||||
return key
|
||||
}
|
||||
return c.section + "." + key
|
||||
}
|
||||
259
cmd/nvidia-container-runtime.experimental/config/toml_test.go
Normal file
259
cmd/nvidia-container-runtime.experimental/config/toml_test.go
Normal file
@@ -0,0 +1,259 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package config
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
"testing/iotest"
|
||||
|
||||
testlog "github.com/sirupsen/logrus/hooks/test"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestUpdateFromReader(t *testing.T) {
|
||||
logger, _ := testlog.NewNullLogger()
|
||||
|
||||
testCases := []struct {
|
||||
description string
|
||||
readerError bool
|
||||
lines []string
|
||||
expected *Config
|
||||
expectedError bool
|
||||
}{
|
||||
{
|
||||
description: "reader error returns error",
|
||||
readerError: true,
|
||||
expectedError: true,
|
||||
expected: getDefaultConfig(),
|
||||
},
|
||||
{
|
||||
description: "empty config returns defaults",
|
||||
lines: []string{},
|
||||
expected: &Config{
|
||||
DebugFilePath: "/dev/null",
|
||||
LogLevel: "info",
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "debug output is set",
|
||||
lines: []string{"nvidia-container-runtime.debug=\"nvidia-container-toolkit.log\""},
|
||||
expected: &Config{
|
||||
DebugFilePath: "nvidia-container-toolkit.log",
|
||||
LogLevel: "info",
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "debug output is set in section",
|
||||
lines: []string{"[nvidia-container-runtime]", "debug=\"nvidia-container-toolkit.log\""},
|
||||
expected: &Config{
|
||||
DebugFilePath: "nvidia-container-toolkit.log",
|
||||
LogLevel: "info",
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "blank debug is set",
|
||||
lines: []string{"nvidia-container-runtime.debug=\"\""},
|
||||
expected: &Config{
|
||||
DebugFilePath: "",
|
||||
LogLevel: "info",
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "non-string debug is ignored",
|
||||
lines: []string{"nvidia-container-runtime.debug=2"},
|
||||
expected: &Config{
|
||||
DebugFilePath: "/dev/null",
|
||||
LogLevel: "info",
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "log-level is set",
|
||||
lines: []string{"nvidia-container-runtime.log-level=\"trace\""},
|
||||
expected: &Config{
|
||||
DebugFilePath: "/dev/null",
|
||||
LogLevel: "trace",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
cfg := getDefaultConfig()
|
||||
|
||||
c := tomlConfig{
|
||||
logger: logger,
|
||||
sections: []tomlSection{
|
||||
{section: nvidiaContainerRuntimeConfigSection},
|
||||
},
|
||||
}
|
||||
var reader io.Reader
|
||||
if tc.readerError {
|
||||
reader = iotest.ErrReader(fmt.Errorf("error"))
|
||||
} else {
|
||||
reader = strings.NewReader(strings.Join(tc.lines, "\n"))
|
||||
}
|
||||
|
||||
err := c.updateFromReader(cfg, reader)
|
||||
|
||||
if tc.expectedError {
|
||||
require.Error(t, err)
|
||||
} else {
|
||||
require.NoError(t, err)
|
||||
}
|
||||
require.EqualValues(t, tc.expected, cfg)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestUpdateFromReaderExperimental(t *testing.T) {
|
||||
logger, _ := testlog.NewNullLogger()
|
||||
|
||||
testCases := []struct {
|
||||
readerError bool
|
||||
lines []string
|
||||
expected *Config
|
||||
expectedError bool
|
||||
}{
|
||||
{
|
||||
lines: []string{"nvidia-container-runtime.debug=\"nvidia-container-toolkit.log\""},
|
||||
expected: getDefaultConfig(),
|
||||
},
|
||||
{
|
||||
lines: []string{"[nvidia-container-runtime]", "debug=\"nvidia-container-toolkit.log\""},
|
||||
expected: getDefaultConfig(),
|
||||
},
|
||||
{
|
||||
lines: []string{"nvidia-container-runtime.experimental.debug=\"\""},
|
||||
expected: &Config{
|
||||
DebugFilePath: "",
|
||||
LogLevel: "info",
|
||||
},
|
||||
},
|
||||
{
|
||||
lines: []string{"nvidia-container-runtime.experimental.debug=2"},
|
||||
expected: getDefaultConfig(),
|
||||
},
|
||||
{
|
||||
lines: []string{"nvidia-container-runtime.experimental.debug=\"nvidia-container-toolkit.log\""},
|
||||
expected: &Config{
|
||||
DebugFilePath: "nvidia-container-toolkit.log",
|
||||
LogLevel: "info",
|
||||
},
|
||||
},
|
||||
{
|
||||
lines: []string{"[nvidia-container-runtime.experimental]", "debug=\"nvidia-container-toolkit.log\""},
|
||||
expected: &Config{
|
||||
DebugFilePath: "nvidia-container-toolkit.log",
|
||||
LogLevel: "info",
|
||||
},
|
||||
},
|
||||
{
|
||||
lines: []string{
|
||||
"nvidia-container-runtime.debug=\"nvidia-container-toolkit.log\"",
|
||||
"nvidia-container-runtime.experimental.debug=\"nvidia-container-exp-toolkit.log\"",
|
||||
},
|
||||
expected: &Config{
|
||||
DebugFilePath: "nvidia-container-exp-toolkit.log",
|
||||
LogLevel: "info",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for i, tc := range testCases {
|
||||
cfg := getDefaultConfig()
|
||||
|
||||
c := tomlConfig{
|
||||
logger: logger,
|
||||
sections: []tomlSection{
|
||||
{section: nvidiaContainerRuntimeExperimentalConfigSection},
|
||||
},
|
||||
}
|
||||
var reader io.Reader
|
||||
if tc.readerError {
|
||||
reader = iotest.ErrReader(fmt.Errorf("error"))
|
||||
} else {
|
||||
reader = strings.NewReader(strings.Join(tc.lines, "\n"))
|
||||
}
|
||||
|
||||
err := c.updateFromReader(cfg, reader)
|
||||
|
||||
if tc.expectedError {
|
||||
require.Error(t, err, "%d: %v", i, tc)
|
||||
} else {
|
||||
require.NoError(t, err, "%d: %v", i, tc)
|
||||
}
|
||||
require.EqualValues(t, tc.expected, cfg, "%d: %v", i, tc)
|
||||
}
|
||||
}
|
||||
|
||||
func TestConfigFromFile(t *testing.T) {
|
||||
wd, err := os.Getwd()
|
||||
require.NoError(t, err)
|
||||
|
||||
// By default debug is disabled
|
||||
lines := []string{
|
||||
"[nvidia-container-cli]",
|
||||
"root = \"/run/nvidia/driver\"",
|
||||
"[nvidia-container-runtime]",
|
||||
"#debug = \"/nvidia-container-toolkit.log\"",
|
||||
"",
|
||||
"[nvidia-container-runtime.experimental]",
|
||||
"debug = \"/nvidia-container-toolkit.experimental.log\"",
|
||||
}
|
||||
|
||||
contents := []byte(strings.Join(lines, "\n"))
|
||||
testDir := path.Join(wd, "test")
|
||||
filename := path.Join(testDir, configFileRelativePath)
|
||||
|
||||
require.NoError(t, os.MkdirAll(filepath.Dir(filename), 0766))
|
||||
require.NoError(t, ioutil.WriteFile(filename, contents, 0766))
|
||||
defer func() { require.NoError(t, os.RemoveAll(testDir)) }()
|
||||
|
||||
logger, _ := testlog.NewNullLogger()
|
||||
c := newConfigFromFileWithLogger(logger, filename)
|
||||
|
||||
cfg := getDefaultConfig()
|
||||
|
||||
err = c.Update(cfg)
|
||||
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, "/nvidia-container-toolkit.experimental.log", cfg.DebugFilePath)
|
||||
|
||||
require.Equal(t, "/run/nvidia/driver", cfg.Root)
|
||||
}
|
||||
|
||||
func TestConfigFromNonexistentFileReturnsNoop(t *testing.T) {
|
||||
logger, _ := testlog.NewNullLogger()
|
||||
c := newConfigFromFileWithLogger(logger, "/does/not/exist")
|
||||
|
||||
n, ok := c.(*noop)
|
||||
|
||||
require.True(t, ok)
|
||||
require.NotNil(t, n)
|
||||
}
|
||||
|
||||
func TestGetConfigKey(t *testing.T) {
|
||||
require.Equal(t, "key", tomlSection{}.configKey("key"))
|
||||
require.Equal(t, "section.key", tomlSection{section: "section"}.configKey("key"))
|
||||
}
|
||||
144
cmd/nvidia-container-runtime.experimental/main.go
Normal file
144
cmd/nvidia-container-runtime.experimental/main.go
Normal file
@@ -0,0 +1,144 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/ensure"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/filter"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/modify"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/runtime"
|
||||
log "github.com/sirupsen/logrus"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-container-runtime.experimental/config"
|
||||
)
|
||||
|
||||
const (
|
||||
visibleDevicesEnvvar = "NVIDIA_VISIBLE_DEVICES"
|
||||
visibleDevicesVoid = "void"
|
||||
)
|
||||
|
||||
var logger = log.New()
|
||||
|
||||
func main() {
|
||||
cfg, err := config.GetConfig(logger)
|
||||
if err != nil {
|
||||
logger.Errorf("Error loading config: %v", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
if cfg.DebugFilePath != "" && cfg.DebugFilePath != "/dev/null" {
|
||||
logFile, err := os.OpenFile(cfg.DebugFilePath, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
|
||||
if err != nil {
|
||||
logger.Errorf("Error opening debug log file: %v", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
defer logFile.Close()
|
||||
|
||||
logger.SetOutput(logFile)
|
||||
}
|
||||
|
||||
logLevel, err := log.ParseLevel(cfg.LogLevel)
|
||||
if err == nil {
|
||||
logger.SetLevel(logLevel)
|
||||
} else {
|
||||
logger.Warnf("Invalid log-level '%v'; using '%v'", cfg.LogLevel, logger.Level.String())
|
||||
}
|
||||
|
||||
logger.Infof("Starting nvidia-container-runtime: %v", os.Args)
|
||||
logger.Debugf("Using config=%+v", cfg)
|
||||
|
||||
if err := run(cfg, os.Args); err != nil {
|
||||
logger.Errorf("Error running runtime: %v", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
func run(cfg *config.Config, args []string) error {
|
||||
logger.Debugf("running with args=%v", args)
|
||||
|
||||
// We create a low-level runtime
|
||||
lowLevelRuntime, err := createLowLevelRuntime(logger, cfg)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error constructing low-level runtime: %v", err)
|
||||
}
|
||||
|
||||
if !oci.HasCreateSubcommand(args) {
|
||||
logger.Infof("No modification of OCI specification required")
|
||||
logger.Infof("Forwarding command to runtime")
|
||||
return lowLevelRuntime.Exec(args)
|
||||
}
|
||||
|
||||
// We create the OCI spec that is to be modified
|
||||
ociSpec, bundleDir, err := oci.NewSpecFromArgs(args)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error constructing OCI spec: %v", err)
|
||||
}
|
||||
|
||||
err = ociSpec.Load()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error loading OCI specification: %v", err)
|
||||
}
|
||||
|
||||
visibleDevices, exists := ociSpec.LookupEnv(visibleDevicesEnvvar)
|
||||
if !exists || visibleDevices == "" || visibleDevices == visibleDevicesVoid {
|
||||
logger.Infof("Using low-level runtime: %v=%v (exists=%v)", visibleDevicesEnvvar, visibleDevices, exists)
|
||||
return lowLevelRuntime.Exec(os.Args)
|
||||
}
|
||||
|
||||
// We create the modifier that will be applied by the Modifying Runtime Wrapper
|
||||
modifier, err := createModifier(cfg.Root, bundleDir, visibleDevices, ociSpec)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error constructing modifer: %v", err)
|
||||
}
|
||||
|
||||
// We construct the Modifying runtime
|
||||
r := runtime.NewModifyingRuntimeWrapperWithLogger(logger, lowLevelRuntime, ociSpec, modifier)
|
||||
|
||||
return r.Exec(os.Args)
|
||||
}
|
||||
|
||||
func createLowLevelRuntime(logger *log.Logger, cfg *config.Config) (oci.Runtime, error) {
|
||||
if cfg.RuntimePath == "" {
|
||||
return oci.NewLowLevelRuntimeWithLogger(logger, "docker-runc", "runc")
|
||||
}
|
||||
logger.Infof("Creating runtime with path %v", cfg.RuntimePath)
|
||||
return oci.NewRuntimeForPathWithLogger(logger, cfg.RuntimePath)
|
||||
}
|
||||
|
||||
func createModifier(root string, bundleDir string, visibleDevices string, env filter.EnvLookup) (modify.Modifier, error) {
|
||||
// We set up the modifier using discovery
|
||||
discovered, err := discover.NewNVMLServerWithLogger(logger, root)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error discovering devices: %v", err)
|
||||
}
|
||||
|
||||
// We apply a filter to the discovered devices
|
||||
selected := filter.NewSelectDevicesFromWithLogger(logger, discovered, visibleDevices, env)
|
||||
|
||||
// We ensure that the selected devices are available
|
||||
available := ensure.NewEnsureDevicesWithLogger(logger, selected, root)
|
||||
|
||||
// We construct the modifer for the OCI spec
|
||||
modifier := modify.NewModifierWithLoggerFor(logger, available, root, bundleDir)
|
||||
|
||||
return modifier, nil
|
||||
}
|
||||
@@ -1,87 +0,0 @@
|
||||
# The NVIDIA Container Runtime
|
||||
|
||||
The NVIDIA Container Runtime is a shim for OCI-compliant low-level runtimes such as [runc](https://github.com/opencontainers/runc). When a `create` command is detected, the incoming [OCI runtime specification](https://github.com/opencontainers/runtime-spec) is modified in place and the command is forwarded to the low-level runtime.
|
||||
|
||||
## Configuration
|
||||
|
||||
The NVIDIA Container Runtime uses file-based configuration, with the config stored in `/etc/nvidia-container-runtime/config.toml`. The `/etc` path can be overridden using the `XDG_CONFIG_HOME` environment variable with the `${XDG_CONFIG_HOME}/nvidia-container-runtime/config.toml` file used instead if this environment variable is set.
|
||||
|
||||
This config file may contain options for other components of the NVIDIA container stack and for the NVIDIA Container Runtime, the relevant config section is `nvidia-container-runtime`
|
||||
|
||||
### Logging
|
||||
|
||||
The `log-level` config option (default: `"info"`) specifies the log level to use and the `debug` option, if set, specifies a log file to which logs for the NVIDIA Container Runtime must be written.
|
||||
|
||||
In addition to this, the NVIDIA Container Runtime considers the value of `--log` and `--log-format` flags that may be passed to it by a container runtime such as docker or containerd. If the `--debug` flag is present the log-level specified in the config file is overridden as `"debug"`.
|
||||
|
||||
### Low-level Runtime Path
|
||||
|
||||
The `runtimes` config option allows for the low-level runtime to be specified. The first entry in this list that is an existing executable file is used as the low-level runtime. If the entry is not a path, the `PATH` is searched for a matching executable. If the entry is a path this is checked instead.
|
||||
|
||||
The default value for this setting is:
|
||||
```toml
|
||||
runtimes = [
|
||||
"docker-runc",
|
||||
"runc",
|
||||
]
|
||||
```
|
||||
|
||||
and if, for example, `crun` is to be used instead this can be changed to:
|
||||
```toml
|
||||
runtimes = [
|
||||
"crun",
|
||||
]
|
||||
```
|
||||
|
||||
### Runtime Mode
|
||||
|
||||
The `mode` config option (default `"auto"`) controls the high-level behaviour of the runtime.
|
||||
|
||||
#### Auto Mode
|
||||
|
||||
When `mode` is set to `"auto"`, the runtime employs heuristics to determine which mode to use based on, for example, the platform where the runtime is being run.
|
||||
|
||||
#### Legacy Mode
|
||||
|
||||
When `mode` is set to `"legacy"`, the NVIDIA Container Runtime adds a [`prestart` hook](https://github.com/opencontainers/runtime-spec/blob/master/config.md#prestart) to the incomming OCI specification that invokes the NVIDIA Container Runtime Hook for all containers created. This hook checks whether NVIDIA devices are requested and ensures GPU access is configured using the `nvidia-container-cli` from the [libnvidia-container](https://github.com/NVIDIA/libnvidia-container) project.
|
||||
|
||||
#### CSV Mode
|
||||
|
||||
When `mode` is set to `"csv"`, CSV files at `/etc/nvidia-container-runtime/host-files-for-container.d` define the devices and mounts that are to be injected into a container when it is created. The search path for the files can be overridden by modifying the `nvidia-container-runtime.modes.csv.mount-spec-path` in the config as below:
|
||||
|
||||
```toml
|
||||
[nvidia-container-runtime]
|
||||
[nvidia-container-runtime.modes.csv]
|
||||
mount-spec-path = "/etc/nvidia-container-runtime/host-files-for-container.d"
|
||||
```
|
||||
|
||||
This mode is primarily targeted at Tegra-based systems without NVML available.
|
||||
|
||||
### Notes on using the docker CLI
|
||||
|
||||
Note that only the `"legacy"` NVIDIA Container Runtime mode is directly compatible with the `--gpus` flag implemented by the `docker` CLI (assuming the NVIDIA Container Runtime is not used). The reason for this is that `docker` inserts the same NVIDIA Container Runtime Hook into the OCI runtime specification.
|
||||
|
||||
|
||||
If a different mode is explicitly set or detected, the NVIDIA Container Runtime Hook will raise the following error when `--gpus` is set:
|
||||
```
|
||||
$ docker run --rm --gpus all ubuntu:18.04
|
||||
docker: Error response from daemon: failed to create shim: OCI runtime create failed: container_linux.go:380: starting container process caused: process_linux.go:545: container init caused: Running hook #0:: error running hook: exit status 1, stdout: , stderr: Auto-detected mode as 'csv'
|
||||
invoking the NVIDIA Container Runtime Hook directly (e.g. specifying the docker --gpus flag) is not supported. Please use the NVIDIA Container Runtime instead.: unknown.
|
||||
```
|
||||
Here NVIDIA Container Runtime must be used explicitly. The recommended way to do this is to specify the `--runtime=nvidia` command line argument as part of the `docker run` commmand as follows:
|
||||
```
|
||||
$ docker run --rm --gpus all --runtime=nvidia ubuntu:18.04
|
||||
```
|
||||
|
||||
Alternatively the NVIDIA Container Runtime can be set as the default runtime for docker. This can be done by modifying the `/etc/docker/daemon.json` file as follows:
|
||||
```json
|
||||
{
|
||||
"default-runtime": "nvidia",
|
||||
"runtimes": {
|
||||
"nvidia": {
|
||||
"path": "nvidia-container-runtime",
|
||||
"runtimeArgs": []
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
@@ -20,220 +20,60 @@ import (
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/tsaikd/KDGoLib/logrusutil"
|
||||
)
|
||||
|
||||
// Logger adds a way to manage output to a log file to a logrus.Logger
|
||||
type Logger struct {
|
||||
*logrus.Logger
|
||||
previousLogger *logrus.Logger
|
||||
logFiles []*os.File
|
||||
previousOutput io.Writer
|
||||
logFile *os.File
|
||||
}
|
||||
|
||||
// NewLogger creates an empty logger
|
||||
// NewLogger constructs a Logger with a preddefined formatter
|
||||
func NewLogger() *Logger {
|
||||
return &Logger{
|
||||
Logger: logrus.New(),
|
||||
logrusLogger := logrus.New()
|
||||
|
||||
formatter := &logrusutil.ConsoleLogFormatter{
|
||||
TimestampFormat: "2006/01/02 15:04:07",
|
||||
Flag: logrusutil.Ltime,
|
||||
}
|
||||
|
||||
logger := &Logger{
|
||||
Logger: logrusLogger,
|
||||
}
|
||||
logger.SetFormatter(formatter)
|
||||
|
||||
return logger
|
||||
}
|
||||
|
||||
// UpdateLogger constructs a Logger with a preddefined formatter
|
||||
func UpdateLogger(filename string, logLevel string, argv []string) (*Logger, error) {
|
||||
configFromArgs := parseArgs(argv)
|
||||
|
||||
level, logLevelError := configFromArgs.getLevel(logLevel)
|
||||
|
||||
var logFiles []*os.File
|
||||
var argLogFileError error
|
||||
|
||||
// We don't create log files if the version argument is supplied
|
||||
if !configFromArgs.version {
|
||||
configLogFile, err := createLogFile(filename)
|
||||
if err != nil {
|
||||
return logger, fmt.Errorf("error opening debug log file: %v", err)
|
||||
}
|
||||
if configLogFile != nil {
|
||||
logFiles = append(logFiles, configLogFile)
|
||||
}
|
||||
|
||||
argLogFile, err := createLogFile(configFromArgs.file)
|
||||
if argLogFile != nil {
|
||||
logFiles = append(logFiles, argLogFile)
|
||||
}
|
||||
argLogFileError = err
|
||||
// LogToFile opens the specified file for appending and sets the logger to
|
||||
// output to the opened file. A reference to the file pointer is stored to
|
||||
// allow this to be closed.
|
||||
func (l *Logger) LogToFile(filename string) error {
|
||||
logFile, err := os.OpenFile(filename, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error opening debug log file: %v", err)
|
||||
}
|
||||
|
||||
l := &Logger{
|
||||
Logger: logrus.New(),
|
||||
previousLogger: logger.Logger,
|
||||
logFiles: logFiles,
|
||||
}
|
||||
l.logFile = logFile
|
||||
l.previousOutput = l.Out
|
||||
l.SetOutput(logFile)
|
||||
|
||||
l.SetLevel(level)
|
||||
if level == logrus.DebugLevel {
|
||||
logrus.SetReportCaller(true)
|
||||
// Shorten function and file names reported by the logger, by
|
||||
// trimming common "github.com/opencontainers/runc" prefix.
|
||||
// This is only done for text formatter.
|
||||
_, file, _, _ := runtime.Caller(0)
|
||||
prefix := filepath.Dir(file) + "/"
|
||||
logrus.SetFormatter(&logrus.TextFormatter{
|
||||
CallerPrettyfier: func(f *runtime.Frame) (string, string) {
|
||||
function := strings.TrimPrefix(f.Function, prefix) + "()"
|
||||
fileLine := strings.TrimPrefix(f.File, prefix) + ":" + strconv.Itoa(f.Line)
|
||||
return function, fileLine
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
if configFromArgs.format == "json" {
|
||||
l.SetFormatter(new(logrus.JSONFormatter))
|
||||
}
|
||||
|
||||
if len(logFiles) == 0 {
|
||||
l.SetOutput(io.Discard)
|
||||
} else if len(logFiles) == 1 {
|
||||
l.SetOutput(logFiles[0])
|
||||
} else if len(logFiles) > 1 {
|
||||
var writers []io.Writer
|
||||
for _, f := range logFiles {
|
||||
writers = append(writers, f)
|
||||
}
|
||||
l.SetOutput(io.MultiWriter(writers...))
|
||||
}
|
||||
|
||||
if logLevelError != nil {
|
||||
l.Warn(logLevelError)
|
||||
}
|
||||
|
||||
if argLogFileError != nil {
|
||||
l.Warnf("Failed to open log file: %v", argLogFileError)
|
||||
}
|
||||
|
||||
return l, nil
|
||||
return nil
|
||||
}
|
||||
|
||||
// Reset closes the log file (if any) and resets the logger output to what it
|
||||
// was before UpdateLogger was called.
|
||||
func (l *Logger) Reset() error {
|
||||
defer func() {
|
||||
previous := l.previousLogger
|
||||
if previous == nil {
|
||||
previous = logrus.New()
|
||||
}
|
||||
logger = &Logger{Logger: previous}
|
||||
}()
|
||||
|
||||
var errs []error
|
||||
for _, f := range l.logFiles {
|
||||
err := f.Close()
|
||||
if err != nil {
|
||||
errs = append(errs, err)
|
||||
}
|
||||
// CloseFile closes the log file (if any) and resets the logger output to what it
|
||||
// was before LogToFile was called.
|
||||
func (l *Logger) CloseFile() error {
|
||||
if l.logFile == nil {
|
||||
return nil
|
||||
}
|
||||
logFile := l.logFile
|
||||
l.SetOutput(l.previousOutput)
|
||||
l.logFile = nil
|
||||
|
||||
var err error
|
||||
for _, e := range errs {
|
||||
if err == nil {
|
||||
err = e
|
||||
continue
|
||||
}
|
||||
return fmt.Errorf("%v; %w", e, err)
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
func createLogFile(filename string) (*os.File, error) {
|
||||
if filename != "" && filename != os.DevNull {
|
||||
return os.OpenFile(filename, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
|
||||
}
|
||||
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
type loggerConfig struct {
|
||||
file string
|
||||
format string
|
||||
debug bool
|
||||
version bool
|
||||
}
|
||||
|
||||
func (c loggerConfig) getLevel(logLevel string) (logrus.Level, error) {
|
||||
if c.debug {
|
||||
return logrus.DebugLevel, nil
|
||||
}
|
||||
|
||||
if logLevel, err := logrus.ParseLevel(logLevel); err == nil {
|
||||
return logLevel, nil
|
||||
}
|
||||
|
||||
return logrus.InfoLevel, fmt.Errorf("invalid log-level '%v'", logLevel)
|
||||
}
|
||||
|
||||
// Informed by Taken from https://github.com/opencontainers/runc/blob/7fd8b57001f5bfa102e89cb434d96bf71f7c1d35/main.go#L182
|
||||
func parseArgs(args []string) loggerConfig {
|
||||
c := loggerConfig{}
|
||||
|
||||
expected := map[string]*string{
|
||||
"log-format": &c.format,
|
||||
"log": &c.file,
|
||||
}
|
||||
|
||||
found := make(map[string]bool)
|
||||
|
||||
for i := 0; i < len(args); i++ {
|
||||
if len(found) == 4 {
|
||||
break
|
||||
}
|
||||
|
||||
param := args[i]
|
||||
|
||||
parts := strings.SplitN(param, "=", 2)
|
||||
trimmed := strings.TrimLeft(parts[0], "-")
|
||||
// If this is not a flag we continue
|
||||
if parts[0] == trimmed {
|
||||
continue
|
||||
}
|
||||
|
||||
// Check the version flag
|
||||
if trimmed == "version" {
|
||||
c.version = true
|
||||
found["version"] = true
|
||||
// For the version flag we don't process any other flags
|
||||
continue
|
||||
}
|
||||
|
||||
// Check the debug flag
|
||||
if trimmed == "debug" {
|
||||
c.debug = true
|
||||
found["debug"] = true
|
||||
continue
|
||||
}
|
||||
|
||||
destination, exists := expected[trimmed]
|
||||
if !exists {
|
||||
continue
|
||||
}
|
||||
|
||||
var value string
|
||||
if len(parts) == 2 {
|
||||
value = parts[2]
|
||||
} else if i+1 < len(args) {
|
||||
value = args[i+1]
|
||||
i++
|
||||
} else {
|
||||
continue
|
||||
}
|
||||
|
||||
*destination = value
|
||||
found[trimmed] = true
|
||||
}
|
||||
|
||||
return c
|
||||
return logFile.Close()
|
||||
}
|
||||
|
||||
@@ -3,82 +3,87 @@ package main
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
"path"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/info"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
"github.com/pelletier/go-toml"
|
||||
)
|
||||
|
||||
// version must be set by go build's -X main.version= option in the Makefile.
|
||||
var version = "unknown"
|
||||
const (
|
||||
configOverride = "XDG_CONFIG_HOME"
|
||||
configFilePath = "nvidia-container-runtime/config.toml"
|
||||
|
||||
// gitCommit will be the hash that the binary was built from
|
||||
// and will be populated by the Makefile
|
||||
var gitCommit = ""
|
||||
hookDefaultFilePath = "/usr/bin/nvidia-container-runtime-hook"
|
||||
)
|
||||
|
||||
var (
|
||||
configDir = "/etc/"
|
||||
)
|
||||
|
||||
var logger = NewLogger()
|
||||
|
||||
func main() {
|
||||
err := run(os.Args)
|
||||
if err != nil {
|
||||
logger.Errorf("%v", err)
|
||||
logger.Errorf("Error running %v: %v", os.Args, err)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
// run is an entry point that allows for idiomatic handling of errors
|
||||
// when calling from the main function.
|
||||
func run(argv []string) (rerr error) {
|
||||
printVersion := hasVersionFlag(argv)
|
||||
if printVersion {
|
||||
fmt.Printf("%v version %v\n", "NVIDIA Container Runtime", info.GetVersionString(fmt.Sprintf("spec: %v", specs.Version)))
|
||||
}
|
||||
|
||||
cfg, err := config.GetConfig()
|
||||
func run(argv []string) (err error) {
|
||||
cfg, err := getConfig()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error loading config: %v", err)
|
||||
}
|
||||
|
||||
logger, err = UpdateLogger(
|
||||
cfg.NVIDIAContainerRuntimeConfig.DebugFilePath,
|
||||
cfg.NVIDIAContainerRuntimeConfig.LogLevel,
|
||||
argv,
|
||||
)
|
||||
err = logger.LogToFile(cfg.debugFilePath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to set up logger: %v", err)
|
||||
return fmt.Errorf("error opening debug log file: %v", err)
|
||||
}
|
||||
defer logger.Reset()
|
||||
defer func() {
|
||||
// We capture and log a returning error before closing the log file.
|
||||
if err != nil {
|
||||
logger.Errorf("Error running %v: %v", argv, err)
|
||||
}
|
||||
logger.CloseFile()
|
||||
}()
|
||||
|
||||
logger.Debugf("Command line arguments: %v", argv)
|
||||
runtime, err := newNVIDIAContainerRuntime(logger.Logger, cfg, argv)
|
||||
r, err := newRuntime(argv)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create NVIDIA Container Runtime: %v", err)
|
||||
return fmt.Errorf("error creating runtime: %v", err)
|
||||
}
|
||||
|
||||
if printVersion {
|
||||
fmt.Print("\n")
|
||||
}
|
||||
return runtime.Exec(argv)
|
||||
logger.Printf("Running %s\n", argv[0])
|
||||
return r.Exec(argv)
|
||||
}
|
||||
|
||||
// TODO: This should be refactored / combined with parseArgs in logger.
|
||||
func hasVersionFlag(args []string) bool {
|
||||
for i := 0; i < len(args); i++ {
|
||||
param := args[i]
|
||||
type config struct {
|
||||
debugFilePath string
|
||||
}
|
||||
|
||||
parts := strings.SplitN(param, "=", 2)
|
||||
trimmed := strings.TrimLeft(parts[0], "-")
|
||||
// If this is not a flag we continue
|
||||
if parts[0] == trimmed {
|
||||
continue
|
||||
}
|
||||
// getConfig sets up the config struct. Values are read from a toml file
|
||||
// or set via the environment.
|
||||
func getConfig() (*config, error) {
|
||||
cfg := &config{}
|
||||
|
||||
// Check the version flag
|
||||
if trimmed == "version" {
|
||||
return true
|
||||
}
|
||||
if XDGConfigDir := os.Getenv(configOverride); len(XDGConfigDir) != 0 {
|
||||
configDir = XDGConfigDir
|
||||
}
|
||||
|
||||
return false
|
||||
configFilePath := path.Join(configDir, configFilePath)
|
||||
|
||||
tomlContent, err := os.ReadFile(configFilePath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
toml, err := toml.Load(string(tomlContent))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
cfg.debugFilePath = toml.GetDefault("nvidia-container-runtime.debug", "/dev/null").(string)
|
||||
|
||||
return cfg, nil
|
||||
}
|
||||
|
||||
@@ -3,15 +3,15 @@ package main
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/modifier"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/test"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
@@ -24,10 +24,6 @@ const (
|
||||
unmodifiedSpecFileSuffix = "test/input/test_spec.json"
|
||||
)
|
||||
|
||||
const (
|
||||
runcExecutableName = "runc"
|
||||
)
|
||||
|
||||
type testConfig struct {
|
||||
root string
|
||||
binPath string
|
||||
@@ -39,7 +35,7 @@ func TestMain(m *testing.M) {
|
||||
// TEST SETUP
|
||||
// Determine the module root and the test binary path
|
||||
var err error
|
||||
moduleRoot, err := test.GetModuleRoot()
|
||||
moduleRoot, err := getModuleRoot()
|
||||
if err != nil {
|
||||
logger.Fatalf("error in test setup: could not get module root: %v", err)
|
||||
}
|
||||
@@ -47,7 +43,7 @@ func TestMain(m *testing.M) {
|
||||
testInputPath := filepath.Join(moduleRoot, "test", "input")
|
||||
|
||||
// Set the environment variables for the test
|
||||
os.Setenv("PATH", test.PrependToPath(testBinPath, moduleRoot))
|
||||
os.Setenv("PATH", prependToPath(testBinPath, moduleRoot))
|
||||
os.Setenv("XDG_CONFIG_HOME", testInputPath)
|
||||
|
||||
// Confirm that the environment is configured correctly
|
||||
@@ -75,6 +71,31 @@ func TestMain(m *testing.M) {
|
||||
os.Exit(exitCode)
|
||||
}
|
||||
|
||||
func getModuleRoot() (string, error) {
|
||||
_, filename, _, _ := runtime.Caller(0)
|
||||
|
||||
return hasGoMod(filename)
|
||||
}
|
||||
|
||||
func hasGoMod(dir string) (string, error) {
|
||||
if dir == "" || dir == "/" {
|
||||
return "", fmt.Errorf("module root not found")
|
||||
}
|
||||
|
||||
_, err := os.Stat(filepath.Join(dir, "go.mod"))
|
||||
if err != nil {
|
||||
return hasGoMod(filepath.Dir(dir))
|
||||
}
|
||||
return dir, nil
|
||||
}
|
||||
|
||||
func prependToPath(additionalPaths ...string) string {
|
||||
paths := strings.Split(os.Getenv("PATH"), ":")
|
||||
paths = append(additionalPaths, paths...)
|
||||
|
||||
return strings.Join(paths, ":")
|
||||
}
|
||||
|
||||
// case 1) nvidia-container-runtime run --bundle
|
||||
// case 2) nvidia-container-runtime create --bundle
|
||||
// - Confirm the runtime handles bad input correctly
|
||||
@@ -84,6 +105,11 @@ func TestBadInput(t *testing.T) {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
cmdRun := exec.Command(nvidiaRuntime, "run", "--bundle")
|
||||
t.Logf("executing: %s\n", strings.Join(cmdRun.Args, " "))
|
||||
output, err := cmdRun.CombinedOutput()
|
||||
require.Errorf(t, err, "runtime should return an error", "output=%v", string(output))
|
||||
|
||||
cmdCreate := exec.Command(nvidiaRuntime, "create", "--bundle")
|
||||
t.Logf("executing: %s\n", strings.Join(cmdCreate.Args, " "))
|
||||
err = cmdCreate.Run()
|
||||
@@ -167,11 +193,11 @@ func TestDuplicateHook(t *testing.T) {
|
||||
require.Equal(t, 1, nvidiaHookCount(spec.Hooks), "exactly one nvidia prestart hook should be inserted correctly into config.json")
|
||||
}
|
||||
|
||||
// addNVIDIAHook is a basic wrapper for an addHookModifier that is used for
|
||||
// addNVIDIAHook is a basic wrapper for nvidiaContainerRunime.addNVIDIAHook that is used for
|
||||
// testing.
|
||||
func addNVIDIAHook(spec *specs.Spec) error {
|
||||
m := modifier.NewStableRuntimeModifier(logger.Logger)
|
||||
return m.Modify(spec)
|
||||
r := nvidiaContainerRuntime{logger: logger.Logger}
|
||||
return r.addNVIDIAHook(spec)
|
||||
}
|
||||
|
||||
func (c testConfig) getRuntimeSpec() (specs.Spec, error) {
|
||||
@@ -244,3 +270,24 @@ func nvidiaHookCount(hooks *specs.Hooks) int {
|
||||
}
|
||||
return count
|
||||
}
|
||||
|
||||
func TestGetConfigWithCustomConfig(t *testing.T) {
|
||||
wd, err := os.Getwd()
|
||||
require.NoError(t, err)
|
||||
|
||||
// By default debug is disabled
|
||||
contents := []byte("[nvidia-container-runtime]\ndebug = \"/nvidia-container-toolkit.log\"")
|
||||
testDir := filepath.Join(wd, "test")
|
||||
filename := filepath.Join(testDir, configFilePath)
|
||||
|
||||
os.Setenv(configOverride, testDir)
|
||||
|
||||
require.NoError(t, os.MkdirAll(filepath.Dir(filename), 0766))
|
||||
require.NoError(t, ioutil.WriteFile(filename, contents, 0766))
|
||||
|
||||
defer func() { require.NoError(t, os.RemoveAll(testDir)) }()
|
||||
|
||||
cfg, err := getConfig()
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, cfg.debugFilePath, "/nvidia-container-toolkit.log")
|
||||
}
|
||||
|
||||
132
cmd/nvidia-container-runtime/nvcr.go
Normal file
132
cmd/nvidia-container-runtime/nvcr.go
Normal file
@@ -0,0 +1,132 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"strings"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// nvidiaContainerRuntime encapsulates the NVIDIA Container Runtime. It wraps the specified runtime, conditionally
|
||||
// modifying the specified OCI specification before invoking the runtime.
|
||||
type nvidiaContainerRuntime struct {
|
||||
logger *log.Logger
|
||||
runtime oci.Runtime
|
||||
ociSpec oci.Spec
|
||||
}
|
||||
|
||||
var _ oci.Runtime = (*nvidiaContainerRuntime)(nil)
|
||||
|
||||
// newNvidiaContainerRuntime is a constructor for a standard runtime shim.
|
||||
func newNvidiaContainerRuntimeWithLogger(logger *log.Logger, runtime oci.Runtime, ociSpec oci.Spec) (oci.Runtime, error) {
|
||||
r := nvidiaContainerRuntime{
|
||||
logger: logger,
|
||||
runtime: runtime,
|
||||
ociSpec: ociSpec,
|
||||
}
|
||||
|
||||
return &r, nil
|
||||
}
|
||||
|
||||
// Exec defines the entrypoint for the NVIDIA Container Runtime. A check is performed to see whether modifications
|
||||
// to the OCI spec are required -- and applicable modifcations applied. The supplied arguments are then
|
||||
// forwarded to the underlying runtime's Exec method.
|
||||
func (r nvidiaContainerRuntime) Exec(args []string) error {
|
||||
if r.modificationRequired(args) {
|
||||
err := r.modifyOCISpec()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error modifying OCI spec: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
r.logger.Println("Forwarding command to runtime")
|
||||
return r.runtime.Exec(args)
|
||||
}
|
||||
|
||||
// modificationRequired checks the intput arguments to determine whether a modification
|
||||
// to the OCI spec is required.
|
||||
func (r nvidiaContainerRuntime) modificationRequired(args []string) bool {
|
||||
if oci.HasCreateSubcommand(args) {
|
||||
r.logger.Infof("'create' command detected; modification required")
|
||||
return true
|
||||
}
|
||||
|
||||
r.logger.Infof("No modification required")
|
||||
return false
|
||||
}
|
||||
|
||||
// modifyOCISpec loads and modifies the OCI spec specified in the nvidiaContainerRuntime
|
||||
// struct. The spec is modified in-place and written to the same file as the input after
|
||||
// modifcationas are applied.
|
||||
func (r nvidiaContainerRuntime) modifyOCISpec() error {
|
||||
err := r.ociSpec.Load()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error loading OCI specification for modification: %v", err)
|
||||
}
|
||||
|
||||
err = r.ociSpec.Modify(r.addNVIDIAHook)
|
||||
if err != nil {
|
||||
return fmt.Errorf("error injecting NVIDIA Container Runtime hook: %v", err)
|
||||
}
|
||||
|
||||
err = r.ociSpec.Flush()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error writing modified OCI specification: %v", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// addNVIDIAHook modifies the specified OCI specification in-place, inserting a
|
||||
// prestart hook.
|
||||
func (r nvidiaContainerRuntime) addNVIDIAHook(spec *specs.Spec) error {
|
||||
path, err := exec.LookPath("nvidia-container-runtime-hook")
|
||||
if err != nil {
|
||||
path = hookDefaultFilePath
|
||||
_, err = os.Stat(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
|
||||
r.logger.Printf("prestart hook path: %s\n", path)
|
||||
|
||||
args := []string{path}
|
||||
if spec.Hooks == nil {
|
||||
spec.Hooks = &specs.Hooks{}
|
||||
} else if len(spec.Hooks.Prestart) != 0 {
|
||||
for _, hook := range spec.Hooks.Prestart {
|
||||
if !strings.Contains(hook.Path, "nvidia-container-runtime-hook") {
|
||||
continue
|
||||
}
|
||||
r.logger.Println("existing nvidia prestart hook in OCI spec file")
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
spec.Hooks.Prestart = append(spec.Hooks.Prestart, specs.Hook{
|
||||
Path: path,
|
||||
Args: append(args, "prestart"),
|
||||
})
|
||||
|
||||
return nil
|
||||
}
|
||||
203
cmd/nvidia-container-runtime/nvcr_test.go
Normal file
203
cmd/nvidia-container-runtime/nvcr_test.go
Normal file
@@ -0,0 +1,203 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
testlog "github.com/sirupsen/logrus/hooks/test"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestAddNvidiaHook(t *testing.T) {
|
||||
logger, logHook := testlog.NewNullLogger()
|
||||
shim := nvidiaContainerRuntime{
|
||||
logger: logger,
|
||||
}
|
||||
|
||||
testCases := []struct {
|
||||
spec *specs.Spec
|
||||
errorPrefix string
|
||||
shouldNotAdd bool
|
||||
}{
|
||||
{
|
||||
spec: &specs.Spec{},
|
||||
},
|
||||
{
|
||||
spec: &specs.Spec{
|
||||
Hooks: &specs.Hooks{},
|
||||
},
|
||||
},
|
||||
{
|
||||
spec: &specs.Spec{
|
||||
Hooks: &specs.Hooks{
|
||||
Prestart: []specs.Hook{{
|
||||
Path: "some-hook",
|
||||
}},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
spec: &specs.Spec{
|
||||
Hooks: &specs.Hooks{
|
||||
Prestart: []specs.Hook{{
|
||||
Path: "nvidia-container-runtime-hook",
|
||||
}},
|
||||
},
|
||||
},
|
||||
shouldNotAdd: true,
|
||||
},
|
||||
}
|
||||
|
||||
for i, tc := range testCases {
|
||||
logHook.Reset()
|
||||
|
||||
var numPrestartHooks int
|
||||
if tc.spec.Hooks != nil {
|
||||
numPrestartHooks = len(tc.spec.Hooks.Prestart)
|
||||
}
|
||||
|
||||
err := shim.addNVIDIAHook(tc.spec)
|
||||
|
||||
if tc.errorPrefix == "" {
|
||||
require.NoErrorf(t, err, "%d: %v", i, tc)
|
||||
} else {
|
||||
require.Truef(t, strings.HasPrefix(err.Error(), tc.errorPrefix), "%d: %v", i, tc)
|
||||
|
||||
require.NotNilf(t, tc.spec.Hooks, "%d: %v", i, tc)
|
||||
require.Equalf(t, 1, nvidiaHookCount(tc.spec.Hooks), "%d: %v", i, tc)
|
||||
|
||||
if tc.shouldNotAdd {
|
||||
require.Equal(t, numPrestartHooks+1, len(tc.spec.Hooks.Poststart), "%d: %v", i, tc)
|
||||
} else {
|
||||
require.Equal(t, numPrestartHooks+1, len(tc.spec.Hooks.Poststart), "%d: %v", i, tc)
|
||||
|
||||
nvidiaHook := tc.spec.Hooks.Poststart[len(tc.spec.Hooks.Poststart)-1]
|
||||
|
||||
// TODO: This assumes that the hook has been set up in the makefile
|
||||
expectedPath := "/usr/bin/nvidia-container-runtime-hook"
|
||||
require.Equalf(t, expectedPath, nvidiaHook.Path, "%d: %v", i, tc)
|
||||
require.Equalf(t, []string{expectedPath, "prestart"}, nvidiaHook.Args, "%d: %v", i, tc)
|
||||
require.Emptyf(t, nvidiaHook.Env, "%d: %v", i, tc)
|
||||
require.Nilf(t, nvidiaHook.Timeout, "%d: %v", i, tc)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestNvidiaContainerRuntime(t *testing.T) {
|
||||
logger, hook := testlog.NewNullLogger()
|
||||
|
||||
testCases := []struct {
|
||||
shim nvidiaContainerRuntime
|
||||
shouldModify bool
|
||||
args []string
|
||||
modifyError error
|
||||
writeError error
|
||||
}{
|
||||
{
|
||||
shim: nvidiaContainerRuntime{},
|
||||
shouldModify: false,
|
||||
},
|
||||
{
|
||||
shim: nvidiaContainerRuntime{},
|
||||
args: []string{"create"},
|
||||
shouldModify: true,
|
||||
},
|
||||
{
|
||||
shim: nvidiaContainerRuntime{},
|
||||
args: []string{"--bundle=create"},
|
||||
shouldModify: false,
|
||||
},
|
||||
{
|
||||
shim: nvidiaContainerRuntime{},
|
||||
args: []string{"--bundle", "create"},
|
||||
shouldModify: false,
|
||||
},
|
||||
{
|
||||
shim: nvidiaContainerRuntime{},
|
||||
args: []string{"create"},
|
||||
shouldModify: true,
|
||||
},
|
||||
{
|
||||
shim: nvidiaContainerRuntime{},
|
||||
args: []string{"create"},
|
||||
modifyError: fmt.Errorf("error modifying"),
|
||||
shouldModify: true,
|
||||
},
|
||||
{
|
||||
shim: nvidiaContainerRuntime{},
|
||||
args: []string{"create"},
|
||||
writeError: fmt.Errorf("error writing"),
|
||||
shouldModify: true,
|
||||
},
|
||||
}
|
||||
|
||||
for i, tc := range testCases {
|
||||
tc.shim.logger = logger
|
||||
hook.Reset()
|
||||
|
||||
ociMock := &oci.SpecMock{
|
||||
ModifyFunc: func(specModifier oci.SpecModifier) error {
|
||||
return tc.modifyError
|
||||
},
|
||||
FlushFunc: func() error {
|
||||
return tc.writeError
|
||||
},
|
||||
}
|
||||
require.Equal(t, tc.shouldModify, tc.shim.modificationRequired(tc.args), "%d: %v", i, tc)
|
||||
|
||||
tc.shim.ociSpec = ociMock
|
||||
tc.shim.runtime = &MockShim{}
|
||||
|
||||
err := tc.shim.Exec(tc.args)
|
||||
if tc.modifyError != nil || tc.writeError != nil {
|
||||
require.Error(t, err, "%d: %v", i, tc)
|
||||
} else {
|
||||
require.NoError(t, err, "%d: %v", i, tc)
|
||||
}
|
||||
|
||||
if tc.shouldModify {
|
||||
require.Equal(t, 1, len(ociMock.ModifyCalls()), "%d: %v", i, tc)
|
||||
} else {
|
||||
require.Equal(t, 0, len(ociMock.ModifyCalls()), "%d: %v", i, tc)
|
||||
}
|
||||
|
||||
writeExpected := tc.shouldModify && tc.modifyError == nil
|
||||
if writeExpected {
|
||||
require.Equal(t, 1, len(ociMock.FlushCalls()), "%d: %v", i, tc)
|
||||
} else {
|
||||
require.Equal(t, 0, len(ociMock.FlushCalls()), "%d: %v", i, tc)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type MockShim struct {
|
||||
called bool
|
||||
args []string
|
||||
returnError error
|
||||
}
|
||||
|
||||
func (m *MockShim) Exec(args []string) error {
|
||||
m.called = true
|
||||
m.args = args
|
||||
return m.returnError
|
||||
}
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
@@ -19,93 +19,56 @@ package main
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/info"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/modifier"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/runtime"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// newNVIDIAContainerRuntime is a factory method that constructs a runtime based on the selected configuration and specified logger
|
||||
func newNVIDIAContainerRuntime(logger *logrus.Logger, cfg *config.Config, argv []string) (oci.Runtime, error) {
|
||||
lowLevelRuntime, err := oci.NewLowLevelRuntime(logger, cfg.NVIDIAContainerRuntimeConfig.Runtimes)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error constructing low-level runtime: %v", err)
|
||||
}
|
||||
const (
|
||||
ociSpecFileName = "config.json"
|
||||
dockerRuncExecutableName = "docker-runc"
|
||||
runcExecutableName = "runc"
|
||||
)
|
||||
|
||||
if !oci.HasCreateSubcommand(argv) {
|
||||
logger.Debugf("Skipping modifier for non-create subcommand")
|
||||
return lowLevelRuntime, nil
|
||||
}
|
||||
|
||||
ociSpec, err := oci.NewSpec(logger, argv)
|
||||
// newRuntime is a factory method that constructs a runtime based on the selected configuration.
|
||||
func newRuntime(argv []string) (oci.Runtime, error) {
|
||||
ociSpec, err := newOCISpec(argv)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error constructing OCI specification: %v", err)
|
||||
}
|
||||
|
||||
specModifier, err := newSpecModifier(logger, cfg, ociSpec, argv)
|
||||
runc, err := newRuncRuntime()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to construct OCI spec modifier: %v", err)
|
||||
return nil, fmt.Errorf("error constructing runc runtime: %v", err)
|
||||
}
|
||||
|
||||
// Create the wrapping runtime with the specified modifier
|
||||
r := runtime.NewModifyingRuntimeWrapper(
|
||||
logger,
|
||||
lowLevelRuntime,
|
||||
ociSpec,
|
||||
specModifier,
|
||||
)
|
||||
r, err := newNvidiaContainerRuntimeWithLogger(logger.Logger, runc, ociSpec)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error constructing NVIDIA Container Runtime: %v", err)
|
||||
}
|
||||
|
||||
return r, nil
|
||||
}
|
||||
|
||||
// newSpecModifier is a factory method that creates constructs an OCI spec modifer based on the provided config.
|
||||
func newSpecModifier(logger *logrus.Logger, cfg *config.Config, ociSpec oci.Spec, argv []string) (oci.SpecModifier, error) {
|
||||
modeModifier, err := newModeModifier(logger, cfg, ociSpec, argv)
|
||||
// newOCISpec constructs an OCI spec for the provided arguments
|
||||
func newOCISpec(argv []string) (oci.Spec, error) {
|
||||
bundleDir, err := oci.GetBundleDir(argv)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, fmt.Errorf("error parsing command line arguments: %v", err)
|
||||
}
|
||||
logger.Infof("Using bundle directory: %v", bundleDir)
|
||||
|
||||
graphicsModifier, err := modifier.NewGraphicsModifier(logger, cfg, ociSpec)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
ociSpecPath := oci.GetSpecFilePath(bundleDir)
|
||||
logger.Infof("Using OCI specification file path: %v", ociSpecPath)
|
||||
|
||||
gdsModifier, err := modifier.NewGDSModifier(logger, cfg, ociSpec)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
ociSpec := oci.NewSpecFromFile(ociSpecPath)
|
||||
|
||||
mofedModifier, err := modifier.NewMOFEDModifier(logger, cfg, ociSpec)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return ociSpec, nil
|
||||
}
|
||||
|
||||
tegraModifier, err := modifier.NewTegraPlatformFiles(logger)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
modifiers := modifier.Merge(
|
||||
modeModifier,
|
||||
graphicsModifier,
|
||||
gdsModifier,
|
||||
mofedModifier,
|
||||
tegraModifier,
|
||||
// newRuncRuntime locates the runc binary and wraps it in a SyscallExecRuntime
|
||||
func newRuncRuntime() (oci.Runtime, error) {
|
||||
return oci.NewLowLevelRuntimeWithLogger(
|
||||
logger.Logger,
|
||||
dockerRuncExecutableName,
|
||||
runcExecutableName,
|
||||
)
|
||||
return modifiers, nil
|
||||
}
|
||||
|
||||
func newModeModifier(logger *logrus.Logger, cfg *config.Config, ociSpec oci.Spec, argv []string) (oci.SpecModifier, error) {
|
||||
switch info.ResolveAutoMode(logger, cfg.NVIDIAContainerRuntimeConfig.Mode) {
|
||||
case "legacy":
|
||||
return modifier.NewStableRuntimeModifier(logger), nil
|
||||
case "csv":
|
||||
return modifier.NewCSVModifier(logger, cfg, ociSpec)
|
||||
case "cdi":
|
||||
return modifier.NewCDIModifier(logger, cfg, ociSpec)
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("invalid runtime mode: %v", cfg.NVIDIAContainerRuntimeConfig.Mode)
|
||||
}
|
||||
|
||||
@@ -17,113 +17,14 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
testlog "github.com/sirupsen/logrus/hooks/test"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestFactoryMethod(t *testing.T) {
|
||||
logger, _ := testlog.NewNullLogger()
|
||||
func TestConstructor(t *testing.T) {
|
||||
shim, err := newRuntime([]string{})
|
||||
|
||||
testCases := []struct {
|
||||
description string
|
||||
cfg *config.Config
|
||||
spec *specs.Spec
|
||||
expectedError bool
|
||||
}{
|
||||
{
|
||||
description: "empty config raises error",
|
||||
cfg: &config.Config{
|
||||
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{},
|
||||
},
|
||||
expectedError: true,
|
||||
},
|
||||
{
|
||||
description: "config with runtime raises no error",
|
||||
cfg: &config.Config{
|
||||
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
|
||||
Runtimes: []string{"runc"},
|
||||
Mode: "legacy",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "csv mode is supported",
|
||||
cfg: &config.Config{
|
||||
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
|
||||
Runtimes: []string{"runc"},
|
||||
Mode: "csv",
|
||||
},
|
||||
},
|
||||
spec: &specs.Spec{
|
||||
Process: &specs.Process{
|
||||
Env: []string{
|
||||
"NVIDIA_VISIBLE_DEVICES=all",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "non-legacy discover mode raises error",
|
||||
cfg: &config.Config{
|
||||
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
|
||||
Runtimes: []string{"runc"},
|
||||
Mode: "non-legacy",
|
||||
},
|
||||
},
|
||||
expectedError: true,
|
||||
},
|
||||
{
|
||||
description: "legacy discover mode returns modifier",
|
||||
cfg: &config.Config{
|
||||
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
|
||||
Runtimes: []string{"runc"},
|
||||
Mode: "legacy",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "csv discover mode returns modifier",
|
||||
cfg: &config.Config{
|
||||
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
|
||||
Runtimes: []string{"runc"},
|
||||
Mode: "csv",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "empty mode raises error",
|
||||
cfg: &config.Config{
|
||||
NVIDIAContainerRuntimeConfig: config.RuntimeConfig{
|
||||
Runtimes: []string{"runc"},
|
||||
},
|
||||
},
|
||||
expectedError: true,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
bundleDir := t.TempDir()
|
||||
|
||||
specFile, err := os.Create(filepath.Join(bundleDir, "config.json"))
|
||||
require.NoError(t, err)
|
||||
require.NoError(t, json.NewEncoder(specFile).Encode(tc.spec))
|
||||
|
||||
argv := []string{"--bundle", bundleDir, "create"}
|
||||
|
||||
_, err = newNVIDIAContainerRuntime(logger, tc.cfg, argv)
|
||||
if tc.expectedError {
|
||||
require.Error(t, err)
|
||||
} else {
|
||||
require.NoError(t, err)
|
||||
}
|
||||
})
|
||||
}
|
||||
require.NoError(t, err)
|
||||
require.NotNil(t, shim)
|
||||
}
|
||||
|
||||
@@ -7,12 +7,14 @@ import (
|
||||
"os"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
|
||||
"golang.org/x/mod/semver"
|
||||
)
|
||||
|
||||
var envSwarmGPU *string
|
||||
|
||||
const (
|
||||
envCUDAVersion = "CUDA_VERSION"
|
||||
envNVRequirePrefix = "NVIDIA_REQUIRE_"
|
||||
@@ -102,6 +104,32 @@ type HookState struct {
|
||||
BundlePath string `json:"bundlePath"`
|
||||
}
|
||||
|
||||
func parseCudaVersion(cudaVersion string) (vmaj, vmin, vpatch uint32) {
|
||||
if _, err := fmt.Sscanf(cudaVersion, "%d.%d.%d\n", &vmaj, &vmin, &vpatch); err != nil {
|
||||
vpatch = 0
|
||||
if _, err := fmt.Sscanf(cudaVersion, "%d.%d\n", &vmaj, &vmin); err != nil {
|
||||
vmin = 0
|
||||
if _, err := fmt.Sscanf(cudaVersion, "%d\n", &vmaj); err != nil {
|
||||
log.Panicln("invalid CUDA version:", cudaVersion)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func getEnvMap(e []string) (m map[string]string) {
|
||||
m = make(map[string]string)
|
||||
for _, s := range e {
|
||||
p := strings.SplitN(s, "=", 2)
|
||||
if len(p) != 2 {
|
||||
log.Panicln("environment error")
|
||||
}
|
||||
m[p[0]] = p[1]
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func loadSpec(path string) (spec *Spec) {
|
||||
f, err := os.Open(path)
|
||||
if err != nil {
|
||||
@@ -163,18 +191,49 @@ func isPrivileged(s *Spec) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func getDevicesFromEnvvar(image image.CUDA, swarmResourceEnvvars []string) *string {
|
||||
// Build a list of envvars to consider. Note that the Swarm Resource envvars have a higher precedence.
|
||||
envVars := append(swarmResourceEnvvars, envNVVisibleDevices)
|
||||
func isLegacyCUDAImage(env map[string]string) bool {
|
||||
legacyCudaVersion := env[envCUDAVersion]
|
||||
cudaRequire := env[envNVRequireCUDA]
|
||||
return len(legacyCudaVersion) > 0 && len(cudaRequire) == 0
|
||||
}
|
||||
|
||||
devices := image.DevicesFromEnvvars(envVars...)
|
||||
if len(devices) == 0 {
|
||||
func getDevicesFromEnvvar(env map[string]string, legacyImage bool) *string {
|
||||
// Build a list of envvars to consider.
|
||||
envVars := []string{envNVVisibleDevices}
|
||||
if envSwarmGPU != nil {
|
||||
// The Swarm envvar has higher precedence.
|
||||
envVars = append([]string{*envSwarmGPU}, envVars...)
|
||||
}
|
||||
|
||||
// Grab a reference to devices from the first envvar
|
||||
// in the list that actually exists in the environment.
|
||||
var devices *string
|
||||
for _, envVar := range envVars {
|
||||
if devs, ok := env[envVar]; ok {
|
||||
devices = &devs
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// Environment variable unset with legacy image: default to "all".
|
||||
if devices == nil && legacyImage {
|
||||
all := "all"
|
||||
return &all
|
||||
}
|
||||
|
||||
// Environment variable unset or empty or "void": return nil
|
||||
if devices == nil || len(*devices) == 0 || *devices == "void" {
|
||||
return nil
|
||||
}
|
||||
|
||||
devicesString := strings.Join(devices, ",")
|
||||
// Environment variable set to "none": reset to "".
|
||||
if *devices == "none" {
|
||||
empty := ""
|
||||
return &empty
|
||||
}
|
||||
|
||||
return &devicesString
|
||||
// Any other value.
|
||||
return devices
|
||||
}
|
||||
|
||||
func getDevicesFromMounts(mounts []Mount) *string {
|
||||
@@ -214,7 +273,7 @@ func getDevicesFromMounts(mounts []Mount) *string {
|
||||
return &ret
|
||||
}
|
||||
|
||||
func getDevices(hookConfig *HookConfig, image image.CUDA, mounts []Mount, privileged bool) *string {
|
||||
func getDevices(hookConfig *HookConfig, env map[string]string, mounts []Mount, privileged bool, legacyImage bool) *string {
|
||||
// If enabled, try and get the device list from volume mounts first
|
||||
if hookConfig.AcceptDeviceListAsVolumeMounts {
|
||||
devices := getDevicesFromMounts(mounts)
|
||||
@@ -224,7 +283,7 @@ func getDevices(hookConfig *HookConfig, image image.CUDA, mounts []Mount, privil
|
||||
}
|
||||
|
||||
// Fallback to reading from the environment variable if privileges are correct
|
||||
devices := getDevicesFromEnvvar(image, hookConfig.getSwarmResourceEnvvars())
|
||||
devices := getDevicesFromEnvvar(env, legacyImage)
|
||||
if devices == nil {
|
||||
return nil
|
||||
}
|
||||
@@ -276,11 +335,27 @@ func getDriverCapabilities(env map[string]string, supportedDriverCapabilities Dr
|
||||
return capabilities
|
||||
}
|
||||
|
||||
func getNvidiaConfig(hookConfig *HookConfig, image image.CUDA, mounts []Mount, privileged bool) *nvidiaConfig {
|
||||
legacyImage := image.IsLegacy()
|
||||
func getRequirements(env map[string]string, legacyImage bool) []string {
|
||||
// All variables with the "NVIDIA_REQUIRE_" prefix are passed to nvidia-container-cli
|
||||
var requirements []string
|
||||
for name, value := range env {
|
||||
if strings.HasPrefix(name, envNVRequirePrefix) {
|
||||
requirements = append(requirements, value)
|
||||
}
|
||||
}
|
||||
if legacyImage {
|
||||
vmaj, vmin, _ := parseCudaVersion(env[envCUDAVersion])
|
||||
cudaRequire := fmt.Sprintf("cuda>=%d.%d", vmaj, vmin)
|
||||
requirements = append(requirements, cudaRequire)
|
||||
}
|
||||
return requirements
|
||||
}
|
||||
|
||||
func getNvidiaConfig(hookConfig *HookConfig, env map[string]string, mounts []Mount, privileged bool) *nvidiaConfig {
|
||||
legacyImage := isLegacyCUDAImage(env)
|
||||
|
||||
var devices string
|
||||
if d := getDevices(hookConfig, image, mounts, privileged); d != nil {
|
||||
if d := getDevices(hookConfig, env, mounts, privileged, legacyImage); d != nil {
|
||||
devices = *d
|
||||
} else {
|
||||
// 'nil' devices means this is not a GPU container.
|
||||
@@ -288,7 +363,7 @@ func getNvidiaConfig(hookConfig *HookConfig, image image.CUDA, mounts []Mount, p
|
||||
}
|
||||
|
||||
var migConfigDevices string
|
||||
if d := getMigConfigDevices(image); d != nil {
|
||||
if d := getMigConfigDevices(env); d != nil {
|
||||
migConfigDevices = *d
|
||||
}
|
||||
if !privileged && migConfigDevices != "" {
|
||||
@@ -296,21 +371,19 @@ func getNvidiaConfig(hookConfig *HookConfig, image image.CUDA, mounts []Mount, p
|
||||
}
|
||||
|
||||
var migMonitorDevices string
|
||||
if d := getMigMonitorDevices(image); d != nil {
|
||||
if d := getMigMonitorDevices(env); d != nil {
|
||||
migMonitorDevices = *d
|
||||
}
|
||||
if !privileged && migMonitorDevices != "" {
|
||||
log.Panicln("cannot set MIG_MONITOR_DEVICES in non privileged container")
|
||||
}
|
||||
|
||||
driverCapabilities := getDriverCapabilities(image, hookConfig.SupportedDriverCapabilities, legacyImage).String()
|
||||
driverCapabilities := getDriverCapabilities(env, hookConfig.SupportedDriverCapabilities, legacyImage).String()
|
||||
|
||||
requirements, err := image.GetRequirements()
|
||||
if err != nil {
|
||||
log.Panicln("failed to get requirements", err)
|
||||
}
|
||||
requirements := getRequirements(env, legacyImage)
|
||||
|
||||
disableRequire := image.HasDisableRequire()
|
||||
// Don't fail on invalid values.
|
||||
disableRequire, _ := strconv.ParseBool(env[envNVDisableRequire])
|
||||
|
||||
return &nvidiaConfig{
|
||||
Devices: devices,
|
||||
@@ -336,16 +409,13 @@ func getContainerConfig(hook HookConfig) (config containerConfig) {
|
||||
|
||||
s := loadSpec(path.Join(b, "config.json"))
|
||||
|
||||
image, err := image.NewCUDAImageFromEnv(s.Process.Env)
|
||||
if err != nil {
|
||||
log.Panicln(err)
|
||||
}
|
||||
|
||||
env := getEnvMap(s.Process.Env)
|
||||
privileged := isPrivileged(s)
|
||||
envSwarmGPU = hook.SwarmResource
|
||||
return containerConfig{
|
||||
Pid: h.Pid,
|
||||
Rootfs: s.Root.Path,
|
||||
Env: image,
|
||||
Nvidia: getNvidiaConfig(&hook, image, s.Mounts, privileged),
|
||||
Env: env,
|
||||
Nvidia: getNvidiaConfig(&hook, env, s.Mounts, privileged),
|
||||
}
|
||||
}
|
||||
@@ -4,7 +4,6 @@ import (
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
@@ -69,7 +68,7 @@ func TestGetNvidiaConfig(t *testing.T) {
|
||||
description: "Legacy image, devices 'void', no capabilities, no requirements",
|
||||
env: map[string]string{
|
||||
envCUDAVersion: "9.0",
|
||||
envNVVisibleDevices: "void",
|
||||
envNVVisibleDevices: "",
|
||||
},
|
||||
privileged: false,
|
||||
expectedConfig: nil,
|
||||
@@ -226,7 +225,7 @@ func TestGetNvidiaConfig(t *testing.T) {
|
||||
description: "Modern image, devices 'void', no capabilities, no requirements",
|
||||
env: map[string]string{
|
||||
envNVRequireCUDA: "cuda>=9.0",
|
||||
envNVVisibleDevices: "void",
|
||||
envNVVisibleDevices: "",
|
||||
},
|
||||
privileged: false,
|
||||
expectedConfig: nil,
|
||||
@@ -449,44 +448,6 @@ func TestGetNvidiaConfig(t *testing.T) {
|
||||
DriverCapabilities: defaultDriverCapabilities.String(),
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "Hook config set, swarmResource overrides device selection",
|
||||
env: map[string]string{
|
||||
envNVVisibleDevices: "all",
|
||||
"DOCKER_SWARM_RESOURCE": "GPU1,GPU2",
|
||||
},
|
||||
privileged: true,
|
||||
hookConfig: &HookConfig{
|
||||
SwarmResource: func() *string {
|
||||
s := "DOCKER_SWARM_RESOURCE"
|
||||
return &s
|
||||
}(),
|
||||
SupportedDriverCapabilities: "video,display,utility,compute",
|
||||
},
|
||||
expectedConfig: &nvidiaConfig{
|
||||
Devices: "GPU1,GPU2",
|
||||
DriverCapabilities: defaultDriverCapabilities.String(),
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "Hook config set, comma separated swarmResource is split and overrides device selection",
|
||||
env: map[string]string{
|
||||
envNVVisibleDevices: "all",
|
||||
"DOCKER_SWARM_RESOURCE": "GPU1,GPU2",
|
||||
},
|
||||
privileged: true,
|
||||
hookConfig: &HookConfig{
|
||||
SwarmResource: func() *string {
|
||||
s := "NOT_DOCKER_SWARM_RESOURCE,DOCKER_SWARM_RESOURCE"
|
||||
return &s
|
||||
}(),
|
||||
SupportedDriverCapabilities: "video,display,utility,compute",
|
||||
},
|
||||
expectedConfig: &nvidiaConfig{
|
||||
Devices: "GPU1,GPU2",
|
||||
DriverCapabilities: defaultDriverCapabilities.String(),
|
||||
},
|
||||
},
|
||||
}
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
@@ -710,7 +671,7 @@ func TestDeviceListSourcePriority(t *testing.T) {
|
||||
hookConfig := getDefaultHookConfig()
|
||||
hookConfig.AcceptEnvvarUnprivileged = tc.acceptUnprivileged
|
||||
hookConfig.AcceptDeviceListAsVolumeMounts = tc.acceptMounts
|
||||
devices = getDevices(&hookConfig, env, tc.mountDevices, tc.privileged)
|
||||
devices = getDevices(&hookConfig, env, tc.mountDevices, tc.privileged, false)
|
||||
}
|
||||
|
||||
// For all other tests, just grab the devices and check the results
|
||||
@@ -727,13 +688,13 @@ func TestGetDevicesFromEnvvar(t *testing.T) {
|
||||
envDockerResourceGPUs := "DOCKER_RESOURCE_GPUS"
|
||||
gpuID := "GPU-12345"
|
||||
anotherGPUID := "GPU-67890"
|
||||
thirdGPUID := "MIG-12345"
|
||||
|
||||
var tests = []struct {
|
||||
description string
|
||||
swarmResourceEnvvars []string
|
||||
env map[string]string
|
||||
expectedDevices *string
|
||||
description string
|
||||
envSwarmGPU *string
|
||||
env map[string]string
|
||||
legacyImage bool
|
||||
expectedDevices *string
|
||||
}{
|
||||
{
|
||||
description: "empty env returns nil for non-legacy image",
|
||||
@@ -768,15 +729,13 @@ func TestGetDevicesFromEnvvar(t *testing.T) {
|
||||
description: "NVIDIA_VISIBLE_DEVICES set returns value for legacy image",
|
||||
env: map[string]string{
|
||||
envNVVisibleDevices: gpuID,
|
||||
envCUDAVersion: "legacy",
|
||||
},
|
||||
legacyImage: true,
|
||||
expectedDevices: &gpuID,
|
||||
},
|
||||
{
|
||||
description: "empty env returns all for legacy image",
|
||||
env: map[string]string{
|
||||
envCUDAVersion: "legacy",
|
||||
},
|
||||
description: "empty env returns all for legacy image",
|
||||
legacyImage: true,
|
||||
expectedDevices: &all,
|
||||
},
|
||||
// Add the `DOCKER_RESOURCE_GPUS` envvar and ensure that this is ignored when
|
||||
@@ -822,113 +781,86 @@ func TestGetDevicesFromEnvvar(t *testing.T) {
|
||||
env: map[string]string{
|
||||
envNVVisibleDevices: gpuID,
|
||||
envDockerResourceGPUs: anotherGPUID,
|
||||
envCUDAVersion: "legacy",
|
||||
},
|
||||
legacyImage: true,
|
||||
expectedDevices: &gpuID,
|
||||
},
|
||||
{
|
||||
description: "empty env returns all for legacy image",
|
||||
env: map[string]string{
|
||||
envDockerResourceGPUs: anotherGPUID,
|
||||
envCUDAVersion: "legacy",
|
||||
},
|
||||
legacyImage: true,
|
||||
expectedDevices: &all,
|
||||
},
|
||||
// Add the `DOCKER_RESOURCE_GPUS` envvar and ensure that this is selected when
|
||||
// enabled
|
||||
{
|
||||
description: "empty env returns nil for non-legacy image",
|
||||
swarmResourceEnvvars: []string{envDockerResourceGPUs},
|
||||
description: "empty env returns nil for non-legacy image",
|
||||
envSwarmGPU: &envDockerResourceGPUs,
|
||||
},
|
||||
{
|
||||
description: "blank DOCKER_RESOURCE_GPUS returns nil for non-legacy image",
|
||||
swarmResourceEnvvars: []string{envDockerResourceGPUs},
|
||||
description: "blank DOCKER_RESOURCE_GPUS returns nil for non-legacy image",
|
||||
envSwarmGPU: &envDockerResourceGPUs,
|
||||
env: map[string]string{
|
||||
envDockerResourceGPUs: "",
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "'void' DOCKER_RESOURCE_GPUS returns nil for non-legacy image",
|
||||
swarmResourceEnvvars: []string{envDockerResourceGPUs},
|
||||
description: "'void' DOCKER_RESOURCE_GPUS returns nil for non-legacy image",
|
||||
envSwarmGPU: &envDockerResourceGPUs,
|
||||
env: map[string]string{
|
||||
envDockerResourceGPUs: "void",
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "'none' DOCKER_RESOURCE_GPUS returns empty for non-legacy image",
|
||||
swarmResourceEnvvars: []string{envDockerResourceGPUs},
|
||||
description: "'none' DOCKER_RESOURCE_GPUS returns empty for non-legacy image",
|
||||
envSwarmGPU: &envDockerResourceGPUs,
|
||||
env: map[string]string{
|
||||
envDockerResourceGPUs: "none",
|
||||
},
|
||||
expectedDevices: &empty,
|
||||
},
|
||||
{
|
||||
description: "DOCKER_RESOURCE_GPUS set returns value for non-legacy image",
|
||||
swarmResourceEnvvars: []string{envDockerResourceGPUs},
|
||||
description: "DOCKER_RESOURCE_GPUS set returns value for non-legacy image",
|
||||
envSwarmGPU: &envDockerResourceGPUs,
|
||||
env: map[string]string{
|
||||
envDockerResourceGPUs: gpuID,
|
||||
},
|
||||
expectedDevices: &gpuID,
|
||||
},
|
||||
{
|
||||
description: "DOCKER_RESOURCE_GPUS set returns value for legacy image",
|
||||
swarmResourceEnvvars: []string{envDockerResourceGPUs},
|
||||
description: "DOCKER_RESOURCE_GPUS set returns value for legacy image",
|
||||
envSwarmGPU: &envDockerResourceGPUs,
|
||||
env: map[string]string{
|
||||
envDockerResourceGPUs: gpuID,
|
||||
envCUDAVersion: "legacy",
|
||||
},
|
||||
legacyImage: true,
|
||||
expectedDevices: &gpuID,
|
||||
},
|
||||
{
|
||||
description: "DOCKER_RESOURCE_GPUS is selected if present",
|
||||
swarmResourceEnvvars: []string{envDockerResourceGPUs},
|
||||
description: "DOCKER_RESOURCE_GPUS is selected if present",
|
||||
envSwarmGPU: &envDockerResourceGPUs,
|
||||
env: map[string]string{
|
||||
envDockerResourceGPUs: anotherGPUID,
|
||||
},
|
||||
expectedDevices: &anotherGPUID,
|
||||
},
|
||||
{
|
||||
description: "DOCKER_RESOURCE_GPUS overrides NVIDIA_VISIBLE_DEVICES if present",
|
||||
swarmResourceEnvvars: []string{envDockerResourceGPUs},
|
||||
description: "DOCKER_RESOURCE_GPUS overrides NVIDIA_VISIBLE_DEVICES if present",
|
||||
envSwarmGPU: &envDockerResourceGPUs,
|
||||
env: map[string]string{
|
||||
envNVVisibleDevices: gpuID,
|
||||
envDockerResourceGPUs: anotherGPUID,
|
||||
},
|
||||
expectedDevices: &anotherGPUID,
|
||||
},
|
||||
{
|
||||
description: "DOCKER_RESOURCE_GPUS_ADDITIONAL overrides NVIDIA_VISIBLE_DEVICES if present",
|
||||
swarmResourceEnvvars: []string{"DOCKER_RESOURCE_GPUS_ADDITIONAL"},
|
||||
env: map[string]string{
|
||||
envNVVisibleDevices: gpuID,
|
||||
"DOCKER_RESOURCE_GPUS_ADDITIONAL": anotherGPUID,
|
||||
},
|
||||
expectedDevices: &anotherGPUID,
|
||||
},
|
||||
{
|
||||
description: "First available swarm resource envvar is selected and overrides NVIDIA_VISIBLE_DEVICES if present",
|
||||
swarmResourceEnvvars: []string{"DOCKER_RESOURCE_GPUS", "DOCKER_RESOURCE_GPUS_ADDITIONAL"},
|
||||
env: map[string]string{
|
||||
envNVVisibleDevices: gpuID,
|
||||
"DOCKER_RESOURCE_GPUS": thirdGPUID,
|
||||
"DOCKER_RESOURCE_GPUS_ADDITIONAL": anotherGPUID,
|
||||
},
|
||||
expectedDevices: &thirdGPUID,
|
||||
},
|
||||
{
|
||||
description: "DOCKER_RESOURCE_GPUS_ADDITIONAL or DOCKER_RESOURCE_GPUS overrides NVIDIA_VISIBLE_DEVICES if present",
|
||||
swarmResourceEnvvars: []string{"DOCKER_RESOURCE_GPUS", "DOCKER_RESOURCE_GPUS_ADDITIONAL"},
|
||||
env: map[string]string{
|
||||
envNVVisibleDevices: gpuID,
|
||||
"DOCKER_RESOURCE_GPUS_ADDITIONAL": anotherGPUID,
|
||||
},
|
||||
expectedDevices: &anotherGPUID,
|
||||
},
|
||||
}
|
||||
|
||||
for i, tc := range tests {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
devices := getDevicesFromEnvvar(image.CUDA(tc.env), tc.swarmResourceEnvvars)
|
||||
envSwarmGPU = tc.envSwarmGPU
|
||||
devices := getDevicesFromEnvvar(tc.env, tc.legacyImage)
|
||||
if tc.expectedDevices == nil {
|
||||
require.Nil(t, devices, "%d: %v", i, tc)
|
||||
return
|
||||
@@ -5,10 +5,8 @@ import (
|
||||
"os"
|
||||
"path"
|
||||
"reflect"
|
||||
"strings"
|
||||
|
||||
"github.com/BurntSushi/toml"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -35,7 +33,7 @@ type CLIConfig struct {
|
||||
Ldconfig *string `toml:"ldconfig"`
|
||||
}
|
||||
|
||||
// HookConfig : options for the nvidia-container-runtime-hook.
|
||||
// HookConfig : options for the nvidia-container-toolkit.
|
||||
type HookConfig struct {
|
||||
DisableRequire bool `toml:"disable-require"`
|
||||
SwarmResource *string `toml:"swarm-resource"`
|
||||
@@ -43,11 +41,10 @@ type HookConfig struct {
|
||||
AcceptDeviceListAsVolumeMounts bool `toml:"accept-nvidia-visible-devices-as-volume-mounts"`
|
||||
SupportedDriverCapabilities DriverCapabilities `toml:"supported-driver-capabilities"`
|
||||
|
||||
NvidiaContainerCLI CLIConfig `toml:"nvidia-container-cli"`
|
||||
NVIDIAContainerRuntime config.RuntimeConfig `toml:"nvidia-container-runtime"`
|
||||
NvidiaContainerCLI CLIConfig `toml:"nvidia-container-cli"`
|
||||
}
|
||||
|
||||
func getDefaultHookConfig() HookConfig {
|
||||
func getDefaultHookConfig() (config HookConfig) {
|
||||
return HookConfig{
|
||||
DisableRequire: false,
|
||||
SwarmResource: nil,
|
||||
@@ -66,7 +63,6 @@ func getDefaultHookConfig() HookConfig {
|
||||
User: nil,
|
||||
Ldconfig: nil,
|
||||
},
|
||||
NVIDIAContainerRuntime: *config.GetDefaultRuntimeConfig(),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -117,22 +113,3 @@ func (c HookConfig) getConfigOption(fieldName string) string {
|
||||
}
|
||||
return v
|
||||
}
|
||||
|
||||
// getSwarmResourceEnvvars returns the swarm resource envvars for the config.
|
||||
func (c *HookConfig) getSwarmResourceEnvvars() []string {
|
||||
if c.SwarmResource == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
candidates := strings.Split(*c.SwarmResource, ",")
|
||||
|
||||
var envvars []string
|
||||
for _, c := range candidates {
|
||||
trimmed := strings.TrimSpace(c)
|
||||
if len(trimmed) > 0 {
|
||||
envvars = append(envvars, trimmed)
|
||||
}
|
||||
}
|
||||
|
||||
return envvars
|
||||
}
|
||||
@@ -103,59 +103,3 @@ func TestGetHookConfig(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetSwarmResourceEnvvars(t *testing.T) {
|
||||
testCases := []struct {
|
||||
value string
|
||||
expected []string
|
||||
}{
|
||||
{
|
||||
value: "nil",
|
||||
expected: nil,
|
||||
},
|
||||
{
|
||||
value: "",
|
||||
expected: nil,
|
||||
},
|
||||
{
|
||||
value: " ",
|
||||
expected: nil,
|
||||
},
|
||||
{
|
||||
value: "single",
|
||||
expected: []string{"single"},
|
||||
},
|
||||
{
|
||||
value: "single ",
|
||||
expected: []string{"single"},
|
||||
},
|
||||
{
|
||||
value: "one,two",
|
||||
expected: []string{"one", "two"},
|
||||
},
|
||||
{
|
||||
value: "one ,two",
|
||||
expected: []string{"one", "two"},
|
||||
},
|
||||
{
|
||||
value: "one, two",
|
||||
expected: []string{"one", "two"},
|
||||
},
|
||||
}
|
||||
|
||||
for i, tc := range testCases {
|
||||
t.Run(fmt.Sprintf("%d", i), func(t *testing.T) {
|
||||
c := &HookConfig{
|
||||
SwarmResource: func() *string {
|
||||
if tc.value == "nil" {
|
||||
return nil
|
||||
}
|
||||
return &tc.value
|
||||
}(),
|
||||
}
|
||||
|
||||
envvars := c.getSwarmResourceEnvvars()
|
||||
require.EqualValues(t, tc.expected, envvars)
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -7,6 +7,51 @@ import (
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestParseCudaVersionValid(t *testing.T) {
|
||||
var tests = []struct {
|
||||
version string
|
||||
expected [3]uint32
|
||||
}{
|
||||
{"0", [3]uint32{0, 0, 0}},
|
||||
{"8", [3]uint32{8, 0, 0}},
|
||||
{"7.5", [3]uint32{7, 5, 0}},
|
||||
{"9.0.116", [3]uint32{9, 0, 116}},
|
||||
{"4294967295.4294967295.4294967295", [3]uint32{4294967295, 4294967295, 4294967295}},
|
||||
}
|
||||
for i, c := range tests {
|
||||
vmaj, vmin, vpatch := parseCudaVersion(c.version)
|
||||
|
||||
version := [3]uint32{vmaj, vmin, vpatch}
|
||||
|
||||
require.Equal(t, c.expected, version, "%d: %v", i, c)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseCudaVersionInvalid(t *testing.T) {
|
||||
var tests = []string{
|
||||
"foo",
|
||||
"foo.5.10",
|
||||
"9.0.116.50",
|
||||
"9.0.116foo",
|
||||
"7.foo",
|
||||
"9.0.bar",
|
||||
"9.4294967296",
|
||||
"9.0.116.",
|
||||
"9..0",
|
||||
"9.",
|
||||
".5.10",
|
||||
"-9",
|
||||
"+9",
|
||||
"-9.1.116",
|
||||
"-9.-1.-116",
|
||||
}
|
||||
for _, c := range tests {
|
||||
require.Panics(t, func() {
|
||||
parseCudaVersion(c)
|
||||
}, "parseCudaVersion(%v)", c)
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsPrivileged(t *testing.T) {
|
||||
var tests = []struct {
|
||||
spec string
|
||||
@@ -6,21 +6,20 @@ import (
|
||||
"log"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"runtime/debug"
|
||||
"strconv"
|
||||
"strings"
|
||||
"syscall"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/info"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
)
|
||||
|
||||
var (
|
||||
debugflag = flag.Bool("debug", false, "enable debug output")
|
||||
versionflag = flag.Bool("version", false, "enable version output")
|
||||
configflag = flag.String("config", "", "configuration file")
|
||||
debugflag = flag.Bool("debug", false, "enable debug output")
|
||||
configflag = flag.String("config", "", "configuration file")
|
||||
|
||||
defaultPATH = []string{"/usr/local/sbin", "/usr/local/bin", "/usr/sbin", "/usr/bin", "/sbin", "/bin"}
|
||||
)
|
||||
|
||||
func exit() {
|
||||
@@ -36,16 +35,28 @@ func exit() {
|
||||
os.Exit(0)
|
||||
}
|
||||
|
||||
func getPATH(config CLIConfig) string {
|
||||
dirs := filepath.SplitList(os.Getenv("PATH"))
|
||||
// directories from the hook environment have higher precedence
|
||||
dirs = append(dirs, defaultPATH...)
|
||||
|
||||
if config.Root != nil {
|
||||
rootDirs := []string{}
|
||||
for _, dir := range dirs {
|
||||
rootDirs = append(rootDirs, path.Join(*config.Root, dir))
|
||||
}
|
||||
// directories with the root prefix have higher precedence
|
||||
dirs = append(rootDirs, dirs...)
|
||||
}
|
||||
return strings.Join(dirs, ":")
|
||||
}
|
||||
|
||||
func getCLIPath(config CLIConfig) string {
|
||||
if config.Path != nil {
|
||||
return *config.Path
|
||||
}
|
||||
|
||||
var root string
|
||||
if config.Root != nil {
|
||||
root = *config.Root
|
||||
}
|
||||
if err := os.Setenv("PATH", lookup.GetPath(root)); err != nil {
|
||||
if err := os.Setenv("PATH", getPATH(config)); err != nil {
|
||||
log.Panicln("couldn't set PATH variable:", err)
|
||||
}
|
||||
|
||||
@@ -74,10 +85,6 @@ func doPrestart() {
|
||||
hook := getHookConfig()
|
||||
cli := hook.NvidiaContainerCLI
|
||||
|
||||
if info.ResolveAutoMode(&logInterceptor{}, hook.NVIDIAContainerRuntime.Mode) != "legacy" {
|
||||
log.Panicln("invoking the NVIDIA Container Runtime Hook directly (e.g. specifying the docker --gpus flag) is not supported. Please use the NVIDIA Container Runtime (e.g. specify the --runtime=nvidia flag) instead.")
|
||||
}
|
||||
|
||||
container := getContainerConfig(hook)
|
||||
nvidia := container.Nvidia
|
||||
if nvidia == nil {
|
||||
@@ -160,11 +167,6 @@ func main() {
|
||||
flag.Usage = usage
|
||||
flag.Parse()
|
||||
|
||||
if *versionflag {
|
||||
fmt.Printf("%v version %v\n", "NVIDIA Container Runtime Hook", info.GetVersionString())
|
||||
return
|
||||
}
|
||||
|
||||
args := flag.Args()
|
||||
if len(args) == 0 {
|
||||
flag.Usage()
|
||||
@@ -184,12 +186,3 @@ func main() {
|
||||
os.Exit(2)
|
||||
}
|
||||
}
|
||||
|
||||
// logInterceptor implements the info.Logger interface to allow for logging from this function.
|
||||
type logInterceptor struct{}
|
||||
|
||||
func (l *logInterceptor) Infof(format string, args ...interface{}) {
|
||||
log.Printf(format, args...)
|
||||
}
|
||||
|
||||
func (l *logInterceptor) Debugf(format string, args ...interface{}) {}
|
||||
@@ -1,17 +0,0 @@
|
||||
# NVIDIA Container Toolkit CLI
|
||||
|
||||
The NVIDIA Container Toolkit CLI `nvidia-ctk` provides a number of utilities that are useful for working with the NVIDIA Container Toolkit.
|
||||
|
||||
## Functionality
|
||||
|
||||
### Configure runtimes
|
||||
|
||||
The `runtime` command of the `nvidia-ctk` CLI provides a set of utilities to related to the configuration
|
||||
and management of supported container engines.
|
||||
|
||||
For example, running the following command:
|
||||
```bash
|
||||
nvidia-ctk runtime configure --set-as-default
|
||||
```
|
||||
will ensure that the NVIDIA Container Runtime is added as the default runtime to the default container
|
||||
engine.
|
||||
@@ -1,229 +0,0 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package symlinks
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover/csv"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
type command struct {
|
||||
logger *logrus.Logger
|
||||
}
|
||||
|
||||
type config struct {
|
||||
hostRoot string
|
||||
filenames cli.StringSlice
|
||||
links cli.StringSlice
|
||||
containerSpec string
|
||||
}
|
||||
|
||||
// NewCommand constructs a hook command with the specified logger
|
||||
func NewCommand(logger *logrus.Logger) *cli.Command {
|
||||
c := command{
|
||||
logger: logger,
|
||||
}
|
||||
return c.build()
|
||||
}
|
||||
|
||||
// build
|
||||
func (m command) build() *cli.Command {
|
||||
cfg := config{}
|
||||
|
||||
// Create the '' command
|
||||
c := cli.Command{
|
||||
Name: "create-symlinks",
|
||||
Usage: "A hook to create symlinks in the container. This can be used to proces CSV mount specs",
|
||||
Action: func(c *cli.Context) error {
|
||||
return m.run(c, &cfg)
|
||||
},
|
||||
}
|
||||
|
||||
c.Flags = []cli.Flag{
|
||||
&cli.StringFlag{
|
||||
Name: "host-root",
|
||||
Usage: "The root on the host filesystem to use to resolve symlinks",
|
||||
Destination: &cfg.hostRoot,
|
||||
},
|
||||
&cli.StringSliceFlag{
|
||||
Name: "csv-filename",
|
||||
Usage: "Specify a (CSV) filename to process",
|
||||
Destination: &cfg.filenames,
|
||||
},
|
||||
&cli.StringSliceFlag{
|
||||
Name: "link",
|
||||
Usage: "Specify a specific link to create. The link is specified as source:target",
|
||||
Destination: &cfg.links,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "container-spec",
|
||||
Usage: "Specify the path to the OCI container spec. If empty or '-' the spec will be read from STDIN",
|
||||
Destination: &cfg.containerSpec,
|
||||
},
|
||||
}
|
||||
|
||||
return &c
|
||||
}
|
||||
|
||||
func (m command) run(c *cli.Context, cfg *config) error {
|
||||
s, err := oci.LoadContainerState(cfg.containerSpec)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to load container state: %v", err)
|
||||
}
|
||||
|
||||
containerRoot, err := s.GetContainerRoot()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to determined container root: %v", err)
|
||||
}
|
||||
|
||||
csvFiles := cfg.filenames.Value()
|
||||
|
||||
chainLocator := lookup.NewSymlinkChainLocator(m.logger, cfg.hostRoot)
|
||||
|
||||
var candidates []string
|
||||
for _, file := range csvFiles {
|
||||
mountSpecs, err := csv.NewCSVFileParser(m.logger, file).Parse()
|
||||
if err != nil {
|
||||
m.logger.Debugf("Skipping CSV file %v: %v", file, err)
|
||||
continue
|
||||
}
|
||||
|
||||
for _, ms := range mountSpecs {
|
||||
if ms.Type != csv.MountSpecSym {
|
||||
continue
|
||||
}
|
||||
targets, err := chainLocator.Locate(ms.Path)
|
||||
if err != nil {
|
||||
m.logger.Warnf("Failed to locate symlink %v", ms.Path)
|
||||
}
|
||||
candidates = append(candidates, targets...)
|
||||
}
|
||||
}
|
||||
|
||||
created := make(map[string]bool)
|
||||
// candidates is a list of absolute paths to symlinks in a chain, or the final target of the chain.
|
||||
for _, candidate := range candidates {
|
||||
targets, err := m.Locate(candidate)
|
||||
if err != nil {
|
||||
m.logger.Debugf("Skipping invalid link: %v", err)
|
||||
continue
|
||||
} else if len(targets) != 1 {
|
||||
m.logger.Debugf("Unexepected number of targets: %v", targets)
|
||||
continue
|
||||
} else if targets[0] == candidate {
|
||||
m.logger.Debugf("%v is not a symlink", candidate)
|
||||
continue
|
||||
}
|
||||
|
||||
err = m.createLink(created, cfg.hostRoot, containerRoot, targets[0], candidate)
|
||||
if err != nil {
|
||||
m.logger.Warnf("Failed to create link %v: %v", []string{targets[0], candidate}, err)
|
||||
}
|
||||
}
|
||||
|
||||
links := cfg.links.Value()
|
||||
for _, l := range links {
|
||||
parts := strings.Split(l, ":")
|
||||
if len(parts) != 2 {
|
||||
m.logger.Warnf("Invalid link specification %v", l)
|
||||
continue
|
||||
}
|
||||
|
||||
err := m.createLink(created, cfg.hostRoot, containerRoot, parts[0], parts[1])
|
||||
if err != nil {
|
||||
m.logger.Warnf("Failed to create link %v: %v", parts, err)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
|
||||
}
|
||||
|
||||
func (m command) createLink(created map[string]bool, hostRoot string, containerRoot string, target string, link string) error {
|
||||
linkPath, err := changeRoot(hostRoot, containerRoot, link)
|
||||
if err != nil {
|
||||
m.logger.Warnf("Failed to resolve path for link %v relative to %v: %v", link, containerRoot, err)
|
||||
}
|
||||
if created[linkPath] {
|
||||
m.logger.Debugf("Link %v already created", linkPath)
|
||||
return nil
|
||||
}
|
||||
|
||||
targetPath, err := changeRoot(hostRoot, "/", target)
|
||||
if err != nil {
|
||||
m.logger.Warnf("Failed to resolve path for target %v relative to %v: %v", target, "/", err)
|
||||
}
|
||||
|
||||
m.logger.Infof("Symlinking %v to %v", linkPath, targetPath)
|
||||
err = os.MkdirAll(filepath.Dir(linkPath), 0755)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create directory: %v", err)
|
||||
}
|
||||
err = os.Symlink(target, linkPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create symlink: %v", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func changeRoot(current string, new string, path string) (string, error) {
|
||||
if !filepath.IsAbs(path) {
|
||||
return path, nil
|
||||
}
|
||||
|
||||
relative := path
|
||||
if current != "" {
|
||||
r, err := filepath.Rel(current, path)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
relative = r
|
||||
}
|
||||
|
||||
return filepath.Join(new, relative), nil
|
||||
}
|
||||
|
||||
// Locate returns the link target of the specified filename or an empty slice if the
|
||||
// specified filename is not a symlink.
|
||||
func (m command) Locate(filename string) ([]string, error) {
|
||||
info, err := os.Lstat(filename)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get file info: %v", info)
|
||||
}
|
||||
if info.Mode()&os.ModeSymlink == 0 {
|
||||
m.logger.Debugf("%v is not a symlink", filename)
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
target, err := os.Readlink(filename)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error checking symlink: %v", err)
|
||||
}
|
||||
|
||||
m.logger.Debugf("Resolved link: '%v' => '%v'", filename, target)
|
||||
|
||||
return []string{target}, nil
|
||||
}
|
||||
@@ -1,52 +0,0 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package hook
|
||||
|
||||
import (
|
||||
symlinks "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/hook/create-symlinks"
|
||||
ldcache "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/hook/update-ldcache"
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
type hookCommand struct {
|
||||
logger *logrus.Logger
|
||||
}
|
||||
|
||||
// NewCommand constructs a hook command with the specified logger
|
||||
func NewCommand(logger *logrus.Logger) *cli.Command {
|
||||
c := hookCommand{
|
||||
logger: logger,
|
||||
}
|
||||
return c.build()
|
||||
}
|
||||
|
||||
// build
|
||||
func (m hookCommand) build() *cli.Command {
|
||||
// Create the 'hook' command
|
||||
hook := cli.Command{
|
||||
Name: "hook",
|
||||
Usage: "A collection of hooks that may be injected into an OCI spec",
|
||||
}
|
||||
|
||||
hook.Subcommands = []*cli.Command{
|
||||
ldcache.NewCommand(m.logger),
|
||||
symlinks.NewCommand(m.logger),
|
||||
}
|
||||
|
||||
return &hook
|
||||
}
|
||||
@@ -1,129 +0,0 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package ldcache
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"syscall"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
type command struct {
|
||||
logger *logrus.Logger
|
||||
}
|
||||
|
||||
type config struct {
|
||||
folders cli.StringSlice
|
||||
containerSpec string
|
||||
}
|
||||
|
||||
// NewCommand constructs an update-ldcache command with the specified logger
|
||||
func NewCommand(logger *logrus.Logger) *cli.Command {
|
||||
c := command{
|
||||
logger: logger,
|
||||
}
|
||||
return c.build()
|
||||
}
|
||||
|
||||
// build the update-ldcache command
|
||||
func (m command) build() *cli.Command {
|
||||
cfg := config{}
|
||||
|
||||
// Create the 'update-ldcache' command
|
||||
c := cli.Command{
|
||||
Name: "update-ldcache",
|
||||
Usage: "Update ldcache in a container by running ldconfig",
|
||||
Action: func(c *cli.Context) error {
|
||||
return m.run(c, &cfg)
|
||||
},
|
||||
}
|
||||
|
||||
c.Flags = []cli.Flag{
|
||||
&cli.StringSliceFlag{
|
||||
Name: "folder",
|
||||
Usage: "Specifiy a folder to add to /etc/ld.so.conf before updating the ld cache",
|
||||
Destination: &cfg.folders,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "container-spec",
|
||||
Usage: "Specify the path to the OCI container spec. If empty or '-' the spec will be read from STDIN",
|
||||
Destination: &cfg.containerSpec,
|
||||
},
|
||||
}
|
||||
|
||||
return &c
|
||||
}
|
||||
|
||||
func (m command) run(c *cli.Context, cfg *config) error {
|
||||
s, err := oci.LoadContainerState(cfg.containerSpec)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to load container state: %v", err)
|
||||
}
|
||||
|
||||
containerRoot, err := s.GetContainerRoot()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to determined container root: %v", err)
|
||||
}
|
||||
|
||||
err = m.createConfig(containerRoot, cfg.folders.Value())
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to update ld.so.conf: %v", err)
|
||||
}
|
||||
|
||||
args := []string{"/sbin/ldconfig"}
|
||||
if containerRoot != "" {
|
||||
args = append(args, "-r", containerRoot)
|
||||
}
|
||||
|
||||
return syscall.Exec(args[0], args, nil)
|
||||
}
|
||||
|
||||
// createConfig creates (or updates) /etc/ld.so.conf.d/nvcr-<RANDOM_STRING>.conf in the container
|
||||
// to include the required paths.
|
||||
func (m command) createConfig(root string, folders []string) error {
|
||||
if len(folders) == 0 {
|
||||
m.logger.Debugf("No folders to add to /etc/ld.so.conf")
|
||||
return nil
|
||||
}
|
||||
|
||||
configFile, err := os.CreateTemp(filepath.Join(root, "/etc/ld.so.conf.d"), "nvcr-*.conf")
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create config file: %v", err)
|
||||
}
|
||||
defer configFile.Close()
|
||||
|
||||
m.logger.Debugf("Adding folders %v to %v", folders, configFile.Name())
|
||||
|
||||
configured := make(map[string]bool)
|
||||
for _, folder := range folders {
|
||||
if configured[folder] {
|
||||
continue
|
||||
}
|
||||
_, err = configFile.WriteString(fmt.Sprintf("%s\n", folder))
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to update ld.so.conf.d: %v", err)
|
||||
}
|
||||
configured[folder] = true
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
@@ -1,386 +0,0 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package cdi
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/ldcache"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
specs "github.com/container-orchestrated-devices/container-device-interface/specs-go"
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/urfave/cli/v2"
|
||||
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/device"
|
||||
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml"
|
||||
"sigs.k8s.io/yaml"
|
||||
)
|
||||
|
||||
const (
|
||||
nvidiaCTKExecutable = "nvidia-ctk"
|
||||
nvidiaCTKDefaultFilePath = "/usr/bin/" + nvidiaCTKExecutable
|
||||
)
|
||||
|
||||
type command struct {
|
||||
logger *logrus.Logger
|
||||
}
|
||||
|
||||
type config struct {
|
||||
output string
|
||||
jsonMode bool
|
||||
}
|
||||
|
||||
// NewCommand constructs a generate-cdi command with the specified logger
|
||||
func NewCommand(logger *logrus.Logger) *cli.Command {
|
||||
c := command{
|
||||
logger: logger,
|
||||
}
|
||||
return c.build()
|
||||
}
|
||||
|
||||
// build creates the CLI command
|
||||
func (m command) build() *cli.Command {
|
||||
cfg := config{}
|
||||
|
||||
// Create the 'generate-cdi' command
|
||||
c := cli.Command{
|
||||
Name: "generate-cdi",
|
||||
Usage: "Generate CDI specifications for use with CDI-enabled runtimes",
|
||||
Action: func(c *cli.Context) error {
|
||||
return m.run(c, &cfg)
|
||||
},
|
||||
}
|
||||
|
||||
c.Flags = []cli.Flag{
|
||||
&cli.StringFlag{
|
||||
Name: "output",
|
||||
Usage: "Specify the file to output the generated CDI specification to. If this is '-' or '' the specification is output to STDOUT",
|
||||
Destination: &cfg.output,
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: "json",
|
||||
Usage: "Output the generated CDI spec in JSON mode instead of YAML",
|
||||
Destination: &cfg.jsonMode,
|
||||
},
|
||||
}
|
||||
|
||||
return &c
|
||||
}
|
||||
|
||||
func (m command) run(c *cli.Context, cfg *config) error {
|
||||
spec, err := m.generateSpec()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to generate CDI spec: %v", err)
|
||||
}
|
||||
|
||||
var outputTo io.Writer
|
||||
if cfg.output == "" || cfg.output == "-" {
|
||||
outputTo = os.Stdout
|
||||
} else {
|
||||
outputFile, err := os.Create(cfg.output)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create output file: %v", err)
|
||||
}
|
||||
defer outputFile.Close()
|
||||
outputTo = outputFile
|
||||
}
|
||||
|
||||
if filepath.Ext(cfg.output) == ".json" {
|
||||
cfg.jsonMode = true
|
||||
} else if filepath.Ext(cfg.output) == ".yaml" || filepath.Ext(cfg.output) == ".yml" {
|
||||
cfg.jsonMode = false
|
||||
}
|
||||
|
||||
data, err := yaml.Marshal(spec)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to marshal CDI spec: %v", err)
|
||||
}
|
||||
|
||||
if cfg.jsonMode {
|
||||
data, err = yaml.YAMLToJSONStrict(data)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to convert CDI spec from YAML to JSON: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
_, err = outputTo.Write(data)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to write output: %v", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m command) generateSpec() (*specs.Spec, error) {
|
||||
nvmllib := nvml.New()
|
||||
if r := nvmllib.Init(); r != nvml.SUCCESS {
|
||||
return nil, r
|
||||
}
|
||||
defer nvmllib.Shutdown()
|
||||
|
||||
devicelib := device.New(device.WithNvml(nvmllib))
|
||||
|
||||
spec := specs.Spec{
|
||||
Version: "0.4.0",
|
||||
Kind: "nvidia.com/gpu",
|
||||
ContainerEdits: specs.ContainerEdits{},
|
||||
}
|
||||
err := devicelib.VisitDevices(func(i int, d device.Device) error {
|
||||
isMig, err := d.IsMigEnabled()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to check whether device is MIG device: %v", err)
|
||||
}
|
||||
if isMig {
|
||||
return nil
|
||||
}
|
||||
device, err := generateEditsForDevice(newGPUDevice(i, d))
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to generate CDI spec for device %v: %v", i, err)
|
||||
}
|
||||
|
||||
spec.Devices = append(spec.Devices, device)
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to generate CDI spec for GPU devices: %v", err)
|
||||
}
|
||||
|
||||
err = devicelib.VisitMigDevices(func(i int, d device.Device, j int, m device.MigDevice) error {
|
||||
device, err := generateEditsForDevice(newMigDevice(i, j, m))
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to generate CDI spec for device %v: %v", i, err)
|
||||
}
|
||||
|
||||
spec.Devices = append(spec.Devices, device)
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("falied to generate CDI spec for MIG devices: %v", err)
|
||||
}
|
||||
|
||||
// We create an "all" device with all the discovered device nodes
|
||||
var allDeviceNodes []*specs.DeviceNode
|
||||
for _, d := range spec.Devices {
|
||||
for _, dn := range d.ContainerEdits.DeviceNodes {
|
||||
allDeviceNodes = append(allDeviceNodes, dn)
|
||||
}
|
||||
}
|
||||
all := specs.Device{
|
||||
Name: "all",
|
||||
ContainerEdits: specs.ContainerEdits{
|
||||
DeviceNodes: allDeviceNodes,
|
||||
},
|
||||
}
|
||||
|
||||
spec.Devices = append(spec.Devices, all)
|
||||
spec.ContainerEdits.DeviceNodes = m.getExistingMetaDeviceNodes()
|
||||
|
||||
libraries, err := m.findLibs(nvmllib)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to locate driver libraries: %v", err)
|
||||
}
|
||||
|
||||
binaries, err := m.findBinaries()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to locate driver binaries: %v", err)
|
||||
}
|
||||
|
||||
ipcs, err := m.findIPC()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to locate driver IPC sockets: %v", err)
|
||||
}
|
||||
|
||||
spec.ContainerEdits.Mounts = generateMountsForPaths(libraries, binaries, ipcs)
|
||||
|
||||
ldcacheUpdateHook := m.generateUpdateLdCacheHook(libraries)
|
||||
|
||||
spec.ContainerEdits.Hooks = []*specs.Hook{ldcacheUpdateHook}
|
||||
|
||||
return &spec, nil
|
||||
}
|
||||
|
||||
func generateEditsForDevice(name string, d deviceInfo) (specs.Device, error) {
|
||||
deviceNodePaths, err := d.GetDeviceNodes()
|
||||
if err != nil {
|
||||
return specs.Device{}, fmt.Errorf("failed to get paths for device: %v", err)
|
||||
}
|
||||
|
||||
deviceNodes := getDeviceNodesFromPaths(deviceNodePaths)
|
||||
|
||||
device := specs.Device{
|
||||
Name: name,
|
||||
ContainerEdits: specs.ContainerEdits{
|
||||
DeviceNodes: deviceNodes,
|
||||
},
|
||||
}
|
||||
|
||||
return device, nil
|
||||
}
|
||||
|
||||
func (m command) getExistingMetaDeviceNodes() []*specs.DeviceNode {
|
||||
metaDeviceNodePaths := []string{
|
||||
"/dev/nvidia-modeset",
|
||||
"/dev/nvidia-uvm-tools",
|
||||
"/dev/nvidia-uvm",
|
||||
"/dev/nvidiactl",
|
||||
}
|
||||
|
||||
var existingDeviceNodePaths []string
|
||||
for _, p := range metaDeviceNodePaths {
|
||||
if _, err := os.Stat(p); err != nil {
|
||||
m.logger.Infof("Ignoring missing meta device %v", p)
|
||||
continue
|
||||
}
|
||||
existingDeviceNodePaths = append(existingDeviceNodePaths, p)
|
||||
}
|
||||
|
||||
return getDeviceNodesFromPaths(existingDeviceNodePaths)
|
||||
}
|
||||
|
||||
func getDeviceNodesFromPaths(deviceNodePaths []string) []*specs.DeviceNode {
|
||||
var deviceNodes []*specs.DeviceNode
|
||||
for _, p := range deviceNodePaths {
|
||||
deviceNode := specs.DeviceNode{
|
||||
Path: p,
|
||||
}
|
||||
deviceNodes = append(deviceNodes, &deviceNode)
|
||||
}
|
||||
|
||||
return deviceNodes
|
||||
}
|
||||
|
||||
func (m command) findLibs(nvmllib nvml.Interface) ([]string, error) {
|
||||
version, r := nvmllib.SystemGetDriverVersion()
|
||||
if r != nvml.SUCCESS {
|
||||
return nil, fmt.Errorf("failed to determine driver version: %v", r)
|
||||
}
|
||||
m.logger.Infof("Using driver version %v", version)
|
||||
|
||||
cache, err := ldcache.New(m.logger, "")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to load ldcache: %v", err)
|
||||
}
|
||||
|
||||
libs32, libs64 := cache.List()
|
||||
|
||||
var libs []string
|
||||
for _, l := range libs64 {
|
||||
if strings.HasSuffix(l, version) {
|
||||
m.logger.Infof("found 64-bit driver lib: %v", l)
|
||||
libs = append(libs, l)
|
||||
}
|
||||
}
|
||||
|
||||
for _, l := range libs32 {
|
||||
if strings.HasSuffix(l, version) {
|
||||
m.logger.Infof("found 32-bit driver lib: %v", l)
|
||||
libs = append(libs, l)
|
||||
}
|
||||
}
|
||||
|
||||
return libs, nil
|
||||
}
|
||||
|
||||
func (m command) findBinaries() ([]string, error) {
|
||||
candidates := []string{
|
||||
"nvidia-smi", /* System management interface */
|
||||
"nvidia-debugdump", /* GPU coredump utility */
|
||||
"nvidia-persistenced", /* Persistence mode utility */
|
||||
"nvidia-cuda-mps-control", /* Multi process service CLI */
|
||||
"nvidia-cuda-mps-server", /* Multi process service server */
|
||||
}
|
||||
|
||||
locator := lookup.NewExecutableLocator(m.logger, "")
|
||||
|
||||
var binaries []string
|
||||
for _, c := range candidates {
|
||||
targets, err := locator.Locate(c)
|
||||
if err != nil {
|
||||
m.logger.Warningf("skipping %v: %v", c, err)
|
||||
continue
|
||||
}
|
||||
|
||||
binaries = append(binaries, targets[0])
|
||||
}
|
||||
return binaries, nil
|
||||
}
|
||||
|
||||
func (m command) findIPC() ([]string, error) {
|
||||
candidates := []string{
|
||||
"/var/run/nvidia-persistenced/socket",
|
||||
"/var/run/nvidia-fabricmanager/socket",
|
||||
// TODO: This can be controlled by the NV_MPS_PIPE_DIR envvar
|
||||
"/tmp/nvidia-mps",
|
||||
}
|
||||
|
||||
locator := lookup.NewFileLocator(m.logger, "")
|
||||
|
||||
var ipcs []string
|
||||
for _, c := range candidates {
|
||||
targets, err := locator.Locate(c)
|
||||
if err != nil {
|
||||
m.logger.Warningf("skipping %v: %v", c, err)
|
||||
continue
|
||||
}
|
||||
|
||||
ipcs = append(ipcs, targets[0])
|
||||
}
|
||||
return ipcs, nil
|
||||
}
|
||||
|
||||
func generateMountsForPaths(pathSets ...[]string) []*specs.Mount {
|
||||
var mounts []*specs.Mount
|
||||
for _, paths := range pathSets {
|
||||
for _, p := range paths {
|
||||
mount := specs.Mount{
|
||||
HostPath: p,
|
||||
// We may want to adjust the container path
|
||||
ContainerPath: p,
|
||||
Type: "bind",
|
||||
Options: []string{
|
||||
"ro",
|
||||
"nosuid",
|
||||
"nodev",
|
||||
"bind",
|
||||
},
|
||||
}
|
||||
mounts = append(mounts, &mount)
|
||||
}
|
||||
}
|
||||
return mounts
|
||||
}
|
||||
|
||||
func (m command) generateUpdateLdCacheHook(libraries []string) *specs.Hook {
|
||||
locator := lookup.NewExecutableLocator(m.logger, "")
|
||||
|
||||
hook := discover.CreateLDCacheUpdateHook(
|
||||
m.logger,
|
||||
locator,
|
||||
nvidiaCTKExecutable,
|
||||
nvidiaCTKDefaultFilePath,
|
||||
libraries,
|
||||
)
|
||||
return &specs.Hook{
|
||||
HookName: hook.Lifecycle,
|
||||
Path: hook.Path,
|
||||
Args: hook.Args,
|
||||
}
|
||||
}
|
||||
@@ -1,123 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY Type, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package cdi
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/nvcaps"
|
||||
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvlib/device"
|
||||
"gitlab.com/nvidia/cloud-native/go-nvlib/pkg/nvml"
|
||||
)
|
||||
|
||||
// nvmlDevice wraps an nvml.Device with more functions.
|
||||
type nvmlDevice struct {
|
||||
nvml.Device
|
||||
}
|
||||
|
||||
// nvmlMigDevice allows for specific functions of nvmlDevice to be overridden.
|
||||
type nvmlMigDevice nvmlDevice
|
||||
|
||||
// deviceInfo defines the information the required to construct a Device
|
||||
type deviceInfo interface {
|
||||
GetUUID() (string, error)
|
||||
GetDeviceNodes() ([]string, error)
|
||||
}
|
||||
|
||||
var _ deviceInfo = (*nvmlDevice)(nil)
|
||||
var _ deviceInfo = (*nvmlMigDevice)(nil)
|
||||
|
||||
func newGPUDevice(i int, gpu device.Device) (string, nvmlDevice) {
|
||||
return fmt.Sprintf("gpu%v", i), nvmlDevice{gpu}
|
||||
}
|
||||
|
||||
func newMigDevice(i int, j int, mig device.MigDevice) (string, nvmlMigDevice) {
|
||||
return fmt.Sprintf("mig%v:%v", i, j), nvmlMigDevice{mig}
|
||||
}
|
||||
|
||||
// GetUUID returns the UUID of the device
|
||||
func (d nvmlDevice) GetUUID() (string, error) {
|
||||
uuid, ret := d.Device.GetUUID()
|
||||
if ret != nvml.SUCCESS {
|
||||
return "", ret
|
||||
}
|
||||
return uuid, nil
|
||||
}
|
||||
|
||||
// GetUUID returns the UUID of the device
|
||||
func (d nvmlMigDevice) GetUUID() (string, error) {
|
||||
return nvmlDevice(d).GetUUID()
|
||||
}
|
||||
|
||||
// GetDeviceNodes returns the device node paths for a GPU device
|
||||
func (d nvmlDevice) GetDeviceNodes() ([]string, error) {
|
||||
minor, ret := d.GetMinorNumber()
|
||||
if ret != nvml.SUCCESS {
|
||||
return nil, fmt.Errorf("error getting GPU device minor number: %v", ret)
|
||||
}
|
||||
path := fmt.Sprintf("/dev/nvidia%d", minor)
|
||||
|
||||
return []string{path}, nil
|
||||
}
|
||||
|
||||
// GetDeviceNodes returns the device node paths for a MIG device
|
||||
func (d nvmlMigDevice) GetDeviceNodes() ([]string, error) {
|
||||
parent, ret := d.GetDeviceHandleFromMigDeviceHandle()
|
||||
if ret != nvml.SUCCESS {
|
||||
return nil, fmt.Errorf("error getting parent device: %v", ret)
|
||||
}
|
||||
minor, ret := parent.GetMinorNumber()
|
||||
if ret != nvml.SUCCESS {
|
||||
return nil, fmt.Errorf("error getting GPU device minor number: %v", ret)
|
||||
}
|
||||
parentPath := fmt.Sprintf("/dev/nvidia%d", minor)
|
||||
|
||||
migCaps, err := nvcaps.NewMigCaps()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error getting MIG capability device paths: %v", err)
|
||||
}
|
||||
|
||||
gi, ret := d.GetGpuInstanceId()
|
||||
if ret != nvml.SUCCESS {
|
||||
return nil, fmt.Errorf("error getting GPU Instance ID: %v", ret)
|
||||
}
|
||||
|
||||
ci, ret := d.GetComputeInstanceId()
|
||||
if ret != nvml.SUCCESS {
|
||||
return nil, fmt.Errorf("error getting Compute Instance ID: %v", ret)
|
||||
}
|
||||
|
||||
giCap := nvcaps.NewGPUInstanceCap(minor, gi)
|
||||
giCapDevicePath, err := migCaps.GetCapDevicePath(giCap)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get GI cap device path: %v", err)
|
||||
}
|
||||
|
||||
ciCap := nvcaps.NewComputeInstanceCap(minor, gi, ci)
|
||||
ciCapDevicePath, err := migCaps.GetCapDevicePath(ciCap)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get CI cap device path: %v", err)
|
||||
}
|
||||
|
||||
devicePaths := []string{
|
||||
parentPath,
|
||||
giCapDevicePath,
|
||||
ciCapDevicePath,
|
||||
}
|
||||
|
||||
return devicePaths, nil
|
||||
}
|
||||
@@ -1,50 +0,0 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package info
|
||||
|
||||
import (
|
||||
cdi "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/info/generate-cdi"
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
type command struct {
|
||||
logger *logrus.Logger
|
||||
}
|
||||
|
||||
// NewCommand constructs an info command with the specified logger
|
||||
func NewCommand(logger *logrus.Logger) *cli.Command {
|
||||
c := command{
|
||||
logger: logger,
|
||||
}
|
||||
return c.build()
|
||||
}
|
||||
|
||||
// build
|
||||
func (m command) build() *cli.Command {
|
||||
// Create the 'hook' command
|
||||
hook := cli.Command{
|
||||
Name: "info",
|
||||
Usage: "Provide information about the system",
|
||||
}
|
||||
|
||||
hook.Subcommands = []*cli.Command{
|
||||
cdi.NewCommand(m.logger),
|
||||
}
|
||||
|
||||
return &hook
|
||||
}
|
||||
@@ -1,86 +0,0 @@
|
||||
/**
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"os"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/hook"
|
||||
infoCLI "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/info"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/runtime"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/info"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
cli "github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
var logger = log.New()
|
||||
|
||||
// config defines the options that can be set for the CLI through config files,
|
||||
// environment variables, or command line flags
|
||||
type config struct {
|
||||
// Debug indicates whether the CLI is started in "debug" mode
|
||||
Debug bool
|
||||
}
|
||||
|
||||
func main() {
|
||||
// Create a config struct to hold the parsed environment variables or command line flags
|
||||
config := config{}
|
||||
|
||||
// Create the top-level CLI
|
||||
c := cli.NewApp()
|
||||
c.Name = "NVIDIA Container Toolkit CLI"
|
||||
c.UseShortOptionHandling = true
|
||||
c.EnableBashCompletion = true
|
||||
c.Usage = "Tools to configure the NVIDIA Container Toolkit"
|
||||
c.Version = info.GetVersionString()
|
||||
|
||||
// Setup the flags for this command
|
||||
c.Flags = []cli.Flag{
|
||||
&cli.BoolFlag{
|
||||
Name: "debug",
|
||||
Aliases: []string{"d"},
|
||||
Usage: "Enable debug-level logging",
|
||||
Destination: &config.Debug,
|
||||
EnvVars: []string{"NVIDIA_CTK_DEBUG"},
|
||||
},
|
||||
}
|
||||
|
||||
// Set log-level for all subcommands
|
||||
c.Before = func(c *cli.Context) error {
|
||||
logLevel := log.InfoLevel
|
||||
if config.Debug {
|
||||
logLevel = log.DebugLevel
|
||||
}
|
||||
logger.SetLevel(logLevel)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Define the subcommands
|
||||
c.Commands = []*cli.Command{
|
||||
hook.NewCommand(logger),
|
||||
runtime.NewCommand(logger),
|
||||
infoCLI.NewCommand(logger),
|
||||
}
|
||||
|
||||
// Run the CLI
|
||||
err := c.Run(os.Args)
|
||||
if err != nil {
|
||||
log.Errorf("%v", err)
|
||||
log.Exit(1)
|
||||
}
|
||||
}
|
||||
@@ -1,154 +0,0 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package configure
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/runtime/nvidia"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/docker"
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
const (
|
||||
defaultRuntime = "docker"
|
||||
|
||||
defaultDockerConfigFilePath = "/etc/docker/daemon.json"
|
||||
)
|
||||
|
||||
type command struct {
|
||||
logger *logrus.Logger
|
||||
}
|
||||
|
||||
// NewCommand constructs an configure command with the specified logger
|
||||
func NewCommand(logger *logrus.Logger) *cli.Command {
|
||||
c := command{
|
||||
logger: logger,
|
||||
}
|
||||
return c.build()
|
||||
}
|
||||
|
||||
// config defines the options that can be set for the CLI through config files,
|
||||
// environment variables, or command line config
|
||||
type config struct {
|
||||
dryRun bool
|
||||
runtime string
|
||||
configFilePath string
|
||||
nvidiaOptions nvidia.Options
|
||||
}
|
||||
|
||||
func (m command) build() *cli.Command {
|
||||
// Create a config struct to hold the parsed environment variables or command line flags
|
||||
config := config{}
|
||||
|
||||
// Create the 'configure' command
|
||||
configure := cli.Command{
|
||||
Name: "configure",
|
||||
Usage: "Add a runtime to the specified container engine",
|
||||
Action: func(c *cli.Context) error {
|
||||
return m.configureWrapper(c, &config)
|
||||
},
|
||||
}
|
||||
|
||||
configure.Flags = []cli.Flag{
|
||||
&cli.BoolFlag{
|
||||
Name: "dry-run",
|
||||
Usage: "update the runtime configuration as required but don't write changes to disk",
|
||||
Destination: &config.dryRun,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "runtime",
|
||||
Usage: "the target runtime engine. One of [docker]",
|
||||
Value: defaultRuntime,
|
||||
Destination: &config.runtime,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "config",
|
||||
Usage: "path to the config file for the target runtime",
|
||||
Destination: &config.configFilePath,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "nvidia-runtime-name",
|
||||
Usage: "specify the name of the NVIDIA runtime that will be added",
|
||||
Value: nvidia.RuntimeName,
|
||||
Destination: &config.nvidiaOptions.RuntimeName,
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "runtime-path",
|
||||
Usage: "specify the path to the NVIDIA runtime executable",
|
||||
Value: nvidia.RuntimeExecutable,
|
||||
Destination: &config.nvidiaOptions.RuntimePath,
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: "set-as-default",
|
||||
Usage: "set the specified runtime as the default runtime",
|
||||
Destination: &config.nvidiaOptions.SetAsDefault,
|
||||
},
|
||||
}
|
||||
|
||||
return &configure
|
||||
}
|
||||
|
||||
func (m command) configureWrapper(c *cli.Context, config *config) error {
|
||||
switch config.runtime {
|
||||
case "docker":
|
||||
return m.configureDocker(c, config)
|
||||
}
|
||||
|
||||
return fmt.Errorf("unrecognized runtime '%v'", config.runtime)
|
||||
}
|
||||
|
||||
// configureDocker updates the docker config to enable the NVIDIA Container Runtime
|
||||
func (m command) configureDocker(c *cli.Context, config *config) error {
|
||||
configFilePath := config.configFilePath
|
||||
if configFilePath == "" {
|
||||
configFilePath = defaultDockerConfigFilePath
|
||||
}
|
||||
|
||||
cfg, err := docker.LoadConfig(configFilePath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to load config: %v", err)
|
||||
}
|
||||
|
||||
defaultRuntime := config.nvidiaOptions.DefaultRuntime()
|
||||
runtimeConfig := config.nvidiaOptions.Runtime().DockerRuntimesConfig()
|
||||
err = docker.UpdateConfig(cfg, defaultRuntime, runtimeConfig)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to update config: %v", err)
|
||||
}
|
||||
|
||||
if config.dryRun {
|
||||
output, err := json.MarshalIndent(cfg, "", " ")
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to convert to JSON: %v", err)
|
||||
}
|
||||
os.Stdout.WriteString(fmt.Sprintf("%s\n", output))
|
||||
return nil
|
||||
}
|
||||
err = docker.FlushConfig(cfg, configFilePath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to flush config: %v", err)
|
||||
}
|
||||
|
||||
m.logger.Infof("Wrote updated config to %v", configFilePath)
|
||||
m.logger.Infof("It is recommended that the docker daemon be restarted.")
|
||||
|
||||
return nil
|
||||
}
|
||||
@@ -1,75 +0,0 @@
|
||||
/*
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package nvidia
|
||||
|
||||
const (
|
||||
// RuntimeName is the default name to use in configs for the NVIDIA Container Runtime
|
||||
RuntimeName = "nvidia"
|
||||
// RuntimeExecutable is the default NVIDIA Container Runtime executable file name
|
||||
RuntimeExecutable = "nvidia-container-runtime"
|
||||
)
|
||||
|
||||
// Options specifies the options for the NVIDIA Container Runtime w.r.t a container engine such as docker.
|
||||
type Options struct {
|
||||
SetAsDefault bool
|
||||
RuntimeName string
|
||||
RuntimePath string
|
||||
}
|
||||
|
||||
// Runtime defines an NVIDIA runtime with a name and a executable
|
||||
type Runtime struct {
|
||||
Name string
|
||||
Path string
|
||||
}
|
||||
|
||||
// DefaultRuntime returns the default runtime for the configured options.
|
||||
// If the configuration is invalid or the default runtimes should not be set
|
||||
// the empty string is returned.
|
||||
func (o Options) DefaultRuntime() string {
|
||||
if !o.SetAsDefault {
|
||||
return ""
|
||||
}
|
||||
|
||||
return o.RuntimeName
|
||||
}
|
||||
|
||||
// Runtime creates a runtime struct based on the options.
|
||||
func (o Options) Runtime() Runtime {
|
||||
path := o.RuntimePath
|
||||
|
||||
if o.RuntimePath == "" {
|
||||
path = RuntimeExecutable
|
||||
}
|
||||
|
||||
r := Runtime{
|
||||
Name: o.RuntimeName,
|
||||
Path: path,
|
||||
}
|
||||
|
||||
return r
|
||||
}
|
||||
|
||||
// DockerRuntimesConfig generatest the expected docker config for the specified runtime
|
||||
func (r Runtime) DockerRuntimesConfig() map[string]interface{} {
|
||||
runtimes := make(map[string]interface{})
|
||||
runtimes[r.Name] = map[string]interface{}{
|
||||
"path": r.Path,
|
||||
"args": []string{},
|
||||
}
|
||||
|
||||
return runtimes
|
||||
}
|
||||
@@ -1,49 +0,0 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package runtime
|
||||
|
||||
import (
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk/runtime/configure"
|
||||
"github.com/sirupsen/logrus"
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
type runtimeCommand struct {
|
||||
logger *logrus.Logger
|
||||
}
|
||||
|
||||
// NewCommand constructs a runtime command with the specified logger
|
||||
func NewCommand(logger *logrus.Logger) *cli.Command {
|
||||
c := runtimeCommand{
|
||||
logger: logger,
|
||||
}
|
||||
return c.build()
|
||||
}
|
||||
|
||||
func (m runtimeCommand) build() *cli.Command {
|
||||
// Create the 'runtime' command
|
||||
runtime := cli.Command{
|
||||
Name: "runtime",
|
||||
Usage: "A collection of runtime-related utilities for the NVIDIA Container Toolkit",
|
||||
}
|
||||
|
||||
runtime.Subcommands = []*cli.Command{
|
||||
configure.NewCommand(m.logger),
|
||||
}
|
||||
|
||||
return &runtime
|
||||
}
|
||||
@@ -16,17 +16,3 @@ ldconfig = "@/sbin/ldconfig"
|
||||
|
||||
[nvidia-container-runtime]
|
||||
#debug = "/var/log/nvidia-container-runtime.log"
|
||||
log-level = "info"
|
||||
|
||||
# Specify the runtimes to consider. This list is processed in order and the PATH
|
||||
# searched for matching executables unless the entry is an absolute path.
|
||||
runtimes = [
|
||||
"docker-runc",
|
||||
"runc",
|
||||
]
|
||||
|
||||
mode = "auto"
|
||||
|
||||
[nvidia-container-runtime.modes.csv]
|
||||
|
||||
mount-spec-path = "/etc/nvidia-container-runtime/host-files-for-container.d"
|
||||
18
config/config.toml.centos
Normal file
18
config/config.toml.centos
Normal file
@@ -0,0 +1,18 @@
|
||||
disable-require = false
|
||||
#swarm-resource = "DOCKER_RESOURCE_GPU"
|
||||
#accept-nvidia-visible-devices-envvar-when-unprivileged = true
|
||||
#accept-nvidia-visible-devices-as-volume-mounts = false
|
||||
|
||||
[nvidia-container-cli]
|
||||
#root = "/run/nvidia/driver"
|
||||
#path = "/usr/bin/nvidia-container-cli"
|
||||
environment = []
|
||||
#debug = "/var/log/nvidia-container-toolkit.log"
|
||||
#ldcache = "/etc/ld.so.cache"
|
||||
load-kmods = true
|
||||
#no-cgroups = false
|
||||
#user = "root:video"
|
||||
ldconfig = "@/sbin/ldconfig"
|
||||
|
||||
[nvidia-container-runtime]
|
||||
#debug = "/var/log/nvidia-container-runtime.log"
|
||||
@@ -16,17 +16,3 @@ ldconfig = "@/sbin/ldconfig"
|
||||
|
||||
[nvidia-container-runtime]
|
||||
#debug = "/var/log/nvidia-container-runtime.log"
|
||||
log-level = "info"
|
||||
|
||||
# Specify the runtimes to consider. This list is processed in order and the PATH
|
||||
# searched for matching executables unless the entry is an absolute path.
|
||||
runtimes = [
|
||||
"docker-runc",
|
||||
"runc",
|
||||
]
|
||||
|
||||
mode = "auto"
|
||||
|
||||
[nvidia-container-runtime.modes.csv]
|
||||
|
||||
mount-spec-path = "/etc/nvidia-container-runtime/host-files-for-container.d"
|
||||
|
||||
@@ -16,17 +16,3 @@ ldconfig = "@/sbin/ldconfig"
|
||||
|
||||
[nvidia-container-runtime]
|
||||
#debug = "/var/log/nvidia-container-runtime.log"
|
||||
log-level = "info"
|
||||
|
||||
# Specify the runtimes to consider. This list is processed in order and the PATH
|
||||
# searched for matching executables unless the entry is an absolute path.
|
||||
runtimes = [
|
||||
"docker-runc",
|
||||
"runc",
|
||||
]
|
||||
|
||||
mode = "auto"
|
||||
|
||||
[nvidia-container-runtime.modes.csv]
|
||||
|
||||
mount-spec-path = "/etc/nvidia-container-runtime/host-files-for-container.d"
|
||||
|
||||
@@ -16,17 +16,3 @@ ldconfig = "@/sbin/ldconfig.real"
|
||||
|
||||
[nvidia-container-runtime]
|
||||
#debug = "/var/log/nvidia-container-runtime.log"
|
||||
log-level = "info"
|
||||
|
||||
# Specify the runtimes to consider. This list is processed in order and the PATH
|
||||
# searched for matching executables unless the entry is an absolute path.
|
||||
runtimes = [
|
||||
"docker-runc",
|
||||
"runc",
|
||||
]
|
||||
|
||||
mode = "auto"
|
||||
|
||||
[nvidia-container-runtime.modes.csv]
|
||||
|
||||
mount-spec-path = "/etc/nvidia-container-runtime/host-files-for-container.d"
|
||||
|
||||
19
config/config.toml.ubuntu+jetpack
Normal file
19
config/config.toml.ubuntu+jetpack
Normal file
@@ -0,0 +1,19 @@
|
||||
disable-require = false
|
||||
supported-driver-capabilities = "compute,compat32,graphics,utility,video,display"
|
||||
#swarm-resource = "DOCKER_RESOURCE_GPU"
|
||||
#accept-nvidia-visible-devices-envvar-when-unprivileged = true
|
||||
#accept-nvidia-visible-devices-as-volume-mounts = false
|
||||
|
||||
[nvidia-container-cli]
|
||||
#root = "/run/nvidia/driver"
|
||||
#path = "/usr/bin/nvidia-container-cli"
|
||||
environment = []
|
||||
#debug = "/var/log/nvidia-container-toolkit.log"
|
||||
#ldcache = "/etc/ld.so.cache"
|
||||
load-kmods = true
|
||||
#no-cgroups = false
|
||||
#user = "root:video"
|
||||
ldconfig = "@/sbin/ldconfig.real"
|
||||
|
||||
[nvidia-container-runtime]
|
||||
#debug = "/var/log/nvidia-container-runtime.log"
|
||||
@@ -1,29 +1,14 @@
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# This is the dockerfile for building packages on yum-based RPM systems.
|
||||
|
||||
ARG BASEIMAGE
|
||||
FROM ${BASEIMAGE}
|
||||
|
||||
RUN yum install -y \
|
||||
ca-certificates \
|
||||
gcc \
|
||||
wget \
|
||||
git \
|
||||
make \
|
||||
rpm-build && \
|
||||
rpm-build \
|
||||
wget \
|
||||
&& \
|
||||
rm -rf /var/cache/yum/*
|
||||
|
||||
ARG GOLANG_VERSION=0.0.0
|
||||
@@ -43,12 +28,11 @@ ENV GOPATH /go
|
||||
ENV PATH $GOPATH/bin:/usr/local/go/bin:$PATH
|
||||
|
||||
# packaging
|
||||
ARG PKG_NAME
|
||||
ARG PKG_VERS
|
||||
ARG PKG_REV
|
||||
ENV PKG_NAME ${PKG_NAME}
|
||||
ENV PKG_VERS ${PKG_VERS}
|
||||
ENV PKG_REV ${PKG_REV}
|
||||
|
||||
ENV VERSION $PKG_VERS
|
||||
ENV RELEASE $PKG_REV
|
||||
|
||||
# output directory
|
||||
ENV DIST_DIR=/tmp/nvidia-container-toolkit-$PKG_VERS/SOURCES
|
||||
@@ -58,8 +42,6 @@ RUN mkdir -p $DIST_DIR /dist
|
||||
WORKDIR $GOPATH/src/nvidia-container-toolkit
|
||||
COPY . .
|
||||
|
||||
ARG GIT_COMMIT
|
||||
ENV GIT_COMMIT ${GIT_COMMIT}
|
||||
RUN make PREFIX=${DIST_DIR} cmds
|
||||
|
||||
ARG CONFIG_TOML_SUFFIX
|
||||
@@ -67,6 +49,8 @@ ENV CONFIG_TOML_SUFFIX ${CONFIG_TOML_SUFFIX}
|
||||
COPY config/config.toml.${CONFIG_TOML_SUFFIX} $DIST_DIR/config.toml
|
||||
|
||||
# Hook for Project Atomic's fork of Docker: https://github.com/projectatomic/docker/tree/docker-1.13.1-rhel#add-dockerhooks-exec-custom-hooks-for-prestartpoststop-containerspatch
|
||||
# This might not be useful on Amazon Linux, but it's simpler to keep the RHEL
|
||||
# and Amazon Linux packages identical.
|
||||
COPY oci-nvidia-hook $DIST_DIR/oci-nvidia-hook
|
||||
|
||||
# Hook for libpod/CRI-O: https://github.com/containers/libpod/blob/v0.8.5/pkg/hooks/docs/oci-hooks.5.md
|
||||
@@ -75,16 +59,11 @@ COPY oci-nvidia-hook.json $DIST_DIR/oci-nvidia-hook.json
|
||||
WORKDIR $DIST_DIR/..
|
||||
COPY packaging/rpm .
|
||||
|
||||
ARG LIBNVIDIA_CONTAINER_TOOLS_VERSION
|
||||
ENV LIBNVIDIA_CONTAINER_TOOLS_VERSION ${LIBNVIDIA_CONTAINER_TOOLS_VERSION}
|
||||
|
||||
CMD arch=$(uname -m) && \
|
||||
rpmbuild --clean --target=$arch -bb \
|
||||
-D "_topdir $PWD" \
|
||||
-D "release_date $(date +'%a %b %d %Y')" \
|
||||
-D "git_commit ${GIT_COMMIT}" \
|
||||
-D "version ${PKG_VERS}" \
|
||||
-D "libnvidia_container_tools_version ${LIBNVIDIA_CONTAINER_TOOLS_VERSION}" \
|
||||
-D "release ${PKG_REV}" \
|
||||
-D "version $VERSION" \
|
||||
-D "libnvidia_container_version ${VERSION}-${RELEASE}" \
|
||||
-D "release $RELEASE" \
|
||||
SPECS/nvidia-container-toolkit.spec && \
|
||||
mv RPMS/$arch/*.rpm /dist
|
||||
67
docker/Dockerfile.centos
Normal file
67
docker/Dockerfile.centos
Normal file
@@ -0,0 +1,67 @@
|
||||
ARG BASEIMAGE
|
||||
FROM ${BASEIMAGE}
|
||||
|
||||
RUN yum install -y \
|
||||
ca-certificates \
|
||||
gcc \
|
||||
git \
|
||||
make \
|
||||
rpm-build \
|
||||
wget \
|
||||
&& \
|
||||
rm -rf /var/cache/yum/*
|
||||
|
||||
ARG GOLANG_VERSION=0.0.0
|
||||
RUN set -eux; \
|
||||
\
|
||||
arch="$(uname -m)"; \
|
||||
case "${arch##*-}" in \
|
||||
x86_64 | amd64) ARCH='amd64' ;; \
|
||||
ppc64el | ppc64le) ARCH='ppc64le' ;; \
|
||||
aarch64) ARCH='arm64' ;; \
|
||||
*) echo "unsupported architecture"; exit 1 ;; \
|
||||
esac; \
|
||||
wget -nv -O - https://storage.googleapis.com/golang/go${GOLANG_VERSION}.linux-${ARCH}.tar.gz \
|
||||
| tar -C /usr/local -xz
|
||||
|
||||
ENV GOPATH /go
|
||||
ENV PATH $GOPATH/bin:/usr/local/go/bin:$PATH
|
||||
|
||||
# packaging
|
||||
ARG PKG_VERS
|
||||
ARG PKG_REV
|
||||
|
||||
ENV VERSION $PKG_VERS
|
||||
ENV RELEASE $PKG_REV
|
||||
|
||||
# output directory
|
||||
ENV DIST_DIR=/tmp/nvidia-container-toolkit-$PKG_VERS/SOURCES
|
||||
RUN mkdir -p $DIST_DIR /dist
|
||||
|
||||
# nvidia-container-toolkit
|
||||
WORKDIR $GOPATH/src/nvidia-container-toolkit
|
||||
COPY . .
|
||||
|
||||
RUN make PREFIX=${DIST_DIR} cmds
|
||||
|
||||
ARG CONFIG_TOML_SUFFIX
|
||||
ENV CONFIG_TOML_SUFFIX ${CONFIG_TOML_SUFFIX}
|
||||
COPY config/config.toml.${CONFIG_TOML_SUFFIX} $DIST_DIR/config.toml
|
||||
|
||||
# Hook for Project Atomic's fork of Docker: https://github.com/projectatomic/docker/tree/docker-1.13.1-rhel#add-dockerhooks-exec-custom-hooks-for-prestartpoststop-containerspatch
|
||||
COPY oci-nvidia-hook $DIST_DIR/oci-nvidia-hook
|
||||
|
||||
# Hook for libpod/CRI-O: https://github.com/containers/libpod/blob/v0.8.5/pkg/hooks/docs/oci-hooks.5.md
|
||||
COPY oci-nvidia-hook.json $DIST_DIR/oci-nvidia-hook.json
|
||||
|
||||
WORKDIR $DIST_DIR/..
|
||||
COPY packaging/rpm .
|
||||
|
||||
CMD arch=$(uname -m) && \
|
||||
rpmbuild --clean --target=$arch -bb \
|
||||
-D "_topdir $PWD" \
|
||||
-D "version $VERSION" \
|
||||
-D "libnvidia_container_version ${VERSION}-${RELEASE}" \
|
||||
-D "release $RELEASE" \
|
||||
SPECS/nvidia-container-toolkit.spec && \
|
||||
mv RPMS/$arch/*.rpm /dist
|
||||
@@ -32,7 +32,6 @@ ENV GOPATH /go
|
||||
ENV PATH $GOPATH/bin:/usr/local/go/bin:$PATH
|
||||
|
||||
# packaging
|
||||
ARG PKG_NAME
|
||||
ARG PKG_VERS
|
||||
ARG PKG_REV
|
||||
|
||||
@@ -49,8 +48,6 @@ RUN mkdir -p $DIST_DIR /dist
|
||||
WORKDIR $GOPATH/src/nvidia-container-toolkit
|
||||
COPY . .
|
||||
|
||||
ARG GIT_COMMIT
|
||||
ENV GIT_COMMIT ${GIT_COMMIT}
|
||||
RUN make PREFIX=${DIST_DIR} cmds
|
||||
|
||||
ARG CONFIG_TOML_SUFFIX
|
||||
@@ -65,17 +62,12 @@ RUN if [ "$(lsb_release -cs)" = "jessie" ]; then \
|
||||
WORKDIR $DIST_DIR
|
||||
COPY packaging/debian ./debian
|
||||
|
||||
ARG LIBNVIDIA_CONTAINER_TOOLS_VERSION
|
||||
ENV LIBNVIDIA_CONTAINER_TOOLS_VERSION ${LIBNVIDIA_CONTAINER_TOOLS_VERSION}
|
||||
|
||||
RUN dch --create --package="${PKG_NAME}" \
|
||||
--newversion "${REVISION}" \
|
||||
"See https://gitlab.com/nvidia/container-toolkit/container-toolkit/-/blob/${GIT_COMMIT}/CHANGELOG.md for the changelog" && \
|
||||
dch --append "Bump libnvidia-container dependency to ${LIBNVIDIA_CONTAINER1_VERSION}" && \
|
||||
dch -r "" && \
|
||||
RUN sed -i "s;@VERSION@;${REVISION};" debian/changelog && \
|
||||
dch --changelog debian/changelog --append "Bump libnvidia-container dependency to ${REVISION}}" && \
|
||||
dch --changelog debian/changelog -r "" && \
|
||||
if [ "$REVISION" != "$(dpkg-parsechangelog --show-field=Version)" ]; then exit 1; fi
|
||||
|
||||
CMD export DISTRIB="$(lsb_release -cs)" && \
|
||||
debuild -eDISTRIB -eSECTION -eLIBNVIDIA_CONTAINER_TOOLS_VERSION -eVERSION="${REVISION}" \
|
||||
debuild -eDISTRIB -eSECTION -eLIBNVIDIA_CONTAINER_VERSION="${REVISION}" \
|
||||
--dpkg-buildpackage-hook='sh debian/prepare' -i -us -uc -b && \
|
||||
mv /tmp/*.deb /dist
|
||||
mv /tmp/nvidia-container-toolkit_*.deb /dist
|
||||
|
||||
@@ -14,8 +14,7 @@
|
||||
ARG GOLANG_VERSION=x.x.x
|
||||
FROM golang:${GOLANG_VERSION}
|
||||
|
||||
RUN go install golang.org/x/lint/golint@latest
|
||||
RUN go install github.com/matryer/moq@latest
|
||||
RUN go install github.com/gordonklaus/ineffassign@latest
|
||||
RUN go install github.com/client9/misspell/cmd/misspell@latest
|
||||
RUN go install github.com/google/go-licenses@latest
|
||||
RUN go get -u golang.org/x/lint/golint
|
||||
RUN go get -u github.com/matryer/moq
|
||||
RUN go get -u github.com/gordonklaus/ineffassign
|
||||
RUN go get -u github.com/client9/misspell/cmd/misspell
|
||||
@@ -25,12 +25,11 @@ ENV GOPATH /go
|
||||
ENV PATH $GOPATH/bin:/usr/local/go/bin:$PATH
|
||||
|
||||
# packaging
|
||||
ARG PKG_NAME
|
||||
ARG PKG_VERS
|
||||
ARG PKG_REV
|
||||
ENV PKG_NAME ${PKG_NAME}
|
||||
ENV PKG_VERS ${PKG_VERS}
|
||||
ENV PKG_REV ${PKG_REV}
|
||||
|
||||
ENV VERSION $PKG_VERS
|
||||
ENV RELEASE $PKG_REV
|
||||
|
||||
# output directory
|
||||
ENV DIST_DIR=/tmp/nvidia-container-toolkit-$PKG_VERS/SOURCES
|
||||
@@ -40,8 +39,6 @@ RUN mkdir -p $DIST_DIR /dist
|
||||
WORKDIR $GOPATH/src/nvidia-container-toolkit
|
||||
COPY . .
|
||||
|
||||
ARG GIT_COMMIT
|
||||
ENV GIT_COMMIT ${GIT_COMMIT}
|
||||
RUN make PREFIX=${DIST_DIR} cmds
|
||||
|
||||
# Hook for Project Atomic's fork of Docker: https://github.com/projectatomic/docker/tree/docker-1.13.1-rhel#add-dockerhooks-exec-custom-hooks-for-prestartpoststop-containerspatch
|
||||
@@ -57,16 +54,11 @@ COPY config/config.toml.${CONFIG_TOML_SUFFIX} $DIST_DIR/config.toml
|
||||
WORKDIR $DIST_DIR/..
|
||||
COPY packaging/rpm .
|
||||
|
||||
ARG LIBNVIDIA_CONTAINER_TOOLS_VERSION
|
||||
ENV LIBNVIDIA_CONTAINER_TOOLS_VERSION ${LIBNVIDIA_CONTAINER_TOOLS_VERSION}
|
||||
|
||||
CMD arch=$(uname -m) && \
|
||||
rpmbuild --clean --target=$arch -bb \
|
||||
-D "_topdir $PWD" \
|
||||
-D "release_date $(date +'%a %b %d %Y')" \
|
||||
-D "git_commit ${GIT_COMMIT}" \
|
||||
-D "version ${PKG_VERS}" \
|
||||
-D "libnvidia_container_tools_version ${LIBNVIDIA_CONTAINER_TOOLS_VERSION}" \
|
||||
-D "release ${PKG_REV}" \
|
||||
-D "version $VERSION" \
|
||||
-D "libnvidia_container_version ${VERSION}-${RELEASE}" \
|
||||
-D "release $RELEASE" \
|
||||
SPECS/nvidia-container-toolkit.spec && \
|
||||
mv RPMS/$arch/*.rpm /dist
|
||||
|
||||
@@ -30,7 +30,6 @@ ENV GOPATH /go
|
||||
ENV PATH $GOPATH/bin:/usr/local/go/bin:$PATH
|
||||
|
||||
# packaging
|
||||
ARG PKG_NAME
|
||||
ARG PKG_VERS
|
||||
ARG PKG_REV
|
||||
|
||||
@@ -47,8 +46,6 @@ RUN mkdir -p $DIST_DIR /dist
|
||||
WORKDIR $GOPATH/src/nvidia-container-toolkit
|
||||
COPY . .
|
||||
|
||||
ARG GIT_COMMIT
|
||||
ENV GIT_COMMIT ${GIT_COMMIT}
|
||||
RUN make PREFIX=${DIST_DIR} cmds
|
||||
|
||||
ARG CONFIG_TOML_SUFFIX
|
||||
@@ -58,17 +55,12 @@ COPY config/config.toml.${CONFIG_TOML_SUFFIX} $DIST_DIR/config.toml
|
||||
WORKDIR $DIST_DIR
|
||||
COPY packaging/debian ./debian
|
||||
|
||||
ARG LIBNVIDIA_CONTAINER_TOOLS_VERSION
|
||||
ENV LIBNVIDIA_CONTAINER_TOOLS_VERSION ${LIBNVIDIA_CONTAINER_TOOLS_VERSION}
|
||||
|
||||
RUN dch --create --package="${PKG_NAME}" \
|
||||
--newversion "${REVISION}" \
|
||||
"See https://gitlab.com/nvidia/container-toolkit/container-toolkit/-/blob/${GIT_COMMIT}/CHANGELOG.md for the changelog" && \
|
||||
dch --append "Bump libnvidia-container dependency to ${LIBNVIDIA_CONTAINER_TOOLS_VERSION}" && \
|
||||
dch -r "" && \
|
||||
RUN sed -i "s;@VERSION@;${REVISION};" debian/changelog && \
|
||||
dch --changelog debian/changelog --append "Bump libnvidia-container dependency to ${REVISION}}" && \
|
||||
dch --changelog debian/changelog -r "" && \
|
||||
if [ "$REVISION" != "$(dpkg-parsechangelog --show-field=Version)" ]; then exit 1; fi
|
||||
|
||||
CMD export DISTRIB="$(lsb_release -cs)" && \
|
||||
debuild -eDISTRIB -eSECTION -eLIBNVIDIA_CONTAINER_TOOLS_VERSION -eVERSION="${REVISION}" \
|
||||
debuild -eDISTRIB -eSECTION -eLIBNVIDIA_CONTAINER_VERSION="${REVISION}" \
|
||||
--dpkg-buildpackage-hook='sh debian/prepare' -i -us -uc -b && \
|
||||
mv /tmp/*.deb /dist
|
||||
|
||||
@@ -14,10 +14,10 @@
|
||||
|
||||
# Supported OSs by architecture
|
||||
AMD64_TARGETS := ubuntu20.04 ubuntu18.04 ubuntu16.04 debian10 debian9
|
||||
X86_64_TARGETS := fedora35 centos7 centos8 rhel7 rhel8 amazonlinux2 opensuse-leap15.1
|
||||
X86_64_TARGETS := centos7 centos8 rhel7 rhel8 amazonlinux2 opensuse-leap15.1
|
||||
PPC64LE_TARGETS := ubuntu18.04 ubuntu16.04 centos7 centos8 rhel7 rhel8
|
||||
ARM64_TARGETS := ubuntu20.04 ubuntu18.04
|
||||
AARCH64_TARGETS := fedora35 centos8 rhel8 amazonlinux2
|
||||
AARCH64_TARGETS := centos8 rhel8 amazonlinux2
|
||||
|
||||
# Define top-level build targets
|
||||
docker%: SHELL:=/bin/bash
|
||||
@@ -85,62 +85,34 @@ docker-all: $(AMD64_TARGETS) $(X86_64_TARGETS) \
|
||||
--%: docker-build-%
|
||||
@
|
||||
|
||||
LIBNVIDIA_CONTAINER_VERSION ?= $(LIB_VERSION)
|
||||
LIBNVIDIA_CONTAINER_TAG ?= $(LIB_TAG)
|
||||
|
||||
# private ubuntu target
|
||||
--ubuntu%: OS := ubuntu
|
||||
--ubuntu%: LIB_VERSION := $(LIB_VERSION)$(if $(LIB_TAG),~$(LIB_TAG))
|
||||
--ubuntu%: LIBNVIDIA_CONTAINER_TOOLS_VERSION := $(LIBNVIDIA_CONTAINER_VERSION)$(if $(LIBNVIDIA_CONTAINER_TAG),~$(LIBNVIDIA_CONTAINER_TAG))-1
|
||||
--ubuntu%: PKG_REV := 1
|
||||
|
||||
# private debian target
|
||||
--debian%: OS := debian
|
||||
--debian%: LIB_VERSION := $(LIB_VERSION)$(if $(LIB_TAG),~$(LIB_TAG))
|
||||
--debian%: LIBNVIDIA_CONTAINER_TOOLS_VERSION := $(LIBNVIDIA_CONTAINER_VERSION)$(if $(LIBNVIDIA_CONTAINER_TAG),~$(LIBNVIDIA_CONTAINER_TAG))-1
|
||||
--debian%: PKG_REV := 1
|
||||
|
||||
# private centos target
|
||||
--centos%: OS := centos
|
||||
--centos%: PKG_REV := $(if $(LIB_TAG),0.1.$(LIB_TAG),1)
|
||||
--centos%: LIBNVIDIA_CONTAINER_TOOLS_VERSION := $(LIBNVIDIA_CONTAINER_VERSION)-$(if $(LIBNVIDIA_CONTAINER_TAG),0.1.$(LIBNVIDIA_CONTAINER_TAG),1)
|
||||
--centos%: DOCKERFILE = $(CURDIR)/docker/Dockerfile.rpm-yum
|
||||
--centos%: CONFIG_TOML_SUFFIX := rpm-yum
|
||||
--centos8%: BASEIMAGE = quay.io/centos/centos:stream8
|
||||
|
||||
# private fedora target
|
||||
--fedora%: OS := fedora
|
||||
--fedora%: PKG_REV := $(if $(LIB_TAG),0.1.$(LIB_TAG),1)
|
||||
--fedora%: LIBNVIDIA_CONTAINER_TOOLS_VERSION := $(LIBNVIDIA_CONTAINER_VERSION)-$(if $(LIBNVIDIA_CONTAINER_TAG),0.1.$(LIBNVIDIA_CONTAINER_TAG),1)
|
||||
--fedora%: DOCKERFILE = $(CURDIR)/docker/Dockerfile.rpm-yum
|
||||
--fedora%: CONFIG_TOML_SUFFIX := rpm-yum
|
||||
# The fedora(35) base image has very slow performance when building aarch64 packages.
|
||||
# Since our primary concern here is glibc versions, we use the older glibc version available in centos8.
|
||||
--fedora35%: BASEIMAGE = quay.io/centos/centos:stream8
|
||||
|
||||
# private amazonlinux target
|
||||
--amazonlinux%: OS := amazonlinux
|
||||
--amazonlinux%: LIBNVIDIA_CONTAINER_TOOLS_VERSION := $(LIBNVIDIA_CONTAINER_VERSION)-$(if $(LIBNVIDIA_CONTAINER_TAG),0.1.$(LIBNVIDIA_CONTAINER_TAG),1)
|
||||
--amazonlinux%: PKG_REV := $(if $(LIB_TAG),0.1.$(LIB_TAG),1)
|
||||
--amazonlinux%: DOCKERFILE = $(CURDIR)/docker/Dockerfile.rpm-yum
|
||||
--amazonlinux%: CONFIG_TOML_SUFFIX := rpm-yum
|
||||
|
||||
# private opensuse-leap target
|
||||
--opensuse-leap%: OS = opensuse-leap
|
||||
--opensuse-leap%: BASEIMAGE = opensuse/leap:$(VERSION)
|
||||
--opensuse-leap%: LIBNVIDIA_CONTAINER_TOOLS_VERSION := $(LIBNVIDIA_CONTAINER_VERSION)-$(if $(LIBNVIDIA_CONTAINER_TAG),0.1.$(LIBNVIDIA_CONTAINER_TAG),1)
|
||||
--opensuse-leap%: PKG_REV := $(if $(LIB_TAG),0.1.$(LIB_TAG),1)
|
||||
|
||||
# private rhel target (actually built on centos)
|
||||
--rhel%: OS := centos
|
||||
--rhel%: LIBNVIDIA_CONTAINER_TOOLS_VERSION := $(LIBNVIDIA_CONTAINER_VERSION)-$(if $(LIBNVIDIA_CONTAINER_TAG),0.1.$(LIBNVIDIA_CONTAINER_TAG),1)
|
||||
--rhel%: PKG_REV := $(if $(LIB_TAG),0.1.$(LIB_TAG),1)
|
||||
--rhel%: VERSION = $(patsubst rhel%-$(ARCH),%,$(TARGET_PLATFORM))
|
||||
--rhel%: ARTIFACTS_DIR = $(DIST_DIR)/rhel$(VERSION)/$(ARCH)
|
||||
--rhel%: DOCKERFILE = $(CURDIR)/docker/Dockerfile.rpm-yum
|
||||
--rhel%: CONFIG_TOML_SUFFIX := rpm-yum
|
||||
--rhel8%: BASEIMAGE = quay.io/centos/centos:stream8
|
||||
|
||||
|
||||
# We allow the CONFIG_TOML_SUFFIX to be overridden.
|
||||
CONFIG_TOML_SUFFIX ?= $(OS)
|
||||
@@ -150,20 +122,15 @@ docker-build-%:
|
||||
docker pull --platform=linux/$(ARCH) $(BASEIMAGE)
|
||||
DOCKER_BUILDKIT=1 \
|
||||
$(DOCKER) build \
|
||||
--platform=linux/$(ARCH) \
|
||||
--progress=plain \
|
||||
--build-arg BASEIMAGE="$(BASEIMAGE)" \
|
||||
--build-arg GOLANG_VERSION="$(GOLANG_VERSION)" \
|
||||
--build-arg PKG_NAME="$(LIB_NAME)" \
|
||||
--build-arg PKG_VERS="$(LIB_VERSION)" \
|
||||
--build-arg PKG_REV="$(PKG_REV)" \
|
||||
--build-arg LIBNVIDIA_CONTAINER_TOOLS_VERSION="$(LIBNVIDIA_CONTAINER_TOOLS_VERSION)" \
|
||||
--build-arg CONFIG_TOML_SUFFIX="$(CONFIG_TOML_SUFFIX)" \
|
||||
--build-arg GIT_COMMIT="$(GIT_COMMIT)" \
|
||||
--tag $(BUILDIMAGE) \
|
||||
--file $(DOCKERFILE) .
|
||||
$(DOCKER) run \
|
||||
--platform=linux/$(ARCH) \
|
||||
-e DISTRIB \
|
||||
-e SECTION \
|
||||
-v $(ARTIFACTS_DIR):/dist \
|
||||
|
||||
@@ -14,37 +14,43 @@
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package lookup
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"encoding/json"
|
||||
"os"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// NewDirectoryLocator creates a Locator that can be used to find directories at the specified root. A logger
|
||||
// is also specified.
|
||||
func NewDirectoryLocator(logger *log.Logger, root string) Locator {
|
||||
l := file{
|
||||
logger: logger,
|
||||
prefixes: []string{root},
|
||||
filter: assertDirectory,
|
||||
}
|
||||
func main() {
|
||||
log.Infof("Starting device discovery with NVML")
|
||||
|
||||
return &l
|
||||
}
|
||||
|
||||
// assertDirectory checks wither the specified path is a directory.
|
||||
func assertDirectory(filename string) error {
|
||||
info, err := os.Stat(filename)
|
||||
d, err := discover.NewNVMLServer("")
|
||||
if err != nil {
|
||||
return fmt.Errorf("error getting info for %v: %v", filename, err)
|
||||
log.Errorf("Error creating NVML Server: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
if !info.IsDir() {
|
||||
return fmt.Errorf("specified path '%v' is not a directory", filename)
|
||||
devices, err := d.Devices()
|
||||
if err != nil {
|
||||
log.Errorf("Error discovering devices: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
return nil
|
||||
mounts, err := d.Mounts()
|
||||
if err != nil {
|
||||
log.Errorf("Error discovering mounts: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
enc := json.NewEncoder(os.Stdout)
|
||||
enc.SetIndent("", " ")
|
||||
|
||||
log.Infof("Discovered devices:")
|
||||
enc.Encode(devices)
|
||||
|
||||
log.Infof("Discovered libraries:")
|
||||
enc.Encode(mounts)
|
||||
}
|
||||
65
examples/filter/main.go
Normal file
65
examples/filter/main.go
Normal file
@@ -0,0 +1,65 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"os"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/filter"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
func main() {
|
||||
d, err := discover.NewNVMLServer("")
|
||||
if err != nil {
|
||||
log.Errorf("Error discovering devices: %v", err)
|
||||
}
|
||||
|
||||
selected := filter.NewSelectDevicesFrom(d, "all", nil)
|
||||
|
||||
devices, err := selected.Devices()
|
||||
if err != nil {
|
||||
log.Errorf("Error discovering devices: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
mounts, err := selected.Mounts()
|
||||
if err != nil {
|
||||
log.Errorf("Error discovering mounts: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
hooks, err := selected.Hooks()
|
||||
if err != nil {
|
||||
log.Errorf("Error discovering hooks: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
enc := json.NewEncoder(os.Stdout)
|
||||
enc.SetIndent("", " ")
|
||||
|
||||
log.Infof("Discovered devices:")
|
||||
enc.Encode(devices)
|
||||
|
||||
log.Infof("Discovered libraries:")
|
||||
enc.Encode(mounts)
|
||||
|
||||
log.Infof("Discovered hook:")
|
||||
enc.Encode(hooks)
|
||||
}
|
||||
26
examples/print-ldcache/main.go
Normal file
26
examples/print-ldcache/main.go
Normal file
@@ -0,0 +1,26 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/ldcache"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
var logger = log.StandardLogger()
|
||||
|
||||
func main() {
|
||||
logger.SetLevel(log.DebugLevel)
|
||||
logger.Infof("Starting device discovery with NVML")
|
||||
|
||||
cache, err := ldcache.NewLDCacheWithLogger(logger, "/run/nvidia/driver")
|
||||
if err != nil {
|
||||
logger.Errorf("Error loading ldcache: %v", err)
|
||||
return
|
||||
}
|
||||
defer cache.Close()
|
||||
|
||||
libs32, libs64 := cache.Lookup("lib")
|
||||
|
||||
logger.Infof("32-bit: %v", libs32)
|
||||
logger.Infof("64-bit: %v", libs64)
|
||||
|
||||
}
|
||||
44
go.mod
44
go.mod
@@ -1,41 +1,17 @@
|
||||
module github.com/NVIDIA/nvidia-container-toolkit
|
||||
|
||||
go 1.17
|
||||
go 1.14
|
||||
|
||||
require (
|
||||
github.com/BurntSushi/toml v1.0.0
|
||||
github.com/NVIDIA/go-nvml v0.11.6-0.0.20220823120812-7e2082095e82
|
||||
github.com/container-orchestrated-devices/container-device-interface v0.5.2
|
||||
github.com/opencontainers/runc v1.1.4
|
||||
github.com/opencontainers/runtime-spec v1.0.3-0.20211214071223-8958f93039ab
|
||||
github.com/pelletier/go-toml v1.9.4
|
||||
github.com/sirupsen/logrus v1.9.0
|
||||
github.com/BurntSushi/toml v0.3.1
|
||||
github.com/NVIDIA/go-nvml v0.11.1-0
|
||||
github.com/containers/podman/v2 v2.2.1
|
||||
github.com/opencontainers/runtime-spec v1.0.3-0.20211101234015-a3c33d663ebc
|
||||
github.com/pelletier/go-toml v1.9.3
|
||||
github.com/sirupsen/logrus v1.8.1
|
||||
github.com/stretchr/testify v1.7.0
|
||||
github.com/tsaikd/KDGoLib v0.0.0-20191001134900-7f3cf518e07d
|
||||
github.com/urfave/cli/v2 v2.3.0
|
||||
gitlab.com/nvidia/cloud-native/go-nvlib v0.0.0-20220922133427-1049a7fa76a9
|
||||
golang.org/x/mod v0.5.0
|
||||
golang.org/x/sys v0.0.0-20220927170352-d9d178bc13c6
|
||||
sigs.k8s.io/yaml v1.3.0
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/blang/semver v3.5.1+incompatible // indirect
|
||||
github.com/cpuguy83/go-md2man/v2 v2.0.1 // indirect
|
||||
github.com/davecgh/go-spew v1.1.1 // indirect
|
||||
github.com/fsnotify/fsnotify v1.5.4 // indirect
|
||||
github.com/hashicorp/errwrap v1.1.0 // indirect
|
||||
github.com/hashicorp/go-multierror v1.1.1 // indirect
|
||||
github.com/kr/text v0.2.0 // indirect
|
||||
github.com/opencontainers/runtime-tools v0.9.1-0.20220110225228-7e2d60f1e41f // indirect
|
||||
github.com/opencontainers/selinux v1.10.1 // indirect
|
||||
github.com/pkg/errors v0.9.1 // indirect
|
||||
github.com/pmezard/go-difflib v1.0.0 // indirect
|
||||
github.com/russross/blackfriday/v2 v2.1.0 // indirect
|
||||
github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635 // indirect
|
||||
github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb // indirect
|
||||
github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 // indirect
|
||||
github.com/xeipuuv/gojsonschema v1.2.0 // indirect
|
||||
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect
|
||||
gopkg.in/yaml.v2 v2.4.0 // indirect
|
||||
gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b // indirect
|
||||
golang.org/x/mod v0.3.0
|
||||
golang.org/x/sys v0.0.0-20210426230700-d19ff857e887
|
||||
)
|
||||
|
||||
@@ -1,48 +0,0 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package config
|
||||
|
||||
import (
|
||||
"github.com/pelletier/go-toml"
|
||||
)
|
||||
|
||||
// ContainerCLIConfig stores the options for the nvidia-container-cli
|
||||
type ContainerCLIConfig struct {
|
||||
Root string
|
||||
}
|
||||
|
||||
// getContainerCLIConfigFrom reads the nvidia container runtime config from the specified toml Tree.
|
||||
func getContainerCLIConfigFrom(toml *toml.Tree) *ContainerCLIConfig {
|
||||
cfg := getDefaultContainerCLIConfig()
|
||||
|
||||
if toml == nil {
|
||||
return cfg
|
||||
}
|
||||
|
||||
cfg.Root = toml.GetDefault("nvidia-container-cli.root", cfg.Root).(string)
|
||||
|
||||
return cfg
|
||||
}
|
||||
|
||||
// getDefaultContainerCLIConfig defines the default values for the config
|
||||
func getDefaultContainerCLIConfig() *ContainerCLIConfig {
|
||||
c := ContainerCLIConfig{
|
||||
Root: "",
|
||||
}
|
||||
|
||||
return &c
|
||||
}
|
||||
@@ -1,114 +0,0 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package config
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path"
|
||||
|
||||
"github.com/pelletier/go-toml"
|
||||
)
|
||||
|
||||
const (
|
||||
configOverride = "XDG_CONFIG_HOME"
|
||||
configFilePath = "nvidia-container-runtime/config.toml"
|
||||
)
|
||||
|
||||
var (
|
||||
// DefaultExecutableDir specifies the default path to use for executables if they cannot be located in the path.
|
||||
DefaultExecutableDir = "/usr/bin"
|
||||
|
||||
// NVIDIAContainerRuntimeHookExecutable is the executable name for the NVIDIA Container Runtime Hook
|
||||
NVIDIAContainerRuntimeHookExecutable = "nvidia-container-runtime-hook"
|
||||
// NVIDIAContainerToolkitExecutable is the executable name for the NVIDIA Container Toolkit (an alias for the NVIDIA Container Runtime Hook)
|
||||
NVIDIAContainerToolkitExecutable = "nvidia-container-toolkit"
|
||||
|
||||
configDir = "/etc/"
|
||||
)
|
||||
|
||||
// Config represents the contents of the config.toml file for the NVIDIA Container Toolkit
|
||||
// Note: This is currently duplicated by the HookConfig in cmd/nvidia-container-toolkit/hook_config.go
|
||||
type Config struct {
|
||||
NVIDIAContainerCLIConfig ContainerCLIConfig `toml:"nvidia-container-cli"`
|
||||
NVIDIACTKConfig CTKConfig `toml:"nvidia-ctk"`
|
||||
NVIDIAContainerRuntimeConfig RuntimeConfig `toml:"nvidia-container-runtime"`
|
||||
}
|
||||
|
||||
// GetConfig sets up the config struct. Values are read from a toml file
|
||||
// or set via the environment.
|
||||
func GetConfig() (*Config, error) {
|
||||
if XDGConfigDir := os.Getenv(configOverride); len(XDGConfigDir) != 0 {
|
||||
configDir = XDGConfigDir
|
||||
}
|
||||
|
||||
configFilePath := path.Join(configDir, configFilePath)
|
||||
|
||||
tomlFile, err := os.Open(configFilePath)
|
||||
if err != nil {
|
||||
return getDefaultConfig(), nil
|
||||
}
|
||||
defer tomlFile.Close()
|
||||
|
||||
cfg, err := loadConfigFrom(tomlFile)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read config values: %v", err)
|
||||
}
|
||||
|
||||
return cfg, nil
|
||||
}
|
||||
|
||||
// loadRuntimeConfigFrom reads the config from the specified Reader
|
||||
func loadConfigFrom(reader io.Reader) (*Config, error) {
|
||||
toml, err := toml.LoadReader(reader)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return getConfigFrom(toml)
|
||||
}
|
||||
|
||||
// getConfigFrom reads the nvidia container runtime config from the specified toml Tree.
|
||||
func getConfigFrom(toml *toml.Tree) (*Config, error) {
|
||||
cfg := getDefaultConfig()
|
||||
|
||||
if toml == nil {
|
||||
return cfg, nil
|
||||
}
|
||||
|
||||
cfg.NVIDIAContainerCLIConfig = *getContainerCLIConfigFrom(toml)
|
||||
cfg.NVIDIACTKConfig = *getCTKConfigFrom(toml)
|
||||
runtimeConfig, err := getRuntimeConfigFrom(toml)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to load nvidia-container-runtime config: %v", err)
|
||||
}
|
||||
cfg.NVIDIAContainerRuntimeConfig = *runtimeConfig
|
||||
|
||||
return cfg, nil
|
||||
}
|
||||
|
||||
// getDefaultConfig defines the default values for the config
|
||||
func getDefaultConfig() *Config {
|
||||
c := Config{
|
||||
NVIDIAContainerCLIConfig: *getDefaultContainerCLIConfig(),
|
||||
NVIDIACTKConfig: *getDefaultCTKConfig(),
|
||||
NVIDIAContainerRuntimeConfig: *GetDefaultRuntimeConfig(),
|
||||
}
|
||||
|
||||
return &c
|
||||
}
|
||||
@@ -1,165 +0,0 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package config
|
||||
|
||||
import (
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestGetConfigWithCustomConfig(t *testing.T) {
|
||||
wd, err := os.Getwd()
|
||||
require.NoError(t, err)
|
||||
|
||||
// By default debug is disabled
|
||||
contents := []byte("[nvidia-container-runtime]\ndebug = \"/nvidia-container-toolkit.log\"")
|
||||
testDir := filepath.Join(wd, "test")
|
||||
filename := filepath.Join(testDir, configFilePath)
|
||||
|
||||
os.Setenv(configOverride, testDir)
|
||||
|
||||
require.NoError(t, os.MkdirAll(filepath.Dir(filename), 0766))
|
||||
require.NoError(t, ioutil.WriteFile(filename, contents, 0766))
|
||||
|
||||
defer func() { require.NoError(t, os.RemoveAll(testDir)) }()
|
||||
|
||||
cfg, err := GetConfig()
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, cfg.NVIDIAContainerRuntimeConfig.DebugFilePath, "/nvidia-container-toolkit.log")
|
||||
}
|
||||
|
||||
func TestGetConfig(t *testing.T) {
|
||||
testCases := []struct {
|
||||
description string
|
||||
contents []string
|
||||
expectedError error
|
||||
expectedConfig *Config
|
||||
}{
|
||||
{
|
||||
description: "empty config is default",
|
||||
expectedConfig: &Config{
|
||||
NVIDIAContainerCLIConfig: ContainerCLIConfig{
|
||||
Root: "",
|
||||
},
|
||||
NVIDIAContainerRuntimeConfig: RuntimeConfig{
|
||||
DebugFilePath: "/dev/null",
|
||||
LogLevel: "info",
|
||||
Runtimes: []string{"docker-runc", "runc"},
|
||||
Mode: "auto",
|
||||
Modes: modesConfig{
|
||||
CSV: csvModeConfig{
|
||||
MountSpecPath: "/etc/nvidia-container-runtime/host-files-for-container.d",
|
||||
},
|
||||
},
|
||||
},
|
||||
NVIDIACTKConfig: CTKConfig{
|
||||
Path: "nvidia-ctk",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "config options set inline",
|
||||
contents: []string{
|
||||
"nvidia-container-cli.root = \"/bar/baz\"",
|
||||
"nvidia-container-runtime.debug = \"/foo/bar\"",
|
||||
"nvidia-container-runtime.experimental = true",
|
||||
"nvidia-container-runtime.discover-mode = \"not-legacy\"",
|
||||
"nvidia-container-runtime.log-level = \"debug\"",
|
||||
"nvidia-container-runtime.runtimes = [\"/some/runtime\",]",
|
||||
"nvidia-container-runtime.mode = \"not-auto\"",
|
||||
"nvidia-container-runtime.modes.csv.mount-spec-path = \"/not/etc/nvidia-container-runtime/host-files-for-container.d\"",
|
||||
"nvidia-ctk.path = \"/foo/bar/nvidia-ctk\"",
|
||||
},
|
||||
expectedConfig: &Config{
|
||||
NVIDIAContainerCLIConfig: ContainerCLIConfig{
|
||||
Root: "/bar/baz",
|
||||
},
|
||||
NVIDIAContainerRuntimeConfig: RuntimeConfig{
|
||||
DebugFilePath: "/foo/bar",
|
||||
LogLevel: "debug",
|
||||
Runtimes: []string{"/some/runtime"},
|
||||
Mode: "not-auto",
|
||||
Modes: modesConfig{
|
||||
CSV: csvModeConfig{
|
||||
MountSpecPath: "/not/etc/nvidia-container-runtime/host-files-for-container.d",
|
||||
},
|
||||
},
|
||||
},
|
||||
NVIDIACTKConfig: CTKConfig{
|
||||
Path: "/foo/bar/nvidia-ctk",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "config options set in section",
|
||||
contents: []string{
|
||||
"[nvidia-container-cli]",
|
||||
"root = \"/bar/baz\"",
|
||||
"[nvidia-container-runtime]",
|
||||
"debug = \"/foo/bar\"",
|
||||
"experimental = true",
|
||||
"discover-mode = \"not-legacy\"",
|
||||
"log-level = \"debug\"",
|
||||
"runtimes = [\"/some/runtime\",]",
|
||||
"mode = \"not-auto\"",
|
||||
"[nvidia-container-runtime.modes.csv]",
|
||||
"mount-spec-path = \"/not/etc/nvidia-container-runtime/host-files-for-container.d\"",
|
||||
"[nvidia-ctk]",
|
||||
"path = \"/foo/bar/nvidia-ctk\"",
|
||||
},
|
||||
expectedConfig: &Config{
|
||||
NVIDIAContainerCLIConfig: ContainerCLIConfig{
|
||||
Root: "/bar/baz",
|
||||
},
|
||||
NVIDIAContainerRuntimeConfig: RuntimeConfig{
|
||||
DebugFilePath: "/foo/bar",
|
||||
LogLevel: "debug",
|
||||
Runtimes: []string{"/some/runtime"},
|
||||
Mode: "not-auto",
|
||||
Modes: modesConfig{
|
||||
CSV: csvModeConfig{
|
||||
MountSpecPath: "/not/etc/nvidia-container-runtime/host-files-for-container.d",
|
||||
},
|
||||
},
|
||||
},
|
||||
NVIDIACTKConfig: CTKConfig{
|
||||
Path: "/foo/bar/nvidia-ctk",
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
reader := strings.NewReader(strings.Join(tc.contents, "\n"))
|
||||
|
||||
cfg, err := loadConfigFrom(reader)
|
||||
if tc.expectedError != nil {
|
||||
require.Error(t, err)
|
||||
} else {
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
require.EqualValues(t, tc.expectedConfig, cfg)
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -1,115 +0,0 @@
|
||||
/**
|
||||
# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package docker
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// LoadConfig loads the docker config from disk
|
||||
func LoadConfig(configFilePath string) (map[string]interface{}, error) {
|
||||
log.Infof("Loading docker config from %v", configFilePath)
|
||||
|
||||
info, err := os.Stat(configFilePath)
|
||||
if os.IsExist(err) && info.IsDir() {
|
||||
return nil, fmt.Errorf("config file is a directory")
|
||||
}
|
||||
|
||||
cfg := make(map[string]interface{})
|
||||
|
||||
if os.IsNotExist(err) {
|
||||
log.Infof("Config file does not exist, creating new one")
|
||||
return cfg, nil
|
||||
}
|
||||
|
||||
readBytes, err := ioutil.ReadFile(configFilePath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to read config: %v", err)
|
||||
}
|
||||
|
||||
reader := bytes.NewReader(readBytes)
|
||||
if err := json.NewDecoder(reader).Decode(&cfg); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
log.Infof("Successfully loaded config")
|
||||
return cfg, nil
|
||||
}
|
||||
|
||||
// UpdateConfig updates the docker config to include the nvidia runtimes
|
||||
func UpdateConfig(config map[string]interface{}, defaultRuntime string, newRuntimes map[string]interface{}) error {
|
||||
if defaultRuntime != "" {
|
||||
config["default-runtime"] = defaultRuntime
|
||||
}
|
||||
|
||||
// Read the existing runtimes
|
||||
runtimes := make(map[string]interface{})
|
||||
if _, exists := config["runtimes"]; exists {
|
||||
runtimes = config["runtimes"].(map[string]interface{})
|
||||
}
|
||||
|
||||
// Add / update the runtime definitions
|
||||
for name, rt := range newRuntimes {
|
||||
runtimes[name] = rt
|
||||
}
|
||||
|
||||
// Update the runtimes definition
|
||||
if len(runtimes) > 0 {
|
||||
config["runtimes"] = runtimes
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// FlushConfig flushes the updated/reverted config out to disk
|
||||
func FlushConfig(cfg map[string]interface{}, configFilePath string) error {
|
||||
log.Infof("Flushing docker config to %v", configFilePath)
|
||||
|
||||
output, err := json.MarshalIndent(cfg, "", " ")
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to convert to JSON: %v", err)
|
||||
}
|
||||
|
||||
switch len(output) {
|
||||
case 0:
|
||||
err := os.Remove(configFilePath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to remove empty file: %v", err)
|
||||
}
|
||||
log.Infof("Config empty, removing file")
|
||||
default:
|
||||
f, err := os.Create(configFilePath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to open %v for writing: %v", configFilePath, err)
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
_, err = f.WriteString(string(output))
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to write output: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
log.Infof("Successfully flushed config")
|
||||
|
||||
return nil
|
||||
}
|
||||
@@ -1,228 +0,0 @@
|
||||
/**
|
||||
# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package docker
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestUpdateConfigDefaultRuntime(t *testing.T) {
|
||||
testCases := []struct {
|
||||
config map[string]interface{}
|
||||
defaultRuntime string
|
||||
runtimeName string
|
||||
expectedDefaultRuntimeName interface{}
|
||||
}{
|
||||
{
|
||||
defaultRuntime: "",
|
||||
expectedDefaultRuntimeName: nil,
|
||||
},
|
||||
{
|
||||
defaultRuntime: "NAME",
|
||||
expectedDefaultRuntimeName: "NAME",
|
||||
},
|
||||
{
|
||||
config: map[string]interface{}{
|
||||
"default-runtime": "ALREADY_SET",
|
||||
},
|
||||
defaultRuntime: "",
|
||||
expectedDefaultRuntimeName: "ALREADY_SET",
|
||||
},
|
||||
{
|
||||
config: map[string]interface{}{
|
||||
"default-runtime": "ALREADY_SET",
|
||||
},
|
||||
defaultRuntime: "NAME",
|
||||
expectedDefaultRuntimeName: "NAME",
|
||||
},
|
||||
}
|
||||
|
||||
for i, tc := range testCases {
|
||||
t.Run(fmt.Sprintf("test case %d", i), func(t *testing.T) {
|
||||
if tc.config == nil {
|
||||
tc.config = make(map[string]interface{})
|
||||
}
|
||||
err := UpdateConfig(tc.config, tc.defaultRuntime, nil)
|
||||
require.NoError(t, err)
|
||||
|
||||
defaultRuntimeName := tc.config["default-runtime"]
|
||||
require.EqualValues(t, tc.expectedDefaultRuntimeName, defaultRuntimeName)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestUpdateConfigRuntimes(t *testing.T) {
|
||||
testCases := []struct {
|
||||
config map[string]interface{}
|
||||
runtimes map[string]interface{}
|
||||
expectedConfig map[string]interface{}
|
||||
}{
|
||||
{
|
||||
config: map[string]interface{}{},
|
||||
runtimes: map[string]interface{}{
|
||||
"runtime1": map[string]interface{}{
|
||||
"path": "/test/runtime/dir/runtime1",
|
||||
"args": []string{},
|
||||
},
|
||||
"runtime2": map[string]interface{}{
|
||||
"path": "/test/runtime/dir/runtime2",
|
||||
"args": []string{},
|
||||
},
|
||||
},
|
||||
expectedConfig: map[string]interface{}{
|
||||
"runtimes": map[string]interface{}{
|
||||
"runtime1": map[string]interface{}{
|
||||
"path": "/test/runtime/dir/runtime1",
|
||||
"args": []string{},
|
||||
},
|
||||
"runtime2": map[string]interface{}{
|
||||
"path": "/test/runtime/dir/runtime2",
|
||||
"args": []string{},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
config: map[string]interface{}{
|
||||
"runtimes": map[string]interface{}{
|
||||
"runtime1": map[string]interface{}{
|
||||
"path": "runtime1",
|
||||
"args": []string{},
|
||||
},
|
||||
},
|
||||
},
|
||||
runtimes: map[string]interface{}{
|
||||
"runtime1": map[string]interface{}{
|
||||
"path": "/test/runtime/dir/runtime1",
|
||||
"args": []string{},
|
||||
},
|
||||
"runtime2": map[string]interface{}{
|
||||
"path": "/test/runtime/dir/runtime2",
|
||||
"args": []string{},
|
||||
},
|
||||
},
|
||||
expectedConfig: map[string]interface{}{
|
||||
"runtimes": map[string]interface{}{
|
||||
"runtime1": map[string]interface{}{
|
||||
"path": "/test/runtime/dir/runtime1",
|
||||
"args": []string{},
|
||||
},
|
||||
"runtime2": map[string]interface{}{
|
||||
"path": "/test/runtime/dir/runtime2",
|
||||
"args": []string{},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
config: map[string]interface{}{
|
||||
"runtimes": map[string]interface{}{
|
||||
"not-nvidia": map[string]interface{}{
|
||||
"path": "some-other-path",
|
||||
"args": []string{},
|
||||
},
|
||||
},
|
||||
},
|
||||
runtimes: map[string]interface{}{
|
||||
"runtime1": map[string]interface{}{
|
||||
"path": "/test/runtime/dir/runtime1",
|
||||
"args": []string{},
|
||||
},
|
||||
},
|
||||
expectedConfig: map[string]interface{}{
|
||||
"runtimes": map[string]interface{}{
|
||||
"not-nvidia": map[string]interface{}{
|
||||
"path": "some-other-path",
|
||||
"args": []string{},
|
||||
},
|
||||
"runtime1": map[string]interface{}{
|
||||
"path": "/test/runtime/dir/runtime1",
|
||||
"args": []string{},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
config: map[string]interface{}{
|
||||
"exec-opts": []string{"native.cgroupdriver=systemd"},
|
||||
"log-driver": "json-file",
|
||||
"log-opts": map[string]string{
|
||||
"max-size": "100m",
|
||||
},
|
||||
"storage-driver": "overlay2",
|
||||
},
|
||||
runtimes: map[string]interface{}{
|
||||
"runtime1": map[string]interface{}{
|
||||
"path": "/test/runtime/dir/runtime1",
|
||||
"args": []string{},
|
||||
},
|
||||
},
|
||||
expectedConfig: map[string]interface{}{
|
||||
"exec-opts": []string{"native.cgroupdriver=systemd"},
|
||||
"log-driver": "json-file",
|
||||
"log-opts": map[string]string{
|
||||
"max-size": "100m",
|
||||
},
|
||||
"storage-driver": "overlay2",
|
||||
"runtimes": map[string]interface{}{
|
||||
"runtime1": map[string]interface{}{
|
||||
"path": "/test/runtime/dir/runtime1",
|
||||
"args": []string{},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
config: map[string]interface{}{
|
||||
"exec-opts": []string{"native.cgroupdriver=systemd"},
|
||||
"log-driver": "json-file",
|
||||
"log-opts": map[string]string{
|
||||
"max-size": "100m",
|
||||
},
|
||||
"storage-driver": "overlay2",
|
||||
},
|
||||
expectedConfig: map[string]interface{}{
|
||||
"exec-opts": []string{"native.cgroupdriver=systemd"},
|
||||
"log-driver": "json-file",
|
||||
"log-opts": map[string]string{
|
||||
"max-size": "100m",
|
||||
},
|
||||
"storage-driver": "overlay2",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for i, tc := range testCases {
|
||||
t.Run(fmt.Sprintf("test case %d", i), func(t *testing.T) {
|
||||
err := UpdateConfig(tc.config, "", tc.runtimes)
|
||||
require.NoError(t, err)
|
||||
|
||||
configContent, err := json.MarshalIndent(tc.config, "", " ")
|
||||
require.NoError(t, err)
|
||||
|
||||
expectedContent, err := json.MarshalIndent(tc.expectedConfig, "", " ")
|
||||
require.NoError(t, err)
|
||||
|
||||
require.EqualValues(t, string(expectedContent), string(configContent))
|
||||
})
|
||||
|
||||
}
|
||||
}
|
||||
@@ -1,174 +0,0 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package image
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/opencontainers/runtime-spec/specs-go"
|
||||
"golang.org/x/mod/semver"
|
||||
)
|
||||
|
||||
const (
|
||||
envCUDAVersion = "CUDA_VERSION"
|
||||
envNVRequirePrefix = "NVIDIA_REQUIRE_"
|
||||
envNVRequireCUDA = envNVRequirePrefix + "CUDA"
|
||||
envNVRequireJetpack = envNVRequirePrefix + "JETPACK"
|
||||
envNVDisableRequire = "NVIDIA_DISABLE_REQUIRE"
|
||||
)
|
||||
|
||||
// CUDA represents a CUDA image that can be used for GPU computing. This wraps
|
||||
// a map of environment variable to values that can be used to perform lookups
|
||||
// such as requirements.
|
||||
type CUDA map[string]string
|
||||
|
||||
// NewCUDAImageFromSpec creates a CUDA image from the input OCI runtime spec.
|
||||
// The process environment is read (if present) to construc the CUDA Image.
|
||||
func NewCUDAImageFromSpec(spec *specs.Spec) (CUDA, error) {
|
||||
if spec == nil || spec.Process == nil {
|
||||
return NewCUDAImageFromEnv(nil)
|
||||
}
|
||||
|
||||
return NewCUDAImageFromEnv(spec.Process.Env)
|
||||
}
|
||||
|
||||
// NewCUDAImageFromEnv creates a CUDA image from the input environment. The environment
|
||||
// is a list of strings of the form ENVAR=VALUE.
|
||||
func NewCUDAImageFromEnv(env []string) (CUDA, error) {
|
||||
c := make(CUDA)
|
||||
|
||||
for _, e := range env {
|
||||
parts := strings.SplitN(e, "=", 2)
|
||||
if len(parts) != 2 {
|
||||
return nil, fmt.Errorf("invalid environment variable: %v", e)
|
||||
}
|
||||
c[parts[0]] = parts[1]
|
||||
}
|
||||
|
||||
return c, nil
|
||||
}
|
||||
|
||||
// IsLegacy returns whether the associated CUDA image is a "legacy" image. An
|
||||
// image is considered legacy if it has a CUDA_VERSION environment variable defined
|
||||
// and no NVIDIA_REQUIRE_CUDA environment variable defined.
|
||||
func (i CUDA) IsLegacy() bool {
|
||||
legacyCudaVersion := i[envCUDAVersion]
|
||||
cudaRequire := i[envNVRequireCUDA]
|
||||
return len(legacyCudaVersion) > 0 && len(cudaRequire) == 0
|
||||
}
|
||||
|
||||
// GetRequirements returns the requirements from all NVIDIA_REQUIRE_ environment
|
||||
// variables.
|
||||
func (i CUDA) GetRequirements() ([]string, error) {
|
||||
// TODO: We need not process this if disable require is set, but this will be done
|
||||
// in a single follow-up to ensure that the behavioural change is accurately captured.
|
||||
// if i.HasDisableRequire() {
|
||||
// return nil, nil
|
||||
// }
|
||||
|
||||
// All variables with the "NVIDIA_REQUIRE_" prefix are passed to nvidia-container-cli
|
||||
var requirements []string
|
||||
for name, value := range i {
|
||||
if strings.HasPrefix(name, envNVRequirePrefix) && !strings.HasPrefix(name, envNVRequireJetpack) {
|
||||
requirements = append(requirements, value)
|
||||
}
|
||||
}
|
||||
if i.IsLegacy() {
|
||||
v, err := i.legacyVersion()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get version: %v", err)
|
||||
}
|
||||
cudaRequire := fmt.Sprintf("cuda>=%s", v)
|
||||
requirements = append(requirements, cudaRequire)
|
||||
}
|
||||
return requirements, nil
|
||||
}
|
||||
|
||||
// HasDisableRequire checks for the value of the NVIDIA_DISABLE_REQUIRE. If set
|
||||
// to a valid (true) boolean value this can be used to disable the requirement checks
|
||||
func (i CUDA) HasDisableRequire() bool {
|
||||
if disable, exists := i[envNVDisableRequire]; exists {
|
||||
// i.logger.Debugf("NVIDIA_DISABLE_REQUIRE=%v; skipping requirement checks", disable)
|
||||
d, _ := strconv.ParseBool(disable)
|
||||
return d
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// DevicesFromEnvvars returns the devices requested by the image through environment variables
|
||||
func (i CUDA) DevicesFromEnvvars(envVars ...string) []string {
|
||||
// Grab a reference to devices from the first envvar
|
||||
// in the list that actually exists in the environment.
|
||||
var devices *string
|
||||
for _, envVar := range envVars {
|
||||
if devs, ok := i[envVar]; ok {
|
||||
devices = &devs
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// Environment variable unset with legacy image: default to "all".
|
||||
if devices == nil && i.IsLegacy() {
|
||||
return []string{"all"}
|
||||
}
|
||||
|
||||
// Environment variable unset or empty or "void": return nil
|
||||
if devices == nil || len(*devices) == 0 || *devices == "void" {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Environment variable set to "none": reset to "".
|
||||
if *devices == "none" {
|
||||
return []string{""}
|
||||
}
|
||||
|
||||
return strings.Split(*devices, ",")
|
||||
}
|
||||
|
||||
func (i CUDA) legacyVersion() (string, error) {
|
||||
majorMinor, err := parseMajorMinorVersion(i[envCUDAVersion])
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("invalid CUDA version: %v", err)
|
||||
}
|
||||
|
||||
return majorMinor, nil
|
||||
}
|
||||
|
||||
func parseMajorMinorVersion(version string) (string, error) {
|
||||
vVersion := "v" + strings.TrimPrefix(version, "v")
|
||||
|
||||
if !semver.IsValid(vVersion) {
|
||||
return "", fmt.Errorf("invalid version string")
|
||||
}
|
||||
|
||||
majorMinor := strings.TrimPrefix(semver.MajorMinor(vVersion), "v")
|
||||
parts := strings.Split(majorMinor, ".")
|
||||
|
||||
var err error
|
||||
_, err = strconv.ParseUint(parts[0], 10, 32)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("invalid major version")
|
||||
}
|
||||
_, err = strconv.ParseUint(parts[1], 10, 32)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("invalid minor version")
|
||||
}
|
||||
return majorMinor, nil
|
||||
}
|
||||
@@ -1,123 +0,0 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package image
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestParseMajorMinorVersionValid(t *testing.T) {
|
||||
var tests = []struct {
|
||||
version string
|
||||
expected string
|
||||
}{
|
||||
{"0", "0.0"},
|
||||
{"8", "8.0"},
|
||||
{"7.5", "7.5"},
|
||||
{"9.0.116", "9.0"},
|
||||
{"4294967295.4294967295.4294967295", "4294967295.4294967295"},
|
||||
{"v11.6", "11.6"},
|
||||
}
|
||||
for _, c := range tests {
|
||||
t.Run(c.version, func(t *testing.T) {
|
||||
version, err := parseMajorMinorVersion(c.version)
|
||||
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, c.expected, version)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseMajorMinorVersionInvalid(t *testing.T) {
|
||||
var tests = []string{
|
||||
"foo",
|
||||
"foo.5.10",
|
||||
"9.0.116.50",
|
||||
"9.0.116foo",
|
||||
"7.foo",
|
||||
"9.0.bar",
|
||||
"9.4294967296",
|
||||
"9.0.116.",
|
||||
"9..0",
|
||||
"9.",
|
||||
".5.10",
|
||||
"-9",
|
||||
"+9",
|
||||
"-9.1.116",
|
||||
"-9.-1.-116",
|
||||
}
|
||||
for _, c := range tests {
|
||||
t.Run(c, func(t *testing.T) {
|
||||
_, err := parseMajorMinorVersion(c)
|
||||
require.Error(t, err)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetRequirements(t *testing.T) {
|
||||
testCases := []struct {
|
||||
description string
|
||||
env []string
|
||||
requirements []string
|
||||
}{
|
||||
{
|
||||
description: "NVIDIA_REQUIRE_JETPACK is ignored",
|
||||
env: []string{"NVIDIA_REQUIRE_JETPACK=csv-mounts=all"},
|
||||
requirements: nil,
|
||||
},
|
||||
{
|
||||
description: "NVIDIA_REQUIRE_JETPACK_HOST_MOUNTS is ignored",
|
||||
env: []string{"NVIDIA_REQUIRE_JETPACK_HOST_MOUNTS=base-only"},
|
||||
requirements: nil,
|
||||
},
|
||||
{
|
||||
description: "single requirement set",
|
||||
env: []string{"NVIDIA_REQUIRE_CUDA=cuda>=11.6"},
|
||||
requirements: []string{"cuda>=11.6"},
|
||||
},
|
||||
{
|
||||
description: "requirements are concatenated requirement set",
|
||||
env: []string{"NVIDIA_REQUIRE_CUDA=cuda>=11.6", "NVIDIA_REQUIRE_BRAND=brand=tesla"},
|
||||
requirements: []string{"cuda>=11.6", "brand=tesla"},
|
||||
},
|
||||
{
|
||||
description: "legacy image",
|
||||
env: []string{"CUDA_VERSION=11.6"},
|
||||
requirements: []string{"cuda>=11.6"},
|
||||
},
|
||||
{
|
||||
description: "legacy image with additional requirement",
|
||||
env: []string{"CUDA_VERSION=11.6", "NVIDIA_REQUIRE_BRAND=brand=tesla"},
|
||||
requirements: []string{"cuda>=11.6", "brand=tesla"},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
image, err := NewCUDAImageFromEnv(tc.env)
|
||||
require.NoError(t, err)
|
||||
|
||||
requirements, err := image.GetRequirements()
|
||||
require.NoError(t, err)
|
||||
require.ElementsMatch(t, tc.requirements, requirements)
|
||||
|
||||
})
|
||||
|
||||
}
|
||||
}
|
||||
@@ -1,101 +0,0 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package config
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/pelletier/go-toml"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
const (
|
||||
dockerRuncExecutableName = "docker-runc"
|
||||
runcExecutableName = "runc"
|
||||
|
||||
auto = "auto"
|
||||
)
|
||||
|
||||
// RuntimeConfig stores the config options for the NVIDIA Container Runtime
|
||||
type RuntimeConfig struct {
|
||||
DebugFilePath string `toml:"debug"`
|
||||
// LogLevel defines the logging level for the application
|
||||
LogLevel string `toml:"log-level"`
|
||||
// Runtimes defines the candidates for the low-level runtime
|
||||
Runtimes []string `toml:"runtimes"`
|
||||
Mode string `toml:"mode"`
|
||||
Modes modesConfig `toml:"modes"`
|
||||
}
|
||||
|
||||
// modesConfig defines (optional) per-mode configs
|
||||
type modesConfig struct {
|
||||
CSV csvModeConfig `toml:"csv"`
|
||||
CDI cdiModeConfig `toml:"cdi"`
|
||||
}
|
||||
|
||||
type cdiModeConfig struct {
|
||||
// SpecDirs allows for the default spec dirs for CDI to be overridden
|
||||
SpecDirs []string `toml:"spec-dirs"`
|
||||
}
|
||||
|
||||
type csvModeConfig struct {
|
||||
MountSpecPath string `toml:"mount-spec-path"`
|
||||
}
|
||||
|
||||
// dummy allows us to unmarshal only a RuntimeConfig from a *toml.Tree
|
||||
type dummy struct {
|
||||
Runtime RuntimeConfig `toml:"nvidia-container-runtime"`
|
||||
}
|
||||
|
||||
// getRuntimeConfigFrom reads the nvidia container runtime config from the specified toml Tree.
|
||||
func getRuntimeConfigFrom(toml *toml.Tree) (*RuntimeConfig, error) {
|
||||
cfg := GetDefaultRuntimeConfig()
|
||||
|
||||
if toml == nil {
|
||||
return cfg, nil
|
||||
}
|
||||
|
||||
d := dummy{
|
||||
Runtime: *cfg,
|
||||
}
|
||||
|
||||
if err := toml.Unmarshal(&d); err != nil {
|
||||
return nil, fmt.Errorf("failed to unmarshal runtime config: %v", err)
|
||||
}
|
||||
|
||||
return &d.Runtime, nil
|
||||
}
|
||||
|
||||
// GetDefaultRuntimeConfig defines the default values for the config
|
||||
func GetDefaultRuntimeConfig() *RuntimeConfig {
|
||||
c := RuntimeConfig{
|
||||
DebugFilePath: "/dev/null",
|
||||
LogLevel: logrus.InfoLevel.String(),
|
||||
Runtimes: []string{
|
||||
dockerRuncExecutableName,
|
||||
runcExecutableName,
|
||||
},
|
||||
Mode: auto,
|
||||
Modes: modesConfig{
|
||||
CSV: csvModeConfig{
|
||||
MountSpecPath: "/etc/nvidia-container-runtime/host-files-for-container.d",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
return &c
|
||||
}
|
||||
@@ -1,46 +0,0 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package config
|
||||
|
||||
import "github.com/pelletier/go-toml"
|
||||
|
||||
// CTKConfig stores the config options for the NVIDIA Container Toolkit CLI (nvidia-ctk)
|
||||
type CTKConfig struct {
|
||||
Path string `toml:"path"`
|
||||
}
|
||||
|
||||
// getCTKConfigFrom reads the nvidia container runtime config from the specified toml Tree.
|
||||
func getCTKConfigFrom(toml *toml.Tree) *CTKConfig {
|
||||
cfg := getDefaultCTKConfig()
|
||||
|
||||
if toml == nil {
|
||||
return cfg
|
||||
}
|
||||
|
||||
cfg.Path = toml.GetDefault("nvidia-ctk.path", cfg.Path).(string)
|
||||
|
||||
return cfg
|
||||
}
|
||||
|
||||
// getDefaultCTKConfig defines the default values for the config
|
||||
func getDefaultCTKConfig() *CTKConfig {
|
||||
c := CTKConfig{
|
||||
Path: "nvidia-ctk",
|
||||
}
|
||||
|
||||
return &c
|
||||
}
|
||||
@@ -1,137 +0,0 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package cuda
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/NVIDIA/go-nvml/pkg/dl"
|
||||
)
|
||||
|
||||
/*
|
||||
#cgo LDFLAGS: -Wl,--unresolved-symbols=ignore-in-object-files
|
||||
|
||||
#ifdef _WIN32
|
||||
#define CUDAAPI __stdcall
|
||||
#else
|
||||
#define CUDAAPI
|
||||
#endif
|
||||
|
||||
typedef int CUdevice;
|
||||
|
||||
typedef enum CUdevice_attribute_enum {
|
||||
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR = 75,
|
||||
CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR = 76
|
||||
} CUdevice_attribute;
|
||||
|
||||
typedef enum cudaError_enum {
|
||||
CUDA_SUCCESS = 0
|
||||
} CUresult;
|
||||
|
||||
CUresult CUDAAPI cuInit(unsigned int Flags);
|
||||
CUresult CUDAAPI cuDriverGetVersion(int *driverVersion);
|
||||
CUresult CUDAAPI cuDeviceGet(CUdevice *device, int ordinal);
|
||||
CUresult CUDAAPI cuDeviceGetAttribute(int *pi, CUdevice_attribute attrib, CUdevice dev);
|
||||
*/
|
||||
import "C"
|
||||
|
||||
const (
|
||||
libraryName = "libcuda.so.1"
|
||||
libraryLoadFlags = dl.RTLD_LAZY | dl.RTLD_GLOBAL
|
||||
)
|
||||
|
||||
// cuda stores a reference the cuda dynamic library
|
||||
var lib *dl.DynamicLibrary
|
||||
|
||||
// Version returns the CUDA version of the driver as a string or an error if this
|
||||
// cannot be determined.
|
||||
func Version() (string, error) {
|
||||
lib, err := load()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer lib.Close()
|
||||
|
||||
if err := lib.Lookup("cuDriverGetVersion"); err != nil {
|
||||
return "", fmt.Errorf("failed to lookup symbol: %v", err)
|
||||
}
|
||||
|
||||
var version C.int
|
||||
if result := C.cuDriverGetVersion(&version); result != C.CUDA_SUCCESS {
|
||||
return "", fmt.Errorf("failed to get CUDA version: result=%v", result)
|
||||
}
|
||||
|
||||
major := version / 1000
|
||||
minor := version % 100 / 10
|
||||
|
||||
return fmt.Sprintf("%d.%d", major, minor), nil
|
||||
}
|
||||
|
||||
// ComputeCapability returns the CUDA compute capability of a device with the specified index as a string
|
||||
// or an error if this cannot be determined.
|
||||
func ComputeCapability(index int) (string, error) {
|
||||
lib, err := load()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
defer lib.Close()
|
||||
|
||||
if err := lib.Lookup("cuInit"); err != nil {
|
||||
return "", fmt.Errorf("failed to lookup symbol: %v", err)
|
||||
}
|
||||
if err := lib.Lookup("cuDeviceGet"); err != nil {
|
||||
return "", fmt.Errorf("failed to lookup symbol: %v", err)
|
||||
}
|
||||
if err := lib.Lookup("cuDeviceGetAttribute"); err != nil {
|
||||
return "", fmt.Errorf("failed to lookup symbol: %v", err)
|
||||
}
|
||||
|
||||
if result := C.cuInit(C.uint(0)); result != C.CUDA_SUCCESS {
|
||||
return "", fmt.Errorf("failed to initialize CUDA: result=%v", result)
|
||||
}
|
||||
|
||||
var device C.CUdevice
|
||||
// NOTE: We only query the first device
|
||||
if result := C.cuDeviceGet(&device, C.int(index)); result != C.CUDA_SUCCESS {
|
||||
return "", fmt.Errorf("failed to get CUDA device %v: result=%v", 0, result)
|
||||
}
|
||||
|
||||
var major C.int
|
||||
if result := C.cuDeviceGetAttribute(&major, C.CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, device); result != C.CUDA_SUCCESS {
|
||||
return "", fmt.Errorf("failed to get CUDA compute capability major for device %v : result=%v", 0, result)
|
||||
}
|
||||
|
||||
var minor C.int
|
||||
if result := C.cuDeviceGetAttribute(&minor, C.CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, device); result != C.CUDA_SUCCESS {
|
||||
return "", fmt.Errorf("failed to get CUDA compute capability minor for device %v: result=%v", 0, result)
|
||||
}
|
||||
|
||||
return fmt.Sprintf("%d.%d", major, minor), nil
|
||||
}
|
||||
|
||||
func load() (*dl.DynamicLibrary, error) {
|
||||
lib := dl.New(libraryName, libraryLoadFlags)
|
||||
if lib == nil {
|
||||
return nil, fmt.Errorf("error instantiating DynamicLibrary for CUDA")
|
||||
}
|
||||
err := lib.Open()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error opening DynamicLibrary for CUDA: %v", err)
|
||||
}
|
||||
|
||||
return lib, nil
|
||||
}
|
||||
51
internal/discover/binaries.go
Normal file
51
internal/discover/binaries.go
Normal file
@@ -0,0 +1,51 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package discover
|
||||
|
||||
import (
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// NewBinaryMounts creates a discoverer for binaries using the specified root
|
||||
func NewBinaryMounts(root string) Discover {
|
||||
return NewBinaryMountsWithLogger(log.StandardLogger(), root)
|
||||
}
|
||||
|
||||
// NewBinaryMountsWithLogger creates a Mounts discoverer as with NewBinaryMounts
|
||||
// with the specified logger
|
||||
func NewBinaryMountsWithLogger(logger *log.Logger, root string) Discover {
|
||||
d := mounts{
|
||||
logger: logger,
|
||||
lookup: lookup.NewPathLocatorWithLogger(logger, root),
|
||||
required: requiredBinaries,
|
||||
}
|
||||
return &d
|
||||
}
|
||||
|
||||
// requiredBinaries defines a set of binaries and their labels
|
||||
var requiredBinaries = map[string][]string{
|
||||
"utility": {
|
||||
"nvidia-smi", /* System management interface */
|
||||
"nvidia-debugdump", /* GPU coredump utility */
|
||||
"nvidia-persistenced", /* Persistence mode utility */
|
||||
},
|
||||
"compute": {
|
||||
"nvidia-cuda-mps-control", /* Multi process service CLI */
|
||||
"nvidia-cuda-mps-server", /* Multi process service server */
|
||||
},
|
||||
}
|
||||
73
internal/discover/binaries_test.go
Normal file
73
internal/discover/binaries_test.go
Normal file
@@ -0,0 +1,73 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package discover
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
testlog "github.com/sirupsen/logrus/hooks/test"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestBinaries(t *testing.T) {
|
||||
binaryLookup := map[string]string{
|
||||
"nvidia-smi": "/usr/bin/nvidia-smi",
|
||||
"nvidia-persistenced": "/usr/bin/nvidia-persistenced",
|
||||
"nvidia-debugdump": "test-duplicates",
|
||||
"nvidia-cuda-mps-control": "test-duplicates",
|
||||
}
|
||||
|
||||
logger, _ := testlog.NewNullLogger()
|
||||
|
||||
d := NewBinaryMountsWithLogger(logger, "")
|
||||
|
||||
// Override lookup for testing
|
||||
mockLocator := NewLocatorMockFromMap(binaryLookup)
|
||||
d.(*mounts).lookup = mockLocator
|
||||
|
||||
mounts, err := d.Mounts()
|
||||
require.NoError(t, err)
|
||||
|
||||
expected := []Mount{
|
||||
{
|
||||
Path: "/usr/bin/nvidia-smi",
|
||||
},
|
||||
{
|
||||
Path: "/usr/bin/nvidia-persistenced",
|
||||
},
|
||||
{
|
||||
Path: "test-duplicates",
|
||||
},
|
||||
}
|
||||
|
||||
require.Equal(t, len(expected), len(mounts))
|
||||
|
||||
devices, err := d.Devices()
|
||||
require.NoError(t, err)
|
||||
require.Empty(t, devices)
|
||||
|
||||
hooks, err := d.Hooks()
|
||||
require.NoError(t, err)
|
||||
require.Empty(t, hooks)
|
||||
}
|
||||
|
||||
func TestNewBinariesConstructor(t *testing.T) {
|
||||
b := NewBinaryMounts("").(*mounts)
|
||||
|
||||
require.NotNil(t, b.logger)
|
||||
require.NotNil(t, b.lookup)
|
||||
}
|
||||
@@ -1,62 +0,0 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package discover
|
||||
|
||||
import (
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// charDevices is a discover for a list of character devices
|
||||
type charDevices mounts
|
||||
|
||||
var _ Discover = (*charDevices)(nil)
|
||||
|
||||
// NewCharDeviceDiscoverer creates a discoverer which locates the specified set of device nodes.
|
||||
func NewCharDeviceDiscoverer(logger *logrus.Logger, devices []string, root string) Discover {
|
||||
locator := lookup.NewCharDeviceLocator(logger, root)
|
||||
|
||||
return NewDeviceDiscoverer(logger, locator, root, devices)
|
||||
}
|
||||
|
||||
// NewDeviceDiscoverer creates a discoverer which locates the specified set of device nodes using the specified locator.
|
||||
func NewDeviceDiscoverer(logger *logrus.Logger, locator lookup.Locator, root string, devices []string) Discover {
|
||||
m := NewMounts(logger, locator, root, devices).(*mounts)
|
||||
|
||||
return (*charDevices)(m)
|
||||
}
|
||||
|
||||
// Mounts returns the discovered mounts for the charDevices.
|
||||
// Since this explicitly specifies a device list, the mounts are nil.
|
||||
func (d *charDevices) Mounts() ([]Mount, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// Devices returns the discovered devices for the charDevices.
|
||||
// Here the device nodes are first discovered as mounts and these are converted to devices.
|
||||
func (d *charDevices) Devices() ([]Device, error) {
|
||||
devicesAsMounts, err := (*mounts)(d).Mounts()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
var devices []Device
|
||||
for _, mount := range devicesAsMounts {
|
||||
devices = append(devices, Device(mount))
|
||||
}
|
||||
|
||||
return devices, nil
|
||||
}
|
||||
@@ -1,83 +0,0 @@
|
||||
/**
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package discover
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
testlog "github.com/sirupsen/logrus/hooks/test"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestCharDevices(t *testing.T) {
|
||||
logger, logHook := testlog.NewNullLogger()
|
||||
|
||||
testCases := []struct {
|
||||
description string
|
||||
input *charDevices
|
||||
expectedMounts []Mount
|
||||
expectedMountsError error
|
||||
expectedDevicesError error
|
||||
expectedDevices []Device
|
||||
}{
|
||||
{
|
||||
description: "dev mounts are empty",
|
||||
input: (*charDevices)(
|
||||
&mounts{
|
||||
lookup: &lookup.LocatorMock{
|
||||
LocateFunc: func(string) ([]string, error) {
|
||||
return []string{"located"}, nil
|
||||
},
|
||||
},
|
||||
required: []string{"required"},
|
||||
},
|
||||
),
|
||||
expectedDevices: []Device{{Path: "located", HostPath: "located"}},
|
||||
},
|
||||
{
|
||||
description: "dev devices returns error for nil lookup",
|
||||
input: &charDevices{},
|
||||
expectedDevicesError: fmt.Errorf("no lookup defined"),
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
logHook.Reset()
|
||||
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
tc.input.logger = logger
|
||||
|
||||
mounts, err := tc.input.Mounts()
|
||||
if tc.expectedMountsError != nil {
|
||||
require.Error(t, err)
|
||||
} else {
|
||||
require.NoError(t, err)
|
||||
}
|
||||
require.ElementsMatch(t, tc.expectedMounts, mounts)
|
||||
|
||||
devices, err := tc.input.Devices()
|
||||
if tc.expectedDevicesError != nil {
|
||||
require.Error(t, err)
|
||||
} else {
|
||||
require.NoError(t, err)
|
||||
}
|
||||
require.ElementsMatch(t, tc.expectedDevices, devices)
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -18,26 +18,16 @@ package discover
|
||||
|
||||
import "fmt"
|
||||
|
||||
// list is a discoverer that contains a list of Discoverers. The output of the
|
||||
// Mounts functions is the concatenation of the output for each of the
|
||||
// composite is a discoverer that contains a list of Discoverers. The output of the
|
||||
// Devices, Mounts, and Hooks functions is the concatenation of the output for each of the
|
||||
// elements in the list.
|
||||
type list struct {
|
||||
type composite struct {
|
||||
discoverers []Discover
|
||||
}
|
||||
|
||||
var _ Discover = (*list)(nil)
|
||||
var _ Discover = (*composite)(nil)
|
||||
|
||||
// Merge creates a discoverer that is the composite of a list of discoveres.
|
||||
func Merge(d ...Discover) Discover {
|
||||
l := list{
|
||||
discoverers: d,
|
||||
}
|
||||
|
||||
return &l
|
||||
}
|
||||
|
||||
// Devices returns all devices from the included discoverers
|
||||
func (d list) Devices() ([]Device, error) {
|
||||
func (d composite) Devices() ([]Device, error) {
|
||||
var allDevices []Device
|
||||
|
||||
for i, di := range d.discoverers {
|
||||
@@ -51,8 +41,7 @@ func (d list) Devices() ([]Device, error) {
|
||||
return allDevices, nil
|
||||
}
|
||||
|
||||
// Mounts returns all mounts from the included discoverers
|
||||
func (d list) Mounts() ([]Mount, error) {
|
||||
func (d composite) Mounts() ([]Mount, error) {
|
||||
var allMounts []Mount
|
||||
|
||||
for i, di := range d.discoverers {
|
||||
@@ -66,8 +55,7 @@ func (d list) Mounts() ([]Mount, error) {
|
||||
return allMounts, nil
|
||||
}
|
||||
|
||||
// Hooks returns all Hooks from the included discoverers
|
||||
func (d list) Hooks() ([]Hook, error) {
|
||||
func (d composite) Hooks() ([]Hook, error) {
|
||||
var allHooks []Hook
|
||||
|
||||
for i, di := range d.discoverers {
|
||||
@@ -80,3 +68,7 @@ func (d list) Hooks() ([]Hook, error) {
|
||||
|
||||
return allHooks, nil
|
||||
}
|
||||
|
||||
func (d *composite) add(di ...Discover) {
|
||||
d.discoverers = append(d.discoverers, di...)
|
||||
}
|
||||
@@ -1,107 +0,0 @@
|
||||
/**
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package discover
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover/csv"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
// NewFromCSVFiles creates a discoverer for the specified CSV files. A logger is also supplied.
|
||||
// The constructed discoverer is comprised of a list, with each element in the list being associated with a
|
||||
// single CSV files.
|
||||
func NewFromCSVFiles(logger *logrus.Logger, files []string, root string) (Discover, error) {
|
||||
if len(files) == 0 {
|
||||
logger.Warnf("No CSV files specified")
|
||||
return None{}, nil
|
||||
}
|
||||
|
||||
symlinkLocator := lookup.NewSymlinkLocator(logger, root)
|
||||
locators := map[csv.MountSpecType]lookup.Locator{
|
||||
csv.MountSpecDev: lookup.NewCharDeviceLocator(logger, root),
|
||||
csv.MountSpecDir: lookup.NewDirectoryLocator(logger, root),
|
||||
// Libraries and symlinks are handled in the same way
|
||||
csv.MountSpecLib: symlinkLocator,
|
||||
csv.MountSpecSym: symlinkLocator,
|
||||
}
|
||||
|
||||
var mountSpecs []*csv.MountSpec
|
||||
for _, filename := range files {
|
||||
targets, err := loadCSVFile(logger, filename)
|
||||
if err != nil {
|
||||
logger.Warnf("Skipping CSV file %v: %v", filename, err)
|
||||
continue
|
||||
}
|
||||
mountSpecs = append(mountSpecs, targets...)
|
||||
}
|
||||
|
||||
return newFromMountSpecs(logger, locators, root, mountSpecs)
|
||||
}
|
||||
|
||||
// loadCSVFile loads the specified CSV file and returns the list of mount specs
|
||||
func loadCSVFile(logger *logrus.Logger, filename string) ([]*csv.MountSpec, error) {
|
||||
// Create a discoverer for each file-kind combination
|
||||
targets, err := csv.NewCSVFileParser(logger, filename).Parse()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse CSV file: %v", err)
|
||||
}
|
||||
if len(targets) == 0 {
|
||||
return nil, fmt.Errorf("CSV file is empty")
|
||||
}
|
||||
|
||||
return targets, nil
|
||||
}
|
||||
|
||||
// newFromMountSpecs creates a discoverer for the CSV file. A logger is also supplied.
|
||||
// A list of csvDiscoverers is returned, with each being associated with a single MountSpecType.
|
||||
func newFromMountSpecs(logger *logrus.Logger, locators map[csv.MountSpecType]lookup.Locator, root string, targets []*csv.MountSpec) (Discover, error) {
|
||||
if len(targets) == 0 {
|
||||
return &None{}, nil
|
||||
}
|
||||
|
||||
var discoverers []Discover
|
||||
var mountSpecTypes []csv.MountSpecType
|
||||
candidatesByType := make(map[csv.MountSpecType][]string)
|
||||
for _, t := range targets {
|
||||
if _, exists := candidatesByType[t.Type]; !exists {
|
||||
mountSpecTypes = append(mountSpecTypes, t.Type)
|
||||
}
|
||||
candidatesByType[t.Type] = append(candidatesByType[t.Type], t.Path)
|
||||
}
|
||||
|
||||
for _, t := range mountSpecTypes {
|
||||
locator, exists := locators[t]
|
||||
if !exists {
|
||||
return nil, fmt.Errorf("no locator defined for '%v'", t)
|
||||
}
|
||||
|
||||
var m Discover
|
||||
switch t {
|
||||
case csv.MountSpecDev:
|
||||
m = NewDeviceDiscoverer(logger, locator, root, candidatesByType[t])
|
||||
default:
|
||||
m = NewMounts(logger, locator, root, candidatesByType[t])
|
||||
}
|
||||
discoverers = append(discoverers, m)
|
||||
|
||||
}
|
||||
|
||||
return &list{discoverers: discoverers}, nil
|
||||
}
|
||||
@@ -1,131 +0,0 @@
|
||||
/**
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package csv
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
const (
|
||||
// DefaultMountSpecPath is default location of CSV files that define the modifications required to the OCI spec
|
||||
DefaultMountSpecPath = "/etc/nvidia-container-runtime/host-files-for-container.d"
|
||||
)
|
||||
|
||||
// GetFileList returns the (non-recursive) list of CSV files in the specified
|
||||
// folder
|
||||
func GetFileList(root string) ([]string, error) {
|
||||
contents, err := os.ReadDir(root)
|
||||
if err != nil && errors.Is(err, os.ErrNotExist) {
|
||||
return nil, nil
|
||||
} else if err != nil {
|
||||
return nil, fmt.Errorf("failed to read the contents of %v: %v", root, err)
|
||||
}
|
||||
|
||||
var csvFilePaths []string
|
||||
for _, c := range contents {
|
||||
if c.IsDir() {
|
||||
continue
|
||||
}
|
||||
if c.Name() == ".csv" {
|
||||
continue
|
||||
}
|
||||
ext := strings.ToLower(filepath.Ext(c.Name()))
|
||||
if ext != ".csv" {
|
||||
continue
|
||||
}
|
||||
|
||||
csvFilePaths = append(csvFilePaths, filepath.Join(root, c.Name()))
|
||||
}
|
||||
|
||||
return csvFilePaths, nil
|
||||
}
|
||||
|
||||
// BaseFilesOnly filters out non-base CSV files from the list of CSV files.
|
||||
func BaseFilesOnly(filenames []string) []string {
|
||||
filter := map[string]bool{
|
||||
"l4t.csv": true,
|
||||
"drivers.csv": true,
|
||||
"devices.csv": true,
|
||||
}
|
||||
|
||||
var selected []string
|
||||
for _, file := range filenames {
|
||||
base := filepath.Base(file)
|
||||
if filter[base] {
|
||||
selected = append(selected, file)
|
||||
}
|
||||
}
|
||||
|
||||
return selected
|
||||
}
|
||||
|
||||
// Parser specifies an interface for parsing MountSpecs
|
||||
type Parser interface {
|
||||
Parse() ([]*MountSpec, error)
|
||||
}
|
||||
|
||||
type csv struct {
|
||||
logger *logrus.Logger
|
||||
filename string
|
||||
}
|
||||
|
||||
// NewCSVFileParser creates a new parser for reading MountSpecs from the specified CSV file
|
||||
func NewCSVFileParser(logger *logrus.Logger, filename string) Parser {
|
||||
p := csv{
|
||||
logger: logger,
|
||||
filename: filename,
|
||||
}
|
||||
|
||||
return &p
|
||||
}
|
||||
|
||||
// Parse parses the csv file and returns a list of MountSpecs in the file
|
||||
func (p csv) Parse() ([]*MountSpec, error) {
|
||||
reader, err := os.Open(p.filename)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to open %v for reading: %v", p.filename, err)
|
||||
}
|
||||
defer reader.Close()
|
||||
|
||||
return p.parseFromReader(reader), nil
|
||||
}
|
||||
|
||||
// parseFromReader parses the specified file and returns a list of required jetson mounts
|
||||
func (p csv) parseFromReader(reader io.Reader) []*MountSpec {
|
||||
var targets []*MountSpec
|
||||
|
||||
scanner := bufio.NewScanner(reader)
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
target, err := NewMountSpecFromLine(line)
|
||||
if err != nil {
|
||||
p.logger.Debugf("Skipping invalid mount spec '%v': %v", line, err)
|
||||
continue
|
||||
}
|
||||
targets = append(targets, target)
|
||||
}
|
||||
|
||||
return targets
|
||||
}
|
||||
@@ -1,83 +0,0 @@
|
||||
/**
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package csv
|
||||
|
||||
import (
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/test"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestGetFileList(t *testing.T) {
|
||||
moduleRoot, _ := test.GetModuleRoot()
|
||||
|
||||
testCases := []struct {
|
||||
description string
|
||||
root string
|
||||
files []string
|
||||
expectedError error
|
||||
}{
|
||||
{
|
||||
description: "returns list of CSV files",
|
||||
root: "test/input/csv_samples/",
|
||||
files: []string{
|
||||
"jetson.csv",
|
||||
"simple_wrong.csv",
|
||||
"simple.csv",
|
||||
"spaced.csv",
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "handles empty folder",
|
||||
root: "test/input/csv_samples/empty",
|
||||
},
|
||||
{
|
||||
description: "handles non-existent folder",
|
||||
root: "test/input/csv_samples/NONEXISTENT",
|
||||
},
|
||||
{
|
||||
description: "handles non-existent folder root",
|
||||
root: "/NONEXISTENT/test/input/csv_samples/",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
root := filepath.Join(moduleRoot, tc.root)
|
||||
files, err := GetFileList(root)
|
||||
|
||||
if tc.expectedError != nil {
|
||||
require.Error(t, err)
|
||||
require.Empty(t, files)
|
||||
return
|
||||
}
|
||||
|
||||
require.NoError(t, err)
|
||||
|
||||
var foundFiles []string
|
||||
for _, f := range files {
|
||||
require.Equal(t, root, filepath.Dir(f))
|
||||
require.Equal(t, ".csv", filepath.Ext(f))
|
||||
foundFiles = append(foundFiles, filepath.Base(f))
|
||||
}
|
||||
|
||||
require.ElementsMatch(t, tc.files, foundFiles)
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -1,74 +0,0 @@
|
||||
/**
|
||||
# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package csv
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// MountSpecType defines the mount types allowed in a CSV file
|
||||
type MountSpecType string
|
||||
|
||||
const (
|
||||
// MountSpecDev is used for character devices
|
||||
MountSpecDev = MountSpecType("dev")
|
||||
// MountSpecDir is used for directories
|
||||
MountSpecDir = MountSpecType("dir")
|
||||
// MountSpecLib is used for libraries or regular files
|
||||
MountSpecLib = MountSpecType("lib")
|
||||
// MountSpecSym is used for symlinks.
|
||||
MountSpecSym = MountSpecType("sym")
|
||||
)
|
||||
|
||||
// MountSpec represents a Jetson mount consisting of a type and a path.
|
||||
type MountSpec struct {
|
||||
Type MountSpecType
|
||||
Path string
|
||||
}
|
||||
|
||||
// NewMountSpecFromLine parses the specified line and returns the MountSpec or an error if the line is malformed
|
||||
func NewMountSpecFromLine(line string) (*MountSpec, error) {
|
||||
parts := strings.SplitN(strings.TrimSpace(line), ",", 2)
|
||||
if len(parts) < 2 {
|
||||
return nil, fmt.Errorf("failed to parse line: %v", line)
|
||||
}
|
||||
mountType := strings.TrimSpace(parts[0])
|
||||
path := strings.TrimSpace(parts[1])
|
||||
|
||||
return NewMountSpec(mountType, path)
|
||||
}
|
||||
|
||||
// NewMountSpec creates a MountSpec with the specified type and path. An error is returned if the type is invalid.
|
||||
func NewMountSpec(mountType string, path string) (*MountSpec, error) {
|
||||
mt := MountSpecType(mountType)
|
||||
switch mt {
|
||||
case MountSpecDev, MountSpecLib, MountSpecSym, MountSpecDir:
|
||||
default:
|
||||
return nil, fmt.Errorf("unexpected mount type: %v", mt)
|
||||
}
|
||||
if path == "" {
|
||||
return nil, fmt.Errorf("invalid path: %v", path)
|
||||
}
|
||||
|
||||
mount := MountSpec{
|
||||
Type: mt,
|
||||
Path: path,
|
||||
}
|
||||
|
||||
return &mount, nil
|
||||
}
|
||||
@@ -1,82 +0,0 @@
|
||||
/**
|
||||
# Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package csv
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestNewMountSpecFromLine(t *testing.T) {
|
||||
parseError := fmt.Errorf("failed to parse line")
|
||||
unexpectedError := fmt.Errorf("unexpected mount type")
|
||||
|
||||
testCases := []struct {
|
||||
line string
|
||||
expectedError error
|
||||
expectedValue MountSpec
|
||||
}{
|
||||
{
|
||||
line: "",
|
||||
expectedError: parseError,
|
||||
},
|
||||
{
|
||||
line: "\t",
|
||||
expectedError: parseError,
|
||||
},
|
||||
{
|
||||
line: ",",
|
||||
expectedError: parseError,
|
||||
},
|
||||
{
|
||||
line: "dev,",
|
||||
expectedError: parseError,
|
||||
},
|
||||
{
|
||||
line: "dev ,/a/path",
|
||||
expectedValue: MountSpec{
|
||||
Path: "/a/path",
|
||||
Type: "dev",
|
||||
},
|
||||
},
|
||||
{
|
||||
line: "dev ,/a/path,with,commas",
|
||||
expectedValue: MountSpec{
|
||||
Path: "/a/path,with,commas",
|
||||
Type: "dev",
|
||||
},
|
||||
},
|
||||
{
|
||||
line: "not-dev ,/a/path",
|
||||
expectedError: unexpectedError,
|
||||
},
|
||||
}
|
||||
|
||||
for i, tc := range testCases {
|
||||
t.Run(fmt.Sprintf("test case %d", i), func(t *testing.T) {
|
||||
target, err := NewMountSpecFromLine(tc.line)
|
||||
if tc.expectedError != nil {
|
||||
require.Error(t, err)
|
||||
return
|
||||
}
|
||||
require.NoError(t, err)
|
||||
require.EqualValues(t, &tc.expectedValue, target)
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -1,142 +0,0 @@
|
||||
/**
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
**/
|
||||
|
||||
package discover
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover/csv"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
||||
testlog "github.com/sirupsen/logrus/hooks/test"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestNewFromMountSpec(t *testing.T) {
|
||||
logger, _ := testlog.NewNullLogger()
|
||||
|
||||
locators := map[csv.MountSpecType]lookup.Locator{
|
||||
"dev": &lookup.LocatorMock{},
|
||||
"lib": &lookup.LocatorMock{},
|
||||
}
|
||||
|
||||
testCases := []struct {
|
||||
description string
|
||||
root string
|
||||
targets []*csv.MountSpec
|
||||
expectedError error
|
||||
expectedDiscoverer Discover
|
||||
}{
|
||||
{
|
||||
description: "empty targets returns None discoverer list",
|
||||
expectedDiscoverer: &None{},
|
||||
},
|
||||
{
|
||||
description: "unexpected locator returns error",
|
||||
targets: []*csv.MountSpec{
|
||||
{
|
||||
Type: "foo",
|
||||
Path: "bar",
|
||||
},
|
||||
},
|
||||
expectedError: fmt.Errorf("no locator defined for foo"),
|
||||
},
|
||||
{
|
||||
description: "creates discoverers based on type",
|
||||
targets: []*csv.MountSpec{
|
||||
{
|
||||
Type: "dev",
|
||||
Path: "dev0",
|
||||
},
|
||||
{
|
||||
Type: "lib",
|
||||
Path: "lib0",
|
||||
},
|
||||
{
|
||||
Type: "dev",
|
||||
Path: "dev1",
|
||||
},
|
||||
},
|
||||
expectedDiscoverer: &list{
|
||||
discoverers: []Discover{
|
||||
(*charDevices)(
|
||||
&mounts{
|
||||
logger: logger,
|
||||
lookup: locators["dev"],
|
||||
root: "/",
|
||||
required: []string{"dev0", "dev1"},
|
||||
},
|
||||
),
|
||||
&mounts{
|
||||
logger: logger,
|
||||
lookup: locators["lib"],
|
||||
root: "/",
|
||||
required: []string{"lib0"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "sets root",
|
||||
targets: []*csv.MountSpec{
|
||||
{
|
||||
Type: "dev",
|
||||
Path: "dev0",
|
||||
},
|
||||
{
|
||||
Type: "lib",
|
||||
Path: "lib0",
|
||||
},
|
||||
{
|
||||
Type: "dev",
|
||||
Path: "dev1",
|
||||
},
|
||||
},
|
||||
root: "/some/root",
|
||||
expectedDiscoverer: &list{
|
||||
discoverers: []Discover{
|
||||
(*charDevices)(
|
||||
&mounts{
|
||||
logger: logger,
|
||||
lookup: locators["dev"],
|
||||
root: "/some/root",
|
||||
required: []string{"dev0", "dev1"},
|
||||
},
|
||||
),
|
||||
&mounts{
|
||||
logger: logger,
|
||||
lookup: locators["lib"],
|
||||
root: "/some/root",
|
||||
required: []string{"lib0"},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
discoverer, err := newFromMountSpecs(logger, locators, tc.root, tc.targets)
|
||||
if tc.expectedError != nil {
|
||||
require.Error(t, err)
|
||||
return
|
||||
}
|
||||
require.NoError(t, err)
|
||||
require.EqualValues(t, tc.expectedDiscoverer, discoverer)
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
# Copyright (c) 2021-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
@@ -16,33 +16,48 @@
|
||||
|
||||
package discover
|
||||
|
||||
// Config represents the configuration options for discovery
|
||||
type Config struct {
|
||||
Root string
|
||||
NVIDIAContainerToolkitCLIExecutablePath string
|
||||
// DevicePath is a path in /dev associated with a device
|
||||
type DevicePath string
|
||||
|
||||
// ProcPath is a path in /proc associated with a devices
|
||||
type ProcPath string
|
||||
|
||||
// PCIBusID is the ID on the PCI bus of a device
|
||||
type PCIBusID string
|
||||
|
||||
// DeviceNode represents a device on the file system
|
||||
type DeviceNode struct {
|
||||
Path DevicePath
|
||||
Major int
|
||||
Minor int
|
||||
}
|
||||
|
||||
// Device represents a discovered character device.
|
||||
// Device represents a discovered device including identifiers (Index, UUID, PCI bus ID)
|
||||
// for selection and paths in /dev and /proc associated with the device
|
||||
type Device struct {
|
||||
HostPath string
|
||||
Path string
|
||||
Index string
|
||||
UUID string
|
||||
PCIBusID PCIBusID
|
||||
DeviceNodes []DeviceNode
|
||||
ProcPaths []ProcPath
|
||||
}
|
||||
|
||||
// Mount represents a discovered mount.
|
||||
// Mount represents a discovered mount. This includes a set of labels
|
||||
// for selection and the mount path
|
||||
type Mount struct {
|
||||
HostPath string
|
||||
Path string
|
||||
Path string
|
||||
Labels map[string]string
|
||||
}
|
||||
|
||||
// Hook represents a discovered hook.
|
||||
// Hook represents a discovered hook
|
||||
type Hook struct {
|
||||
Lifecycle string
|
||||
Path string
|
||||
Args []string
|
||||
Path string
|
||||
Args []string
|
||||
HookName string
|
||||
Labels map[string]string
|
||||
}
|
||||
|
||||
//go:generate moq -stub -out discover_mock.go . Discover
|
||||
// Discover defines an interface for discovering the devices, mounts, and hooks available on a system
|
||||
// Discover defines an interface for discovering the devices and mounts available on a system
|
||||
type Discover interface {
|
||||
Devices() ([]Device, error)
|
||||
Mounts() ([]Mount, error)
|
||||
|
||||
@@ -1,150 +0,0 @@
|
||||
// Code generated by moq; DO NOT EDIT.
|
||||
// github.com/matryer/moq
|
||||
|
||||
package discover
|
||||
|
||||
import (
|
||||
"sync"
|
||||
)
|
||||
|
||||
// Ensure, that DiscoverMock does implement Discover.
|
||||
// If this is not the case, regenerate this file with moq.
|
||||
var _ Discover = &DiscoverMock{}
|
||||
|
||||
// DiscoverMock is a mock implementation of Discover.
|
||||
//
|
||||
// func TestSomethingThatUsesDiscover(t *testing.T) {
|
||||
//
|
||||
// // make and configure a mocked Discover
|
||||
// mockedDiscover := &DiscoverMock{
|
||||
// DevicesFunc: func() ([]Device, error) {
|
||||
// panic("mock out the Devices method")
|
||||
// },
|
||||
// HooksFunc: func() ([]Hook, error) {
|
||||
// panic("mock out the Hooks method")
|
||||
// },
|
||||
// MountsFunc: func() ([]Mount, error) {
|
||||
// panic("mock out the Mounts method")
|
||||
// },
|
||||
// }
|
||||
//
|
||||
// // use mockedDiscover in code that requires Discover
|
||||
// // and then make assertions.
|
||||
//
|
||||
// }
|
||||
type DiscoverMock struct {
|
||||
// DevicesFunc mocks the Devices method.
|
||||
DevicesFunc func() ([]Device, error)
|
||||
|
||||
// HooksFunc mocks the Hooks method.
|
||||
HooksFunc func() ([]Hook, error)
|
||||
|
||||
// MountsFunc mocks the Mounts method.
|
||||
MountsFunc func() ([]Mount, error)
|
||||
|
||||
// calls tracks calls to the methods.
|
||||
calls struct {
|
||||
// Devices holds details about calls to the Devices method.
|
||||
Devices []struct {
|
||||
}
|
||||
// Hooks holds details about calls to the Hooks method.
|
||||
Hooks []struct {
|
||||
}
|
||||
// Mounts holds details about calls to the Mounts method.
|
||||
Mounts []struct {
|
||||
}
|
||||
}
|
||||
lockDevices sync.RWMutex
|
||||
lockHooks sync.RWMutex
|
||||
lockMounts sync.RWMutex
|
||||
}
|
||||
|
||||
// Devices calls DevicesFunc.
|
||||
func (mock *DiscoverMock) Devices() ([]Device, error) {
|
||||
callInfo := struct {
|
||||
}{}
|
||||
mock.lockDevices.Lock()
|
||||
mock.calls.Devices = append(mock.calls.Devices, callInfo)
|
||||
mock.lockDevices.Unlock()
|
||||
if mock.DevicesFunc == nil {
|
||||
var (
|
||||
devicesOut []Device
|
||||
errOut error
|
||||
)
|
||||
return devicesOut, errOut
|
||||
}
|
||||
return mock.DevicesFunc()
|
||||
}
|
||||
|
||||
// DevicesCalls gets all the calls that were made to Devices.
|
||||
// Check the length with:
|
||||
// len(mockedDiscover.DevicesCalls())
|
||||
func (mock *DiscoverMock) DevicesCalls() []struct {
|
||||
} {
|
||||
var calls []struct {
|
||||
}
|
||||
mock.lockDevices.RLock()
|
||||
calls = mock.calls.Devices
|
||||
mock.lockDevices.RUnlock()
|
||||
return calls
|
||||
}
|
||||
|
||||
// Hooks calls HooksFunc.
|
||||
func (mock *DiscoverMock) Hooks() ([]Hook, error) {
|
||||
callInfo := struct {
|
||||
}{}
|
||||
mock.lockHooks.Lock()
|
||||
mock.calls.Hooks = append(mock.calls.Hooks, callInfo)
|
||||
mock.lockHooks.Unlock()
|
||||
if mock.HooksFunc == nil {
|
||||
var (
|
||||
hooksOut []Hook
|
||||
errOut error
|
||||
)
|
||||
return hooksOut, errOut
|
||||
}
|
||||
return mock.HooksFunc()
|
||||
}
|
||||
|
||||
// HooksCalls gets all the calls that were made to Hooks.
|
||||
// Check the length with:
|
||||
// len(mockedDiscover.HooksCalls())
|
||||
func (mock *DiscoverMock) HooksCalls() []struct {
|
||||
} {
|
||||
var calls []struct {
|
||||
}
|
||||
mock.lockHooks.RLock()
|
||||
calls = mock.calls.Hooks
|
||||
mock.lockHooks.RUnlock()
|
||||
return calls
|
||||
}
|
||||
|
||||
// Mounts calls MountsFunc.
|
||||
func (mock *DiscoverMock) Mounts() ([]Mount, error) {
|
||||
callInfo := struct {
|
||||
}{}
|
||||
mock.lockMounts.Lock()
|
||||
mock.calls.Mounts = append(mock.calls.Mounts, callInfo)
|
||||
mock.lockMounts.Unlock()
|
||||
if mock.MountsFunc == nil {
|
||||
var (
|
||||
mountsOut []Mount
|
||||
errOut error
|
||||
)
|
||||
return mountsOut, errOut
|
||||
}
|
||||
return mock.MountsFunc()
|
||||
}
|
||||
|
||||
// MountsCalls gets all the calls that were made to Mounts.
|
||||
// Check the length with:
|
||||
// len(mockedDiscover.MountsCalls())
|
||||
func (mock *DiscoverMock) MountsCalls() []struct {
|
||||
} {
|
||||
var calls []struct {
|
||||
}
|
||||
mock.lockMounts.RLock()
|
||||
calls = mock.calls.Mounts
|
||||
mock.lockMounts.RUnlock()
|
||||
return calls
|
||||
}
|
||||
424
internal/discover/discover_nvml.go
Normal file
424
internal/discover/discover_nvml.go
Normal file
@@ -0,0 +1,424 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package discover
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/nvml"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/proc"
|
||||
log "github.com/sirupsen/logrus"
|
||||
)
|
||||
|
||||
const (
|
||||
// ControlDeviceUUID is used as the UUID for control devices such as nvidiactl or nvidia-modeset
|
||||
ControlDeviceUUID = "CONTROL"
|
||||
|
||||
// MIGConfigDeviceUUID is used to indicate the MIG config control device
|
||||
MIGConfigDeviceUUID = "CONFIG"
|
||||
|
||||
// MIGMonitorDeviceUUID is used to indicate the MIG monitor control device
|
||||
MIGMonitorDeviceUUID = "MONITOR"
|
||||
|
||||
nvidiaGPUDeviceName = "nvidia-frontend"
|
||||
nvidiaCapsDeviceName = "nvidia-caps"
|
||||
nvidiaUVMDeviceName = "nvidia-uvm"
|
||||
)
|
||||
|
||||
type nvmlDiscover struct {
|
||||
None
|
||||
logger *log.Logger
|
||||
nvml nvml.Interface
|
||||
migCaps map[ProcPath]DeviceNode
|
||||
nvidiaDevices proc.NvidiaDevices
|
||||
}
|
||||
|
||||
var _ Discover = (*nvmlDiscover)(nil)
|
||||
|
||||
// NewNVMLDiscover constructs a discoverer that uses NVML to find the devices
|
||||
// available on a system.
|
||||
func NewNVMLDiscover(nvml nvml.Interface) (Discover, error) {
|
||||
return NewNVMLDiscoverWithLogger(log.StandardLogger(), nvml)
|
||||
}
|
||||
|
||||
// NewNVMLDiscoverWithLogger constructs a discovered as with NewNVMLDiscover with the specified
|
||||
// logger
|
||||
func NewNVMLDiscoverWithLogger(logger *log.Logger, nvml nvml.Interface) (Discover, error) {
|
||||
nvidiaDevices, err := proc.GetNvidiaDevices()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error loading NVIDIA devices: %v", err)
|
||||
}
|
||||
|
||||
var migCaps map[ProcPath]DeviceNode
|
||||
nvcapsDevice, exists := nvidiaDevices.Get(nvidiaCapsDeviceName)
|
||||
if !exists {
|
||||
logger.Warnf("%v nvcaps device could not be found", nvidiaCapsDeviceName)
|
||||
} else if migCaps, err = getMigCaps(nvcapsDevice.Major); err != nil {
|
||||
logger.Warnf("Could not load MIG capability devices: %v", err)
|
||||
migCaps = nil
|
||||
}
|
||||
|
||||
discover := &nvmlDiscover{
|
||||
logger: logger,
|
||||
nvml: nvml,
|
||||
migCaps: migCaps,
|
||||
nvidiaDevices: nvidiaDevices,
|
||||
}
|
||||
|
||||
return discover, nil
|
||||
}
|
||||
|
||||
// hasMigSupport checks if MIG device discovery is supported.
|
||||
// Cases where this will be disabled include where no MIG minors file is
|
||||
// present.
|
||||
func (d nvmlDiscover) hasMigSupport() bool {
|
||||
return len(d.migCaps) > 0
|
||||
}
|
||||
|
||||
func (d *nvmlDiscover) Devices() ([]Device, error) {
|
||||
ret := d.nvml.Init()
|
||||
if ret.Value() != nvml.SUCCESS {
|
||||
return nil, fmt.Errorf("error initalizing NVML: %v", ret.Error())
|
||||
}
|
||||
defer d.tryShutdownNVML()
|
||||
|
||||
c, ret := d.nvml.DeviceGetCount()
|
||||
if ret.Value() != nvml.SUCCESS {
|
||||
return nil, fmt.Errorf("error getting device count: %v", ret.Error())
|
||||
}
|
||||
|
||||
var handles []nvml.Device
|
||||
for i := 0; i < c; i++ {
|
||||
handle, ret := d.nvml.DeviceGetHandleByIndex(i)
|
||||
if ret.Value() != nvml.SUCCESS {
|
||||
return nil, fmt.Errorf("error getting device handle for device %v: %v", i, ret.Error())
|
||||
}
|
||||
|
||||
if !d.hasMigSupport() {
|
||||
handles = append(handles, handle)
|
||||
continue
|
||||
}
|
||||
|
||||
migHandles, err := getMIGHandlesForDevice(handle)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error getting MIG handles for device: %v", err)
|
||||
}
|
||||
if len(migHandles) == 0 {
|
||||
handles = append(handles, handle)
|
||||
}
|
||||
handles = append(handles, migHandles...)
|
||||
}
|
||||
|
||||
return d.devicesByHandle(handles)
|
||||
}
|
||||
|
||||
func (d *nvmlDiscover) devicesByHandle(handles []nvml.Device) ([]Device, error) {
|
||||
var devices []Device
|
||||
var largestMinorNumber int
|
||||
for _, h := range handles {
|
||||
device, err := d.deviceFromNVMLHandle(h)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error constructing device from handle %v: %v", h, err)
|
||||
}
|
||||
devices = append(devices, device)
|
||||
|
||||
if largestMinorNumber < device.DeviceNodes[0].Minor {
|
||||
largestMinorNumber = device.DeviceNodes[0].Minor
|
||||
}
|
||||
}
|
||||
|
||||
controlDevices, err := d.getControlDevices()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error getting control devices: %v", err)
|
||||
}
|
||||
devices = append(devices, controlDevices...)
|
||||
|
||||
if d.hasMigSupport() {
|
||||
migControlDevices, err := d.getMigControlDevices(largestMinorNumber)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error getting MIG control devices: %v", err)
|
||||
}
|
||||
devices = append(devices, migControlDevices...)
|
||||
}
|
||||
|
||||
return devices, nil
|
||||
}
|
||||
|
||||
func (d *nvmlDiscover) deviceFromNVMLHandle(handle nvml.Device) (Device, error) {
|
||||
if d.hasMigSupport() {
|
||||
isMigDevice, ret := handle.IsMigDeviceHandle()
|
||||
if ret.Value() != nvml.SUCCESS {
|
||||
return Device{}, fmt.Errorf("error checking device handle: %v", ret.Error())
|
||||
}
|
||||
|
||||
if isMigDevice {
|
||||
return d.deviceFromMIGDeviceHandle(handle)
|
||||
}
|
||||
}
|
||||
|
||||
return d.deviceFromFullDeviceHandle(handle)
|
||||
}
|
||||
|
||||
func (d *nvmlDiscover) deviceFromFullDeviceHandle(handle nvml.Device) (Device, error) {
|
||||
index, ret := handle.GetIndex()
|
||||
if ret.Value() != nvml.SUCCESS {
|
||||
return Device{}, fmt.Errorf("error getting device index: %v", ret.Error())
|
||||
}
|
||||
|
||||
uuid, ret := handle.GetUUID()
|
||||
if ret.Value() != nvml.SUCCESS {
|
||||
return Device{}, fmt.Errorf("error getting device UUID: %v", ret.Error())
|
||||
}
|
||||
|
||||
pciInfo, ret := handle.GetPciInfo()
|
||||
if ret.Value() != nvml.SUCCESS {
|
||||
return Device{}, fmt.Errorf("error getting PCI info: %v", ret.Error())
|
||||
}
|
||||
busID := NewPCIBusID(pciInfo)
|
||||
|
||||
minor, ret := handle.GetMinorNumber()
|
||||
if ret.Value() != nvml.SUCCESS {
|
||||
return Device{}, fmt.Errorf("error getting minor number: %v", ret.Error())
|
||||
}
|
||||
|
||||
nvidiaGPUDevice, exists := d.nvidiaDevices.Get(nvidiaGPUDeviceName)
|
||||
if !exists {
|
||||
return Device{}, fmt.Errorf("device for '%v' does not exist", nvidiaGPUDeviceName)
|
||||
}
|
||||
|
||||
deviceNode := DeviceNode{
|
||||
Path: DevicePath(fmt.Sprintf("/dev/nvidia%d", minor)),
|
||||
Major: nvidiaGPUDevice.Major,
|
||||
Minor: minor,
|
||||
}
|
||||
|
||||
device := Device{
|
||||
Index: fmt.Sprintf("%d", index),
|
||||
PCIBusID: busID,
|
||||
UUID: uuid,
|
||||
DeviceNodes: []DeviceNode{deviceNode},
|
||||
ProcPaths: []ProcPath{busID.GetProcPath()},
|
||||
}
|
||||
|
||||
return device, nil
|
||||
}
|
||||
|
||||
func (d *nvmlDiscover) deviceFromMIGDeviceHandle(handle nvml.Device) (Device, error) {
|
||||
parent, ret := handle.GetDeviceHandleFromMigDeviceHandle()
|
||||
if ret.Value() != nvml.SUCCESS {
|
||||
return Device{}, fmt.Errorf("error getting parent device handle: %v", ret.Error())
|
||||
}
|
||||
|
||||
gpu, ret := parent.GetMinorNumber()
|
||||
if ret.Value() != nvml.SUCCESS {
|
||||
return Device{}, fmt.Errorf("error getting GPU minor number: %v", ret.Error())
|
||||
}
|
||||
|
||||
parentDevice, err := d.deviceFromFullDeviceHandle(parent)
|
||||
if err != nil {
|
||||
return Device{}, fmt.Errorf("error getting parent device: %v", err)
|
||||
}
|
||||
|
||||
index, ret := handle.GetIndex()
|
||||
if ret.Value() != nvml.SUCCESS {
|
||||
return Device{}, fmt.Errorf("error getting device index: %v", ret.Error())
|
||||
}
|
||||
|
||||
uuid, ret := handle.GetUUID()
|
||||
if ret.Value() != nvml.SUCCESS {
|
||||
return Device{}, fmt.Errorf("error getting device UUID: %v", ret.Error())
|
||||
}
|
||||
|
||||
capDeviceNodes := []DeviceNode{}
|
||||
procPaths, err := getProcPathsForMigDevice(gpu, handle)
|
||||
if err != nil {
|
||||
return Device{}, fmt.Errorf("error getting proc paths for MIG device: %v", err)
|
||||
}
|
||||
|
||||
for _, p := range procPaths {
|
||||
capDeviceNode, ok := d.migCaps[p]
|
||||
if !ok {
|
||||
return Device{}, fmt.Errorf("could not determine cap device path for %v", p)
|
||||
}
|
||||
capDeviceNodes = append(capDeviceNodes, capDeviceNode)
|
||||
}
|
||||
|
||||
device := Device{
|
||||
Index: fmt.Sprintf("%s:%d", parentDevice.Index, index),
|
||||
UUID: uuid,
|
||||
DeviceNodes: append(parentDevice.DeviceNodes, capDeviceNodes...),
|
||||
ProcPaths: append(parentDevice.ProcPaths, procPaths...),
|
||||
}
|
||||
|
||||
return device, nil
|
||||
}
|
||||
|
||||
func (d *nvmlDiscover) getControlDevices() ([]Device, error) {
|
||||
devices := []struct {
|
||||
name string
|
||||
path string
|
||||
minor int
|
||||
}{
|
||||
// TODO: Where is the best place to find these device Minors programatically?
|
||||
{nvidiaGPUDeviceName, "/dev/nvidia-modeset", 254},
|
||||
{nvidiaGPUDeviceName, "/dev/nvidiactl", 255},
|
||||
{nvidiaUVMDeviceName, "/dev/nvidia-uvm", 0},
|
||||
{nvidiaUVMDeviceName, "/dev/nvidia-uvm-tools", 1},
|
||||
}
|
||||
|
||||
var controlDevices []Device
|
||||
for _, info := range devices {
|
||||
device, exists := d.nvidiaDevices.Get(info.name)
|
||||
if !exists {
|
||||
d.logger.Warnf("device name %v not defined; skipping control devices %v", info.name, info.path)
|
||||
continue
|
||||
}
|
||||
|
||||
deviceNode := DeviceNode{
|
||||
Path: DevicePath(info.path),
|
||||
Major: device.Major,
|
||||
Minor: info.minor,
|
||||
}
|
||||
|
||||
controlDevices = append(controlDevices, Device{
|
||||
UUID: ControlDeviceUUID,
|
||||
DeviceNodes: []DeviceNode{deviceNode},
|
||||
ProcPaths: []ProcPath{},
|
||||
})
|
||||
|
||||
}
|
||||
|
||||
return controlDevices, nil
|
||||
}
|
||||
|
||||
func (d *nvmlDiscover) getMigControlDevices(numGpus int) ([]Device, error) {
|
||||
targets := map[string]ProcPath{
|
||||
MIGConfigDeviceUUID: ProcPath("/proc/driver/nvidia/capabilities/mig/config"),
|
||||
MIGMonitorDeviceUUID: ProcPath("/proc/driver/nvidia/capabilities/mig/monitor"),
|
||||
}
|
||||
|
||||
var devices []Device
|
||||
for id, procPath := range targets {
|
||||
deviceNode, exists := d.migCaps[procPath]
|
||||
if !exists {
|
||||
return nil, fmt.Errorf("device node for '%v' is undefined", procPath)
|
||||
}
|
||||
|
||||
var procPaths []ProcPath
|
||||
for gpu := 0; gpu <= numGpus; gpu++ {
|
||||
procPaths = append(procPaths, ProcPath(fmt.Sprintf("/proc/driver/nvidia/capabilities/gpu%d/mig", gpu)))
|
||||
}
|
||||
procPaths = append(procPaths, procPath)
|
||||
|
||||
devices = append(devices, Device{
|
||||
UUID: id,
|
||||
DeviceNodes: []DeviceNode{deviceNode},
|
||||
ProcPaths: procPaths,
|
||||
})
|
||||
}
|
||||
|
||||
return devices, nil
|
||||
}
|
||||
|
||||
func getProcPathsForMigDevice(gpu int, handle nvml.Device) ([]ProcPath, error) {
|
||||
gi, ret := handle.GetGPUInstanceId()
|
||||
if ret.Value() != nvml.SUCCESS {
|
||||
return nil, fmt.Errorf("error getting GPU instance ID: %v", ret.Error())
|
||||
}
|
||||
|
||||
ci, ret := handle.GetComputeInstanceId()
|
||||
if ret.Value() != nvml.SUCCESS {
|
||||
return nil, fmt.Errorf("error getting comput instance ID: %v", ret.Error())
|
||||
}
|
||||
|
||||
procPaths := []ProcPath{
|
||||
ProcPath(fmt.Sprintf("/proc/driver/nvidia/capabilities/gpu%d/mig/gi%d/access", gpu, gi)),
|
||||
ProcPath(fmt.Sprintf("/proc/driver/nvidia/capabilities/gpu%d/mig/gi%d/ci%d/access", gpu, gi, ci)),
|
||||
}
|
||||
|
||||
return procPaths, nil
|
||||
}
|
||||
|
||||
func getMIGHandlesForDevice(handle nvml.Device) ([]nvml.Device, error) {
|
||||
currentMigMode, _, ret := handle.GetMigMode()
|
||||
if ret.Value() == nvml.ERROR_NOT_SUPPORTED {
|
||||
return nil, nil
|
||||
}
|
||||
if ret.Value() != nvml.SUCCESS {
|
||||
return nil, fmt.Errorf("error getting MIG mode for device: %v", ret.Error())
|
||||
}
|
||||
if currentMigMode == nvml.DEVICE_MIG_DISABLE {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
maxMigDeviceCount, ret := handle.GetMaxMigDeviceCount()
|
||||
if ret.Value() != nvml.SUCCESS {
|
||||
return nil, fmt.Errorf("error getting number of MIG devices: %v", ret.Error())
|
||||
}
|
||||
|
||||
var migHandles []nvml.Device
|
||||
for mi := 0; mi < maxMigDeviceCount; mi++ {
|
||||
migHandle, ret := handle.GetMigDeviceHandleByIndex(mi)
|
||||
if ret.Value() == nvml.ERROR_NOT_FOUND {
|
||||
continue
|
||||
}
|
||||
|
||||
if ret.Value() != nvml.SUCCESS {
|
||||
return nil, fmt.Errorf("error getting MIG device %v: %v", mi, ret.Error())
|
||||
}
|
||||
|
||||
migHandles = append(migHandles, migHandle)
|
||||
}
|
||||
|
||||
return migHandles, nil
|
||||
}
|
||||
|
||||
func (d *nvmlDiscover) tryShutdownNVML() {
|
||||
ret := d.nvml.Shutdown()
|
||||
if ret.Value() != nvml.SUCCESS {
|
||||
d.logger.Warnf("Could not shut down NVML: %v", ret.Error())
|
||||
}
|
||||
}
|
||||
|
||||
// NewPCIBusID provides a utility function that returns the string representation
|
||||
// of the bus ID.
|
||||
func NewPCIBusID(p nvml.PciInfo) PCIBusID {
|
||||
var bytes []byte
|
||||
for _, b := range p.BusId {
|
||||
if byte(b) == '\x00' {
|
||||
break
|
||||
}
|
||||
bytes = append(bytes, byte(b))
|
||||
}
|
||||
return PCIBusID(string(bytes))
|
||||
}
|
||||
|
||||
// GetProcPath returns the path in /proc associated with the PCI bus ID
|
||||
func (p PCIBusID) GetProcPath() ProcPath {
|
||||
id := strings.ToLower(p.String())
|
||||
|
||||
if strings.HasPrefix(id, "0000") {
|
||||
id = id[4:]
|
||||
}
|
||||
return ProcPath(filepath.Join("/proc/driver/nvidia/gpus", id))
|
||||
}
|
||||
|
||||
func (p PCIBusID) String() string {
|
||||
return string(p)
|
||||
}
|
||||
44
internal/discover/discover_nvml_mig.go
Normal file
44
internal/discover/discover_nvml_mig.go
Normal file
@@ -0,0 +1,44 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package discover
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/nvcaps"
|
||||
)
|
||||
|
||||
// getMigCaps returns a mapping of MIG capability path to device nodes
|
||||
func getMigCaps(capDeviceMajor int) (map[ProcPath]DeviceNode, error) {
|
||||
migCaps, err := nvcaps.LoadMigMinors()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error loading MIG minors: %v", err)
|
||||
}
|
||||
return getMigCapsFromMigMinors(migCaps, capDeviceMajor), nil
|
||||
}
|
||||
|
||||
func getMigCapsFromMigMinors(migCaps map[nvcaps.MigCap]nvcaps.MigMinor, capDeviceMajor int) map[ProcPath]DeviceNode {
|
||||
capsDevicePaths := make(map[ProcPath]DeviceNode)
|
||||
for cap, minor := range migCaps {
|
||||
capsDevicePaths[ProcPath(cap.ProcPath())] = DeviceNode{
|
||||
Path: DevicePath(minor.DevicePath()),
|
||||
Major: capDeviceMajor,
|
||||
Minor: int(minor),
|
||||
}
|
||||
}
|
||||
return capsDevicePaths
|
||||
}
|
||||
41
internal/discover/discover_nvml_mig_test.go
Normal file
41
internal/discover/discover_nvml_mig_test.go
Normal file
@@ -0,0 +1,41 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package discover
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/nvcaps"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestGetMigCaps(t *testing.T) {
|
||||
migMinors := map[nvcaps.MigCap]nvcaps.MigMinor{
|
||||
"config": 1,
|
||||
"monitor": 2,
|
||||
"gpu0/gi0/access": 3,
|
||||
"gpu0/gi0/ci0/access": 4,
|
||||
}
|
||||
|
||||
migCapMajor := 999
|
||||
migCaps := getMigCapsFromMigMinors(migMinors, migCapMajor)
|
||||
|
||||
require.Len(t, migCaps, len(migMinors))
|
||||
for _, c := range migCaps {
|
||||
require.Equal(t, migCapMajor, c.Major)
|
||||
}
|
||||
}
|
||||
219
internal/discover/discover_nvml_test.go
Normal file
219
internal/discover/discover_nvml_test.go
Normal file
@@ -0,0 +1,219 @@
|
||||
/*
|
||||
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
*/
|
||||
|
||||
package discover
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/nvml"
|
||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/proc"
|
||||
testlog "github.com/sirupsen/logrus/hooks/test"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
const (
|
||||
nvidiaGPUDeviceMajorDefault = 195
|
||||
nvidiaCapsDeviceMajorDefault = 235
|
||||
)
|
||||
|
||||
func newTestDiscover() nvmlDiscover {
|
||||
logger, _ := testlog.NewNullLogger()
|
||||
nvml := nvml.NewMockNVMLServer(nvml.NewMockA100Device(0))
|
||||
|
||||
return nvmlDiscover{
|
||||
logger: logger,
|
||||
nvml: nvml,
|
||||
nvidiaDevices: proc.NewMockNvidiaDevices(
|
||||
proc.Device{Name: nvidiaGPUDeviceName, Major: nvidiaGPUDeviceMajorDefault},
|
||||
),
|
||||
}
|
||||
}
|
||||
|
||||
func newTestDiscoverWithMIG() nvmlDiscover {
|
||||
device := &nvml.MockA100Device{
|
||||
Index: 0,
|
||||
MigMode: nvml.DEVICE_MIG_ENABLE,
|
||||
GpuInstances: make(map[*nvml.MockA100GpuInstance]struct{}),
|
||||
GpuInstanceCounter: 0,
|
||||
}
|
||||
// Create a single gi and ci on the device
|
||||
gpuInstanceProfileInfo := &nvml.GpuInstanceProfileInfo{
|
||||
Id: nvml.GPU_INSTANCE_PROFILE_7_SLICE,
|
||||
}
|
||||
gi, _ := device.CreateGpuInstance(gpuInstanceProfileInfo)
|
||||
|
||||
computeInstanceProfileInfo := &nvml.ComputeInstanceProfileInfo{
|
||||
Id: nvml.COMPUTE_INSTANCE_PROFILE_1_SLICE,
|
||||
}
|
||||
_, _ = gi.CreateComputeInstance(computeInstanceProfileInfo)
|
||||
|
||||
logger, _ := testlog.NewNullLogger()
|
||||
return nvmlDiscover{
|
||||
logger: logger,
|
||||
nvml: nvml.NewMockNVMLServer(device),
|
||||
migCaps: map[ProcPath]DeviceNode{
|
||||
ProcPath("/proc/driver/nvidia/capabilities/gpu0/mig/gi0/access"): {
|
||||
Path: DevicePath("/dev/nvidia-caps/nvidia-cap3"),
|
||||
Minor: 3,
|
||||
Major: 235,
|
||||
},
|
||||
ProcPath("/proc/driver/nvidia/capabilities/gpu0/mig/gi0/ci0/access"): {
|
||||
Path: DevicePath("/dev/nvidia-caps/nvidia-cap4"),
|
||||
Minor: 4,
|
||||
Major: 235,
|
||||
},
|
||||
ProcPath("/proc/driver/nvidia/capabilities/mig/config"): {
|
||||
Path: DevicePath("/dev/nvidia-caps/nvidia-cap1"),
|
||||
Minor: 1,
|
||||
Major: 235,
|
||||
},
|
||||
ProcPath("/proc/driver/nvidia/capabilities/mig/monitor"): {
|
||||
Path: DevicePath("/dev/nvidia-caps/nvidia-cap2"),
|
||||
Minor: 2,
|
||||
Major: 235,
|
||||
},
|
||||
},
|
||||
nvidiaDevices: proc.NewMockNvidiaDevices(
|
||||
proc.Device{Name: nvidiaGPUDeviceName, Major: nvidiaGPUDeviceMajorDefault},
|
||||
proc.Device{Name: nvidiaCapsDeviceName, Major: nvidiaCapsDeviceMajorDefault},
|
||||
),
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func TestDiscoverNvmlDevices(t *testing.T) {
|
||||
d := newTestDiscover()
|
||||
devices, err := d.Devices()
|
||||
|
||||
require.NoError(t, err)
|
||||
require.Len(t, devices, 3)
|
||||
|
||||
device := devices[0]
|
||||
|
||||
require.Equal(t, "0", device.Index)
|
||||
require.Equal(t, "GPU-0", device.UUID)
|
||||
require.Equal(t, "0000FFFF:FF:FF.F", device.PCIBusID.String())
|
||||
|
||||
expectedDeviceNodes := []DeviceNode{
|
||||
{
|
||||
Path: DevicePath("/dev/nvidia0"),
|
||||
Minor: 0,
|
||||
Major: nvidiaGPUDeviceMajorDefault,
|
||||
},
|
||||
}
|
||||
require.Equal(t, expectedDeviceNodes, device.DeviceNodes)
|
||||
|
||||
expectedProcPaths := []ProcPath{ProcPath("/proc/driver/nvidia/gpus/ffff:ff:ff.f")}
|
||||
require.Equal(t, expectedProcPaths, device.ProcPaths)
|
||||
}
|
||||
|
||||
func TestDiscoverNvmlMigDevices(t *testing.T) {
|
||||
d := newTestDiscoverWithMIG()
|
||||
|
||||
devices, err := d.Devices()
|
||||
|
||||
require.NoError(t, err)
|
||||
require.Len(t, devices, 5)
|
||||
|
||||
mig := devices[0]
|
||||
|
||||
require.Equal(t, "0:0", mig.Index)
|
||||
require.Equal(t, "MIG-0", mig.UUID)
|
||||
require.Empty(t, mig.PCIBusID)
|
||||
|
||||
expectedDeviceNodes := []DeviceNode{
|
||||
{
|
||||
Path: DevicePath("/dev/nvidia0"),
|
||||
Minor: 0,
|
||||
Major: nvidiaGPUDeviceMajorDefault,
|
||||
},
|
||||
{
|
||||
Path: DevicePath("/dev/nvidia-caps/nvidia-cap3"),
|
||||
Minor: 3,
|
||||
Major: nvidiaCapsDeviceMajorDefault,
|
||||
},
|
||||
{
|
||||
Path: DevicePath("/dev/nvidia-caps/nvidia-cap4"),
|
||||
Minor: 4,
|
||||
Major: nvidiaCapsDeviceMajorDefault,
|
||||
},
|
||||
}
|
||||
require.Equal(t, expectedDeviceNodes, mig.DeviceNodes)
|
||||
|
||||
expectedProcPaths := []ProcPath{
|
||||
ProcPath("/proc/driver/nvidia/gpus/ffff:ff:ff.f"),
|
||||
ProcPath("/proc/driver/nvidia/capabilities/gpu0/mig/gi0/access"),
|
||||
ProcPath("/proc/driver/nvidia/capabilities/gpu0/mig/gi0/ci0/access"),
|
||||
}
|
||||
require.Equal(t, expectedProcPaths, mig.ProcPaths)
|
||||
|
||||
var config *Device
|
||||
var monitor *Device
|
||||
for i, d := range devices {
|
||||
if d.UUID == "CONFIG" {
|
||||
config = &devices[i]
|
||||
}
|
||||
if d.UUID == "MONITOR" {
|
||||
monitor = &devices[i]
|
||||
}
|
||||
}
|
||||
|
||||
require.NotNil(t, config)
|
||||
require.NotNil(t, monitor)
|
||||
|
||||
require.Equal(t, "CONFIG", config.UUID)
|
||||
|
||||
expectedDeviceNodes = []DeviceNode{
|
||||
{
|
||||
Path: DevicePath("/dev/nvidia-caps/nvidia-cap1"),
|
||||
Minor: 1,
|
||||
Major: nvidiaCapsDeviceMajorDefault,
|
||||
},
|
||||
}
|
||||
require.Equal(t, expectedDeviceNodes, config.DeviceNodes)
|
||||
|
||||
require.Contains(t, config.ProcPaths, ProcPath("/proc/driver/nvidia/capabilities/mig/config"))
|
||||
require.Len(t, config.ProcPaths, 2)
|
||||
|
||||
require.Equal(t, "MONITOR", monitor.UUID)
|
||||
|
||||
expectedDeviceNodes = []DeviceNode{
|
||||
{
|
||||
Path: DevicePath("/dev/nvidia-caps/nvidia-cap2"),
|
||||
Minor: 2,
|
||||
Major: nvidiaCapsDeviceMajorDefault,
|
||||
},
|
||||
}
|
||||
require.Equal(t, expectedDeviceNodes, monitor.DeviceNodes)
|
||||
|
||||
require.Contains(t, monitor.ProcPaths, ProcPath("/proc/driver/nvidia/capabilities/mig/monitor"))
|
||||
require.Len(t, monitor.ProcPaths, 2)
|
||||
|
||||
}
|
||||
|
||||
func TestPCIBusID(t *testing.T) {
|
||||
testCases := map[string]ProcPath{
|
||||
"0000FFFF:FF:FF.F": "/proc/driver/nvidia/gpus/ffff:ff:ff.f",
|
||||
"FFFFFFFF:FF:FF.F": "/proc/driver/nvidia/gpus/ffffffff:ff:ff.f",
|
||||
}
|
||||
|
||||
for busID, procPath := range testCases {
|
||||
p := PCIBusID(busID)
|
||||
require.Equal(t, busID, p.String())
|
||||
require.Equal(t, procPath, p.GetProcPath())
|
||||
}
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user