Compare commits

..

1 Commits

Author SHA1 Message Date
Carlos Eduardo Arango Gutierrez
9674787e7e [no-relnote] Add E2E for libnvidia-container
Some checks failed
CI Pipeline / code-scanning (push) Has been cancelled
CI Pipeline / variables (push) Has been cancelled
CI Pipeline / golang (push) Has been cancelled
CI Pipeline / image (push) Has been cancelled
CI Pipeline / e2e-test (push) Has been cancelled
Signed-off-by: Carlos Eduardo Arango Gutierrez <eduardoa@nvidia.com>
2025-06-05 18:18:04 +02:00
163 changed files with 4481 additions and 13336 deletions

View File

@@ -22,7 +22,15 @@ variables:
BUILD_MULTI_ARCH_IMAGES: "true" BUILD_MULTI_ARCH_IMAGES: "true"
stages: stages:
- pull - trigger
- image
- lint
- go-checks
- go-build
- unit-tests
- package-build
- image-build
- test
- scan - scan
- release - release
- sign - sign
@@ -45,6 +53,108 @@ workflow:
# We then add all the regular triggers # We then add all the regular triggers
- !reference [.pipeline-trigger-rules, rules] - !reference [.pipeline-trigger-rules, rules]
# The main or manual job is used to filter out distributions or architectures that are not required on
# every build.
.main-or-manual:
rules:
- !reference [.pipeline-trigger-rules, rules]
- if: $CI_PIPELINE_SOURCE == "schedule"
when: manual
# The trigger-pipeline job adds a manualy triggered job to the pipeline on merge requests.
trigger-pipeline:
stage: trigger
script:
- echo "starting pipeline"
rules:
- !reference [.main-or-manual, rules]
- if: $CI_PIPELINE_SOURCE == "merge_request_event"
when: manual
allow_failure: false
- when: always
# Define the distribution targets
.dist-centos7:
rules:
- !reference [.main-or-manual, rules]
variables:
DIST: centos7
.dist-centos8:
variables:
DIST: centos8
.dist-ubi8:
rules:
- !reference [.main-or-manual, rules]
variables:
DIST: ubi8
.dist-ubuntu18.04:
variables:
DIST: ubuntu18.04
.dist-ubuntu20.04:
variables:
DIST: ubuntu20.04
.dist-packaging:
variables:
DIST: packaging
# Define architecture targets
.arch-aarch64:
variables:
ARCH: aarch64
.arch-amd64:
variables:
ARCH: amd64
.arch-arm64:
variables:
ARCH: arm64
.arch-ppc64le:
rules:
- !reference [.main-or-manual, rules]
variables:
ARCH: ppc64le
.arch-x86_64:
variables:
ARCH: x86_64
# Define the platform targets
.platform-amd64:
variables:
PLATFORM: linux/amd64
.platform-arm64:
variables:
PLATFORM: linux/arm64
# Define test helpers
.integration:
stage: test
variables:
IMAGE_NAME: "${CI_REGISTRY_IMAGE}/container-toolkit"
VERSION: "${CI_COMMIT_SHORT_SHA}"
before_script:
- apk add --no-cache make bash jq
- docker login -u "${CI_REGISTRY_USER}" -p "${CI_REGISTRY_PASSWORD}" "${CI_REGISTRY}"
- docker pull "${IMAGE_NAME}:${VERSION}-${DIST}"
script:
- make -f deployments/container/Makefile test-${DIST}
# Define the test targets
test-packaging:
extends:
- .integration
- .dist-packaging
needs:
- image-packaging
# Download the regctl binary for use in the release steps # Download the regctl binary for use in the release steps
.regctl-setup: .regctl-setup:
before_script: before_script:
@@ -54,3 +164,84 @@ workflow:
- curl -sSLo bin/regctl https://github.com/regclient/regclient/releases/download/${REGCTL_VERSION}/regctl-linux-amd64 - curl -sSLo bin/regctl https://github.com/regclient/regclient/releases/download/${REGCTL_VERSION}/regctl-linux-amd64
- chmod a+x bin/regctl - chmod a+x bin/regctl
- export PATH=$(pwd)/bin:${PATH} - export PATH=$(pwd)/bin:${PATH}
# .release forms the base of the deployment jobs which push images to the CI registry.
# This is extended with the version to be deployed (e.g. the SHA or TAG) and the
# target os.
.release:
stage: release
variables:
# Define the source image for the release
IMAGE_NAME: "${CI_REGISTRY_IMAGE}/container-toolkit"
VERSION: "${CI_COMMIT_SHORT_SHA}"
# OUT_IMAGE_VERSION is overridden for external releases
OUT_IMAGE_VERSION: "${CI_COMMIT_SHORT_SHA}"
before_script:
- !reference [.regctl-setup, before_script]
# We ensure that the components of the output image are set:
- 'echo Image Name: ${OUT_IMAGE_NAME} ; [[ -n "${OUT_IMAGE_NAME}" ]] || exit 1'
- 'echo Version: ${OUT_IMAGE_VERSION} ; [[ -n "${OUT_IMAGE_VERSION}" ]] || exit 1'
- apk add --no-cache make bash
script:
# Log in to the "output" registry, tag the image and push the image
- 'echo "Logging in to CI registry ${CI_REGISTRY}"'
- regctl registry login "${CI_REGISTRY}" -u "${CI_REGISTRY_USER}" -p "${CI_REGISTRY_PASSWORD}"
- '[ ${CI_REGISTRY} = ${OUT_REGISTRY} ] || echo "Logging in to output registry ${OUT_REGISTRY}"'
- '[ ${CI_REGISTRY} = ${OUT_REGISTRY} ] || regctl registry login "${OUT_REGISTRY}" -u "${OUT_REGISTRY_USER}" -p "${OUT_REGISTRY_TOKEN}"'
# Since OUT_IMAGE_NAME and OUT_IMAGE_VERSION are set, this will push the CI image to the
# Target
- make -f deployments/container/Makefile push-${DIST}
# Define a staging release step that pushes an image to an internal "staging" repository
# This is triggered for all pipelines (i.e. not only tags) to test the pipeline steps
# outside of the release process.
.release:staging:
extends:
- .release
variables:
OUT_REGISTRY_USER: "${NGC_REGISTRY_USER}"
OUT_REGISTRY_TOKEN: "${NGC_REGISTRY_TOKEN}"
OUT_REGISTRY: "${NGC_REGISTRY}"
OUT_IMAGE_NAME: "${NGC_REGISTRY_STAGING_IMAGE_NAME}"
# Define an external release step that pushes an image to an external repository.
# This includes a devlopment image off main.
.release:external:
extends:
- .release
variables:
FORCE_PUBLISH_IMAGES: "yes"
rules:
- if: $CI_COMMIT_TAG
variables:
OUT_IMAGE_VERSION: "${CI_COMMIT_TAG}"
- if: $CI_COMMIT_BRANCH == $RELEASE_DEVEL_BRANCH
variables:
OUT_IMAGE_VERSION: "${DEVEL_RELEASE_IMAGE_VERSION}"
# Define the release jobs
release:staging-ubi8:
extends:
- .release:staging
- .dist-ubi8
needs:
- image-ubi8
release:staging-ubuntu20.04:
extends:
- .release:staging
- .dist-ubuntu20.04
needs:
- test-toolkit-ubuntu20.04
- test-containerd-ubuntu20.04
- test-crio-ubuntu20.04
- test-docker-ubuntu20.04
release:staging-packaging:
extends:
- .release:staging
- .dist-packaging
needs:
- test-packaging

View File

@@ -72,7 +72,7 @@ jobs:
env: env:
E2E_INSTALL_CTK: "true" E2E_INSTALL_CTK: "true"
E2E_IMAGE_NAME: ghcr.io/nvidia/container-toolkit E2E_IMAGE_NAME: ghcr.io/nvidia/container-toolkit
E2E_IMAGE_TAG: ${{ inputs.version }} E2E_IMAGE_TAG: ${{ inputs.version }}-ubuntu20.04
E2E_SSH_USER: ${{ secrets.E2E_SSH_USER }} E2E_SSH_USER: ${{ secrets.E2E_SSH_USER }}
E2E_SSH_HOST: ${{ steps.holodeck_public_dns_name.outputs.result }} E2E_SSH_HOST: ${{ steps.holodeck_public_dns_name.outputs.result }}
run: | run: |

View File

@@ -79,9 +79,15 @@ jobs:
runs-on: ubuntu-latest runs-on: ubuntu-latest
strategy: strategy:
matrix: matrix:
target: dist:
- application - ubuntu20.04
- ubi8
- packaging - packaging
ispr:
- ${{ github.ref_name != 'main' && !startsWith( github.ref_name, 'release-' ) }}
exclude:
- ispr: true
dist: ubi8
needs: packages needs: packages
steps: steps:
- uses: actions/checkout@v4 - uses: actions/checkout@v4
@@ -117,4 +123,4 @@ jobs:
BUILD_MULTI_ARCH_IMAGES: ${{ inputs.build_multi_arch_images }} BUILD_MULTI_ARCH_IMAGES: ${{ inputs.build_multi_arch_images }}
run: | run: |
echo "${VERSION}" echo "${VERSION}"
make -f deployments/container/Makefile build-${{ matrix.target }} make -f deployments/container/Makefile build-${{ matrix.dist }}

228
.gitlab-ci.yml Normal file
View File

@@ -0,0 +1,228 @@
# Copyright (c) 2019-2022, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
include:
- .common-ci.yml
# Define the package build helpers
.multi-arch-build:
before_script:
- apk add --no-cache coreutils build-base sed git bash make
- '[[ -n "${SKIP_QEMU_SETUP}" ]] || docker run --rm --privileged multiarch/qemu-user-static --reset -p yes -c yes'
.package-artifacts:
variables:
ARTIFACTS_NAME: "toolkit-container-${CI_PIPELINE_ID}"
ARTIFACTS_ROOT: "toolkit-container-${CI_PIPELINE_ID}"
DIST_DIR: ${CI_PROJECT_DIR}/${ARTIFACTS_ROOT}
.package-build:
extends:
- .multi-arch-build
- .package-artifacts
stage: package-build
timeout: 3h
script:
- ./scripts/build-packages.sh ${DIST}-${ARCH}
artifacts:
name: ${ARTIFACTS_NAME}
paths:
- ${ARTIFACTS_ROOT}
needs:
- job: package-meta-packages
artifacts: true
# Define the package build targets
package-meta-packages:
extends:
- .package-artifacts
stage: package-build
variables:
SKIP_LIBNVIDIA_CONTAINER: "yes"
SKIP_NVIDIA_CONTAINER_TOOLKIT: "yes"
parallel:
matrix:
- PACKAGING: [deb, rpm]
before_script:
- apk add --no-cache coreutils build-base sed git bash make
script:
- ./scripts/build-packages.sh ${PACKAGING}
artifacts:
name: ${ARTIFACTS_NAME}
paths:
- ${ARTIFACTS_ROOT}
package-centos7-aarch64:
extends:
- .package-build
- .dist-centos7
- .arch-aarch64
package-centos7-x86_64:
extends:
- .package-build
- .dist-centos7
- .arch-x86_64
package-centos8-ppc64le:
extends:
- .package-build
- .dist-centos8
- .arch-ppc64le
package-ubuntu18.04-amd64:
extends:
- .package-build
- .dist-ubuntu18.04
- .arch-amd64
package-ubuntu18.04-arm64:
extends:
- .package-build
- .dist-ubuntu18.04
- .arch-arm64
package-ubuntu18.04-ppc64le:
extends:
- .package-build
- .dist-ubuntu18.04
- .arch-ppc64le
.buildx-setup:
before_script:
- export BUILDX_VERSION=v0.6.3
- apk add --no-cache curl
- mkdir -p ~/.docker/cli-plugins
- curl -sSLo ~/.docker/cli-plugins/docker-buildx "https://github.com/docker/buildx/releases/download/${BUILDX_VERSION}/buildx-${BUILDX_VERSION}.linux-amd64"
- chmod a+x ~/.docker/cli-plugins/docker-buildx
- docker buildx create --use --platform=linux/amd64,linux/arm64
- '[[ -n "${SKIP_QEMU_SETUP}" ]] || docker run --rm --privileged multiarch/qemu-user-static --reset -p yes'
# Define the image build targets
.image-build:
stage: image-build
variables:
IMAGE_NAME: "${CI_REGISTRY_IMAGE}/container-toolkit"
VERSION: "${CI_COMMIT_SHORT_SHA}"
PUSH_ON_BUILD: "true"
before_script:
- !reference [.buildx-setup, before_script]
- apk add --no-cache bash make git
- 'echo "Logging in to CI registry ${CI_REGISTRY}"'
- docker login -u "${CI_REGISTRY_USER}" -p "${CI_REGISTRY_PASSWORD}" "${CI_REGISTRY}"
script:
- make -f deployments/container/Makefile build-${DIST}
image-ubi8:
extends:
- .image-build
- .package-artifacts
- .dist-ubi8
needs:
# Note: The ubi8 image uses the centos7 packages
- package-centos7-aarch64
- package-centos7-x86_64
image-ubuntu20.04:
extends:
- .image-build
- .package-artifacts
- .dist-ubuntu20.04
needs:
- package-ubuntu18.04-amd64
- package-ubuntu18.04-arm64
- job: package-ubuntu18.04-ppc64le
optional: true
# The DIST=packaging target creates an image containing all built packages
image-packaging:
extends:
- .image-build
- .package-artifacts
- .dist-packaging
needs:
- job: package-ubuntu18.04-amd64
- job: package-ubuntu18.04-arm64
- job: package-amazonlinux2-aarch64
optional: true
- job: package-amazonlinux2-x86_64
optional: true
- job: package-centos7-aarch64
optional: true
- job: package-centos7-x86_64
optional: true
- job: package-centos8-ppc64le
optional: true
- job: package-debian10-amd64
optional: true
- job: package-opensuse-leap15.1-x86_64
optional: true
- job: package-ubuntu18.04-ppc64le
optional: true
# Define publish test helpers
.test:docker:
extends:
- .integration
variables:
TEST_CASES: "docker"
.test:containerd:
# TODO: The containerd tests fail due to issues with SIGHUP.
# Until this is resolved with retry up to twice and allow failure here.
retry: 2
allow_failure: true
extends:
- .integration
variables:
TEST_CASES: "containerd"
.test:crio:
extends:
- .integration
variables:
TEST_CASES: "crio"
# Define the test targets
test-toolkit-ubuntu20.04:
extends:
- .test:toolkit
- .dist-ubuntu20.04
needs:
- image-ubuntu20.04
test-containerd-ubuntu20.04:
extends:
- .test:containerd
- .dist-ubuntu20.04
needs:
- image-ubuntu20.04
test-crio-ubuntu20.04:
extends:
- .test:crio
- .dist-ubuntu20.04
needs:
- image-ubuntu20.04
test-docker-ubuntu20.04:
extends:
- .test:docker
- .dist-ubuntu20.04
needs:
- image-ubuntu20.04

View File

@@ -39,62 +39,19 @@ variables:
KITMAKER_RELEASE_FOLDER: "kitmaker" KITMAKER_RELEASE_FOLDER: "kitmaker"
PACKAGE_ARCHIVE_RELEASE_FOLDER: "releases" PACKAGE_ARCHIVE_RELEASE_FOLDER: "releases"
# .copy-images copies the required application and packaging images from the .image-pull:
# IN_IMAGE="${IN_IMAGE_NAME}:${IN_IMAGE_TAG}${TAG_SUFFIX}" stage: image-build
# to
# OUT_IMAGE="${OUT_IMAGE_NAME}:${OUT_IMAGE_TAG}${TAG_SUFFIX}"
# The script also logs into IN_REGISTRY and OUT_REGISTRY using the supplied
# username and tokens.
.copy-images:
parallel:
matrix:
- TAG_SUFFIX: ["", "-packaging"]
before_script:
- !reference [.regctl-setup, before_script]
- apk add --no-cache make bash
variables:
REGCTL: regctl
script:
- |
if [ -n ${IN_REGISTRY} ] && [ -n ${IN_REGISTRY_USER} ]; then
echo "Logging in to ${IN_REGISTRY}"
${REGCTL} registry login "${IN_REGISTRY}" -u "${IN_REGISTRY_USER}" -p "${IN_REGISTRY_TOKEN}" || exit 1
fi
if [ -n ${OUT_REGISTRY} ] && [ -n ${OUT_REGISTRY_USER} ] && [ "${IN_REGISTRY}" != "${OUT_REGISTRY}" ]; then
echo "Logging in to ${OUT_REGISTRY}"
${REGCTL} registry login "${OUT_REGISTRY}" -u "${OUT_REGISTRY_USER}" -p "${OUT_REGISTRY_TOKEN}" || exit 1
fi
export IN_IMAGE="${IN_IMAGE_NAME}:${IN_IMAGE_TAG}${TAG_SUFFIX}"
export OUT_IMAGE="${OUT_IMAGE_NAME}:${OUT_IMAGE_TAG}${TAG_SUFFIX}"
echo "Copying ${IN_IMAGE} to ${OUT_IMAGE}"
${REGCTL} image copy ${IN_IMAGE} ${OUT_IMAGE}
# pull-images pulls images from the public CI registry to the internal CI registry.
pull-images:
extends:
- .copy-images
stage: pull
variables: variables:
IN_REGISTRY: "${STAGING_REGISTRY}" IN_REGISTRY: "${STAGING_REGISTRY}"
IN_IMAGE_NAME: ${STAGING_REGISTRY}/container-toolkit IN_IMAGE_NAME: container-toolkit
IN_IMAGE_TAG: "${STAGING_VERSION}" IN_VERSION: "${STAGING_VERSION}"
OUT_REGISTRY: "${CI_REGISTRY}"
OUT_REGISTRY_USER: "${CI_REGISTRY_USER}" OUT_REGISTRY_USER: "${CI_REGISTRY_USER}"
OUT_REGISTRY_TOKEN: "${CI_REGISTRY_PASSWORD}" OUT_REGISTRY_TOKEN: "${CI_REGISTRY_PASSWORD}"
OUT_REGISTRY: "${CI_REGISTRY}"
OUT_IMAGE_NAME: "${CI_REGISTRY_IMAGE}/container-toolkit" OUT_IMAGE_NAME: "${CI_REGISTRY_IMAGE}/container-toolkit"
OUT_IMAGE_TAG: "${CI_COMMIT_SHORT_SHA}" PUSH_MULTIPLE_TAGS: "false"
# We delay the job start to allow the public pipeline to generate the required images. # We delay the job start to allow the public pipeline to generate the required images.
rules: rules:
# If the pipeline is triggered from a tag or the WEB UI we don't delay the
# start of the pipeline.
- if: $CI_COMMIT_TAG || $CI_PIPELINE_SOURCE == "web"
# If the pipeline is triggered through other means (i.e. a branch or MR)
# we add a 30 minute delay to ensure that the images are available in the
# public CI registry.
- when: delayed - when: delayed
start_in: 30 minutes start_in: 30 minutes
timeout: 30 minutes timeout: 30 minutes
@@ -103,6 +60,30 @@ pull-images:
when: when:
- job_execution_timeout - job_execution_timeout
- stuck_or_timeout_failure - stuck_or_timeout_failure
before_script:
- !reference [.regctl-setup, before_script]
- apk add --no-cache make bash
- >
regctl manifest get ${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION}-${DIST} --list > /dev/null && echo "${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION}-${DIST}" || ( echo "${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION}-${DIST} does not exist" && sleep infinity )
script:
- regctl registry login "${OUT_REGISTRY}" -u "${OUT_REGISTRY_USER}" -p "${OUT_REGISTRY_TOKEN}"
- make -f deployments/container/Makefile IMAGE=${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION}-${DIST} OUT_IMAGE=${OUT_IMAGE_NAME}:${CI_COMMIT_SHORT_SHA}-${DIST} push-${DIST}
image-ubi8:
extends:
- .dist-ubi8
- .image-pull
image-ubuntu20.04:
extends:
- .dist-ubuntu20.04
- .image-pull
# The DIST=packaging target creates an image containing all built packages
image-packaging:
extends:
- .dist-packaging
- .image-pull
# We skip the integration tests for the internal CI: # We skip the integration tests for the internal CI:
.integration: .integration:
@@ -114,37 +95,27 @@ pull-images:
# The .scan step forms the base of the image scan operation performed before releasing # The .scan step forms the base of the image scan operation performed before releasing
# images. # images.
scan-images: .scan:
stage: scan stage: scan
needs:
- pull-images
image: "${PULSE_IMAGE}" image: "${PULSE_IMAGE}"
parallel:
matrix:
- TAG_SUFFIX: [""]
PLATFORM: ["linux/amd64", "linux/arm64"]
- TAG_SUFFIX: "-packaging"
PLATFORM: "linux/amd64"
variables: variables:
IMAGE: "${CI_REGISTRY_IMAGE}/container-toolkit:${CI_COMMIT_SHORT_SHA}" IMAGE: "${CI_REGISTRY_IMAGE}/container-toolkit:${CI_COMMIT_SHORT_SHA}-${DIST}"
IMAGE_ARCHIVE: "container-toolkit-${CI_JOB_ID}.tar" IMAGE_ARCHIVE: "container-toolkit-${DIST}-${ARCH}-${CI_JOB_ID}.tar"
rules: rules:
- if: $IGNORE_SCANS == "yes" - if: $SKIP_SCANS != "yes"
allow_failure: true - when: manual
- when: on_success before_script:
script: - docker login -u "${CI_REGISTRY_USER}" -p "${CI_REGISTRY_PASSWORD}" "${CI_REGISTRY}"
- | # TODO: We should specify the architecture here and scan all architectures
docker login -u "${CI_REGISTRY_USER}" -p "${CI_REGISTRY_PASSWORD}" "${CI_REGISTRY}" - docker pull --platform="${PLATFORM}" "${IMAGE}"
export SCAN_IMAGE=${IMAGE}${TAG_SUFFIX} - docker save "${IMAGE}" -o "${IMAGE_ARCHIVE}"
echo "Scanning image ${SCAN_IMAGE} for ${PLATFORM}" - AuthHeader=$(echo -n $SSA_CLIENT_ID:$SSA_CLIENT_SECRET | base64 -w0)
docker pull --platform="${PLATFORM}" "${SCAN_IMAGE}" - >
docker save "${SCAN_IMAGE}" -o "${IMAGE_ARCHIVE}"
AuthHeader=$(echo -n $SSA_CLIENT_ID:$SSA_CLIENT_SECRET | base64 -w0)
export SSA_TOKEN=$(curl --request POST --header "Authorization: Basic $AuthHeader" --header "Content-Type: application/x-www-form-urlencoded" ${SSA_ISSUER_URL} | jq ".access_token" | tr -d '"') export SSA_TOKEN=$(curl --request POST --header "Authorization: Basic $AuthHeader" --header "Content-Type: application/x-www-form-urlencoded" ${SSA_ISSUER_URL} | jq ".access_token" | tr -d '"')
if [ -z "$SSA_TOKEN" ]; then exit 1; else echo "SSA_TOKEN set!"; fi - if [ -z "$SSA_TOKEN" ]; then exit 1; else echo "SSA_TOKEN set!"; fi
script:
pulse-cli -n $NSPECT_ID --ssa $SSA_TOKEN scan -i $IMAGE_ARCHIVE -p $CONTAINER_POLICY -o - pulse-cli -n $NSPECT_ID --ssa $SSA_TOKEN scan -i $IMAGE_ARCHIVE -p $CONTAINER_POLICY -o
rm -f "${IMAGE_ARCHIVE}" - rm -f "${IMAGE_ARCHIVE}"
artifacts: artifacts:
when: always when: always
expire_in: 1 week expire_in: 1 week
@@ -155,10 +126,62 @@ scan-images:
- vulns.json - vulns.json
- policy_evaluation.json - policy_evaluation.json
upload-kitmaker-packages: # Define the scan targets
scan-ubuntu20.04-amd64:
extends:
- .dist-ubuntu20.04
- .platform-amd64
- .scan
needs:
- image-ubuntu20.04
scan-ubuntu20.04-arm64:
extends:
- .dist-ubuntu20.04
- .platform-arm64
- .scan
needs:
- image-ubuntu20.04
- scan-ubuntu20.04-amd64
scan-ubi8-amd64:
extends:
- .dist-ubi8
- .platform-amd64
- .scan
needs:
- image-ubi8
scan-ubi8-arm64:
extends:
- .dist-ubi8
- .platform-arm64
- .scan
needs:
- image-ubi8
- scan-ubi8-amd64
scan-packaging:
extends:
- .dist-packaging
- .scan
needs:
- image-packaging
# Define external release helpers
.release:ngc:
extends:
- .release:external
variables:
OUT_REGISTRY_USER: "${NGC_REGISTRY_USER}"
OUT_REGISTRY_TOKEN: "${NGC_REGISTRY_TOKEN}"
OUT_REGISTRY: "${NGC_REGISTRY}"
OUT_IMAGE_NAME: "${NGC_REGISTRY_IMAGE}"
.release:packages:
stage: release stage: release
needs: needs:
- pull-images - image-packaging
variables: variables:
VERSION: "${CI_COMMIT_SHORT_SHA}" VERSION: "${CI_COMMIT_SHORT_SHA}"
PACKAGE_REGISTRY: "${CI_REGISTRY}" PACKAGE_REGISTRY: "${CI_REGISTRY}"
@@ -176,81 +199,34 @@ upload-kitmaker-packages:
- ./scripts/release-kitmaker-artifactory.sh "${KITMAKER_ARTIFACTORY_REPO}" - ./scripts/release-kitmaker-artifactory.sh "${KITMAKER_ARTIFACTORY_REPO}"
- rm -rf ${ARTIFACTS_DIR} - rm -rf ${ARTIFACTS_DIR}
push-images-to-staging: # Define the package release targets
release:packages:kitmaker:
extends: extends:
- .copy-images - .release:packages
stage: release
release:staging-ubuntu20.04:
extends:
- .release:staging
- .dist-ubuntu20.04
needs: needs:
- scan-images - image-ubuntu20.04
variables:
IN_REGISTRY: "${CI_REGISTRY}"
IN_REGISTRY_USER: "${CI_REGISTRY_USER}"
IN_REGISTRY_TOKEN: "${CI_REGISTRY_PASSWORD}"
IN_IMAGE_NAME: "${CI_REGISTRY_IMAGE}/container-toolkit"
IN_IMAGE_TAG: "${CI_COMMIT_SHORT_SHA}"
OUT_REGISTRY: "${NGC_REGISTRY}" # Define the external release targets
OUT_REGISTRY_USER: "${NGC_REGISTRY_USER}" # Release to NGC
OUT_REGISTRY_TOKEN: "${NGC_REGISTRY_TOKEN}" release:ngc-ubuntu20.04:
OUT_IMAGE_NAME: "${NGC_STAGING_REGISTRY}/container-toolkit"
OUT_IMAGE_TAG: "${CI_COMMIT_SHORT_SHA}"
.release-images:
extends: extends:
- .copy-images - .dist-ubuntu20.04
stage: release - .release:ngc
needs:
- scan-images
- push-images-to-staging
variables:
IN_REGISTRY: "${CI_REGISTRY}"
IN_REGISTRY_USER: "${CI_REGISTRY_USER}"
IN_REGISTRY_TOKEN: "${CI_REGISTRY_PASSWORD}"
IN_IMAGE_NAME: "${CI_REGISTRY_IMAGE}/container-toolkit"
IN_IMAGE_TAG: "${CI_COMMIT_SHORT_SHA}"
OUT_REGISTRY: "${NGC_REGISTRY}" release:ngc-ubi8:
OUT_REGISTRY_USER: "${NGC_REGISTRY_USER}"
OUT_REGISTRY_TOKEN: "${NGC_REGISTRY_TOKEN}"
OUT_IMAGE_NAME: "${NGC_REGISTRY_IMAGE}"
OUT_IMAGE_TAG: "${CI_COMMIT_TAG}"
release-images-to-ngc:
extends: extends:
- .release-images - .dist-ubi8
rules: - .release:ngc
- if: $CI_COMMIT_TAG
release-images-dummy: release:ngc-packaging:
extends: extends:
- .release-images - .dist-packaging
variables: - .release:ngc
REGCTL: "echo [DUMMY] regctl"
rules:
- if: $CI_COMMIT_TAG == null || $CI_COMMIT_TAG == ""
# .sign-images forms the base of the jobs which sign images in the NGC registry.
.sign-images:
stage: sign
image: ubuntu:latest
parallel:
matrix:
- TAG_SUFFIX: ["", "-packaging"]
variables:
IMAGE_NAME: "${NGC_REGISTRY_IMAGE}"
IMAGE_TAG: "${CI_COMMIT_TAG}"
NGC_CLI: "ngc-cli/ngc"
before_script:
- !reference [.ngccli-setup, before_script]
script:
- |
# We ensure that the IMAGE_NAME and IMAGE_TAG is set
echo Image Name: ${IMAGE_NAME} && [[ -n "${IMAGE_NAME}" ]] || exit 1
echo Image Tag: ${IMAGE_TAG} && [[ -n "${IMAGE_TAG}" ]] || exit 1
export IMAGE=${IMAGE_NAME}:${IMAGE_TAG}${TAG_SUFFIX}
echo "Signing the image ${IMAGE}"
${NGC_CLI} registry image publish --source ${IMAGE} ${IMAGE} --public --discoverable --allow-guest --sign --org nvidia
# Define the external image signing steps for NGC # Define the external image signing steps for NGC
# Download the ngc cli binary for use in the sign steps # Download the ngc cli binary for use in the sign steps
@@ -268,24 +244,45 @@ release-images-dummy:
- unzip ngccli_linux.zip - unzip ngccli_linux.zip
- chmod u+x ngc-cli/ngc - chmod u+x ngc-cli/ngc
sign-ngc-images: # .sign forms the base of the deployment jobs which signs images in the CI registry.
extends: # This is extended with the image name and version to be deployed.
- .sign-images .sign:ngc:
needs: image: ubuntu:latest
- release-images-to-ngc stage: sign
rules: rules:
- if: $CI_COMMIT_TAG - if: $CI_COMMIT_TAG
variables: variables:
NGC_CLI_API_KEY: "${NGC_REGISTRY_TOKEN}" NGC_CLI_API_KEY: "${NGC_REGISTRY_TOKEN}"
IMAGE_NAME: "${NGC_REGISTRY_IMAGE}"
IMAGE_TAG: "${CI_COMMIT_TAG}-${DIST}"
retry: retry:
max: 2 max: 2
before_script:
- !reference [.ngccli-setup, before_script]
# We ensure that the IMAGE_NAME and IMAGE_TAG is set
- 'echo Image Name: ${IMAGE_NAME} && [[ -n "${IMAGE_NAME}" ]] || exit 1'
- 'echo Image Tag: ${IMAGE_TAG} && [[ -n "${IMAGE_TAG}" ]] || exit 1'
script:
- 'echo "Signing the image ${IMAGE_NAME}:${IMAGE_TAG}"'
- ngc-cli/ngc registry image publish --source ${IMAGE_NAME}:${IMAGE_TAG} ${IMAGE_NAME}:${IMAGE_TAG} --public --discoverable --allow-guest --sign --org nvidia
sign-images-dummy: sign:ngc-ubuntu20.04:
extends: extends:
- .sign-images - .dist-ubuntu20.04
- .sign:ngc
needs: needs:
- release-images-dummy - release:ngc-ubuntu20.04
variables:
NGC_CLI: "echo [DUMMY] ngc-cli/ngc" sign:ngc-ubi8:
rules: extends:
- if: $CI_COMMIT_TAG == null || $CI_COMMIT_TAG == "" - .dist-ubi8
- .sign:ngc
needs:
- release:ngc-ubi8
sign:ngc-packaging:
extends:
- .dist-packaging
- .sign:ngc
needs:
- release:ngc-packaging

View File

@@ -1,139 +1,34 @@
# NVIDIA Container Toolkit Changelog # NVIDIA Container Toolkit Changelog
## v1.18.0-rc.1 ## v1.17.4
- Disable mounting of compat libs from container by default
- Add create-soname-symlinks hook
- Require matching version of libnvidia-container-tools
- Add envvar for libcuda.so parent dir to CDI spec
- Add EnvVar to Discover interface
- Resolve to legacy by default in nvidia-container-runtime-hook
- Default to jit-cdi mode in the nvidia runtime
- Use functional options to construct runtime mode resolver
- Add NVIDIA_CTK_CONFIG_FILE_PATH envvar
- Switch to cuda ubi9 base image
- Use single version tag for image
- BUGFIX: modifier: respect GPU volume-mount device requests
- Ensure consistent sorting of annotation devices
- Extract deb and rpm packages to single image
- Remove docker-run as default runtime candidate
- Return annotation devices from VisibleDevices
- Make CDI device requests consistent with other methods
- Construct container info once
- Add logic to extract annotation device requests to image type
- Add IsPrivileged function to CUDA container type
- Add device IDs to nvcdi.GetSpec API
- Refactor extracting requested devices from the container image
- Add EnvVars option for all nvidia-ctk cdi commands
- Add nvidia-cdi-refresh service
- Add discovery of arch-specific vulkan ICD
- Add disabled-device-node-modification hook to CDI spec
- Add a hook to disable device node creation in a container
- Remove redundant deduplication of search paths for WSL
- Added ability to disable specific (or all) CDI hooks
- Consolidate HookName functionality on internal/discover pkg
- Add envvar to control debug logging in CDI hooks
- Add FeatureFlags to the nvcdi API
- Reenable nvsandboxutils for driver discovery
- Edit discover.mounts to have a deterministic output
- Refactor the way we create CDI Hooks
- Issue warning on unsupported CDI hook
- Run update-ldcache in isolated namespaces
- Add cuda-compat-mode config option
- Fix mode detection on Thor-based systems
- Add rprivate to CDI mount options
- Skip nil discoverers in merge
- bump runc go dep to v1.3.0
- Fix resolution of libs in LDCache on ARM
- Updated .release:staging to stage images in nvstaging
- Refactor toolkit installer
- Allow container runtime executable path to be specified
- Add support for building ubuntu22.04 on arm64
- Fix race condition in mounts cache
- Add support for building ubuntu22.04 on amd64
- Fix update-ldcache arguments
- Remove positional arguments from nvidia-ctk-installer
- Remove deprecated --runtime-args from nvidia-ctk-installer
- Add version info to nvidia-ctk-installer
- Update nvidia-ctk-installer app name to match binary name
- Allow nvidia-ctk config --set to accept comma-separated lists
- Disable enable-cuda-compat hook for management containers
- Allow enable-cuda-compat hook to be disabled in CDI spec generation
- Add disable-cuda-compat-lib-hook feature flag
- Add basic integration tests for forward compat
- Ensure that mode hook is executed last
- Add enable-cuda-compat hook to CDI spec generation
- Add ldconfig hook in legacy mode
- Add enable-cuda-compat hook if required
- Add enable-cuda-compat hook to allow compat libs to be discovered
- Use libcontainer execseal to run ldconfig
- Add ignore-imex-channel-requests feature flag
- Disable nvsandboxutils in nvcdi API
- Allow cdi mode to work with --gpus flag
- Add E2E GitHub Action for Container Toolkit
- Add remote-test option for E2E
- Enable CDI in runtime if CDI_ENABLED is set
- Fix overwriting docker feature flags
- Add option in toolkit container to enable CDI in runtime
- Remove Set from engine config API
- Add EnableCDI() method to engine.Interface
- Add IMEX binaries to CDI discovery
- Rename test folder to tests
- Add allow-cuda-compat-libs-from-container feature flag - Add allow-cuda-compat-libs-from-container feature flag
- Disable mounting of compat libs from container
- Skip graphics modifier in CSV mode - Skip graphics modifier in CSV mode
- Move nvidia-toolkit to nvidia-ctk-installer
- Automated regression testing for the NVIDIA Container Toolkit
- Add support for containerd version 3 config
- Remove watch option from create-dev-char-symlinks
- Add string TOML source
- Improve the implementation for UseLegacyConfig
- Properly pass configSearchPaths to a Driver constructor - Properly pass configSearchPaths to a Driver constructor
- Fix create-device-node test when devices exist
- Add imex mode to CDI spec generation
- Only allow host-relative LDConfig paths
- Fix NVIDIA_IMEX_CHANNELS handling on legacy images
- Fix bug in default config file path
- Fix fsnotify.Remove logic function.
- Force symlink creation in create-symlink hook
### Changes in the Toolkit Container
- Create /work/nvidia-toolkit symlink
- Use Apache license for images
- Switch to golang distroless image
- Switch to cuda ubi9 base image
- Use single version tag for image
- Extract deb and rpm packages to single image
- Bump nvidia/cuda in /deployments/container
- Bump nvidia/cuda in /deployments/container
- Add E2E GitHub Action for Container Toolkit
- Bump nvidia/cuda in /deployments/container
- Move nvidia-toolkit to nvidia-ctk-installer
- Add support for containerd version 3 config - Add support for containerd version 3 config
- Improve the implementation for UseLegacyConfig - Add string TOML source
- Bump nvidia/cuda in /deployments/container
- Add imex mode to CDI spec generation
- Only allow host-relative LDConfig paths
- Fallback to file for runtime config
### Changes in libnvidia-container ### Changes in libnvidia-container
- Fix pointer accessing local variable out of scope
- Require version match between libnvidia-container-tools and libnvidia-container1
- Add libnvidia-gpucomp.so to the list of compute libs
- Use VERSION_ prefix for version parts in makefiles
- Add additional logging
- Do not discard container flags when --cuda-compat-mode is not specified
- Remove unneeded --no-cntlibs argument from list command
- Add cuda-compat-mode flag to configure command
- Skip files when user has insufficient permissions
- Fix building with Go 1.24
- Add no-cntlibs CLI option to nvidia-container-cli - Add no-cntlibs CLI option to nvidia-container-cli
- Fix always using fallback
- Add fallback for systems without memfd_create()
- Create virtual copy of host ldconfig binary before calling fexecve()
- Fix some typos in text.
### Changes in the Toolkit Container
- Bump CUDA base image version to 12.6.3
## v1.17.3
- Only allow host-relative LDConfig paths by default.
### Changes in libnvidia-container
- Create virtual copy of host ldconfig binary before calling fexecve()
## v1.17.2
- Fixed a bug where legacy images would set imex channels as `all`.
## v1.17.1
- Fixed a bug where specific symlinks existing in a container image could cause a container to fail to start.
- Fixed a bug on Tegra-based systems where a container would fail to start.
- Fixed a bug where the default container runtime config path was not properly set.
### Changes in the Toolkit Container
- Fallback to using a config file if the current runtime config can not be determined from the command line.
## v1.17.0 ## v1.17.0
- Promote v1.17.0-rc.2 to v1.17.0 - Promote v1.17.0-rc.2 to v1.17.0

View File

@@ -20,10 +20,8 @@ import (
"github.com/urfave/cli/v2" "github.com/urfave/cli/v2"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/chmod" "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/chmod"
createsonamesymlinks "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/create-soname-symlinks"
symlinks "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/create-symlinks" symlinks "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/create-symlinks"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/cudacompat" "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/cudacompat"
disabledevicenodemodification "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/disable-device-node-modification"
ldcache "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/update-ldcache" ldcache "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/update-ldcache"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger" "github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
) )
@@ -36,8 +34,6 @@ func New(logger logger.Interface) []*cli.Command {
symlinks.NewCommand(logger), symlinks.NewCommand(logger),
chmod.NewCommand(logger), chmod.NewCommand(logger),
cudacompat.NewCommand(logger), cudacompat.NewCommand(logger),
createsonamesymlinks.NewCommand(logger),
disabledevicenodemodification.NewCommand(logger),
} }
} }

View File

@@ -1,166 +0,0 @@
/**
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package create_soname_symlinks
import (
"errors"
"fmt"
"log"
"os"
"github.com/moby/sys/reexec"
"github.com/urfave/cli/v2"
"github.com/NVIDIA/nvidia-container-toolkit/internal/ldconfig"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
)
const (
reexecUpdateLdCacheCommandName = "reexec-create-soname-symlinks"
)
type command struct {
logger logger.Interface
}
type options struct {
folders cli.StringSlice
ldconfigPath string
containerSpec string
}
func init() {
reexec.Register(reexecUpdateLdCacheCommandName, createSonameSymlinksHandler)
if reexec.Init() {
os.Exit(0)
}
}
// NewCommand constructs an create-soname-symlinks command with the specified logger
func NewCommand(logger logger.Interface) *cli.Command {
c := command{
logger: logger,
}
return c.build()
}
// build the create-soname-symlinks command
func (m command) build() *cli.Command {
cfg := options{}
// Create the 'create-soname-symlinks' command
c := cli.Command{
Name: "create-soname-symlinks",
Usage: "Create soname symlinks libraries in specified directories",
Before: func(c *cli.Context) error {
return m.validateFlags(c, &cfg)
},
Action: func(c *cli.Context) error {
return m.run(c, &cfg)
},
}
c.Flags = []cli.Flag{
&cli.StringSliceFlag{
Name: "folder",
Usage: "Specify a directory to generate soname symlinks in. Can be specified multiple times",
Destination: &cfg.folders,
},
&cli.StringFlag{
Name: "ldconfig-path",
Usage: "Specify the path to ldconfig on the host",
Destination: &cfg.ldconfigPath,
Value: "/sbin/ldconfig",
},
&cli.StringFlag{
Name: "container-spec",
Usage: "Specify the path to the OCI container spec. If empty or '-' the spec will be read from STDIN",
Destination: &cfg.containerSpec,
},
}
return &c
}
func (m command) validateFlags(c *cli.Context, cfg *options) error {
if cfg.ldconfigPath == "" {
return errors.New("ldconfig-path must be specified")
}
return nil
}
func (m command) run(c *cli.Context, cfg *options) error {
s, err := oci.LoadContainerState(cfg.containerSpec)
if err != nil {
return fmt.Errorf("failed to load container state: %v", err)
}
containerRootDir, err := s.GetContainerRoot()
if err != nil || containerRootDir == "" || containerRootDir == "/" {
return fmt.Errorf("failed to determined container root: %v", err)
}
cmd, err := ldconfig.NewRunner(
reexecUpdateLdCacheCommandName,
cfg.ldconfigPath,
containerRootDir,
cfg.folders.Value()...,
)
if err != nil {
return err
}
return cmd.Run()
}
// createSonameSymlinksHandler wraps createSonameSymlinks with error handling.
func createSonameSymlinksHandler() {
if err := createSonameSymlinks(os.Args); err != nil {
log.Printf("Error updating ldcache: %v", err)
os.Exit(1)
}
}
// createSonameSymlinks ensures that soname symlinks are created in the
// specified directories.
// It is invoked from a reexec'd handler and provides namespace isolation for
// the operations performed by this hook. At the point where this is invoked,
// we are in a new mount namespace that is cloned from the parent.
//
// args[0] is the reexec initializer function name
// args[1] is the path of the ldconfig binary on the host
// args[2] is the container root directory
// The remaining args are directories where soname symlinks need to be created.
func createSonameSymlinks(args []string) error {
if len(args) < 3 {
return fmt.Errorf("incorrect arguments: %v", args)
}
hostLdconfigPath := args[1]
containerRootDirPath := args[2]
ldconfig, err := ldconfig.New(
hostLdconfigPath,
containerRootDirPath,
)
if err != nil {
return fmt.Errorf("failed to construct ldconfig runner: %w", err)
}
return ldconfig.CreateSonameSymlinks(args[3:]...)
}

View File

@@ -1,144 +0,0 @@
/**
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package disabledevicenodemodification
import (
"bufio"
"bytes"
"errors"
"fmt"
"io"
"os"
"strings"
"github.com/urfave/cli/v2"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
)
const (
nvidiaDriverParamsPath = "/proc/driver/nvidia/params"
)
type options struct {
containerSpec string
}
// NewCommand constructs an disable-device-node-modification subcommand with the specified logger
func NewCommand(logger logger.Interface) *cli.Command {
cfg := options{}
c := cli.Command{
Name: "disable-device-node-modification",
Usage: "Ensure that the /proc/driver/nvidia/params file present in the container does not allow device node modifications.",
Before: func(c *cli.Context) error {
return validateFlags(c, &cfg)
},
Action: func(c *cli.Context) error {
return run(c, &cfg)
},
}
c.Flags = []cli.Flag{
&cli.StringFlag{
Name: "container-spec",
Hidden: true,
Usage: "Specify the path to the OCI container spec. If empty or '-' the spec will be read from STDIN",
Destination: &cfg.containerSpec,
},
}
return &c
}
func validateFlags(c *cli.Context, cfg *options) error {
return nil
}
func run(_ *cli.Context, cfg *options) error {
modifiedParamsFileContents, err := getModifiedNVIDIAParamsContents()
if err != nil {
return fmt.Errorf("failed to get modified params file contents: %w", err)
}
if len(modifiedParamsFileContents) == 0 {
return nil
}
s, err := oci.LoadContainerState(cfg.containerSpec)
if err != nil {
return fmt.Errorf("failed to load container state: %w", err)
}
containerRootDirPath, err := s.GetContainerRoot()
if err != nil {
return fmt.Errorf("failed to determined container root: %w", err)
}
return createParamsFileInContainer(containerRootDirPath, modifiedParamsFileContents)
}
func getModifiedNVIDIAParamsContents() ([]byte, error) {
hostNvidiaParamsFile, err := os.Open(nvidiaDriverParamsPath)
if errors.Is(err, os.ErrNotExist) {
return nil, nil
}
if err != nil {
return nil, fmt.Errorf("failed to load params file: %w", err)
}
defer hostNvidiaParamsFile.Close()
modifiedContents, err := getModifiedParamsFileContentsFromReader(hostNvidiaParamsFile)
if err != nil {
return nil, fmt.Errorf("failed to get modfied params file contents: %w", err)
}
return modifiedContents, nil
}
// getModifiedParamsFileContentsFromReader returns the contents of a modified params file from the specified reader.
func getModifiedParamsFileContentsFromReader(r io.Reader) ([]byte, error) {
var modified bytes.Buffer
scanner := bufio.NewScanner(r)
var requiresModification bool
for scanner.Scan() {
line := scanner.Text()
if strings.HasPrefix(line, "ModifyDeviceFiles: ") {
if line == "ModifyDeviceFiles: 0" {
return nil, nil
}
if line == "ModifyDeviceFiles: 1" {
line = "ModifyDeviceFiles: 0"
requiresModification = true
}
}
if _, err := modified.WriteString(line + "\n"); err != nil {
return nil, fmt.Errorf("failed to create output buffer: %w", err)
}
}
if err := scanner.Err(); err != nil {
return nil, fmt.Errorf("failed to read params file: %w", err)
}
if !requiresModification {
return nil, nil
}
return modified.Bytes(), nil
}

View File

@@ -1,91 +0,0 @@
/**
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package disabledevicenodemodification
import (
"bytes"
"testing"
"github.com/stretchr/testify/require"
)
func TestGetModifiedParamsFileContentsFromReader(t *testing.T) {
testCases := map[string]struct {
contents []byte
expectedError error
expectedContents []byte
}{
"no contents": {
contents: nil,
expectedError: nil,
expectedContents: nil,
},
"other contents are ignored": {
contents: []byte(`# Some other content
that we don't care about
`),
expectedError: nil,
expectedContents: nil,
},
"already zero requires no modification": {
contents: []byte("ModifyDeviceFiles: 0"),
expectedError: nil,
expectedContents: nil,
},
"leading spaces require no modification": {
contents: []byte(" ModifyDeviceFiles: 1"),
},
"Trailing spaces require no modification": {
contents: []byte("ModifyDeviceFiles: 1 "),
},
"Not 1 require no modification": {
contents: []byte("ModifyDeviceFiles: 11"),
},
"single line requires modification": {
contents: []byte("ModifyDeviceFiles: 1"),
expectedError: nil,
expectedContents: []byte("ModifyDeviceFiles: 0\n"),
},
"single line with trailing newline requires modification": {
contents: []byte("ModifyDeviceFiles: 1\n"),
expectedError: nil,
expectedContents: []byte("ModifyDeviceFiles: 0\n"),
},
"other content is maintained": {
contents: []byte(`ModifyDeviceFiles: 1
other content
that
is maintained`),
expectedError: nil,
expectedContents: []byte(`ModifyDeviceFiles: 0
other content
that
is maintained
`),
},
}
for description, tc := range testCases {
t.Run(description, func(t *testing.T) {
contents, err := getModifiedParamsFileContentsFromReader(bytes.NewReader(tc.contents))
require.EqualValues(t, tc.expectedError, err)
require.EqualValues(t, string(tc.expectedContents), string(contents))
})
}
}

View File

@@ -1,63 +0,0 @@
//go:build linux
/**
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package disabledevicenodemodification
import (
"fmt"
"os"
"path/filepath"
"github.com/opencontainers/runc/libcontainer/utils"
"golang.org/x/sys/unix"
)
func createParamsFileInContainer(containerRootDirPath string, contents []byte) error {
tmpRoot, err := os.MkdirTemp("", "nvct-empty-dir*")
if err != nil {
return fmt.Errorf("failed to create temp root: %w", err)
}
if err := createTmpFs(tmpRoot, len(contents)); err != nil {
return fmt.Errorf("failed to create tmpfs mount for params file: %w", err)
}
modifiedParamsFile, err := os.OpenFile(filepath.Join(tmpRoot, "nvct-params"), os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0444)
if err != nil {
return fmt.Errorf("failed to open modified params file: %w", err)
}
defer modifiedParamsFile.Close()
if _, err := modifiedParamsFile.Write(contents); err != nil {
return fmt.Errorf("failed to write temporary params file: %w", err)
}
err = utils.WithProcfd(containerRootDirPath, nvidiaDriverParamsPath, func(nvidiaDriverParamsFdPath string) error {
return unix.Mount(modifiedParamsFile.Name(), nvidiaDriverParamsFdPath, "", unix.MS_BIND|unix.MS_RDONLY|unix.MS_NODEV|unix.MS_PRIVATE|unix.MS_NOSYMFOLLOW, "")
})
if err != nil {
return fmt.Errorf("failed to mount modified params file: %w", err)
}
return nil
}
func createTmpFs(target string, size int) error {
return unix.Mount("tmpfs", target, "tmpfs", 0, fmt.Sprintf("size=%d", size))
}

View File

@@ -1,27 +0,0 @@
//go:build !linux
// +build !linux
/**
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package disabledevicenodemodification
import "fmt"
func createParamsFileInContainer(containerRootDirPath string, contents []byte) error {
return fmt.Errorf("not supported")
}

View File

@@ -0,0 +1,46 @@
/**
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package ldcache
import (
"os"
"path/filepath"
"github.com/moby/sys/symlink"
)
// A containerRoot represents the root filesystem of a container.
type containerRoot string
// hasPath checks whether the specified path exists in the root.
func (r containerRoot) hasPath(path string) bool {
resolved, err := r.resolve(path)
if err != nil {
return false
}
if _, err := os.Stat(resolved); err != nil && os.IsNotExist(err) {
return false
}
return true
}
// resolve returns the absolute path including root path.
// Symlinks are resolved, but are guaranteed to resolve in the root.
func (r containerRoot) resolve(path string) (string, error) {
absolute := filepath.Clean(filepath.Join(string(r), path))
return symlink.FollowSymlinkInScope(absolute, string(r))
}

View File

@@ -17,7 +17,7 @@
# limitations under the License. # limitations under the License.
**/ **/
package ldconfig package ldcache
import ( import (
"errors" "errors"
@@ -29,8 +29,8 @@ import (
"syscall" "syscall"
securejoin "github.com/cyphar/filepath-securejoin" securejoin "github.com/cyphar/filepath-securejoin"
"github.com/moby/sys/reexec"
"github.com/moby/sys/reexec"
"github.com/opencontainers/runc/libcontainer/utils" "github.com/opencontainers/runc/libcontainer/utils"
"golang.org/x/sys/unix" "golang.org/x/sys/unix"
) )
@@ -182,7 +182,7 @@ func createTmpFs(target string, size int) error {
// createReexecCommand creates a command that can be used to trigger the reexec // createReexecCommand creates a command that can be used to trigger the reexec
// initializer. // initializer.
// On linux this command runs in new namespaces. // On linux this command runs in new namespaces.
func createReexecCommand(args []string) (*exec.Cmd, error) { func createReexecCommand(args []string) *exec.Cmd {
cmd := reexec.Command(args...) cmd := reexec.Command(args...)
cmd.Stdin = os.Stdin cmd.Stdin = os.Stdin
cmd.Stdout = os.Stdout cmd.Stdout = os.Stdout
@@ -196,5 +196,5 @@ func createReexecCommand(args []string) (*exec.Cmd, error) {
syscall.CLONE_NEWNET, syscall.CLONE_NEWNET,
} }
return cmd, nil return cmd
} }

View File

@@ -17,11 +17,14 @@
# limitations under the License. # limitations under the License.
**/ **/
package ldconfig package ldcache
import ( import (
"fmt" "fmt"
"os"
"os/exec" "os/exec"
"github.com/moby/sys/reexec"
) )
func pivotRoot(newroot string) error { func pivotRoot(newroot string) error {
@@ -36,6 +39,13 @@ func mountProc(newroot string) error {
return fmt.Errorf("not supported") return fmt.Errorf("not supported")
} }
func createReexecCommand(args []string) (*exec.Cmd, error) { // createReexecCommand creates a command that can be used ot trigger the reexec
return nil, fmt.Errorf("not supported") // initializer.
func createReexecCommand(args []string) *exec.Cmd {
cmd := reexec.Command(args...)
cmd.Stdin = os.Stdin
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
return cmd
} }

View File

@@ -16,7 +16,7 @@
# limitations under the License. # limitations under the License.
**/ **/
package ldconfig package ldcache
import ( import (
"fmt" "fmt"

View File

@@ -16,7 +16,7 @@
# limitations under the License. # limitations under the License.
**/ **/
package ldconfig package ldcache
import "syscall" import "syscall"

View File

@@ -21,16 +21,24 @@ import (
"fmt" "fmt"
"log" "log"
"os" "os"
"strings"
"github.com/moby/sys/reexec" "github.com/moby/sys/reexec"
"github.com/urfave/cli/v2" "github.com/urfave/cli/v2"
"github.com/NVIDIA/nvidia-container-toolkit/internal/ldconfig" "github.com/NVIDIA/nvidia-container-toolkit/internal/config"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger" "github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci" "github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
) )
const ( const (
// ldsoconfdFilenamePattern specifies the pattern for the filename
// in ld.so.conf.d that includes references to the specified directories.
// The 00-nvcr prefix is chosen to ensure that these libraries have a
// higher precedence than other libraries on the system, but lower than
// the 00-cuda-compat that is included in some containers.
ldsoconfdFilenamePattern = "00-nvcr-*.conf"
reexecUpdateLdCacheCommandName = "reexec-update-ldcache" reexecUpdateLdCacheCommandName = "reexec-update-ldcache"
) )
@@ -115,15 +123,15 @@ func (m command) run(c *cli.Context, cfg *options) error {
return fmt.Errorf("failed to determined container root: %v", err) return fmt.Errorf("failed to determined container root: %v", err)
} }
cmd, err := ldconfig.NewRunner( args := []string{
reexecUpdateLdCacheCommandName, reexecUpdateLdCacheCommandName,
cfg.ldconfigPath, strings.TrimPrefix(config.NormalizeLDConfigPath("@"+cfg.ldconfigPath), "@"),
containerRootDir, containerRootDir,
cfg.folders.Value()...,
)
if err != nil {
return err
} }
args = append(args, cfg.folders.Value()...)
cmd := createReexecCommand(args)
return cmd.Run() return cmd.Run()
} }
@@ -135,16 +143,15 @@ func updateLdCacheHandler() {
} }
} }
// updateLdCache ensures that the ldcache in the container is updated to include // updateLdCache is invoked from a reexec'd handler and provides namespace
// libraries that are mounted from the host. // isolation for the operations performed by this hook.
// It is invoked from a reexec'd handler and provides namespace isolation for // At the point where this is invoked, we are in a new mount namespace that is
// the operations performed by this hook. At the point where this is invoked, // cloned from the parent.
// we are in a new mount namespace that is cloned from the parent.
// //
// args[0] is the reexec initializer function name // args[0] is the reexec initializer function name
// args[1] is the path of the ldconfig binary on the host // args[1] is the path of the ldconfig binary on the host
// args[2] is the container root directory // args[2] is the container root directory
// The remaining args are folders where soname symlinks need to be created. // The remaining args are folders that need to be added to the ldcache.
func updateLdCache(args []string) error { func updateLdCache(args []string) error {
if len(args) < 3 { if len(args) < 3 {
return fmt.Errorf("incorrect arguments: %v", args) return fmt.Errorf("incorrect arguments: %v", args)
@@ -152,13 +159,97 @@ func updateLdCache(args []string) error {
hostLdconfigPath := args[1] hostLdconfigPath := args[1]
containerRootDirPath := args[2] containerRootDirPath := args[2]
ldconfig, err := ldconfig.New( // To prevent leaking the parent proc filesystem, we create a new proc mount
hostLdconfigPath, // in the container root.
containerRootDirPath, if err := mountProc(containerRootDirPath); err != nil {
) return fmt.Errorf("error mounting /proc: %w", err)
if err != nil {
return fmt.Errorf("failed to construct ldconfig runner: %w", err)
} }
return ldconfig.UpdateLDCache(args[3:]...) // We mount the host ldconfig before we pivot root since host paths are not
// visible after the pivot root operation.
ldconfigPath, err := mountLdConfig(hostLdconfigPath, containerRootDirPath)
if err != nil {
return fmt.Errorf("error mounting host ldconfig: %w", err)
}
// We pivot to the container root for the new process, this further limits
// access to the host.
if err := pivotRoot(containerRootDirPath); err != nil {
return fmt.Errorf("error running pivot_root: %w", err)
}
return runLdconfig(ldconfigPath, args[3:]...)
}
// runLdconfig runs the ldconfig binary and ensures that the specified directories
// are processed for the ldcache.
func runLdconfig(ldconfigPath string, directories ...string) error {
args := []string{
"ldconfig",
// Explicitly specify using /etc/ld.so.conf since the host's ldconfig may
// be configured to use a different config file by default.
// Note that since we apply the `-r {{ .containerRootDir }}` argument, /etc/ld.so.conf is
// in the container.
"-f", "/etc/ld.so.conf",
}
containerRoot := containerRoot("/")
if containerRoot.hasPath("/etc/ld.so.cache") {
args = append(args, "-C", "/etc/ld.so.cache")
} else {
args = append(args, "-N")
}
if containerRoot.hasPath("/etc/ld.so.conf.d") {
err := createLdsoconfdFile(ldsoconfdFilenamePattern, directories...)
if err != nil {
return fmt.Errorf("failed to update ld.so.conf.d: %w", err)
}
} else {
args = append(args, directories...)
}
return SafeExec(ldconfigPath, args, nil)
}
// createLdsoconfdFile creates a file at /etc/ld.so.conf.d/.
// The file is created at /etc/ld.so.conf.d/{{ .pattern }} using `CreateTemp` and
// contains the specified directories on each line.
func createLdsoconfdFile(pattern string, dirs ...string) error {
if len(dirs) == 0 {
return nil
}
ldsoconfdDir := "/etc/ld.so.conf.d"
if err := os.MkdirAll(ldsoconfdDir, 0755); err != nil {
return fmt.Errorf("failed to create ld.so.conf.d: %w", err)
}
configFile, err := os.CreateTemp(ldsoconfdDir, pattern)
if err != nil {
return fmt.Errorf("failed to create config file: %w", err)
}
defer func() {
_ = configFile.Close()
}()
added := make(map[string]bool)
for _, dir := range dirs {
if added[dir] {
continue
}
_, err = fmt.Fprintf(configFile, "%s\n", dir)
if err != nil {
return fmt.Errorf("failed to update config file: %w", err)
}
added[dir] = true
}
// The created file needs to be world readable for the cases where the container is run as a non-root user.
if err := configFile.Chmod(0644); err != nil {
return fmt.Errorf("failed to chmod config file: %w", err)
}
return nil
} }

View File

@@ -13,6 +13,10 @@ import (
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image" "github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
) )
const (
capSysAdmin = "CAP_SYS_ADMIN"
)
type nvidiaConfig struct { type nvidiaConfig struct {
Devices []string Devices []string
MigConfigDevices string MigConfigDevices string
@@ -99,9 +103,9 @@ func loadSpec(path string) (spec *Spec) {
return return
} }
func (s *Spec) GetCapabilities() []string { func isPrivileged(s *Spec) bool {
if s == nil || s.Process == nil || s.Process.Capabilities == nil { if s.Process.Capabilities == nil {
return nil return false
} }
var caps []string var caps []string
@@ -114,22 +118,67 @@ func (s *Spec) GetCapabilities() []string {
if err != nil { if err != nil {
log.Panicln("could not decode Process.Capabilities in OCI spec:", err) log.Panicln("could not decode Process.Capabilities in OCI spec:", err)
} }
return caps for _, c := range caps {
if c == capSysAdmin {
return true
}
}
return false
} }
// Otherwise, parse s.Process.Capabilities as: // Otherwise, parse s.Process.Capabilities as:
// github.com/opencontainers/runtime-spec/blob/v1.0.0/specs-go/config.go#L30-L54 // github.com/opencontainers/runtime-spec/blob/v1.0.0/specs-go/config.go#L30-L54
capabilities := specs.LinuxCapabilities{} process := specs.Process{
err := json.Unmarshal(*s.Process.Capabilities, &capabilities) Env: s.Process.Env,
}
err := json.Unmarshal(*s.Process.Capabilities, &process.Capabilities)
if err != nil { if err != nil {
log.Panicln("could not decode Process.Capabilities in OCI spec:", err) log.Panicln("could not decode Process.Capabilities in OCI spec:", err)
} }
return image.OCISpecCapabilities(capabilities).GetCapabilities() fullSpec := specs.Spec{
Version: *s.Version,
Process: &process,
}
return image.IsPrivileged(&fullSpec)
} }
func isPrivileged(s *Spec) bool { func getDevicesFromEnvvar(containerImage image.CUDA, swarmResourceEnvvars []string) []string {
return image.IsPrivileged(s) // We check if the image has at least one of the Swarm resource envvars defined and use this
// if specified.
for _, envvar := range swarmResourceEnvvars {
if containerImage.HasEnvvar(envvar) {
return containerImage.DevicesFromEnvvars(swarmResourceEnvvars...).List()
}
}
return containerImage.VisibleDevicesFromEnvVar()
}
func (hookConfig *hookConfig) getDevices(image image.CUDA, privileged bool) []string {
// If enabled, try and get the device list from volume mounts first
if hookConfig.AcceptDeviceListAsVolumeMounts {
devices := image.VisibleDevicesFromMounts()
if len(devices) > 0 {
return devices
}
}
// Fallback to reading from the environment variable if privileges are correct
devices := getDevicesFromEnvvar(image, hookConfig.getSwarmResourceEnvvars())
if len(devices) == 0 {
return nil
}
if privileged || hookConfig.AcceptEnvvarUnprivileged {
return devices
}
configName := hookConfig.getConfigOption("AcceptEnvvarUnprivileged")
log.Printf("Ignoring devices specified in NVIDIA_VISIBLE_DEVICES (privileged=%v, %v=%v) ", privileged, configName, hookConfig.AcceptEnvvarUnprivileged)
return nil
} }
func getMigConfigDevices(i image.CUDA) *string { func getMigConfigDevices(i image.CUDA) *string {
@@ -176,6 +225,7 @@ func (hookConfig *hookConfig) getDriverCapabilities(cudaImage image.CUDA, legacy
// We use the default driver capabilities by default. This is filtered to only include the // We use the default driver capabilities by default. This is filtered to only include the
// supported capabilities // supported capabilities
supportedDriverCapabilities := image.NewDriverCapabilities(hookConfig.SupportedDriverCapabilities) supportedDriverCapabilities := image.NewDriverCapabilities(hookConfig.SupportedDriverCapabilities)
capabilities := supportedDriverCapabilities.Intersection(image.DefaultDriverCapabilities) capabilities := supportedDriverCapabilities.Intersection(image.DefaultDriverCapabilities)
capsEnvSpecified := cudaImage.HasEnvvar(image.EnvVarNvidiaDriverCapabilities) capsEnvSpecified := cudaImage.HasEnvvar(image.EnvVarNvidiaDriverCapabilities)
@@ -201,7 +251,7 @@ func (hookConfig *hookConfig) getDriverCapabilities(cudaImage image.CUDA, legacy
func (hookConfig *hookConfig) getNvidiaConfig(image image.CUDA, privileged bool) *nvidiaConfig { func (hookConfig *hookConfig) getNvidiaConfig(image image.CUDA, privileged bool) *nvidiaConfig {
legacyImage := image.IsLegacy() legacyImage := image.IsLegacy()
devices := image.VisibleDevices() devices := hookConfig.getDevices(image, privileged)
if len(devices) == 0 { if len(devices) == 0 {
// empty devices means this is not a GPU container. // empty devices means this is not a GPU container.
return nil return nil
@@ -242,14 +292,7 @@ func (hookConfig *hookConfig) getNvidiaConfig(image image.CUDA, privileged bool)
} }
} }
func (hookConfig *hookConfig) getContainerConfig() (config *containerConfig) { func (hookConfig *hookConfig) getContainerConfig() (config containerConfig) {
hookConfig.Lock()
defer hookConfig.Unlock()
if hookConfig.containerConfig != nil {
return hookConfig.containerConfig
}
var h HookState var h HookState
d := json.NewDecoder(os.Stdin) d := json.NewDecoder(os.Stdin)
if err := d.Decode(&h); err != nil { if err := d.Decode(&h); err != nil {
@@ -263,28 +306,20 @@ func (hookConfig *hookConfig) getContainerConfig() (config *containerConfig) {
s := loadSpec(path.Join(b, "config.json")) s := loadSpec(path.Join(b, "config.json"))
privileged := isPrivileged(s) image, err := image.New(
i, err := image.New(
image.WithEnv(s.Process.Env), image.WithEnv(s.Process.Env),
image.WithMounts(s.Mounts), image.WithMounts(s.Mounts),
image.WithPrivileged(privileged),
image.WithDisableRequire(hookConfig.DisableRequire), image.WithDisableRequire(hookConfig.DisableRequire),
image.WithAcceptDeviceListAsVolumeMounts(hookConfig.AcceptDeviceListAsVolumeMounts),
image.WithAcceptEnvvarUnprivileged(hookConfig.AcceptEnvvarUnprivileged),
image.WithPreferredVisibleDevicesEnvVars(hookConfig.getSwarmResourceEnvvars()...),
) )
if err != nil { if err != nil {
log.Panicln(err) log.Panicln(err)
} }
cc := containerConfig{ privileged := isPrivileged(s)
return containerConfig{
Pid: h.Pid, Pid: h.Pid,
Rootfs: s.Root.Path, Rootfs: s.Root.Path,
Image: i, Image: image,
Nvidia: hookConfig.getNvidiaConfig(i, privileged), Nvidia: hookConfig.getNvidiaConfig(image, privileged),
} }
hookConfig.containerConfig = &cc
return hookConfig.containerConfig
} }

View File

@@ -1,8 +1,10 @@
package main package main
import ( import (
"path/filepath"
"testing" "testing"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config" "github.com/NVIDIA/nvidia-container-toolkit/internal/config"
@@ -477,17 +479,14 @@ func TestGetNvidiaConfig(t *testing.T) {
t.Run(tc.description, func(t *testing.T) { t.Run(tc.description, func(t *testing.T) {
image, _ := image.New( image, _ := image.New(
image.WithEnvMap(tc.env), image.WithEnvMap(tc.env),
image.WithPrivileged(tc.privileged),
image.WithPreferredVisibleDevicesEnvVars(tc.hookConfig.getSwarmResourceEnvvars()...),
) )
// Wrap the call to getNvidiaConfig() in a closure. // Wrap the call to getNvidiaConfig() in a closure.
var cfg *nvidiaConfig var cfg *nvidiaConfig
getConfig := func() { getConfig := func() {
hookCfg := tc.hookConfig hookCfg := tc.hookConfig
if hookCfg == nil { if hookCfg == nil {
defaultConfig, _ := config.GetDefault() defaultConfig, _ := config.GetDefault()
hookCfg = &hookConfig{Config: defaultConfig} hookCfg = &hookConfig{defaultConfig}
} }
cfg = hookCfg.getNvidiaConfig(image, tc.privileged) cfg = hookCfg.getNvidiaConfig(image, tc.privileged)
} }
@@ -519,6 +518,340 @@ func TestGetNvidiaConfig(t *testing.T) {
} }
} }
func TestDeviceListSourcePriority(t *testing.T) {
var tests = []struct {
description string
mountDevices []specs.Mount
envvarDevices string
privileged bool
acceptUnprivileged bool
acceptMounts bool
expectedDevices []string
}{
{
description: "Mount devices, unprivileged, no accept unprivileged",
mountDevices: []specs.Mount{
{
Source: "/dev/null",
Destination: filepath.Join(image.DeviceListAsVolumeMountsRoot, "GPU0"),
},
{
Source: "/dev/null",
Destination: filepath.Join(image.DeviceListAsVolumeMountsRoot, "GPU1"),
},
},
envvarDevices: "GPU2,GPU3",
privileged: false,
acceptUnprivileged: false,
acceptMounts: true,
expectedDevices: []string{"GPU0", "GPU1"},
},
{
description: "No mount devices, unprivileged, no accept unprivileged",
mountDevices: nil,
envvarDevices: "GPU0,GPU1",
privileged: false,
acceptUnprivileged: false,
acceptMounts: true,
expectedDevices: nil,
},
{
description: "No mount devices, privileged, no accept unprivileged",
mountDevices: nil,
envvarDevices: "GPU0,GPU1",
privileged: true,
acceptUnprivileged: false,
acceptMounts: true,
expectedDevices: []string{"GPU0", "GPU1"},
},
{
description: "No mount devices, unprivileged, accept unprivileged",
mountDevices: nil,
envvarDevices: "GPU0,GPU1",
privileged: false,
acceptUnprivileged: true,
acceptMounts: true,
expectedDevices: []string{"GPU0", "GPU1"},
},
{
description: "Mount devices, unprivileged, accept unprivileged, no accept mounts",
mountDevices: []specs.Mount{
{
Source: "/dev/null",
Destination: filepath.Join(image.DeviceListAsVolumeMountsRoot, "GPU0"),
},
{
Source: "/dev/null",
Destination: filepath.Join(image.DeviceListAsVolumeMountsRoot, "GPU1"),
},
},
envvarDevices: "GPU2,GPU3",
privileged: false,
acceptUnprivileged: true,
acceptMounts: false,
expectedDevices: []string{"GPU2", "GPU3"},
},
{
description: "Mount devices, unprivileged, no accept unprivileged, no accept mounts",
mountDevices: []specs.Mount{
{
Source: "/dev/null",
Destination: filepath.Join(image.DeviceListAsVolumeMountsRoot, "GPU0"),
},
{
Source: "/dev/null",
Destination: filepath.Join(image.DeviceListAsVolumeMountsRoot, "GPU1"),
},
},
envvarDevices: "GPU2,GPU3",
privileged: false,
acceptUnprivileged: false,
acceptMounts: false,
expectedDevices: nil,
},
}
for _, tc := range tests {
t.Run(tc.description, func(t *testing.T) {
// Wrap the call to getDevices() in a closure.
var devices []string
getDevices := func() {
image, _ := image.New(
image.WithEnvMap(
map[string]string{
image.EnvVarNvidiaVisibleDevices: tc.envvarDevices,
},
),
image.WithMounts(tc.mountDevices),
)
defaultConfig, _ := config.GetDefault()
cfg := &hookConfig{defaultConfig}
cfg.AcceptEnvvarUnprivileged = tc.acceptUnprivileged
cfg.AcceptDeviceListAsVolumeMounts = tc.acceptMounts
devices = cfg.getDevices(image, tc.privileged)
}
// For all other tests, just grab the devices and check the results
getDevices()
require.Equal(t, tc.expectedDevices, devices)
})
}
}
func TestGetDevicesFromEnvvar(t *testing.T) {
envDockerResourceGPUs := "DOCKER_RESOURCE_GPUS"
gpuID := "GPU-12345"
anotherGPUID := "GPU-67890"
thirdGPUID := "MIG-12345"
var tests = []struct {
description string
swarmResourceEnvvars []string
env map[string]string
expectedDevices []string
}{
{
description: "empty env returns nil for non-legacy image",
},
{
description: "blank NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image",
env: map[string]string{
image.EnvVarNvidiaVisibleDevices: "",
},
},
{
description: "'void' NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image",
env: map[string]string{
image.EnvVarNvidiaVisibleDevices: "void",
},
},
{
description: "'none' NVIDIA_VISIBLE_DEVICES returns empty for non-legacy image",
env: map[string]string{
image.EnvVarNvidiaVisibleDevices: "none",
},
expectedDevices: []string{""},
},
{
description: "NVIDIA_VISIBLE_DEVICES set returns value for non-legacy image",
env: map[string]string{
image.EnvVarNvidiaVisibleDevices: gpuID,
},
expectedDevices: []string{gpuID},
},
{
description: "NVIDIA_VISIBLE_DEVICES set returns value for legacy image",
env: map[string]string{
image.EnvVarNvidiaVisibleDevices: gpuID,
image.EnvVarCudaVersion: "legacy",
},
expectedDevices: []string{gpuID},
},
{
description: "empty env returns all for legacy image",
env: map[string]string{
image.EnvVarCudaVersion: "legacy",
},
expectedDevices: []string{"all"},
},
// Add the `DOCKER_RESOURCE_GPUS` envvar and ensure that this is ignored when
// not enabled
{
description: "missing NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image",
env: map[string]string{
envDockerResourceGPUs: anotherGPUID,
},
},
{
description: "blank NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image",
env: map[string]string{
image.EnvVarNvidiaVisibleDevices: "",
envDockerResourceGPUs: anotherGPUID,
},
},
{
description: "'void' NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image",
env: map[string]string{
image.EnvVarNvidiaVisibleDevices: "void",
envDockerResourceGPUs: anotherGPUID,
},
},
{
description: "'none' NVIDIA_VISIBLE_DEVICES returns empty for non-legacy image",
env: map[string]string{
image.EnvVarNvidiaVisibleDevices: "none",
envDockerResourceGPUs: anotherGPUID,
},
expectedDevices: []string{""},
},
{
description: "NVIDIA_VISIBLE_DEVICES set returns value for non-legacy image",
env: map[string]string{
image.EnvVarNvidiaVisibleDevices: gpuID,
envDockerResourceGPUs: anotherGPUID,
},
expectedDevices: []string{gpuID},
},
{
description: "NVIDIA_VISIBLE_DEVICES set returns value for legacy image",
env: map[string]string{
image.EnvVarNvidiaVisibleDevices: gpuID,
envDockerResourceGPUs: anotherGPUID,
image.EnvVarCudaVersion: "legacy",
},
expectedDevices: []string{gpuID},
},
{
description: "empty env returns all for legacy image",
env: map[string]string{
envDockerResourceGPUs: anotherGPUID,
image.EnvVarCudaVersion: "legacy",
},
expectedDevices: []string{"all"},
},
// Add the `DOCKER_RESOURCE_GPUS` envvar and ensure that this is selected when
// enabled
{
description: "empty env returns nil for non-legacy image",
swarmResourceEnvvars: []string{envDockerResourceGPUs},
},
{
description: "blank DOCKER_RESOURCE_GPUS returns nil for non-legacy image",
swarmResourceEnvvars: []string{envDockerResourceGPUs},
env: map[string]string{
envDockerResourceGPUs: "",
},
},
{
description: "'void' DOCKER_RESOURCE_GPUS returns nil for non-legacy image",
swarmResourceEnvvars: []string{envDockerResourceGPUs},
env: map[string]string{
envDockerResourceGPUs: "void",
},
},
{
description: "'none' DOCKER_RESOURCE_GPUS returns empty for non-legacy image",
swarmResourceEnvvars: []string{envDockerResourceGPUs},
env: map[string]string{
envDockerResourceGPUs: "none",
},
expectedDevices: []string{""},
},
{
description: "DOCKER_RESOURCE_GPUS set returns value for non-legacy image",
swarmResourceEnvvars: []string{envDockerResourceGPUs},
env: map[string]string{
envDockerResourceGPUs: gpuID,
},
expectedDevices: []string{gpuID},
},
{
description: "DOCKER_RESOURCE_GPUS set returns value for legacy image",
swarmResourceEnvvars: []string{envDockerResourceGPUs},
env: map[string]string{
envDockerResourceGPUs: gpuID,
image.EnvVarCudaVersion: "legacy",
},
expectedDevices: []string{gpuID},
},
{
description: "DOCKER_RESOURCE_GPUS is selected if present",
swarmResourceEnvvars: []string{envDockerResourceGPUs},
env: map[string]string{
envDockerResourceGPUs: anotherGPUID,
},
expectedDevices: []string{anotherGPUID},
},
{
description: "DOCKER_RESOURCE_GPUS overrides NVIDIA_VISIBLE_DEVICES if present",
swarmResourceEnvvars: []string{envDockerResourceGPUs},
env: map[string]string{
image.EnvVarNvidiaVisibleDevices: gpuID,
envDockerResourceGPUs: anotherGPUID,
},
expectedDevices: []string{anotherGPUID},
},
{
description: "DOCKER_RESOURCE_GPUS_ADDITIONAL overrides NVIDIA_VISIBLE_DEVICES if present",
swarmResourceEnvvars: []string{"DOCKER_RESOURCE_GPUS_ADDITIONAL"},
env: map[string]string{
image.EnvVarNvidiaVisibleDevices: gpuID,
"DOCKER_RESOURCE_GPUS_ADDITIONAL": anotherGPUID,
},
expectedDevices: []string{anotherGPUID},
},
{
description: "All available swarm resource envvars are selected and override NVIDIA_VISIBLE_DEVICES if present",
swarmResourceEnvvars: []string{"DOCKER_RESOURCE_GPUS", "DOCKER_RESOURCE_GPUS_ADDITIONAL"},
env: map[string]string{
image.EnvVarNvidiaVisibleDevices: gpuID,
"DOCKER_RESOURCE_GPUS": thirdGPUID,
"DOCKER_RESOURCE_GPUS_ADDITIONAL": anotherGPUID,
},
expectedDevices: []string{thirdGPUID, anotherGPUID},
},
{
description: "DOCKER_RESOURCE_GPUS_ADDITIONAL or DOCKER_RESOURCE_GPUS override NVIDIA_VISIBLE_DEVICES if present",
swarmResourceEnvvars: []string{"DOCKER_RESOURCE_GPUS", "DOCKER_RESOURCE_GPUS_ADDITIONAL"},
env: map[string]string{
image.EnvVarNvidiaVisibleDevices: gpuID,
"DOCKER_RESOURCE_GPUS_ADDITIONAL": anotherGPUID,
},
expectedDevices: []string{anotherGPUID},
},
}
for _, tc := range tests {
t.Run(tc.description, func(t *testing.T) {
image, _ := image.New(
image.WithEnvMap(tc.env),
)
devices := getDevicesFromEnvvar(image, tc.swarmResourceEnvvars)
require.EqualValues(t, tc.expectedDevices, devices)
})
}
}
func TestGetDriverCapabilities(t *testing.T) { func TestGetDriverCapabilities(t *testing.T) {
supportedCapabilities := "compute,display,utility,video" supportedCapabilities := "compute,display,utility,video"

View File

@@ -4,46 +4,50 @@ import (
"fmt" "fmt"
"log" "log"
"os" "os"
"path"
"reflect" "reflect"
"strings" "strings"
"sync"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config" "github.com/NVIDIA/nvidia-container-toolkit/internal/config"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image" "github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
"github.com/NVIDIA/nvidia-container-toolkit/internal/info" )
const (
configPath = "/etc/nvidia-container-runtime/config.toml"
driverPath = "/run/nvidia/driver"
) )
// hookConfig wraps the toolkit config. // hookConfig wraps the toolkit config.
// This allows for functions to be defined on the local type. // This allows for functions to be defined on the local type.
type hookConfig struct { type hookConfig struct {
sync.Mutex
*config.Config *config.Config
containerConfig *containerConfig
} }
// loadConfig loads the required paths for the hook config. // loadConfig loads the required paths for the hook config.
func loadConfig() (*config.Config, error) { func loadConfig() (*config.Config, error) {
configFilePath, required := getConfigFilePath() var configPaths []string
cfg, err := config.New( var required bool
config.WithConfigFile(configFilePath), if len(*configflag) != 0 {
config.WithRequired(true), configPaths = append(configPaths, *configflag)
) required = true
if err == nil { } else {
return cfg.Config() configPaths = append(configPaths, path.Join(driverPath, configPath), configPath)
} else if os.IsNotExist(err) && !required {
return config.GetDefault()
} }
return nil, fmt.Errorf("couldn't open required configuration file: %v", err)
}
func getConfigFilePath() (string, bool) { for _, p := range configPaths {
if configFromFlag := *configflag; configFromFlag != "" { cfg, err := config.New(
return configFromFlag, true config.WithConfigFile(p),
config.WithRequired(true),
)
if err == nil {
return cfg.Config()
} else if os.IsNotExist(err) && !required {
continue
}
return nil, fmt.Errorf("couldn't open required configuration file: %v", err)
} }
if configFromEnvvar := os.Getenv(config.FilePathOverrideEnvVar); configFromEnvvar != "" {
return configFromEnvvar, true return config.GetDefault()
}
return config.GetConfigFilePath(), false
} }
func getHookConfig() (*hookConfig, error) { func getHookConfig() (*hookConfig, error) {
@@ -51,7 +55,7 @@ func getHookConfig() (*hookConfig, error) {
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to load config: %v", err) return nil, fmt.Errorf("failed to load config: %v", err)
} }
config := &hookConfig{Config: cfg} config := &hookConfig{cfg}
allSupportedDriverCapabilities := image.SupportedDriverCapabilities allSupportedDriverCapabilities := image.SupportedDriverCapabilities
if config.SupportedDriverCapabilities == "all" { if config.SupportedDriverCapabilities == "all" {
@@ -69,8 +73,8 @@ func getHookConfig() (*hookConfig, error) {
// getConfigOption returns the toml config option associated with the // getConfigOption returns the toml config option associated with the
// specified struct field. // specified struct field.
func (c *hookConfig) getConfigOption(fieldName string) string { func (c hookConfig) getConfigOption(fieldName string) string {
t := reflect.TypeOf(&c) t := reflect.TypeOf(c)
f, ok := t.FieldByName(fieldName) f, ok := t.FieldByName(fieldName)
if !ok { if !ok {
return fieldName return fieldName
@@ -84,7 +88,7 @@ func (c *hookConfig) getConfigOption(fieldName string) string {
// getSwarmResourceEnvvars returns the swarm resource envvars for the config. // getSwarmResourceEnvvars returns the swarm resource envvars for the config.
func (c *hookConfig) getSwarmResourceEnvvars() []string { func (c *hookConfig) getSwarmResourceEnvvars() []string {
if c == nil || c.SwarmResource == "" { if c.SwarmResource == "" {
return nil return nil
} }
@@ -123,21 +127,3 @@ func (c *hookConfig) nvidiaContainerCliCUDACompatModeFlags() []string {
} }
return []string{flag} return []string{flag}
} }
func (c *hookConfig) assertModeIsLegacy() error {
if c.NVIDIAContainerRuntimeHookConfig.SkipModeDetection {
return nil
}
mr := info.NewRuntimeModeResolver(
info.WithLogger(&logInterceptor{}),
info.WithImage(&c.containerConfig.Image),
info.WithDefaultMode(info.LegacyRuntimeMode),
)
mode := mr.ResolveRuntimeMode(c.NVIDIAContainerRuntimeConfig.Mode)
if mode == "legacy" {
return nil
}
return fmt.Errorf("invoking the NVIDIA Container Runtime Hook directly (e.g. specifying the docker --gpus flag) is not supported. Please use the NVIDIA Container Runtime (e.g. specify the --runtime=nvidia flag) instead")
}

View File

@@ -90,10 +90,10 @@ func TestGetHookConfig(t *testing.T) {
} }
} }
var cfg *hookConfig var cfg hookConfig
getHookConfig := func() { getHookConfig := func() {
c, _ := getHookConfig() c, _ := getHookConfig()
cfg = c cfg = *c
} }
if tc.expectedPanic { if tc.expectedPanic {

View File

@@ -55,7 +55,7 @@ func getCLIPath(config config.ContainerCLIConfig) string {
} }
// getRootfsPath returns an absolute path. We don't need to resolve symlinks for now. // getRootfsPath returns an absolute path. We don't need to resolve symlinks for now.
func getRootfsPath(config *containerConfig) string { func getRootfsPath(config containerConfig) string {
rootfs, err := filepath.Abs(config.Rootfs) rootfs, err := filepath.Abs(config.Rootfs)
if err != nil { if err != nil {
log.Panicln(err) log.Panicln(err)
@@ -82,8 +82,8 @@ func doPrestart() {
return return
} }
if err := hook.assertModeIsLegacy(); err != nil { if !hook.NVIDIAContainerRuntimeHookConfig.SkipModeDetection && info.ResolveAutoMode(&logInterceptor{}, hook.NVIDIAContainerRuntimeConfig.Mode, container.Image) != "legacy" {
log.Panicf("%v", err) log.Panicln("invoking the NVIDIA Container Runtime Hook directly (e.g. specifying the docker --gpus flag) is not supported. Please use the NVIDIA Container Runtime (e.g. specify the --runtime=nvidia flag) instead.")
} }
rootfs := getRootfsPath(container) rootfs := getRootfsPath(container)

View File

@@ -21,8 +21,8 @@ The `runtimes` config option allows for the low-level runtime to be specified. T
The default value for this setting is: The default value for this setting is:
```toml ```toml
runtimes = [ runtimes = [
"docker-runc",
"runc", "runc",
"crun",
] ]
``` ```

View File

@@ -122,10 +122,11 @@ func TestGoodInput(t *testing.T) {
err = cmdCreate.Run() err = cmdCreate.Run()
require.NoError(t, err, "runtime should not return an error") require.NoError(t, err, "runtime should not return an error")
// Check config.json to ensure that the NVIDIA prestart was not inserted. // Check config.json for NVIDIA prestart hook
spec, err = cfg.getRuntimeSpec() spec, err = cfg.getRuntimeSpec()
require.NoError(t, err, "should be no errors when reading and parsing spec from config.json") require.NoError(t, err, "should be no errors when reading and parsing spec from config.json")
require.Empty(t, spec.Hooks, "there should be no hooks in config.json") require.NotEmpty(t, spec.Hooks, "there should be hooks in config.json")
require.Equal(t, 1, nvidiaHookCount(spec.Hooks), "exactly one nvidia prestart hook should be inserted correctly into config.json")
} }
// NVIDIA prestart hook already present in config file // NVIDIA prestart hook already present in config file
@@ -167,10 +168,11 @@ func TestDuplicateHook(t *testing.T) {
output, err := cmdCreate.CombinedOutput() output, err := cmdCreate.CombinedOutput()
require.NoErrorf(t, err, "runtime should not return an error", "output=%v", string(output)) require.NoErrorf(t, err, "runtime should not return an error", "output=%v", string(output))
// Check config.json to ensure that the NVIDIA prestart hook was removed. // Check config.json for NVIDIA prestart hook
spec, err = cfg.getRuntimeSpec() spec, err = cfg.getRuntimeSpec()
require.NoError(t, err, "should be no errors when reading and parsing spec from config.json") require.NoError(t, err, "should be no errors when reading and parsing spec from config.json")
require.Empty(t, spec.Hooks, "there should be no hooks in config.json") require.NotEmpty(t, spec.Hooks, "there should be hooks in config.json")
require.Equal(t, 1, nvidiaHookCount(spec.Hooks), "exactly one nvidia prestart hook should be inserted correctly into config.json")
} }
// addNVIDIAHook is a basic wrapper for an addHookModifier that is used for // addNVIDIAHook is a basic wrapper for an addHookModifier that is used for
@@ -238,3 +240,18 @@ func (c testConfig) generateNewRuntimeSpec() error {
} }
return nil return nil
} }
// Return number of valid NVIDIA prestart hooks in runtime spec
func nvidiaHookCount(hooks *specs.Hooks) int {
if hooks == nil {
return 0
}
count := 0
for _, hook := range hooks.Prestart {
if strings.Contains(hook.Path, nvidiaHook) {
count++
}
}
return count
}

View File

@@ -14,7 +14,6 @@ import (
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk-installer/toolkit" "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk-installer/toolkit"
"github.com/NVIDIA/nvidia-container-toolkit/internal/info" "github.com/NVIDIA/nvidia-container-toolkit/internal/info"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger" "github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
) )
const ( const (
@@ -28,7 +27,7 @@ const (
) )
var availableRuntimes = map[string]struct{}{"docker": {}, "crio": {}, "containerd": {}} var availableRuntimes = map[string]struct{}{"docker": {}, "crio": {}, "containerd": {}}
var defaultLowLevelRuntimes = []string{"runc", "crun"} var defaultLowLevelRuntimes = []string{"docker-runc", "runc", "crun"}
var waitingForSignal = make(chan bool, 1) var waitingForSignal = make(chan bool, 1)
var signalReceived = make(chan bool, 1) var signalReceived = make(chan bool, 1)
@@ -37,11 +36,10 @@ var signalReceived = make(chan bool, 1)
type options struct { type options struct {
toolkitInstallDir string toolkitInstallDir string
noDaemon bool noDaemon bool
runtime string runtime string
pidFile string pidFile string
sourceRoot string sourceRoot string
packageType string
toolkitOptions toolkit.Options toolkitOptions toolkit.Options
runtimeOptions runtime.Options runtimeOptions runtime.Options
@@ -125,17 +123,11 @@ func (a app) build() *cli.App {
EnvVars: []string{"TOOLKIT_INSTALL_DIR", "ROOT"}, EnvVars: []string{"TOOLKIT_INSTALL_DIR", "ROOT"},
}, },
&cli.StringFlag{ &cli.StringFlag{
Name: "toolkit-source-root", Name: "source-root",
Usage: "The folder where the required toolkit artifacts can be found. If this is not specified, the path /artifacts/{{ .ToolkitPackageType }} is used where ToolkitPackageType is the resolved package type", Value: "/",
Usage: "The folder where the required toolkit artifacts can be found",
Destination: &options.sourceRoot, Destination: &options.sourceRoot,
EnvVars: []string{"TOOLKIT_SOURCE_ROOT"}, EnvVars: []string{"SOURCE_ROOT"},
},
&cli.StringFlag{
Name: "toolkit-package-type",
Usage: "specify the package type to use for the toolkit. One of ['deb', 'rpm', 'auto', '']. If 'auto' or '' are used, the type is inferred automatically.",
Value: "auto",
Destination: &options.packageType,
EnvVars: []string{"TOOLKIT_PACKAGE_TYPE"},
}, },
&cli.StringFlag{ &cli.StringFlag{
Name: "pid-file", Name: "pid-file",
@@ -153,15 +145,6 @@ func (a app) build() *cli.App {
} }
func (a *app) Before(c *cli.Context, o *options) error { func (a *app) Before(c *cli.Context, o *options) error {
if o.sourceRoot == "" {
sourceRoot, err := a.resolveSourceRoot(o.runtimeOptions.HostRootMount, o.packageType)
if err != nil {
return fmt.Errorf("failed to resolve source root: %v", err)
}
a.logger.Infof("Resolved source root to %v", sourceRoot)
o.sourceRoot = sourceRoot
}
a.toolkit = toolkit.NewInstaller( a.toolkit = toolkit.NewInstaller(
toolkit.WithLogger(a.logger), toolkit.WithLogger(a.logger),
toolkit.WithSourceRoot(o.sourceRoot), toolkit.WithSourceRoot(o.sourceRoot),
@@ -294,35 +277,3 @@ func (a *app) shutdown(pidFile string) {
a.logger.Warningf("Unable to remove pidfile: %v", err) a.logger.Warningf("Unable to remove pidfile: %v", err)
} }
} }
func (a *app) resolveSourceRoot(hostRoot string, packageType string) (string, error) {
resolvedPackageType, err := a.resolvePackageType(hostRoot, packageType)
if err != nil {
return "", err
}
switch resolvedPackageType {
case "deb":
return "/artifacts/deb", nil
case "rpm":
return "/artifacts/rpm", nil
default:
return "", fmt.Errorf("invalid package type: %v", resolvedPackageType)
}
}
func (a *app) resolvePackageType(hostRoot string, packageType string) (rPackageTypes string, rerr error) {
if packageType != "" && packageType != "auto" {
return packageType, nil
}
locator := lookup.NewExecutableLocator(a.logger, hostRoot)
if candidates, err := locator.Locate("/usr/bin/rpm"); err == nil && len(candidates) > 0 {
return "rpm", nil
}
if candidates, err := locator.Locate("/usr/bin/dpkg"); err == nil && len(candidates) > 0 {
return "deb", nil
}
return "deb", nil
}

View File

@@ -67,7 +67,7 @@ swarm-resource = ""
debug = "/dev/null" debug = "/dev/null"
log-level = "info" log-level = "info"
mode = "auto" mode = "auto"
runtimes = ["runc", "crun"] runtimes = ["docker-runc", "runc", "crun"]
[nvidia-container-runtime.modes] [nvidia-container-runtime.modes]
@@ -131,7 +131,7 @@ swarm-resource = ""
debug = "/dev/null" debug = "/dev/null"
log-level = "info" log-level = "info"
mode = "auto" mode = "auto"
runtimes = ["runc", "crun"] runtimes = ["docker-runc", "runc", "crun"]
[nvidia-container-runtime.modes] [nvidia-container-runtime.modes]
@@ -198,7 +198,7 @@ swarm-resource = ""
debug = "/dev/null" debug = "/dev/null"
log-level = "info" log-level = "info"
mode = "auto" mode = "auto"
runtimes = ["runc", "crun"] runtimes = ["docker-runc", "runc", "crun"]
[nvidia-container-runtime.modes] [nvidia-container-runtime.modes]
@@ -262,7 +262,7 @@ swarm-resource = ""
debug = "/dev/null" debug = "/dev/null"
log-level = "info" log-level = "info"
mode = "auto" mode = "auto"
runtimes = ["runc", "crun"] runtimes = ["docker-runc", "runc", "crun"]
[nvidia-container-runtime.modes] [nvidia-container-runtime.modes]
@@ -348,7 +348,7 @@ swarm-resource = ""
debug = "/dev/null" debug = "/dev/null"
log-level = "info" log-level = "info"
mode = "auto" mode = "auto"
runtimes = ["runc", "crun"] runtimes = ["docker-runc", "runc", "crun"]
[nvidia-container-runtime.modes] [nvidia-container-runtime.modes]
@@ -433,7 +433,7 @@ swarm-resource = ""
"--driver-root-ctr-path=" + hostRoot, "--driver-root-ctr-path=" + hostRoot,
"--pid-file=" + filepath.Join(testRoot, "toolkit.pid"), "--pid-file=" + filepath.Join(testRoot, "toolkit.pid"),
"--restart-mode=none", "--restart-mode=none",
"--toolkit-source-root=" + filepath.Join(artifactRoot, "deb"), "--source-root=" + filepath.Join(artifactRoot, "deb"),
} }
err := app.Run(append(testArgs, tc.args...)) err := app.Run(append(testArgs, tc.args...))

View File

@@ -28,7 +28,7 @@ type createDirectory struct {
logger logger.Interface logger logger.Interface
} }
func (t *ToolkitInstaller) createDirectory() Installer { func (t *toolkitInstaller) createDirectory() Installer {
return &createDirectory{ return &createDirectory{
logger: t.logger, logger: t.logger,
} }

View File

@@ -28,18 +28,20 @@ import (
log "github.com/sirupsen/logrus" log "github.com/sirupsen/logrus"
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk-installer/container/operator" "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk-installer/container/operator"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
) )
type executable struct { type executable struct {
requiresKernelModule bool requiresKernelModule bool
path string path string
symlink string symlink string
args []string
env map[string]string env map[string]string
} }
func (t *ToolkitInstaller) collectExecutables(destDir string) ([]Installer, error) { func (t *toolkitInstaller) collectExecutables(destDir string) ([]Installer, error) {
configFilePath := t.ConfigFilePath(destDir) configHome := filepath.Join(destDir, ".config")
configDir := filepath.Join(configHome, "nvidia-container-runtime")
configPath := filepath.Join(configDir, "config.toml")
executables := []executable{ executables := []executable{
{ {
@@ -54,7 +56,7 @@ func (t *ToolkitInstaller) collectExecutables(destDir string) ([]Installer, erro
path: runtime.Path, path: runtime.Path,
requiresKernelModule: true, requiresKernelModule: true,
env: map[string]string{ env: map[string]string{
config.FilePathOverrideEnvVar: configFilePath, "XDG_CONFIG_HOME": configHome,
}, },
} }
executables = append(executables, e) executables = append(executables, e)
@@ -70,9 +72,7 @@ func (t *ToolkitInstaller) collectExecutables(destDir string) ([]Installer, erro
executable{ executable{
path: "nvidia-container-runtime-hook", path: "nvidia-container-runtime-hook",
symlink: "nvidia-container-toolkit", symlink: "nvidia-container-toolkit",
env: map[string]string{ args: []string{fmt.Sprintf("-config %s", configPath)},
config.FilePathOverrideEnvVar: configFilePath,
},
}, },
) )
@@ -94,6 +94,7 @@ func (t *ToolkitInstaller) collectExecutables(destDir string) ([]Installer, erro
Source: executablePath, Source: executablePath,
WrappedExecutable: dotRealFilename, WrappedExecutable: dotRealFilename,
CheckModules: executable.requiresKernelModule, CheckModules: executable.requiresKernelModule,
Args: executable.args,
Envvars: map[string]string{ Envvars: map[string]string{
"PATH": strings.Join([]string{destDir, "$PATH"}, ":"), "PATH": strings.Join([]string{destDir, "$PATH"}, ":"),
}, },
@@ -123,6 +124,7 @@ type wrapper struct {
Envvars map[string]string Envvars map[string]string
WrappedExecutable string WrappedExecutable string
CheckModules bool CheckModules bool
Args []string
} }
type render struct { type render struct {
@@ -163,6 +165,9 @@ fi
{{$key}}={{$value}} \ {{$key}}={{$value}} \
{{- end }} {{- end }}
{{ .DestDir }}/{{ .WrappedExecutable }} \ {{ .DestDir }}/{{ .WrappedExecutable }} \
{{- range $arg := .Args }}
{{$arg}} \
{{- end }}
"$@" "$@"
` `

View File

@@ -68,6 +68,19 @@ fi
PATH=/foo/bar/baz \ PATH=/foo/bar/baz \
/dest-dir/some-runtime \ /dest-dir/some-runtime \
"$@" "$@"
`,
},
{
description: "args are added",
w: &wrapper{
WrappedExecutable: "some-runtime",
Args: []string{"--config foo", "bar"},
},
expected: `#! /bin/sh
/dest-dir/some-runtime \
--config foo \
bar \
"$@"
`, `,
}, },
} }

View File

@@ -33,7 +33,7 @@ type Installer interface {
Install(string) error Install(string) error
} }
type ToolkitInstaller struct { type toolkitInstaller struct {
logger logger.Interface logger logger.Interface
ignoreErrors bool ignoreErrors bool
sourceRoot string sourceRoot string
@@ -43,13 +43,11 @@ type ToolkitInstaller struct {
ensureTargetDirectory Installer ensureTargetDirectory Installer
} }
var _ Installer = (*ToolkitInstaller)(nil) var _ Installer = (*toolkitInstaller)(nil)
// New creates a toolkit installer with the specified options. // New creates a toolkit installer with the specified options.
func New(opts ...Option) (*ToolkitInstaller, error) { func New(opts ...Option) (Installer, error) {
t := &ToolkitInstaller{ t := &toolkitInstaller{}
sourceRoot: "/",
}
for _, opt := range opts { for _, opt := range opts {
opt(t) opt(t)
} }
@@ -57,6 +55,9 @@ func New(opts ...Option) (*ToolkitInstaller, error) {
if t.logger == nil { if t.logger == nil {
t.logger = logger.New() t.logger = logger.New()
} }
if t.sourceRoot == "" {
t.sourceRoot = "/"
}
if t.artifactRoot == nil { if t.artifactRoot == nil {
artifactRoot, err := newArtifactRoot(t.logger, t.sourceRoot) artifactRoot, err := newArtifactRoot(t.logger, t.sourceRoot)
if err != nil { if err != nil {
@@ -73,7 +74,7 @@ func New(opts ...Option) (*ToolkitInstaller, error) {
} }
// Install ensures that the required toolkit files are installed in the specified directory. // Install ensures that the required toolkit files are installed in the specified directory.
func (t *ToolkitInstaller) Install(destDir string) error { func (t *toolkitInstaller) Install(destDir string) error {
var installers []Installer var installers []Installer
installers = append(installers, t.ensureTargetDirectory) installers = append(installers, t.ensureTargetDirectory)
@@ -98,11 +99,6 @@ func (t *ToolkitInstaller) Install(destDir string) error {
return errs return errs
} }
func (t *ToolkitInstaller) ConfigFilePath(destDir string) string {
toolkitConfigDir := filepath.Join(destDir, ".config", "nvidia-container-runtime")
return filepath.Join(toolkitConfigDir, "config.toml")
}
type symlink struct { type symlink struct {
linkname string linkname string
target string target string

View File

@@ -112,7 +112,7 @@ func TestToolkitInstaller(t *testing.T) {
return nil return nil
}, },
} }
i := ToolkitInstaller{ i := toolkitInstaller{
logger: logger, logger: logger,
artifactRoot: r, artifactRoot: r,
ensureTargetDirectory: createDirectory, ensureTargetDirectory: createDirectory,
@@ -172,8 +172,8 @@ if [ "${?}" != "0" ]; then
echo "nvidia driver modules are not yet loaded, invoking runc directly" echo "nvidia driver modules are not yet loaded, invoking runc directly"
exec runc "$@" exec runc "$@"
fi fi
NVIDIA_CTK_CONFIG_FILE_PATH=/foo/bar/baz/.config/nvidia-container-runtime/config.toml \
PATH=/foo/bar/baz:$PATH \ PATH=/foo/bar/baz:$PATH \
XDG_CONFIG_HOME=/foo/bar/baz/.config \
/foo/bar/baz/nvidia-container-runtime.real \ /foo/bar/baz/nvidia-container-runtime.real \
"$@" "$@"
`, `,
@@ -187,8 +187,8 @@ if [ "${?}" != "0" ]; then
echo "nvidia driver modules are not yet loaded, invoking runc directly" echo "nvidia driver modules are not yet loaded, invoking runc directly"
exec runc "$@" exec runc "$@"
fi fi
NVIDIA_CTK_CONFIG_FILE_PATH=/foo/bar/baz/.config/nvidia-container-runtime/config.toml \
PATH=/foo/bar/baz:$PATH \ PATH=/foo/bar/baz:$PATH \
XDG_CONFIG_HOME=/foo/bar/baz/.config \
/foo/bar/baz/nvidia-container-runtime.cdi.real \ /foo/bar/baz/nvidia-container-runtime.cdi.real \
"$@" "$@"
`, `,
@@ -202,8 +202,8 @@ if [ "${?}" != "0" ]; then
echo "nvidia driver modules are not yet loaded, invoking runc directly" echo "nvidia driver modules are not yet loaded, invoking runc directly"
exec runc "$@" exec runc "$@"
fi fi
NVIDIA_CTK_CONFIG_FILE_PATH=/foo/bar/baz/.config/nvidia-container-runtime/config.toml \
PATH=/foo/bar/baz:$PATH \ PATH=/foo/bar/baz:$PATH \
XDG_CONFIG_HOME=/foo/bar/baz/.config \
/foo/bar/baz/nvidia-container-runtime.legacy.real \ /foo/bar/baz/nvidia-container-runtime.legacy.real \
"$@" "$@"
`, `,
@@ -240,9 +240,9 @@ PATH=/foo/bar/baz:$PATH \
path: "/foo/bar/baz/nvidia-container-runtime-hook", path: "/foo/bar/baz/nvidia-container-runtime-hook",
mode: 0777, mode: 0777,
wrapper: `#! /bin/sh wrapper: `#! /bin/sh
NVIDIA_CTK_CONFIG_FILE_PATH=/foo/bar/baz/.config/nvidia-container-runtime/config.toml \
PATH=/foo/bar/baz:$PATH \ PATH=/foo/bar/baz:$PATH \
/foo/bar/baz/nvidia-container-runtime-hook.real \ /foo/bar/baz/nvidia-container-runtime-hook.real \
-config /foo/bar/baz/.config/nvidia-container-runtime/config.toml \
"$@" "$@"
`, `,
}, },

View File

@@ -28,7 +28,7 @@ import (
// A predefined set of library candidates are considered, with the first one // A predefined set of library candidates are considered, with the first one
// resulting in success being installed to the toolkit folder. The install process // resulting in success being installed to the toolkit folder. The install process
// resolves the symlink for the library and copies the versioned library itself. // resolves the symlink for the library and copies the versioned library itself.
func (t *ToolkitInstaller) collectLibraries() ([]Installer, error) { func (t *toolkitInstaller) collectLibraries() ([]Installer, error) {
requiredLibraries := []string{ requiredLibraries := []string{
"libnvidia-container.so.1", "libnvidia-container.so.1",
"libnvidia-container-go.so.1", "libnvidia-container-go.so.1",

View File

@@ -19,29 +19,29 @@ package installer
import "github.com/NVIDIA/nvidia-container-toolkit/internal/logger" import "github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
type Option func(*ToolkitInstaller) type Option func(*toolkitInstaller)
func WithLogger(logger logger.Interface) Option { func WithLogger(logger logger.Interface) Option {
return func(ti *ToolkitInstaller) { return func(ti *toolkitInstaller) {
ti.logger = logger ti.logger = logger
} }
} }
func WithArtifactRoot(artifactRoot *artifactRoot) Option { func WithArtifactRoot(artifactRoot *artifactRoot) Option {
return func(ti *ToolkitInstaller) { return func(ti *toolkitInstaller) {
ti.artifactRoot = artifactRoot ti.artifactRoot = artifactRoot
} }
} }
func WithIgnoreErrors(ignoreErrors bool) Option { func WithIgnoreErrors(ignoreErrors bool) Option {
return func(ti *ToolkitInstaller) { return func(ti *toolkitInstaller) {
ti.ignoreErrors = ignoreErrors ti.ignoreErrors = ignoreErrors
} }
} }
// WithSourceRoot sets the root directory for locating artifacts to be installed. // WithSourceRoot sets the root directory for locating artifacts to be installed.
func WithSourceRoot(sourceRoot string) Option { func WithSourceRoot(sourceRoot string) Option {
return func(ti *ToolkitInstaller) { return func(ti *toolkitInstaller) {
ti.sourceRoot = sourceRoot ti.sourceRoot = sourceRoot
} }
} }

View File

@@ -37,6 +37,8 @@ import (
const ( const (
// DefaultNvidiaDriverRoot specifies the default NVIDIA driver run directory // DefaultNvidiaDriverRoot specifies the default NVIDIA driver run directory
DefaultNvidiaDriverRoot = "/run/nvidia/driver" DefaultNvidiaDriverRoot = "/run/nvidia/driver"
configFilename = "config.toml"
) )
type cdiOptions struct { type cdiOptions struct {
@@ -213,8 +215,7 @@ func Flags(opts *Options) []cli.Flag {
// An Installer is used to install the NVIDIA Container Toolkit from the toolkit container. // An Installer is used to install the NVIDIA Container Toolkit from the toolkit container.
type Installer struct { type Installer struct {
logger logger.Interface logger logger.Interface
sourceRoot string sourceRoot string
// toolkitRoot specifies the destination path at which the toolkit is installed. // toolkitRoot specifies the destination path at which the toolkit is installed.
toolkitRoot string toolkitRoot string
@@ -314,7 +315,7 @@ func (t *Installer) Install(cli *cli.Context, opts *Options) error {
t.logger.Errorf("Ignoring error: %v", fmt.Errorf("could not install toolkit components: %w", err)) t.logger.Errorf("Ignoring error: %v", fmt.Errorf("could not install toolkit components: %w", err))
} }
err = t.installToolkitConfig(cli, opts, toolkit.ConfigFilePath(t.toolkitRoot)) err = t.installToolkitConfig(cli, opts)
if err != nil && !opts.ignoreErrors { if err != nil && !opts.ignoreErrors {
return fmt.Errorf("error installing NVIDIA container toolkit config: %v", err) return fmt.Errorf("error installing NVIDIA container toolkit config: %v", err)
} else if err != nil { } else if err != nil {
@@ -341,11 +342,13 @@ func (t *Installer) Install(cli *cli.Context, opts *Options) error {
// installToolkitConfig installs the config file for the NVIDIA container toolkit ensuring // installToolkitConfig installs the config file for the NVIDIA container toolkit ensuring
// that the settings are updated to match the desired install and nvidia driver directories. // that the settings are updated to match the desired install and nvidia driver directories.
func (t *Installer) installToolkitConfig(c *cli.Context, opts *Options, toolkitConfigPath string) error { func (t *Installer) installToolkitConfig(c *cli.Context, opts *Options) error {
toolkitConfigDir := filepath.Join(t.toolkitRoot, ".config", "nvidia-container-runtime")
toolkitConfigPath := filepath.Join(toolkitConfigDir, configFilename)
t.logger.Infof("Installing NVIDIA container toolkit config '%v'", toolkitConfigPath) t.logger.Infof("Installing NVIDIA container toolkit config '%v'", toolkitConfigPath)
err := t.createDirectories(filepath.Dir(toolkitConfigPath)) err := t.createDirectories(toolkitConfigDir)
if err != nil && !opts.ignoreErrors { if err != nil && !opts.ignoreErrors {
return fmt.Errorf("could not create required directories: %v", err) return fmt.Errorf("could not create required directories: %v", err)
} else if err != nil { } else if err != nil {

View File

@@ -86,7 +86,6 @@ devices:
hostPath: /host/driver/root/dev/nvidia-caps-imex-channels/channel2047 hostPath: /host/driver/root/dev/nvidia-caps-imex-channels/channel2047
containerEdits: containerEdits:
env: env:
- NVIDIA_CTK_LIBCUDA_DIR=/lib/x86_64-linux-gnu
- NVIDIA_VISIBLE_DEVICES=void - NVIDIA_VISIBLE_DEVICES=void
hooks: hooks:
- hookName: createContainer - hookName: createContainer
@@ -98,15 +97,6 @@ containerEdits:
- libcuda.so.1::/lib/x86_64-linux-gnu/libcuda.so - libcuda.so.1::/lib/x86_64-linux-gnu/libcuda.so
env: env:
- NVIDIA_CTK_DEBUG=false - NVIDIA_CTK_DEBUG=false
- hookName: createContainer
path: {{ .toolkitRoot }}/nvidia-cdi-hook
args:
- nvidia-cdi-hook
- create-soname-symlinks
- --folder
- /lib/x86_64-linux-gnu
env:
- NVIDIA_CTK_DEBUG=false
- hookName: createContainer - hookName: createContainer
path: {{ .toolkitRoot }}/nvidia-cdi-hook path: {{ .toolkitRoot }}/nvidia-cdi-hook
args: args:

View File

@@ -57,7 +57,6 @@ type options struct {
configSearchPaths cli.StringSlice configSearchPaths cli.StringSlice
librarySearchPaths cli.StringSlice librarySearchPaths cli.StringSlice
disabledHooks cli.StringSlice
csv struct { csv struct {
files cli.StringSlice files cli.StringSlice
@@ -97,20 +96,17 @@ func (m command) build() *cli.Command {
Name: "config-search-path", Name: "config-search-path",
Usage: "Specify the path to search for config files when discovering the entities that should be included in the CDI specification.", Usage: "Specify the path to search for config files when discovering the entities that should be included in the CDI specification.",
Destination: &opts.configSearchPaths, Destination: &opts.configSearchPaths,
EnvVars: []string{"NVIDIA_CTK_CDI_GENERATE_CONFIG_SEARCH_PATHS"},
}, },
&cli.StringFlag{ &cli.StringFlag{
Name: "output", Name: "output",
Usage: "Specify the file to output the generated CDI specification to. If this is '' the specification is output to STDOUT", Usage: "Specify the file to output the generated CDI specification to. If this is '' the specification is output to STDOUT",
Destination: &opts.output, Destination: &opts.output,
EnvVars: []string{"NVIDIA_CTK_CDI_OUTPUT_FILE_PATH"},
}, },
&cli.StringFlag{ &cli.StringFlag{
Name: "format", Name: "format",
Usage: "The output format for the generated spec [json | yaml]. This overrides the format defined by the output file extension (if specified).", Usage: "The output format for the generated spec [json | yaml]. This overrides the format defined by the output file extension (if specified).",
Value: spec.FormatYAML, Value: spec.FormatYAML,
Destination: &opts.format, Destination: &opts.format,
EnvVars: []string{"NVIDIA_CTK_CDI_GENERATE_OUTPUT_FORMAT"},
}, },
&cli.StringFlag{ &cli.StringFlag{
Name: "mode", Name: "mode",
@@ -120,32 +116,27 @@ func (m command) build() *cli.Command {
"If mode is set to 'auto' the mode will be determined based on the system configuration.", "If mode is set to 'auto' the mode will be determined based on the system configuration.",
Value: string(nvcdi.ModeAuto), Value: string(nvcdi.ModeAuto),
Destination: &opts.mode, Destination: &opts.mode,
EnvVars: []string{"NVIDIA_CTK_CDI_GENERATE_MODE"},
}, },
&cli.StringFlag{ &cli.StringFlag{
Name: "dev-root", Name: "dev-root",
Usage: "Specify the root where `/dev` is located. If this is not specified, the driver-root is assumed.", Usage: "Specify the root where `/dev` is located. If this is not specified, the driver-root is assumed.",
Destination: &opts.devRoot, Destination: &opts.devRoot,
EnvVars: []string{"NVIDIA_CTK_DEV_ROOT"},
}, },
&cli.StringSliceFlag{ &cli.StringSliceFlag{
Name: "device-name-strategy", Name: "device-name-strategy",
Usage: "Specify the strategy for generating device names. If this is specified multiple times, the devices will be duplicated for each strategy. One of [index | uuid | type-index]", Usage: "Specify the strategy for generating device names. If this is specified multiple times, the devices will be duplicated for each strategy. One of [index | uuid | type-index]",
Value: cli.NewStringSlice(nvcdi.DeviceNameStrategyIndex, nvcdi.DeviceNameStrategyUUID), Value: cli.NewStringSlice(nvcdi.DeviceNameStrategyIndex, nvcdi.DeviceNameStrategyUUID),
Destination: &opts.deviceNameStrategies, Destination: &opts.deviceNameStrategies,
EnvVars: []string{"NVIDIA_CTK_CDI_GENERATE_DEVICE_NAME_STRATEGIES"},
}, },
&cli.StringFlag{ &cli.StringFlag{
Name: "driver-root", Name: "driver-root",
Usage: "Specify the NVIDIA GPU driver root to use when discovering the entities that should be included in the CDI specification.", Usage: "Specify the NVIDIA GPU driver root to use when discovering the entities that should be included in the CDI specification.",
Destination: &opts.driverRoot, Destination: &opts.driverRoot,
EnvVars: []string{"NVIDIA_CTK_DRIVER_ROOT"},
}, },
&cli.StringSliceFlag{ &cli.StringSliceFlag{
Name: "library-search-path", Name: "library-search-path",
Usage: "Specify the path to search for libraries when discovering the entities that should be included in the CDI specification.\n\tNote: This option only applies to CSV mode.", Usage: "Specify the path to search for libraries when discovering the entities that should be included in the CDI specification.\n\tNote: This option only applies to CSV mode.",
Destination: &opts.librarySearchPaths, Destination: &opts.librarySearchPaths,
EnvVars: []string{"NVIDIA_CTK_CDI_GENERATE_LIBRARY_SEARCH_PATHS"},
}, },
&cli.StringFlag{ &cli.StringFlag{
Name: "nvidia-cdi-hook-path", Name: "nvidia-cdi-hook-path",
@@ -154,13 +145,11 @@ func (m command) build() *cli.Command {
"If not specified, the PATH will be searched for `nvidia-cdi-hook`. " + "If not specified, the PATH will be searched for `nvidia-cdi-hook`. " +
"NOTE: That if this is specified as `nvidia-ctk`, the PATH will be searched for `nvidia-ctk` instead.", "NOTE: That if this is specified as `nvidia-ctk`, the PATH will be searched for `nvidia-ctk` instead.",
Destination: &opts.nvidiaCDIHookPath, Destination: &opts.nvidiaCDIHookPath,
EnvVars: []string{"NVIDIA_CTK_CDI_HOOK_PATH"},
}, },
&cli.StringFlag{ &cli.StringFlag{
Name: "ldconfig-path", Name: "ldconfig-path",
Usage: "Specify the path to use for ldconfig in the generated CDI specification", Usage: "Specify the path to use for ldconfig in the generated CDI specification",
Destination: &opts.ldconfigPath, Destination: &opts.ldconfigPath,
EnvVars: []string{"NVIDIA_CTK_CDI_GENERATE_LDCONFIG_PATH"},
}, },
&cli.StringFlag{ &cli.StringFlag{
Name: "vendor", Name: "vendor",
@@ -168,7 +157,6 @@ func (m command) build() *cli.Command {
Usage: "the vendor string to use for the generated CDI specification.", Usage: "the vendor string to use for the generated CDI specification.",
Value: "nvidia.com", Value: "nvidia.com",
Destination: &opts.vendor, Destination: &opts.vendor,
EnvVars: []string{"NVIDIA_CTK_CDI_GENERATE_VENDOR"},
}, },
&cli.StringFlag{ &cli.StringFlag{
Name: "class", Name: "class",
@@ -176,30 +164,17 @@ func (m command) build() *cli.Command {
Usage: "the class string to use for the generated CDI specification.", Usage: "the class string to use for the generated CDI specification.",
Value: "gpu", Value: "gpu",
Destination: &opts.class, Destination: &opts.class,
EnvVars: []string{"NVIDIA_CTK_CDI_GENERATE_CLASS"},
}, },
&cli.StringSliceFlag{ &cli.StringSliceFlag{
Name: "csv.file", Name: "csv.file",
Usage: "The path to the list of CSV files to use when generating the CDI specification in CSV mode.", Usage: "The path to the list of CSV files to use when generating the CDI specification in CSV mode.",
Value: cli.NewStringSlice(csv.DefaultFileList()...), Value: cli.NewStringSlice(csv.DefaultFileList()...),
Destination: &opts.csv.files, Destination: &opts.csv.files,
EnvVars: []string{"NVIDIA_CTK_CDI_GENERATE_CSV_FILES"},
}, },
&cli.StringSliceFlag{ &cli.StringSliceFlag{
Name: "csv.ignore-pattern", Name: "csv.ignore-pattern",
Usage: "specify a pattern the CSV mount specifications.", Usage: "Specify a pattern the CSV mount specifications.",
Destination: &opts.csv.ignorePatterns, Destination: &opts.csv.ignorePatterns,
EnvVars: []string{"NVIDIA_CTK_CDI_GENERATE_CSV_IGNORE_PATTERNS"},
},
&cli.StringSliceFlag{
Name: "disable-hook",
Aliases: []string{"disable-hooks"},
Usage: "specify a specific hook to skip when generating CDI " +
"specifications. This can be specified multiple times and the " +
"special hook name 'all' can be used ensure that the generated " +
"CDI specification does not include any hooks.",
Destination: &opts.disabledHooks,
EnvVars: []string{"NVIDIA_CTK_CDI_GENERATE_DISABLED_HOOKS"},
}, },
} }
@@ -287,7 +262,7 @@ func (m command) generateSpec(opts *options) (spec.Interface, error) {
deviceNamers = append(deviceNamers, deviceNamer) deviceNamers = append(deviceNamers, deviceNamer)
} }
cdiOptions := []nvcdi.Option{ cdilib, err := nvcdi.New(
nvcdi.WithLogger(m.logger), nvcdi.WithLogger(m.logger),
nvcdi.WithDriverRoot(opts.driverRoot), nvcdi.WithDriverRoot(opts.driverRoot),
nvcdi.WithDevRoot(opts.devRoot), nvcdi.WithDevRoot(opts.devRoot),
@@ -301,13 +276,7 @@ func (m command) generateSpec(opts *options) (spec.Interface, error) {
nvcdi.WithCSVIgnorePatterns(opts.csv.ignorePatterns.Value()), nvcdi.WithCSVIgnorePatterns(opts.csv.ignorePatterns.Value()),
// We set the following to allow for dependency injection: // We set the following to allow for dependency injection:
nvcdi.WithNvmlLib(opts.nvmllib), nvcdi.WithNvmlLib(opts.nvmllib),
} )
for _, hook := range opts.disabledHooks.Value() {
cdiOptions = append(cdiOptions, nvcdi.WithDisabledHook(hook))
}
cdilib, err := nvcdi.New(cdiOptions...)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to create CDI library: %v", err) return nil, fmt.Errorf("failed to create CDI library: %v", err)
} }

View File

@@ -26,7 +26,6 @@ import (
"github.com/NVIDIA/go-nvml/pkg/nvml/mock/dgxa100" "github.com/NVIDIA/go-nvml/pkg/nvml/mock/dgxa100"
testlog "github.com/sirupsen/logrus/hooks/test" testlog "github.com/sirupsen/logrus/hooks/test"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
"github.com/urfave/cli/v2"
"github.com/NVIDIA/nvidia-container-toolkit/internal/test" "github.com/NVIDIA/nvidia-container-toolkit/internal/test"
) )
@@ -80,7 +79,6 @@ devices:
hostPath: {{ .driverRoot }}/dev/nvidia0 hostPath: {{ .driverRoot }}/dev/nvidia0
containerEdits: containerEdits:
env: env:
- NVIDIA_CTK_LIBCUDA_DIR=/lib/x86_64-linux-gnu
- NVIDIA_VISIBLE_DEVICES=void - NVIDIA_VISIBLE_DEVICES=void
deviceNodes: deviceNodes:
- path: /dev/nvidiactl - path: /dev/nvidiactl
@@ -103,15 +101,6 @@ containerEdits:
- --host-driver-version=999.88.77 - --host-driver-version=999.88.77
env: env:
- NVIDIA_CTK_DEBUG=false - NVIDIA_CTK_DEBUG=false
- hookName: createContainer
path: /usr/bin/nvidia-cdi-hook
args:
- nvidia-cdi-hook
- create-soname-symlinks
- --folder
- /lib/x86_64-linux-gnu
env:
- NVIDIA_CTK_DEBUG=false
- hookName: createContainer - hookName: createContainer
path: /usr/bin/nvidia-cdi-hook path: /usr/bin/nvidia-cdi-hook
args: args:
@@ -121,227 +110,6 @@ containerEdits:
- /lib/x86_64-linux-gnu - /lib/x86_64-linux-gnu
env: env:
- NVIDIA_CTK_DEBUG=false - NVIDIA_CTK_DEBUG=false
- hookName: createContainer
path: /usr/bin/nvidia-cdi-hook
args:
- nvidia-cdi-hook
- disable-device-node-modification
env:
- NVIDIA_CTK_DEBUG=false
mounts:
- hostPath: {{ .driverRoot }}/lib/x86_64-linux-gnu/libcuda.so.999.88.77
containerPath: /lib/x86_64-linux-gnu/libcuda.so.999.88.77
options:
- ro
- nosuid
- nodev
- rbind
- rprivate
`,
},
{
description: "disableHooks1",
options: options{
format: "yaml",
mode: "nvml",
vendor: "example.com",
class: "device",
driverRoot: driverRoot,
disabledHooks: valueOf(cli.NewStringSlice("enable-cuda-compat")),
},
expectedOptions: options{
format: "yaml",
mode: "nvml",
vendor: "example.com",
class: "device",
nvidiaCDIHookPath: "/usr/bin/nvidia-cdi-hook",
driverRoot: driverRoot,
disabledHooks: valueOf(cli.NewStringSlice("enable-cuda-compat")),
},
expectedSpec: `---
cdiVersion: 0.5.0
kind: example.com/device
devices:
- name: "0"
containerEdits:
deviceNodes:
- path: /dev/nvidia0
hostPath: {{ .driverRoot }}/dev/nvidia0
- name: all
containerEdits:
deviceNodes:
- path: /dev/nvidia0
hostPath: {{ .driverRoot }}/dev/nvidia0
containerEdits:
env:
- NVIDIA_CTK_LIBCUDA_DIR=/lib/x86_64-linux-gnu
- NVIDIA_VISIBLE_DEVICES=void
deviceNodes:
- path: /dev/nvidiactl
hostPath: {{ .driverRoot }}/dev/nvidiactl
hooks:
- hookName: createContainer
path: /usr/bin/nvidia-cdi-hook
args:
- nvidia-cdi-hook
- create-symlinks
- --link
- libcuda.so.1::/lib/x86_64-linux-gnu/libcuda.so
env:
- NVIDIA_CTK_DEBUG=false
- hookName: createContainer
path: /usr/bin/nvidia-cdi-hook
args:
- nvidia-cdi-hook
- create-soname-symlinks
- --folder
- /lib/x86_64-linux-gnu
env:
- NVIDIA_CTK_DEBUG=false
- hookName: createContainer
path: /usr/bin/nvidia-cdi-hook
args:
- nvidia-cdi-hook
- update-ldcache
- --folder
- /lib/x86_64-linux-gnu
env:
- NVIDIA_CTK_DEBUG=false
- hookName: createContainer
path: /usr/bin/nvidia-cdi-hook
args:
- nvidia-cdi-hook
- disable-device-node-modification
env:
- NVIDIA_CTK_DEBUG=false
mounts:
- hostPath: {{ .driverRoot }}/lib/x86_64-linux-gnu/libcuda.so.999.88.77
containerPath: /lib/x86_64-linux-gnu/libcuda.so.999.88.77
options:
- ro
- nosuid
- nodev
- rbind
- rprivate
`,
},
{
description: "disableHooks2",
options: options{
format: "yaml",
mode: "nvml",
vendor: "example.com",
class: "device",
driverRoot: driverRoot,
disabledHooks: valueOf(cli.NewStringSlice("enable-cuda-compat", "update-ldcache")),
},
expectedOptions: options{
format: "yaml",
mode: "nvml",
vendor: "example.com",
class: "device",
nvidiaCDIHookPath: "/usr/bin/nvidia-cdi-hook",
driverRoot: driverRoot,
disabledHooks: valueOf(cli.NewStringSlice("enable-cuda-compat", "update-ldcache")),
},
expectedSpec: `---
cdiVersion: 0.5.0
kind: example.com/device
devices:
- name: "0"
containerEdits:
deviceNodes:
- path: /dev/nvidia0
hostPath: {{ .driverRoot }}/dev/nvidia0
- name: all
containerEdits:
deviceNodes:
- path: /dev/nvidia0
hostPath: {{ .driverRoot }}/dev/nvidia0
containerEdits:
env:
- NVIDIA_CTK_LIBCUDA_DIR=/lib/x86_64-linux-gnu
- NVIDIA_VISIBLE_DEVICES=void
deviceNodes:
- path: /dev/nvidiactl
hostPath: {{ .driverRoot }}/dev/nvidiactl
hooks:
- hookName: createContainer
path: /usr/bin/nvidia-cdi-hook
args:
- nvidia-cdi-hook
- create-symlinks
- --link
- libcuda.so.1::/lib/x86_64-linux-gnu/libcuda.so
env:
- NVIDIA_CTK_DEBUG=false
- hookName: createContainer
path: /usr/bin/nvidia-cdi-hook
args:
- nvidia-cdi-hook
- create-soname-symlinks
- --folder
- /lib/x86_64-linux-gnu
env:
- NVIDIA_CTK_DEBUG=false
- hookName: createContainer
path: /usr/bin/nvidia-cdi-hook
args:
- nvidia-cdi-hook
- disable-device-node-modification
env:
- NVIDIA_CTK_DEBUG=false
mounts:
- hostPath: {{ .driverRoot }}/lib/x86_64-linux-gnu/libcuda.so.999.88.77
containerPath: /lib/x86_64-linux-gnu/libcuda.so.999.88.77
options:
- ro
- nosuid
- nodev
- rbind
- rprivate
`,
},
{
description: "disableHooksAll",
options: options{
format: "yaml",
mode: "nvml",
vendor: "example.com",
class: "device",
driverRoot: driverRoot,
disabledHooks: valueOf(cli.NewStringSlice("all")),
},
expectedOptions: options{
format: "yaml",
mode: "nvml",
vendor: "example.com",
class: "device",
nvidiaCDIHookPath: "/usr/bin/nvidia-cdi-hook",
driverRoot: driverRoot,
disabledHooks: valueOf(cli.NewStringSlice("all")),
},
expectedSpec: `---
cdiVersion: 0.5.0
kind: example.com/device
devices:
- name: "0"
containerEdits:
deviceNodes:
- path: /dev/nvidia0
hostPath: {{ .driverRoot }}/dev/nvidia0
- name: all
containerEdits:
deviceNodes:
- path: /dev/nvidia0
hostPath: {{ .driverRoot }}/dev/nvidia0
containerEdits:
env:
- NVIDIA_CTK_LIBCUDA_DIR=/lib/x86_64-linux-gnu
- NVIDIA_VISIBLE_DEVICES=void
deviceNodes:
- path: /dev/nvidiactl
hostPath: {{ .driverRoot }}/dev/nvidiactl
mounts: mounts:
- hostPath: {{ .driverRoot }}/lib/x86_64-linux-gnu/libcuda.so.999.88.77 - hostPath: {{ .driverRoot }}/lib/x86_64-linux-gnu/libcuda.so.999.88.77
containerPath: /lib/x86_64-linux-gnu/libcuda.so.999.88.77 containerPath: /lib/x86_64-linux-gnu/libcuda.so.999.88.77
@@ -394,9 +162,3 @@ containerEdits:
}) })
} }
} }
// valueOf returns the value of a pointer.
// Note that this does not check for a nil pointer and is only used for testing.
func valueOf[T any](v *T) T {
return *v
}

View File

@@ -64,7 +64,6 @@ func (m command) build() *cli.Command {
Usage: "specify the directories to scan for CDI specifications", Usage: "specify the directories to scan for CDI specifications",
Value: cli.NewStringSlice(cdi.DefaultSpecDirs...), Value: cli.NewStringSlice(cdi.DefaultSpecDirs...),
Destination: &cfg.cdiSpecDirs, Destination: &cfg.cdiSpecDirs,
EnvVars: []string{"NVIDIA_CTK_CDI_SPEC_DIRS"},
}, },
} }

View File

@@ -1,171 +0,0 @@
# SPDX-FileCopyrightText: Copyright (c) 2019 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
ARG GOLANG_VERSION=x.x.x
ARG VERSION="N/A"
FROM nvcr.io/nvidia/cuda:12.9.0-base-ubi9 AS build
RUN dnf install -y \
wget make git gcc \
&& \
rm -rf /var/cache/yum/*
ARG GOLANG_VERSION=x.x.x
RUN set -eux; \
\
arch="$(uname -m)"; \
case "${arch##*-}" in \
x86_64 | amd64) ARCH='amd64' ;; \
ppc64el | ppc64le) ARCH='ppc64le' ;; \
aarch64 | arm64) ARCH='arm64' ;; \
*) echo "unsupported architecture" ; exit 1 ;; \
esac; \
wget -nv -O - https://storage.googleapis.com/golang/go${GOLANG_VERSION}.linux-${ARCH}.tar.gz \
| tar -C /usr/local -xz
ENV GOPATH=/go
ENV PATH=$GOPATH/bin:/usr/local/go/bin:$PATH
WORKDIR /build
COPY . .
RUN mkdir -p /artifacts/bin
ARG VERSION="N/A"
ARG GIT_COMMIT="unknown"
RUN make PREFIX=/artifacts/bin cmd-nvidia-ctk-installer
# The packaging stage collects the deb and rpm packages built for
# supported architectures.
FROM nvcr.io/nvidia/distroless/go:v3.1.9-dev AS packaging
USER 0:0
SHELL ["/busybox/sh", "-c"]
RUN ln -s /busybox/sh /bin/sh
ARG ARTIFACTS_ROOT
COPY ${ARTIFACTS_ROOT} /artifacts/packages/
WORKDIR /artifacts
# build-args are added to the manifest.txt file below.
ARG PACKAGE_VERSION
ARG GIT_BRANCH
ARG GIT_COMMIT
ARG GIT_COMMIT_SHORT
ARG SOURCE_DATE_EPOCH
ARG VERSION
# Create a manifest.txt file with the absolute paths of all deb and rpm packages in the container
RUN echo "#IMAGE_EPOCH=$(date '+%s')" > /artifacts/manifest.txt && \
env | sed 's/^/#/g' >> /artifacts/manifest.txt && \
find /artifacts/packages -iname '*.deb' -o -iname '*.rpm' >> /artifacts/manifest.txt
LABEL name="NVIDIA Container Toolkit Packages"
LABEL vendor="NVIDIA"
LABEL version="${VERSION}"
LABEL release="N/A"
LABEL summary="deb and rpm packages for the NVIDIA Container Toolkit"
LABEL description="See summary"
COPY LICENSE /licenses/
# The debpackages stage is used to extract the contents of deb packages.
FROM nvcr.io/nvidia/cuda:12.9.0-base-ubuntu20.04 AS debpackages
ARG TARGETARCH
ARG PACKAGE_DIST_DEB=ubuntu18.04
COPY --from=packaging /artifacts/packages/${PACKAGE_DIST_DEB} /deb-packages
RUN mkdir -p /artifacts/deb
RUN set -eux; \
\
case "${TARGETARCH}" in \
x86_64 | amd64) ARCH='amd64' ;; \
ppc64el | ppc64le) ARCH='ppc64le' ;; \
aarch64 | arm64) ARCH='arm64' ;; \
*) echo "unsupported architecture" ; exit 1 ;; \
esac; \
for p in $(ls /deb-packages/${ARCH}/*.deb); do dpkg-deb -xv $p /artifacts/deb/; done
# The rpmpackages stage is used to extract the contents of the rpm packages.
FROM nvcr.io/nvidia/cuda:12.9.0-base-ubi9 AS rpmpackages
RUN dnf install -y cpio
ARG TARGETARCH
ARG PACKAGE_DIST_RPM=centos7
COPY --from=packaging /artifacts/packages/${PACKAGE_DIST_RPM} /rpm-packages
RUN mkdir -p /artifacts/rpm
RUN set -eux; \
\
case "${TARGETARCH}" in \
x86_64 | amd64) ARCH='x86_64' ;; \
ppc64el | ppc64le) ARCH='ppc64le' ;; \
aarch64 | arm64) ARCH='aarch64' ;; \
*) echo "unsupported architecture" ; exit 1 ;; \
esac; \
for p in $(ls /rpm-packages/${ARCH}/*.rpm); do rpm2cpio $p | cpio -idmv -D /artifacts/rpm; done
# The artifacts image serves as an intermediate stage to collect the artifacts
# From the previous stages:
# - The extracted deb packages
# - The extracted rpm packages
# - The nvidia-ctk-installer binary
FROM scratch AS artifacts
COPY --from=rpmpackages /artifacts/rpm /artifacts/rpm
COPY --from=debpackages /artifacts/deb /artifacts/deb
COPY --from=build /artifacts/bin /artifacts/build
# The application stage contains the application used as a GPU Operator
# operand.
FROM nvcr.io/nvidia/distroless/go:v3.1.9-dev AS application
USER 0:0
SHELL ["/busybox/sh", "-c"]
RUN ln -s /busybox/sh /bin/sh
ENV NVIDIA_DISABLE_REQUIRE="true"
ENV NVIDIA_VISIBLE_DEVICES=void
ENV NVIDIA_DRIVER_CAPABILITIES=utility
COPY --from=artifacts /artifacts/rpm /artifacts/rpm
COPY --from=artifacts /artifacts/deb /artifacts/deb
COPY --from=artifacts /artifacts/build /work
WORKDIR /work
ENV PATH=/work:$PATH
ARG VERSION
LABEL io.k8s.display-name="NVIDIA Container Runtime Config"
LABEL name="NVIDIA Container Runtime Config"
LABEL vendor="NVIDIA"
LABEL version="${VERSION}"
LABEL release="N/A"
LABEL summary="Automatically Configure your Container Runtime for GPU support."
LABEL description="See summary"
COPY LICENSE /licenses/
ENTRYPOINT ["/work/nvidia-ctk-installer"]
# The GPU Operator exec's nvidia-toolkit in its entrypoint.
# We create a symlink here to ensure compatibility with older
# GPU Operator versions.
RUN ln -s /work/nvidia-ctk-installer /work/nvidia-toolkit

View File

@@ -0,0 +1,38 @@
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
ARG GOLANG_VERSION=x.x.x
FROM nvcr.io/nvidia/cuda:12.9.0-base-ubuntu20.04
ARG ARTIFACTS_ROOT
COPY ${ARTIFACTS_ROOT} /artifacts/packages/
WORKDIR /artifacts/packages
# build-args are added to the manifest.txt file below.
ARG PACKAGE_DIST
ARG PACKAGE_VERSION
ARG GIT_BRANCH
ARG GIT_COMMIT
ARG GIT_COMMIT_SHORT
ARG SOURCE_DATE_EPOCH
ARG VERSION
# Create a manifest.txt file with the absolute paths of all deb and rpm packages in the container
RUN echo "#IMAGE_EPOCH=$(date '+%s')" > /artifacts/manifest.txt && \
env | sed 's/^/#/g' >> /artifacts/manifest.txt && \
find /artifacts/packages -iname '*.deb' -o -iname '*.rpm' >> /artifacts/manifest.txt
RUN mkdir /licenses && mv /NGC-DL-CONTAINER-LICENSE /licenses/NGC-DL-CONTAINER-LICENSE

View File

@@ -0,0 +1,90 @@
# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
ARG GOLANG_VERSION=x.x.x
ARG VERSION="N/A"
FROM nvcr.io/nvidia/cuda:12.9.0-base-ubi8 AS build
RUN yum install -y \
wget make git gcc \
&& \
rm -rf /var/cache/yum/*
ARG GOLANG_VERSION=x.x.x
RUN set -eux; \
\
arch="$(uname -m)"; \
case "${arch##*-}" in \
x86_64 | amd64) ARCH='amd64' ;; \
ppc64el | ppc64le) ARCH='ppc64le' ;; \
aarch64 | arm64) ARCH='arm64' ;; \
*) echo "unsupported architecture" ; exit 1 ;; \
esac; \
wget -nv -O - https://storage.googleapis.com/golang/go${GOLANG_VERSION}.linux-${ARCH}.tar.gz \
| tar -C /usr/local -xz
ENV GOPATH=/go
ENV PATH=$GOPATH/bin:/usr/local/go/bin:$PATH
WORKDIR /build
COPY . .
RUN mkdir /artifacts
ARG VERSION="N/A"
ARG GIT_COMMIT="unknown"
RUN make PREFIX=/artifacts cmd-nvidia-ctk-installer
FROM nvcr.io/nvidia/cuda:12.9.0-base-ubi8
ENV NVIDIA_DISABLE_REQUIRE="true"
ENV NVIDIA_VISIBLE_DEVICES=void
ENV NVIDIA_DRIVER_CAPABILITIES=utility
ARG ARTIFACTS_ROOT
ARG PACKAGE_DIST
COPY ${ARTIFACTS_ROOT}/${PACKAGE_DIST} /artifacts/packages/${PACKAGE_DIST}
WORKDIR /artifacts/packages
ARG PACKAGE_VERSION
ARG TARGETARCH
ENV PACKAGE_ARCH=${TARGETARCH}
RUN PACKAGE_ARCH=${PACKAGE_ARCH/amd64/x86_64} && PACKAGE_ARCH=${PACKAGE_ARCH/arm64/aarch64} && \
yum localinstall -y \
${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container1-1.*.rpm \
${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container-tools-1.*.rpm \
${PACKAGE_DIST}/${PACKAGE_ARCH}/nvidia-container-toolkit*-${PACKAGE_VERSION}*.rpm
WORKDIR /work
COPY --from=build /artifacts/nvidia-ctk-installer /work/nvidia-ctk-installer
RUN ln -s nvidia-ctk-installer nvidia-toolkit
ENV PATH=/work:$PATH
ARG VERSION
LABEL io.k8s.display-name="NVIDIA Container Runtime Config"
LABEL name="NVIDIA Container Runtime Config"
LABEL vendor="NVIDIA"
LABEL version="${VERSION}"
LABEL release="N/A"
LABEL summary="Automatically Configure your Container Runtime for GPU support."
LABEL description="See summary"
RUN mkdir /licenses && mv /NGC-DL-CONTAINER-LICENSE /licenses/NGC-DL-CONTAINER-LICENSE
ENTRYPOINT ["/work/nvidia-ctk-installer"]

View File

@@ -0,0 +1,98 @@
# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
ARG GOLANG_VERSION=x.x.x
ARG VERSION="N/A"
FROM nvcr.io/nvidia/cuda:12.9.0-base-ubuntu20.04 AS build
RUN apt-get update && \
apt-get install -y wget make git gcc \
&& \
rm -rf /var/lib/apt/lists/*
ARG GOLANG_VERSION=x.x.x
RUN set -eux; \
\
arch="$(uname -m)"; \
case "${arch##*-}" in \
x86_64 | amd64) ARCH='amd64' ;; \
ppc64el | ppc64le) ARCH='ppc64le' ;; \
aarch64 | arm64) ARCH='arm64' ;; \
*) echo "unsupported architecture" ; exit 1 ;; \
esac; \
wget -nv -O - https://storage.googleapis.com/golang/go${GOLANG_VERSION}.linux-${ARCH}.tar.gz \
| tar -C /usr/local -xz
ENV GOPATH=/go
ENV PATH=$GOPATH/bin:/usr/local/go/bin:$PATH
WORKDIR /build
COPY . .
RUN mkdir /artifacts
ARG VERSION="N/A"
ARG GIT_COMMIT="unknown"
RUN make PREFIX=/artifacts cmd-nvidia-ctk-installer
FROM nvcr.io/nvidia/cuda:12.9.0-base-ubuntu20.04
# Remove the CUDA repository configurations to avoid issues with rotated GPG keys
RUN rm -f /etc/apt/sources.list.d/cuda.list
ARG DEBIAN_FRONTEND=noninteractive
RUN apt-get update && apt-get install -y --no-install-recommends \
libcap2 \
curl \
&& \
rm -rf /var/lib/apt/lists/*
ENV NVIDIA_DISABLE_REQUIRE="true"
ENV NVIDIA_VISIBLE_DEVICES=void
ENV NVIDIA_DRIVER_CAPABILITIES=utility
ARG ARTIFACTS_ROOT
ARG PACKAGE_DIST
COPY ${ARTIFACTS_ROOT}/${PACKAGE_DIST} /artifacts/packages/${PACKAGE_DIST}
WORKDIR /artifacts/packages
ARG PACKAGE_VERSION
ARG TARGETARCH
ENV PACKAGE_ARCH=${TARGETARCH}
RUN dpkg -i \
${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container1_1.*.deb \
${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container-tools_1.*.deb \
${PACKAGE_DIST}/${PACKAGE_ARCH}/nvidia-container-toolkit*_${PACKAGE_VERSION}*.deb
WORKDIR /work
COPY --from=build /artifacts/nvidia-ctk-installer /work/nvidia-ctk-installer
RUN ln -s nvidia-ctk-installer nvidia-toolkit
ENV PATH=/work:$PATH
ARG VERSION
LABEL io.k8s.display-name="NVIDIA Container Runtime Config"
LABEL name="NVIDIA Container Runtime Config"
LABEL vendor="NVIDIA"
LABEL version="${VERSION}"
LABEL release="N/A"
LABEL summary="Automatically Configure your Container Runtime for GPU support."
LABEL description="See summary"
RUN mkdir /licenses && mv /NGC-DL-CONTAINER-LICENSE /licenses/NGC-DL-CONTAINER-LICENSE
ENTRYPOINT ["/work/nvidia-ctk-installer"]

View File

@@ -29,17 +29,17 @@ include $(CURDIR)/versions.mk
IMAGE_VERSION := $(VERSION) IMAGE_VERSION := $(VERSION)
IMAGE_TAG ?= $(VERSION) IMAGE_TAG ?= $(VERSION)-$(DIST)
IMAGE = $(IMAGE_NAME):$(IMAGE_TAG) IMAGE = $(IMAGE_NAME):$(IMAGE_TAG)
OUT_IMAGE_NAME ?= $(IMAGE_NAME) OUT_IMAGE_NAME ?= $(IMAGE_NAME)
OUT_IMAGE_VERSION ?= $(IMAGE_VERSION) OUT_IMAGE_VERSION ?= $(IMAGE_VERSION)
OUT_IMAGE_TAG = $(OUT_IMAGE_VERSION) OUT_IMAGE_TAG = $(OUT_IMAGE_VERSION)-$(DIST)
OUT_IMAGE = $(OUT_IMAGE_NAME):$(OUT_IMAGE_TAG) OUT_IMAGE = $(OUT_IMAGE_NAME):$(OUT_IMAGE_TAG)
##### Public rules ##### ##### Public rules #####
DEFAULT_PUSH_TARGET := application DEFAULT_PUSH_TARGET := ubuntu20.04
DISTRIBUTIONS := $(DEFAULT_PUSH_TARGET) DISTRIBUTIONS := ubuntu20.04 ubi8
META_TARGETS := packaging META_TARGETS := packaging
@@ -56,16 +56,30 @@ else
include $(CURDIR)/deployments/container/multi-arch.mk include $(CURDIR)/deployments/container/multi-arch.mk
endif endif
# For the default push target we also push a short tag equal to the version.
# We skip this for the development release
DEVEL_RELEASE_IMAGE_VERSION ?= devel
PUSH_MULTIPLE_TAGS ?= true
ifeq ($(strip $(OUT_IMAGE_VERSION)),$(DEVEL_RELEASE_IMAGE_VERSION))
PUSH_MULTIPLE_TAGS = false
endif
ifeq ($(PUSH_MULTIPLE_TAGS),true)
push-$(DEFAULT_PUSH_TARGET): push-short
endif
push-%: DIST = $(*)
push-short: DIST = $(DEFAULT_PUSH_TARGET)
# Define the push targets # Define the push targets
$(PUSH_TARGETS): push-%: $(PUSH_TARGETS): push-%:
$(CURDIR)/scripts/publish-image.sh $(IMAGE) $(OUT_IMAGE) $(CURDIR)/scripts/publish-image.sh $(IMAGE) $(OUT_IMAGE)
DOCKERFILE = $(CURDIR)/deployments/container/Dockerfile push-short:
$(CURDIR)/scripts/publish-image.sh $(IMAGE) $(OUT_IMAGE)
# For packaging targets we set the output image tag to include the -packaging suffix.
%-packaging: INTERMEDIATE_TARGET := --target=packaging build-%: DIST = $(*)
%-packaging: IMAGE_TAG = $(IMAGE_VERSION)-packaging build-%: DOCKERFILE = $(CURDIR)/deployments/container/Dockerfile.$(DOCKERFILE_SUFFIX)
%-packaging: OUT_IMAGE_TAG = $(IMAGE_VERSION)-packaging
ARTIFACTS_ROOT ?= $(shell realpath --relative-to=$(CURDIR) $(DIST_DIR)) ARTIFACTS_ROOT ?= $(shell realpath --relative-to=$(CURDIR) $(DIST_DIR))
@@ -76,12 +90,10 @@ $(IMAGE_TARGETS): image-%: $(ARTIFACTS_ROOT)
--provenance=false --sbom=false \ --provenance=false --sbom=false \
$(DOCKER_BUILD_OPTIONS) \ $(DOCKER_BUILD_OPTIONS) \
$(DOCKER_BUILD_PLATFORM_OPTIONS) \ $(DOCKER_BUILD_PLATFORM_OPTIONS) \
$(INTERMEDIATE_TARGET) \
--tag $(IMAGE) \ --tag $(IMAGE) \
--build-arg ARTIFACTS_ROOT="$(ARTIFACTS_ROOT)" \ --build-arg ARTIFACTS_ROOT="$(ARTIFACTS_ROOT)" \
--build-arg GOLANG_VERSION="$(GOLANG_VERSION)" \ --build-arg GOLANG_VERSION="$(GOLANG_VERSION)" \
--build-arg PACKAGE_DIST_DEB="$(PACKAGE_DIST_DEB)" \ --build-arg PACKAGE_DIST="$(PACKAGE_DIST)" \
--build-arg PACKAGE_DIST_RPM="$(PACKAGE_DIST_RPM)" \
--build-arg PACKAGE_VERSION="$(PACKAGE_VERSION)" \ --build-arg PACKAGE_VERSION="$(PACKAGE_VERSION)" \
--build-arg VERSION="$(VERSION)" \ --build-arg VERSION="$(VERSION)" \
--build-arg GIT_COMMIT="$(GIT_COMMIT)" \ --build-arg GIT_COMMIT="$(GIT_COMMIT)" \
@@ -91,17 +103,25 @@ $(IMAGE_TARGETS): image-%: $(ARTIFACTS_ROOT)
-f $(DOCKERFILE) \ -f $(DOCKERFILE) \
$(CURDIR) $(CURDIR)
build-ubuntu%: DOCKERFILE_SUFFIX := ubuntu
build-ubuntu%: PACKAGE_DIST = ubuntu18.04
PACKAGE_DIST_DEB = ubuntu18.04 build-ubi8: DOCKERFILE_SUFFIX := ubi8
# TODO: This needs to be set to centos8 for ppc64le builds build-ubi8: PACKAGE_DIST = centos7
PACKAGE_DIST_RPM = centos7
# Handle the default build target. build-packaging: DOCKERFILE_SUFFIX := packaging
.PHONY: build push build-packaging: PACKAGE_ARCH := amd64
build: build-$(DEFAULT_PUSH_TARGET) build-packaging: PACKAGE_DIST = all
push: push-$(DEFAULT_PUSH_TARGET)
# Test targets # Test targets
test-%: DIST = $(*)
# Handle the default build target.
.PHONY: build
build: $(DEFAULT_PUSH_TARGET)
$(DEFAULT_PUSH_TARGET): build-$(DEFAULT_PUSH_TARGET)
$(DEFAULT_PUSH_TARGET): DIST = $(DEFAULT_PUSH_TARGET)
TEST_CASES ?= docker crio containerd TEST_CASES ?= docker crio containerd
$(TEST_TARGETS): test-%: $(TEST_TARGETS): test-%:
TEST_CASES="$(TEST_CASES)" bash -x $(CURDIR)/test/container/main.sh run \ TEST_CASES="$(TEST_CASES)" bash -x $(CURDIR)/test/container/main.sh run \

View File

@@ -23,3 +23,11 @@ $(BUILD_TARGETS): build-%: image-%
else else
$(BUILD_TARGETS): build-%: image-% $(BUILD_TARGETS): build-%: image-%
endif endif
# For the default distribution we also retag the image.
# Note: This needs to be updated for multi-arch images.
ifeq ($(IMAGE_TAG),$(VERSION)-$(DIST))
$(DEFAULT_PUSH_TARGET):
$(DOCKER) image inspect $(IMAGE) > /dev/null || $(DOCKER) pull $(IMAGE)
$(DOCKER) tag $(IMAGE) $(subst :$(IMAGE_TAG),:$(VERSION),$(IMAGE))
endif

View File

@@ -14,7 +14,7 @@
# This Dockerfile is also used to define the golang version used in this project # This Dockerfile is also used to define the golang version used in this project
# This allows dependabot to manage this version in addition to other images. # This allows dependabot to manage this version in addition to other images.
FROM golang:1.24.4 FROM golang:1.24.3
WORKDIR /work WORKDIR /work
COPY * . COPY * .

View File

@@ -1,28 +0,0 @@
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
[Unit]
Description=Refresh NVIDIA CDI specification file
ConditionPathExists=/usr/bin/nvidia-smi
ConditionPathExists=/usr/bin/nvidia-ctk
[Service]
Type=oneshot
EnvironmentFile=-/etc/nvidia-container-toolkit/cdi-refresh.env
ExecCondition=/usr/bin/grep -qE '/nvidia.ko' /lib/modules/%v/modules.dep
ExecStart=/usr/bin/nvidia-ctk cdi generate --output=/var/run/cdi/nvidia.yaml
CapabilityBoundingSet=CAP_SYS_MODULE CAP_SYS_ADMIN CAP_MKNOD
[Install]
WantedBy=multi-user.target

View File

@@ -55,7 +55,9 @@ RUN make PREFIX=${DIST_DIR} cmds
WORKDIR $DIST_DIR WORKDIR $DIST_DIR
COPY packaging/debian ./debian COPY packaging/debian ./debian
COPY deployments/systemd/ .
ARG LIBNVIDIA_CONTAINER_TOOLS_VERSION
ENV LIBNVIDIA_CONTAINER_TOOLS_VERSION ${LIBNVIDIA_CONTAINER_TOOLS_VERSION}
RUN dch --create --package="${PKG_NAME}" \ RUN dch --create --package="${PKG_NAME}" \
--newversion "${REVISION}" \ --newversion "${REVISION}" \
@@ -65,6 +67,6 @@ RUN dch --create --package="${PKG_NAME}" \
if [ "$REVISION" != "$(dpkg-parsechangelog --show-field=Version)" ]; then exit 1; fi if [ "$REVISION" != "$(dpkg-parsechangelog --show-field=Version)" ]; then exit 1; fi
CMD export DISTRIB="$(lsb_release -cs)" && \ CMD export DISTRIB="$(lsb_release -cs)" && \
debuild -eDISTRIB -eSECTION -eVERSION="${REVISION}" \ debuild -eDISTRIB -eSECTION -eLIBNVIDIA_CONTAINER_TOOLS_VERSION -eVERSION="${REVISION}" \
--dpkg-buildpackage-hook='sh debian/prepare' -i -us -uc -b && \ --dpkg-buildpackage-hook='sh debian/prepare' -i -us -uc -b && \
mv /tmp/*.deb /dist mv /tmp/*.deb /dist

View File

@@ -46,7 +46,9 @@ RUN make PREFIX=${DIST_DIR} cmds
WORKDIR $DIST_DIR/.. WORKDIR $DIST_DIR/..
COPY packaging/rpm . COPY packaging/rpm .
COPY deployments/systemd/ .
ARG LIBNVIDIA_CONTAINER_TOOLS_VERSION
ENV LIBNVIDIA_CONTAINER_TOOLS_VERSION ${LIBNVIDIA_CONTAINER_TOOLS_VERSION}
CMD arch=$(uname -m) && \ CMD arch=$(uname -m) && \
rpmbuild --clean --target=$arch -bb \ rpmbuild --clean --target=$arch -bb \
@@ -54,6 +56,7 @@ CMD arch=$(uname -m) && \
-D "release_date $(date +'%a %b %d %Y')" \ -D "release_date $(date +'%a %b %d %Y')" \
-D "git_commit ${GIT_COMMIT}" \ -D "git_commit ${GIT_COMMIT}" \
-D "version ${PKG_VERS}" \ -D "version ${PKG_VERS}" \
-D "libnvidia_container_tools_version ${LIBNVIDIA_CONTAINER_TOOLS_VERSION}" \
-D "release ${PKG_REV}" \ -D "release ${PKG_REV}" \
SPECS/nvidia-container-toolkit.spec && \ SPECS/nvidia-container-toolkit.spec && \
mv RPMS/$arch/*.rpm /dist mv RPMS/$arch/*.rpm /dist

View File

@@ -71,7 +71,9 @@ RUN make PREFIX=${DIST_DIR} cmds
WORKDIR $DIST_DIR/.. WORKDIR $DIST_DIR/..
COPY packaging/rpm . COPY packaging/rpm .
COPY deployments/systemd/ ${DIST_DIR}/
ARG LIBNVIDIA_CONTAINER_TOOLS_VERSION
ENV LIBNVIDIA_CONTAINER_TOOLS_VERSION ${LIBNVIDIA_CONTAINER_TOOLS_VERSION}
CMD arch=$(uname -m) && \ CMD arch=$(uname -m) && \
rpmbuild --clean --target=$arch -bb \ rpmbuild --clean --target=$arch -bb \
@@ -79,6 +81,7 @@ CMD arch=$(uname -m) && \
-D "release_date $(date +'%a %b %d %Y')" \ -D "release_date $(date +'%a %b %d %Y')" \
-D "git_commit ${GIT_COMMIT}" \ -D "git_commit ${GIT_COMMIT}" \
-D "version ${PKG_VERS}" \ -D "version ${PKG_VERS}" \
-D "libnvidia_container_tools_version ${LIBNVIDIA_CONTAINER_TOOLS_VERSION}" \
-D "release ${PKG_REV}" \ -D "release ${PKG_REV}" \
SPECS/nvidia-container-toolkit.spec && \ SPECS/nvidia-container-toolkit.spec && \
mv RPMS/$arch/*.rpm /dist mv RPMS/$arch/*.rpm /dist

View File

@@ -53,16 +53,18 @@ RUN make PREFIX=${DIST_DIR} cmds
WORKDIR $DIST_DIR WORKDIR $DIST_DIR
COPY packaging/debian ./debian COPY packaging/debian ./debian
COPY deployments/systemd/ .
ARG LIBNVIDIA_CONTAINER_TOOLS_VERSION
ENV LIBNVIDIA_CONTAINER_TOOLS_VERSION ${LIBNVIDIA_CONTAINER_TOOLS_VERSION}
RUN dch --create --package="${PKG_NAME}" \ RUN dch --create --package="${PKG_NAME}" \
--newversion "${REVISION}" \ --newversion "${REVISION}" \
"See https://gitlab.com/nvidia/container-toolkit/container-toolkit/-/blob/${GIT_COMMIT}/CHANGELOG.md for the changelog" && \ "See https://gitlab.com/nvidia/container-toolkit/container-toolkit/-/blob/${GIT_COMMIT}/CHANGELOG.md for the changelog" && \
dch --append "Bump libnvidia-container dependency to ${REVISION}" && \ dch --append "Bump libnvidia-container dependency to ${LIBNVIDIA_CONTAINER_TOOLS_VERSION}" && \
dch -r "" && \ dch -r "" && \
if [ "$REVISION" != "$(dpkg-parsechangelog --show-field=Version)" ]; then exit 1; fi if [ "$REVISION" != "$(dpkg-parsechangelog --show-field=Version)" ]; then exit 1; fi
CMD export DISTRIB="$(lsb_release -cs)" && \ CMD export DISTRIB="$(lsb_release -cs)" && \
debuild -eDISTRIB -eSECTION -eVERSION="${REVISION}" \ debuild -eDISTRIB -eSECTION -eLIBNVIDIA_CONTAINER_TOOLS_VERSION -eVERSION="${REVISION}" \
--dpkg-buildpackage-hook='sh debian/prepare' -i -us -uc -b && \ --dpkg-buildpackage-hook='sh debian/prepare' -i -us -uc -b && \
mv /tmp/*.deb /dist mv /tmp/*.deb /dist

View File

@@ -85,6 +85,11 @@ docker-all: $(AMD64_TARGETS) $(X86_64_TARGETS) \
--%: docker-build-% --%: docker-build-%
@ @
LIBNVIDIA_CONTAINER_VERSION ?= $(LIB_VERSION)
LIBNVIDIA_CONTAINER_TAG ?= $(LIB_TAG)
LIBNVIDIA_CONTAINER_TOOLS_VERSION := $(LIBNVIDIA_CONTAINER_VERSION)$(if $(LIBNVIDIA_CONTAINER_TAG),~$(LIBNVIDIA_CONTAINER_TAG))-1
# private ubuntu target # private ubuntu target
--ubuntu%: OS := ubuntu --ubuntu%: OS := ubuntu
@@ -124,6 +129,7 @@ docker-build-%:
--build-arg PKG_NAME="$(LIB_NAME)" \ --build-arg PKG_NAME="$(LIB_NAME)" \
--build-arg PKG_VERS="$(PACKAGE_VERSION)" \ --build-arg PKG_VERS="$(PACKAGE_VERSION)" \
--build-arg PKG_REV="$(PACKAGE_REVISION)" \ --build-arg PKG_REV="$(PACKAGE_REVISION)" \
--build-arg LIBNVIDIA_CONTAINER_TOOLS_VERSION="$(LIBNVIDIA_CONTAINER_TOOLS_VERSION)" \
--build-arg GIT_COMMIT="$(GIT_COMMIT)" \ --build-arg GIT_COMMIT="$(GIT_COMMIT)" \
--tag $(BUILDIMAGE) \ --tag $(BUILDIMAGE) \
--file $(DOCKERFILE) . --file $(DOCKERFILE) .

10
go.mod
View File

@@ -3,8 +3,8 @@ module github.com/NVIDIA/nvidia-container-toolkit
go 1.23.0 go 1.23.0
require ( require (
github.com/NVIDIA/go-nvlib v0.7.3 github.com/NVIDIA/go-nvlib v0.7.2
github.com/NVIDIA/go-nvml v0.12.9-0 github.com/NVIDIA/go-nvml v0.12.4-1
github.com/cyphar/filepath-securejoin v0.4.1 github.com/cyphar/filepath-securejoin v0.4.1
github.com/moby/sys/reexec v0.1.0 github.com/moby/sys/reexec v0.1.0
github.com/moby/sys/symlink v0.3.0 github.com/moby/sys/symlink v0.3.0
@@ -13,15 +13,15 @@ require (
github.com/pelletier/go-toml v1.9.5 github.com/pelletier/go-toml v1.9.5
github.com/sirupsen/logrus v1.9.3 github.com/sirupsen/logrus v1.9.3
github.com/stretchr/testify v1.10.0 github.com/stretchr/testify v1.10.0
github.com/urfave/cli/v2 v2.27.7 github.com/urfave/cli/v2 v2.27.6
golang.org/x/mod v0.25.0 golang.org/x/mod v0.24.0
golang.org/x/sys v0.33.0 golang.org/x/sys v0.33.0
tags.cncf.io/container-device-interface v1.0.1 tags.cncf.io/container-device-interface v1.0.1
tags.cncf.io/container-device-interface/specs-go v1.0.0 tags.cncf.io/container-device-interface/specs-go v1.0.0
) )
require ( require (
github.com/cpuguy83/go-md2man/v2 v2.0.7 // indirect github.com/cpuguy83/go-md2man/v2 v2.0.5 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect github.com/davecgh/go-spew v1.1.1 // indirect
github.com/fsnotify/fsnotify v1.7.0 // indirect github.com/fsnotify/fsnotify v1.7.0 // indirect
github.com/google/uuid v1.6.0 // indirect github.com/google/uuid v1.6.0 // indirect

20
go.sum
View File

@@ -1,11 +1,11 @@
github.com/NVIDIA/go-nvlib v0.7.3 h1:kXc8PkWUlrwedSpM4fR8xT/DAq1NKy8HqhpgteFcGAw= github.com/NVIDIA/go-nvlib v0.7.2 h1:7sy/NVUa4sM9FLKwH6CjBfHSWrJUmv8emVyxLTzjfOA=
github.com/NVIDIA/go-nvlib v0.7.3/go.mod h1:i95Je7GinMy/+BDs++DAdbPmT2TubjNP8i8joC7DD7I= github.com/NVIDIA/go-nvlib v0.7.2/go.mod h1:2Kh2kYSP5IJ8EKf0/SYDzHiQKb9EJkwOf2LQzu6pXzY=
github.com/NVIDIA/go-nvml v0.12.9-0 h1:e344UK8ZkeMeeLkdQtRhmXRxNf+u532LDZPGMtkdus0= github.com/NVIDIA/go-nvml v0.12.4-1 h1:WKUvqshhWSNTfm47ETRhv0A0zJyr1ncCuHiXwoTrBEc=
github.com/NVIDIA/go-nvml v0.12.9-0/go.mod h1:+KNA7c7gIBH7SKSJ1ntlwkfN80zdx8ovl4hrK3LmPt4= github.com/NVIDIA/go-nvml v0.12.4-1/go.mod h1:8Llmj+1Rr+9VGGwZuRer5N/aCjxGuR5nPb/9ebBiIEQ=
github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM= github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM=
github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ= github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ=
github.com/cpuguy83/go-md2man/v2 v2.0.7 h1:zbFlGlXEAKlwXpmvle3d8Oe3YnkKIK4xSRTd3sHPnBo= github.com/cpuguy83/go-md2man/v2 v2.0.5 h1:ZtcqGrnekaHpVLArFSe4HK5DoKx1T0rq2DwVB0alcyc=
github.com/cpuguy83/go-md2man/v2 v2.0.7/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g= github.com/cpuguy83/go-md2man/v2 v2.0.5/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E= github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
github.com/cyphar/filepath-securejoin v0.4.1 h1:JyxxyPEaktOD+GAnqIqTf9A8tHyAG22rowi7HkoSU1s= github.com/cyphar/filepath-securejoin v0.4.1 h1:JyxxyPEaktOD+GAnqIqTf9A8tHyAG22rowi7HkoSU1s=
github.com/cyphar/filepath-securejoin v0.4.1/go.mod h1:Sdj7gXlvMcPZsbhwhQ33GguGLDGQL7h7bg04C/+u9jI= github.com/cyphar/filepath-securejoin v0.4.1/go.mod h1:Sdj7gXlvMcPZsbhwhQ33GguGLDGQL7h7bg04C/+u9jI=
@@ -69,8 +69,8 @@ github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf
github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635 h1:kdXcSzyDtseVEc4yCz2qF8ZrQvIDBJLl4S1c3GCXmoI= github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635 h1:kdXcSzyDtseVEc4yCz2qF8ZrQvIDBJLl4S1c3GCXmoI=
github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635/go.mod h1:hkRG7XYTFWNJGYcbNJQlaLq0fg1yr4J4t/NcTQtrfww= github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635/go.mod h1:hkRG7XYTFWNJGYcbNJQlaLq0fg1yr4J4t/NcTQtrfww=
github.com/urfave/cli v1.19.1/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijbERA= github.com/urfave/cli v1.19.1/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijbERA=
github.com/urfave/cli/v2 v2.27.7 h1:bH59vdhbjLv3LAvIu6gd0usJHgoTTPhCFib8qqOwXYU= github.com/urfave/cli/v2 v2.27.6 h1:VdRdS98FNhKZ8/Az8B7MTyGQmpIr36O1EHybx/LaZ4g=
github.com/urfave/cli/v2 v2.27.7/go.mod h1:CyNAG/xg+iAOg0N4MPGZqVmv2rCoP267496AOXUZjA4= github.com/urfave/cli/v2 v2.27.6/go.mod h1:3Sevf16NykTbInEnD0yKkjDAeZDS0A6bzhBH5hrMvTQ=
github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU= github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU=
github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb h1:zGWFAtiMcyryUHoUjUJX0/lt1H2+i2Ka2n+D3DImSNo= github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb h1:zGWFAtiMcyryUHoUjUJX0/lt1H2+i2Ka2n+D3DImSNo=
github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU= github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU=
@@ -80,8 +80,8 @@ github.com/xeipuuv/gojsonschema v1.2.0 h1:LhYJRs+L4fBtjZUfuSZIKGeVu0QRy8e5Xi7D17
github.com/xeipuuv/gojsonschema v1.2.0/go.mod h1:anYRn/JVcOK2ZgGU+IjEV4nwlhoK5sQluxsYJ78Id3Y= github.com/xeipuuv/gojsonschema v1.2.0/go.mod h1:anYRn/JVcOK2ZgGU+IjEV4nwlhoK5sQluxsYJ78Id3Y=
github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 h1:gEOO8jv9F4OT7lGCjxCBTO/36wtF6j2nSip77qHd4x4= github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 h1:gEOO8jv9F4OT7lGCjxCBTO/36wtF6j2nSip77qHd4x4=
github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1/go.mod h1:Ohn+xnUBiLI6FVj/9LpzZWtj1/D6lUovWYBkxHVV3aM= github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1/go.mod h1:Ohn+xnUBiLI6FVj/9LpzZWtj1/D6lUovWYBkxHVV3aM=
golang.org/x/mod v0.25.0 h1:n7a+ZbQKQA/Ysbyb0/6IbB1H/X41mKgbhfv7AfG/44w= golang.org/x/mod v0.24.0 h1:ZfthKaKaT4NrhGVZHO1/WDTwGES4De8KtWO0SIbNJMU=
golang.org/x/mod v0.25.0/go.mod h1:IXM97Txy2VM4PJ3gI61r1YEk/gAj6zAHN3AdZt6S9Ww= golang.org/x/mod v0.24.0/go.mod h1:IXM97Txy2VM4PJ3gI61r1YEk/gAj6zAHN3AdZt6S9Ww=
golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20191115151921-52ab43148777/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20191115151921-52ab43148777/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=

View File

@@ -53,6 +53,6 @@ docker run --rm \
-v $(pwd):$(pwd) \ -v $(pwd):$(pwd) \
-w $(pwd) \ -w $(pwd) \
-u $(id -u):$(id -g) \ -u $(id -u):$(id -g) \
--entrypoint="sh" \ --entrypoint="bash" \
${IMAGE} \ ${IMAGE} \
-c "cp -p -R /artifacts/* ${DIST_DIR}" -c "cp --preserve=timestamps -R /artifacts/* ${DIST_DIR}"

View File

@@ -31,10 +31,8 @@ import (
) )
const ( const (
FilePathOverrideEnvVar = "NVIDIA_CTK_CONFIG_FILE_PATH" configOverride = "XDG_CONFIG_HOME"
RelativeFilePath = "nvidia-container-runtime/config.toml" configFilePath = "nvidia-container-runtime/config.toml"
configRootOverride = "XDG_CONFIG_HOME"
nvidiaCTKExecutable = "nvidia-ctk" nvidiaCTKExecutable = "nvidia-ctk"
nvidiaCTKDefaultFilePath = "/usr/bin/nvidia-ctk" nvidiaCTKDefaultFilePath = "/usr/bin/nvidia-ctk"
@@ -76,15 +74,11 @@ type Config struct {
// GetConfigFilePath returns the path to the config file for the configured system // GetConfigFilePath returns the path to the config file for the configured system
func GetConfigFilePath() string { func GetConfigFilePath() string {
if configFilePathOverride := os.Getenv(FilePathOverrideEnvVar); configFilePathOverride != "" { if XDGConfigDir := os.Getenv(configOverride); len(XDGConfigDir) != 0 {
return configFilePathOverride return filepath.Join(XDGConfigDir, configFilePath)
}
configRoot := "/etc"
if XDGConfigDir := os.Getenv(configRootOverride); len(XDGConfigDir) != 0 {
configRoot = XDGConfigDir
} }
return filepath.Join(configRoot, RelativeFilePath) return filepath.Join("/etc", configFilePath)
} }
// GetConfig sets up the config struct. Values are read from a toml file // GetConfig sets up the config struct. Values are read from a toml file
@@ -116,7 +110,7 @@ func GetDefault() (*Config, error) {
NVIDIAContainerRuntimeConfig: RuntimeConfig{ NVIDIAContainerRuntimeConfig: RuntimeConfig{
DebugFilePath: "/dev/null", DebugFilePath: "/dev/null",
LogLevel: "info", LogLevel: "info",
Runtimes: []string{"runc", "crun"}, Runtimes: []string{"docker-runc", "runc", "crun"},
Mode: "auto", Mode: "auto",
Modes: modesConfig{ Modes: modesConfig{
CSV: csvModeConfig{ CSV: csvModeConfig{

View File

@@ -27,26 +27,9 @@ import (
func TestGetConfigWithCustomConfig(t *testing.T) { func TestGetConfigWithCustomConfig(t *testing.T) {
testDir := t.TempDir() testDir := t.TempDir()
t.Setenv(configRootOverride, testDir) t.Setenv(configOverride, testDir)
filename := filepath.Join(testDir, RelativeFilePath) filename := filepath.Join(testDir, configFilePath)
// By default debug is disabled
contents := []byte("[nvidia-container-runtime]\ndebug = \"/nvidia-container-toolkit.log\"")
require.NoError(t, os.MkdirAll(filepath.Dir(filename), 0766))
require.NoError(t, os.WriteFile(filename, contents, 0600))
cfg, err := GetConfig()
require.NoError(t, err)
require.Equal(t, "/nvidia-container-toolkit.log", cfg.NVIDIAContainerRuntimeConfig.DebugFilePath)
}
func TestGetConfigWithConfigFilePathOverride(t *testing.T) {
testDir := t.TempDir()
filename := filepath.Join(testDir, RelativeFilePath)
t.Setenv(FilePathOverrideEnvVar, filename)
// By default debug is disabled // By default debug is disabled
contents := []byte("[nvidia-container-runtime]\ndebug = \"/nvidia-container-toolkit.log\"") contents := []byte("[nvidia-container-runtime]\ndebug = \"/nvidia-container-toolkit.log\"")
@@ -80,7 +63,7 @@ func TestGetConfig(t *testing.T) {
NVIDIAContainerRuntimeConfig: RuntimeConfig{ NVIDIAContainerRuntimeConfig: RuntimeConfig{
DebugFilePath: "/dev/null", DebugFilePath: "/dev/null",
LogLevel: "info", LogLevel: "info",
Runtimes: []string{"runc", "crun"}, Runtimes: []string{"docker-runc", "runc", "crun"},
Mode: "auto", Mode: "auto",
Modes: modesConfig{ Modes: modesConfig{
CSV: csvModeConfig{ CSV: csvModeConfig{
@@ -187,7 +170,7 @@ func TestGetConfig(t *testing.T) {
NVIDIAContainerRuntimeConfig: RuntimeConfig{ NVIDIAContainerRuntimeConfig: RuntimeConfig{
DebugFilePath: "/dev/null", DebugFilePath: "/dev/null",
LogLevel: "info", LogLevel: "info",
Runtimes: []string{"runc", "crun"}, Runtimes: []string{"docker-runc", "runc", "crun"},
Mode: "auto", Mode: "auto",
Modes: modesConfig{ Modes: modesConfig{
CSV: csvModeConfig{ CSV: csvModeConfig{
@@ -306,7 +289,7 @@ func TestGetConfig(t *testing.T) {
NVIDIAContainerRuntimeConfig: RuntimeConfig{ NVIDIAContainerRuntimeConfig: RuntimeConfig{
DebugFilePath: "/dev/null", DebugFilePath: "/dev/null",
LogLevel: "info", LogLevel: "info",
Runtimes: []string{"runc", "crun"}, Runtimes: []string{"docker-runc", "runc", "crun"},
Mode: "auto", Mode: "auto",
Modes: modesConfig{ Modes: modesConfig{
CSV: csvModeConfig{ CSV: csvModeConfig{
@@ -348,7 +331,7 @@ func TestGetConfig(t *testing.T) {
NVIDIAContainerRuntimeConfig: RuntimeConfig{ NVIDIAContainerRuntimeConfig: RuntimeConfig{
DebugFilePath: "/dev/null", DebugFilePath: "/dev/null",
LogLevel: "info", LogLevel: "info",
Runtimes: []string{"runc", "crun"}, Runtimes: []string{"docker-runc", "runc", "crun"},
Mode: "auto", Mode: "auto",
Modes: modesConfig{ Modes: modesConfig{
CSV: csvModeConfig{ CSV: csvModeConfig{

View File

@@ -21,35 +21,22 @@ import (
"strings" "strings"
"github.com/opencontainers/runtime-spec/specs-go" "github.com/opencontainers/runtime-spec/specs-go"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
) )
type builder struct { type builder struct {
CUDA env map[string]string
mounts []specs.Mount
disableRequire bool disableRequire bool
} }
// Option is a functional option for creating a CUDA image.
type Option func(*builder) error
// New creates a new CUDA image from the input options. // New creates a new CUDA image from the input options.
func New(opt ...Option) (CUDA, error) { func New(opt ...Option) (CUDA, error) {
b := &builder{ b := &builder{}
CUDA: CUDA{
acceptEnvvarUnprivileged: true,
},
}
for _, o := range opt { for _, o := range opt {
if err := o(b); err != nil { if err := o(b); err != nil {
return CUDA{}, err return CUDA{}, err
} }
} }
if b.logger == nil {
b.logger = logger.New()
}
if b.env == nil { if b.env == nil {
b.env = make(map[string]string) b.env = make(map[string]string)
} }
@@ -63,36 +50,15 @@ func (b builder) build() (CUDA, error) {
b.env[EnvVarNvidiaDisableRequire] = "true" b.env[EnvVarNvidiaDisableRequire] = "true"
} }
return b.CUDA, nil c := CUDA{
env: b.env,
mounts: b.mounts,
}
return c, nil
} }
func WithAcceptDeviceListAsVolumeMounts(acceptDeviceListAsVolumeMounts bool) Option { // Option is a functional option for creating a CUDA image.
return func(b *builder) error { type Option func(*builder) error
b.acceptDeviceListAsVolumeMounts = acceptDeviceListAsVolumeMounts
return nil
}
}
func WithAcceptEnvvarUnprivileged(acceptEnvvarUnprivileged bool) Option {
return func(b *builder) error {
b.acceptEnvvarUnprivileged = acceptEnvvarUnprivileged
return nil
}
}
func WithAnnotations(annotations map[string]string) Option {
return func(b *builder) error {
b.annotations = annotations
return nil
}
}
func WithAnnotationsPrefixes(annotationsPrefixes []string) Option {
return func(b *builder) error {
b.annotationsPrefixes = annotationsPrefixes
return nil
}
}
// WithDisableRequire sets the disable require option. // WithDisableRequire sets the disable require option.
func WithDisableRequire(disableRequire bool) Option { func WithDisableRequire(disableRequire bool) Option {
@@ -127,14 +93,6 @@ func WithEnvMap(env map[string]string) Option {
} }
} }
// WithLogger sets the logger to use when creating the CUDA image.
func WithLogger(logger logger.Interface) Option {
return func(b *builder) error {
b.logger = logger
return nil
}
}
// WithMounts sets the mounts associated with the CUDA image. // WithMounts sets the mounts associated with the CUDA image.
func WithMounts(mounts []specs.Mount) Option { func WithMounts(mounts []specs.Mount) Option {
return func(b *builder) error { return func(b *builder) error {
@@ -142,20 +100,3 @@ func WithMounts(mounts []specs.Mount) Option {
return nil return nil
} }
} }
// WithPreferredVisibleDevicesEnvVars sets the environment variables that
// should take precedence over the default NVIDIA_VISIBLE_DEVICES.
func WithPreferredVisibleDevicesEnvVars(preferredVisibleDeviceEnvVars ...string) Option {
return func(b *builder) error {
b.preferredVisibleDeviceEnvVars = preferredVisibleDeviceEnvVars
return nil
}
}
// WithPrivileged sets whether an image is privileged or not.
func WithPrivileged(isPrivileged bool) Option {
return func(b *builder) error {
b.isPrivileged = isPrivileged
return nil
}
}

View File

@@ -19,15 +19,12 @@ package image
import ( import (
"fmt" "fmt"
"path/filepath" "path/filepath"
"slices"
"strconv" "strconv"
"strings" "strings"
"github.com/opencontainers/runtime-spec/specs-go" "github.com/opencontainers/runtime-spec/specs-go"
"golang.org/x/mod/semver" "golang.org/x/mod/semver"
"tags.cncf.io/container-device-interface/pkg/parser" "tags.cncf.io/container-device-interface/pkg/parser"
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
) )
const ( const (
@@ -41,44 +38,27 @@ const (
// a map of environment variable to values that can be used to perform lookups // a map of environment variable to values that can be used to perform lookups
// such as requirements. // such as requirements.
type CUDA struct { type CUDA struct {
logger logger.Interface env map[string]string
mounts []specs.Mount
annotations map[string]string
env map[string]string
isPrivileged bool
mounts []specs.Mount
annotationsPrefixes []string
acceptDeviceListAsVolumeMounts bool
acceptEnvvarUnprivileged bool
preferredVisibleDeviceEnvVars []string
} }
// NewCUDAImageFromSpec creates a CUDA image from the input OCI runtime spec. // NewCUDAImageFromSpec creates a CUDA image from the input OCI runtime spec.
// The process environment is read (if present) to construc the CUDA Image. // The process environment is read (if present) to construc the CUDA Image.
func NewCUDAImageFromSpec(spec *specs.Spec, opts ...Option) (CUDA, error) { func NewCUDAImageFromSpec(spec *specs.Spec) (CUDA, error) {
if spec == nil {
return New(opts...)
}
var env []string var env []string
if spec.Process != nil { if spec != nil && spec.Process != nil {
env = spec.Process.Env env = spec.Process.Env
} }
specOpts := []Option{ return New(
WithAnnotations(spec.Annotations),
WithEnv(env), WithEnv(env),
WithMounts(spec.Mounts), WithMounts(spec.Mounts),
WithPrivileged(IsPrivileged((*OCISpec)(spec))), )
}
return New(append(opts, specOpts...)...)
} }
// newCUDAImageFromEnv creates a CUDA image from the input environment. The environment // NewCUDAImageFromEnv creates a CUDA image from the input environment. The environment
// is a list of strings of the form ENVAR=VALUE. // is a list of strings of the form ENVAR=VALUE.
func newCUDAImageFromEnv(env []string) (CUDA, error) { func NewCUDAImageFromEnv(env []string) (CUDA, error) {
return New(WithEnv(env)) return New(WithEnv(env))
} }
@@ -103,10 +83,6 @@ func (i CUDA) IsLegacy() bool {
return len(legacyCudaVersion) > 0 && len(cudaRequire) == 0 return len(legacyCudaVersion) > 0 && len(cudaRequire) == 0
} }
func (i CUDA) IsPrivileged() bool {
return i.isPrivileged
}
// GetRequirements returns the requirements from all NVIDIA_REQUIRE_ environment // GetRequirements returns the requirements from all NVIDIA_REQUIRE_ environment
// variables. // variables.
func (i CUDA) GetRequirements() ([]string, error) { func (i CUDA) GetRequirements() ([]string, error) {
@@ -144,8 +120,8 @@ func (i CUDA) HasDisableRequire() bool {
return false return false
} }
// devicesFromEnvvars returns the devices requested by the image through environment variables // DevicesFromEnvvars returns the devices requested by the image through environment variables
func (i CUDA) devicesFromEnvvars(envVars ...string) []string { func (i CUDA) DevicesFromEnvvars(envVars ...string) VisibleDevices {
// We concantenate all the devices from the specified env. // We concantenate all the devices from the specified env.
var isSet bool var isSet bool
var devices []string var devices []string
@@ -166,15 +142,15 @@ func (i CUDA) devicesFromEnvvars(envVars ...string) []string {
// Environment variable unset with legacy image: default to "all". // Environment variable unset with legacy image: default to "all".
if !isSet && len(devices) == 0 && i.IsLegacy() { if !isSet && len(devices) == 0 && i.IsLegacy() {
devices = []string{"all"} return NewVisibleDevices("all")
} }
// Environment variable unset or empty or "void": return nil // Environment variable unset or empty or "void": return nil
if len(devices) == 0 || requested["void"] { if len(devices) == 0 || requested["void"] {
devices = []string{"void"} return NewVisibleDevices("void")
} }
return NewVisibleDevices(devices...).List() return NewVisibleDevices(devices...)
} }
// GetDriverCapabilities returns the requested driver capabilities. // GetDriverCapabilities returns the requested driver capabilities.
@@ -224,137 +200,46 @@ func parseMajorMinorVersion(version string) (string, error) {
// OnlyFullyQualifiedCDIDevices returns true if all devices requested in the image are requested as CDI devices/ // OnlyFullyQualifiedCDIDevices returns true if all devices requested in the image are requested as CDI devices/
func (i CUDA) OnlyFullyQualifiedCDIDevices() bool { func (i CUDA) OnlyFullyQualifiedCDIDevices() bool {
var hasCDIdevice bool var hasCDIdevice bool
for _, device := range i.VisibleDevices() { for _, device := range i.VisibleDevicesFromEnvVar() {
if !parser.IsQualifiedName(device) { if !parser.IsQualifiedName(device) {
return false return false
} }
hasCDIdevice = true hasCDIdevice = true
} }
for _, device := range i.DevicesFromMounts() {
if !strings.HasPrefix(device, "cdi/") {
return false
}
hasCDIdevice = true
}
return hasCDIdevice return hasCDIdevice
} }
// visibleEnvVars returns the environment variables that are used to determine device visibility. // VisibleDevicesFromEnvVar returns the set of visible devices requested through
// It returns the preferred environment variables that are set, or NVIDIA_VISIBLE_DEVICES if none are set. // the NVIDIA_VISIBLE_DEVICES environment variable.
func (i CUDA) visibleEnvVars() []string { func (i CUDA) VisibleDevicesFromEnvVar() []string {
var envVars []string return i.DevicesFromEnvvars(EnvVarNvidiaVisibleDevices).List()
for _, envVar := range i.preferredVisibleDeviceEnvVars {
if !i.HasEnvvar(envVar) {
continue
}
envVars = append(envVars, envVar)
}
if len(envVars) > 0 {
return envVars
}
return []string{EnvVarNvidiaVisibleDevices}
} }
// VisibleDevices returns a list of devices requested in the container image. // VisibleDevicesFromMounts returns the set of visible devices requested as mounts.
// If volume mount requests are enabled these are returned if requested, func (i CUDA) VisibleDevicesFromMounts() []string {
// otherwise device requests through environment variables are considered.
// In cases where environment variable requests required privileged containers,
// such devices requests are ignored.
func (i CUDA) VisibleDevices() []string {
// If annotation device requests are present, these are preferred.
annotationDeviceRequests := i.cdiDeviceRequestsFromAnnotations()
if len(annotationDeviceRequests) > 0 {
return annotationDeviceRequests
}
// If enabled, try and get the device list from volume mounts first
if i.acceptDeviceListAsVolumeMounts {
volumeMountDeviceRequests := i.visibleDevicesFromMounts()
if len(volumeMountDeviceRequests) > 0 {
return volumeMountDeviceRequests
}
}
// Get the Fallback to reading from the environment variable if privileges are correct
envVarDeviceRequests := i.visibleDevicesFromEnvVar()
if len(envVarDeviceRequests) == 0 {
return nil
}
// If the container is privileged, or environment variable requests are
// allowed for unprivileged containers, these devices are returned.
if i.isPrivileged || i.acceptEnvvarUnprivileged {
return envVarDeviceRequests
}
// We log a warning if we are ignoring the environment variable requests.
envVars := i.visibleEnvVars()
if len(envVars) > 0 {
i.logger.Warningf("Ignoring devices requested by environment variable(s) in unprivileged container: %v", envVars)
}
return nil
}
// cdiDeviceRequestsFromAnnotations returns a list of devices specified in the
// annotations.
// Keys starting with the specified prefixes are considered and expected to
// contain a comma-separated list of fully-qualified CDI devices names.
// The format of the requested devices is not checked and the list is not
// deduplicated.
func (i CUDA) cdiDeviceRequestsFromAnnotations() []string {
if len(i.annotationsPrefixes) == 0 || len(i.annotations) == 0 {
return nil
}
var annotationKeys []string
for key := range i.annotations {
for _, prefix := range i.annotationsPrefixes {
if strings.HasPrefix(key, prefix) {
annotationKeys = append(annotationKeys, key)
// There is no need to check additional prefixes since we
// typically deduplicate devices in any case.
break
}
}
}
// We sort the annotationKeys for consistent results.
slices.Sort(annotationKeys)
var devices []string var devices []string
for _, key := range annotationKeys { for _, device := range i.DevicesFromMounts() {
devices = append(devices, strings.Split(i.annotations[key], ",")...)
}
return devices
}
// visibleDevicesFromEnvVar returns the set of visible devices requested through environment variables.
// If any of the preferredVisibleDeviceEnvVars are present in the image, they
// are used to determine the visible devices. If this is not the case, the
// NVIDIA_VISIBLE_DEVICES environment variable is used.
func (i CUDA) visibleDevicesFromEnvVar() []string {
envVars := i.visibleEnvVars()
return i.devicesFromEnvvars(envVars...)
}
// visibleDevicesFromMounts returns the set of visible devices requested as mounts.
func (i CUDA) visibleDevicesFromMounts() []string {
var devices []string
for _, device := range i.requestsFromMounts() {
switch { switch {
case strings.HasPrefix(device, volumeMountDevicePrefixCDI):
continue
case strings.HasPrefix(device, volumeMountDevicePrefixImex): case strings.HasPrefix(device, volumeMountDevicePrefixImex):
continue continue
case strings.HasPrefix(device, volumeMountDevicePrefixCDI):
name, err := cdiDeviceMountRequest(device).qualifiedName()
if err != nil {
i.logger.Warningf("Ignoring invalid mount request for CDI device %v: %v", device, err)
continue
}
devices = append(devices, name)
default:
devices = append(devices, device)
} }
devices = append(devices, device)
} }
return devices return devices
} }
// requestsFromMounts returns a list of device specified as mounts. // DevicesFromMounts returns a list of device specified as mounts.
func (i CUDA) requestsFromMounts() []string { // TODO: This should be merged with getDevicesFromMounts used in the NVIDIA Container Runtime
func (i CUDA) DevicesFromMounts() []string {
root := filepath.Clean(DeviceListAsVolumeMountsRoot) root := filepath.Clean(DeviceListAsVolumeMountsRoot)
seen := make(map[string]bool) seen := make(map[string]bool)
var devices []string var devices []string
@@ -386,35 +271,28 @@ func (i CUDA) requestsFromMounts() []string {
return devices return devices
} }
// a cdiDeviceMountRequest represents a CDI device requests as a mount. // CDIDevicesFromMounts returns a list of CDI devices specified as mounts on the image.
// Here the host path /dev/null is mounted to a particular path in the container. func (i CUDA) CDIDevicesFromMounts() []string {
// The container path has the form: var devices []string
// /var/run/nvidia-container-devices/cdi/<vendor>/<class>/<device> for _, mountDevice := range i.DevicesFromMounts() {
// or if !strings.HasPrefix(mountDevice, volumeMountDevicePrefixCDI) {
// /var/run/nvidia-container-devices/cdi/<vendor>/<class>=<device> continue
type cdiDeviceMountRequest string }
parts := strings.SplitN(strings.TrimPrefix(mountDevice, volumeMountDevicePrefixCDI), "/", 3)
// qualifiedName returns the fully-qualified name of the CDI device. if len(parts) != 3 {
func (m cdiDeviceMountRequest) qualifiedName() (string, error) { continue
if !strings.HasPrefix(string(m), volumeMountDevicePrefixCDI) { }
return "", fmt.Errorf("invalid mount CDI device request: %s", m) vendor := parts[0]
class := parts[1]
device := parts[2]
devices = append(devices, fmt.Sprintf("%s/%s=%s", vendor, class, device))
} }
return devices
requestedDevice := strings.TrimPrefix(string(m), volumeMountDevicePrefixCDI)
if parser.IsQualifiedName(requestedDevice) {
return requestedDevice, nil
}
parts := strings.SplitN(requestedDevice, "/", 3)
if len(parts) != 3 {
return "", fmt.Errorf("invalid mount CDI device request: %s", m)
}
return fmt.Sprintf("%s/%s=%s", parts[0], parts[1], parts[2]), nil
} }
// ImexChannelsFromEnvVar returns the list of IMEX channels requested for the image. // ImexChannelsFromEnvVar returns the list of IMEX channels requested for the image.
func (i CUDA) ImexChannelsFromEnvVar() []string { func (i CUDA) ImexChannelsFromEnvVar() []string {
imexChannels := i.devicesFromEnvvars(EnvVarNvidiaImexChannels) imexChannels := i.DevicesFromEnvvars(EnvVarNvidiaImexChannels).List()
if len(imexChannels) == 1 && imexChannels[0] == "all" { if len(imexChannels) == 1 && imexChannels[0] == "all" {
return nil return nil
} }
@@ -424,7 +302,7 @@ func (i CUDA) ImexChannelsFromEnvVar() []string {
// ImexChannelsFromMounts returns the list of IMEX channels requested for the image. // ImexChannelsFromMounts returns the list of IMEX channels requested for the image.
func (i CUDA) ImexChannelsFromMounts() []string { func (i CUDA) ImexChannelsFromMounts() []string {
var channels []string var channels []string
for _, mountDevice := range i.requestsFromMounts() { for _, mountDevice := range i.DevicesFromMounts() {
if !strings.HasPrefix(mountDevice, volumeMountDevicePrefixImex) { if !strings.HasPrefix(mountDevice, volumeMountDevicePrefixImex) {
continue continue
} }

View File

@@ -21,91 +21,9 @@ import (
"testing" "testing"
"github.com/opencontainers/runtime-spec/specs-go" "github.com/opencontainers/runtime-spec/specs-go"
testlog "github.com/sirupsen/logrus/hooks/test"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
) )
func TestNewCUDAImageFromSpec(t *testing.T) {
logger, _ := testlog.NewNullLogger()
testCases := []struct {
description string
spec *specs.Spec
options []Option
expected CUDA
}{
{
description: "no env vars",
spec: &specs.Spec{
Process: &specs.Process{
Env: []string{},
},
},
expected: CUDA{
logger: logger,
env: map[string]string{},
acceptEnvvarUnprivileged: true,
},
},
{
description: "NVIDIA_VISIBLE_DEVICES=all",
spec: &specs.Spec{
Process: &specs.Process{
Env: []string{"NVIDIA_VISIBLE_DEVICES=all"},
},
},
expected: CUDA{
logger: logger,
env: map[string]string{"NVIDIA_VISIBLE_DEVICES": "all"},
acceptEnvvarUnprivileged: true,
},
},
{
description: "Spec overrides options",
spec: &specs.Spec{
Process: &specs.Process{
Env: []string{"NVIDIA_VISIBLE_DEVICES=all"},
},
Mounts: []specs.Mount{
{
Source: "/spec-source",
Destination: "/spec-destination",
},
},
},
options: []Option{
WithEnvMap(map[string]string{"OTHER": "value"}),
WithMounts([]specs.Mount{
{
Source: "/option-source",
Destination: "/option-destination",
},
}),
},
expected: CUDA{
logger: logger,
env: map[string]string{"NVIDIA_VISIBLE_DEVICES": "all"},
mounts: []specs.Mount{
{
Source: "/spec-source",
Destination: "/spec-destination",
},
},
acceptEnvvarUnprivileged: true,
},
},
}
for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
options := append([]Option{WithLogger(logger)}, tc.options...)
image, err := NewCUDAImageFromSpec(tc.spec, options...)
require.NoError(t, err)
require.EqualValues(t, tc.expected, image)
})
}
}
func TestParseMajorMinorVersionValid(t *testing.T) { func TestParseMajorMinorVersionValid(t *testing.T) {
var tests = []struct { var tests = []struct {
version string version string
@@ -204,7 +122,7 @@ func TestGetRequirements(t *testing.T) {
for _, tc := range testCases { for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) { t.Run(tc.description, func(t *testing.T) {
image, err := newCUDAImageFromEnv(tc.env) image, err := NewCUDAImageFromEnv(tc.env)
require.NoError(t, err) require.NoError(t, err)
requirements, err := image.GetRequirements() requirements, err := image.GetRequirements()
@@ -215,226 +133,6 @@ func TestGetRequirements(t *testing.T) {
} }
} }
func TestGetDevicesFromEnvvar(t *testing.T) {
envDockerResourceGPUs := "DOCKER_RESOURCE_GPUS"
gpuID := "GPU-12345"
anotherGPUID := "GPU-67890"
thirdGPUID := "MIG-12345"
var tests = []struct {
description string
preferredVisibleDeviceEnvVars []string
env map[string]string
expectedDevices []string
}{
{
description: "empty env returns nil for non-legacy image",
},
{
description: "blank NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image",
env: map[string]string{
EnvVarNvidiaVisibleDevices: "",
},
},
{
description: "'void' NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image",
env: map[string]string{
EnvVarNvidiaVisibleDevices: "void",
},
},
{
description: "'none' NVIDIA_VISIBLE_DEVICES returns empty for non-legacy image",
env: map[string]string{
EnvVarNvidiaVisibleDevices: "none",
},
expectedDevices: []string{""},
},
{
description: "NVIDIA_VISIBLE_DEVICES set returns value for non-legacy image",
env: map[string]string{
EnvVarNvidiaVisibleDevices: gpuID,
},
expectedDevices: []string{gpuID},
},
{
description: "NVIDIA_VISIBLE_DEVICES set returns value for legacy image",
env: map[string]string{
EnvVarNvidiaVisibleDevices: gpuID,
EnvVarCudaVersion: "legacy",
},
expectedDevices: []string{gpuID},
},
{
description: "empty env returns all for legacy image",
env: map[string]string{
EnvVarCudaVersion: "legacy",
},
expectedDevices: []string{"all"},
},
// Add the `DOCKER_RESOURCE_GPUS` envvar and ensure that this is ignored when
// not enabled
{
description: "missing NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image",
env: map[string]string{
envDockerResourceGPUs: anotherGPUID,
},
},
{
description: "blank NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image",
env: map[string]string{
EnvVarNvidiaVisibleDevices: "",
envDockerResourceGPUs: anotherGPUID,
},
},
{
description: "'void' NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image",
env: map[string]string{
EnvVarNvidiaVisibleDevices: "void",
envDockerResourceGPUs: anotherGPUID,
},
},
{
description: "'none' NVIDIA_VISIBLE_DEVICES returns empty for non-legacy image",
env: map[string]string{
EnvVarNvidiaVisibleDevices: "none",
envDockerResourceGPUs: anotherGPUID,
},
expectedDevices: []string{""},
},
{
description: "NVIDIA_VISIBLE_DEVICES set returns value for non-legacy image",
env: map[string]string{
EnvVarNvidiaVisibleDevices: gpuID,
envDockerResourceGPUs: anotherGPUID,
},
expectedDevices: []string{gpuID},
},
{
description: "NVIDIA_VISIBLE_DEVICES set returns value for legacy image",
env: map[string]string{
EnvVarNvidiaVisibleDevices: gpuID,
envDockerResourceGPUs: anotherGPUID,
EnvVarCudaVersion: "legacy",
},
expectedDevices: []string{gpuID},
},
{
description: "empty env returns all for legacy image",
env: map[string]string{
envDockerResourceGPUs: anotherGPUID,
EnvVarCudaVersion: "legacy",
},
expectedDevices: []string{"all"},
},
// Add the `DOCKER_RESOURCE_GPUS` envvar and ensure that this is selected when
// enabled
{
description: "empty env returns nil for non-legacy image",
preferredVisibleDeviceEnvVars: []string{envDockerResourceGPUs},
},
{
description: "blank DOCKER_RESOURCE_GPUS returns nil for non-legacy image",
preferredVisibleDeviceEnvVars: []string{envDockerResourceGPUs},
env: map[string]string{
envDockerResourceGPUs: "",
},
},
{
description: "'void' DOCKER_RESOURCE_GPUS returns nil for non-legacy image",
preferredVisibleDeviceEnvVars: []string{envDockerResourceGPUs},
env: map[string]string{
envDockerResourceGPUs: "void",
},
},
{
description: "'none' DOCKER_RESOURCE_GPUS returns empty for non-legacy image",
preferredVisibleDeviceEnvVars: []string{envDockerResourceGPUs},
env: map[string]string{
envDockerResourceGPUs: "none",
},
expectedDevices: []string{""},
},
{
description: "DOCKER_RESOURCE_GPUS set returns value for non-legacy image",
preferredVisibleDeviceEnvVars: []string{envDockerResourceGPUs},
env: map[string]string{
envDockerResourceGPUs: gpuID,
},
expectedDevices: []string{gpuID},
},
{
description: "DOCKER_RESOURCE_GPUS set returns value for legacy image",
preferredVisibleDeviceEnvVars: []string{envDockerResourceGPUs},
env: map[string]string{
envDockerResourceGPUs: gpuID,
EnvVarCudaVersion: "legacy",
},
expectedDevices: []string{gpuID},
},
{
description: "DOCKER_RESOURCE_GPUS is selected if present",
preferredVisibleDeviceEnvVars: []string{envDockerResourceGPUs},
env: map[string]string{
envDockerResourceGPUs: anotherGPUID,
},
expectedDevices: []string{anotherGPUID},
},
{
description: "DOCKER_RESOURCE_GPUS overrides NVIDIA_VISIBLE_DEVICES if present",
preferredVisibleDeviceEnvVars: []string{envDockerResourceGPUs},
env: map[string]string{
EnvVarNvidiaVisibleDevices: gpuID,
envDockerResourceGPUs: anotherGPUID,
},
expectedDevices: []string{anotherGPUID},
},
{
description: "DOCKER_RESOURCE_GPUS_ADDITIONAL overrides NVIDIA_VISIBLE_DEVICES if present",
preferredVisibleDeviceEnvVars: []string{"DOCKER_RESOURCE_GPUS_ADDITIONAL"},
env: map[string]string{
EnvVarNvidiaVisibleDevices: gpuID,
"DOCKER_RESOURCE_GPUS_ADDITIONAL": anotherGPUID,
},
expectedDevices: []string{anotherGPUID},
},
{
description: "All available swarm resource envvars are selected and override NVIDIA_VISIBLE_DEVICES if present",
preferredVisibleDeviceEnvVars: []string{"DOCKER_RESOURCE_GPUS", "DOCKER_RESOURCE_GPUS_ADDITIONAL"},
env: map[string]string{
EnvVarNvidiaVisibleDevices: gpuID,
"DOCKER_RESOURCE_GPUS": thirdGPUID,
"DOCKER_RESOURCE_GPUS_ADDITIONAL": anotherGPUID,
},
expectedDevices: []string{thirdGPUID, anotherGPUID},
},
{
description: "DOCKER_RESOURCE_GPUS_ADDITIONAL or DOCKER_RESOURCE_GPUS override NVIDIA_VISIBLE_DEVICES if present",
preferredVisibleDeviceEnvVars: []string{"DOCKER_RESOURCE_GPUS", "DOCKER_RESOURCE_GPUS_ADDITIONAL"},
env: map[string]string{
EnvVarNvidiaVisibleDevices: gpuID,
"DOCKER_RESOURCE_GPUS_ADDITIONAL": anotherGPUID,
},
expectedDevices: []string{anotherGPUID},
},
}
for _, tc := range tests {
t.Run(tc.description, func(t *testing.T) {
image, err := New(
WithEnvMap(tc.env),
WithPrivileged(true),
WithAcceptDeviceListAsVolumeMounts(false),
WithAcceptEnvvarUnprivileged(false),
WithPreferredVisibleDevicesEnvVars(tc.preferredVisibleDeviceEnvVars...),
)
require.NoError(t, err)
devices := image.visibleDevicesFromEnvVar()
require.EqualValues(t, tc.expectedDevices, devices)
})
}
}
func TestGetVisibleDevicesFromMounts(t *testing.T) { func TestGetVisibleDevicesFromMounts(t *testing.T) {
var tests = []struct { var tests = []struct {
description string description string
@@ -487,9 +185,9 @@ func TestGetVisibleDevicesFromMounts(t *testing.T) {
expectedDevices: []string{"GPU0-MIG0/0/1", "GPU1-MIG0/0/1"}, expectedDevices: []string{"GPU0-MIG0/0/1", "GPU1-MIG0/0/1"},
}, },
{ {
description: "cdi devices are included", description: "cdi devices are ignored",
mounts: makeTestMounts("GPU0", "nvidia.com/gpu=all", "GPU1"), mounts: makeTestMounts("GPU0", "cdi/nvidia.com/gpu=all", "GPU1"),
expectedDevices: []string{"GPU0", "nvidia.com/gpu=all", "GPU1"}, expectedDevices: []string{"GPU0", "GPU1"},
}, },
{ {
description: "imex devices are ignored", description: "imex devices are ignored",
@@ -499,195 +197,8 @@ func TestGetVisibleDevicesFromMounts(t *testing.T) {
} }
for _, tc := range tests { for _, tc := range tests {
t.Run(tc.description, func(t *testing.T) { t.Run(tc.description, func(t *testing.T) {
image, err := New(WithMounts(tc.mounts)) image, _ := New(WithMounts(tc.mounts))
require.NoError(t, err) require.Equal(t, tc.expectedDevices, image.VisibleDevicesFromMounts())
require.Equal(t, tc.expectedDevices, image.visibleDevicesFromMounts())
})
}
}
func TestVisibleDevices(t *testing.T) {
var tests = []struct {
description string
mountDevices []specs.Mount
envvarDevices string
privileged bool
acceptUnprivileged bool
acceptMounts bool
preferredVisibleDeviceEnvVars []string
env map[string]string
expectedDevices []string
}{
{
description: "Mount devices, unprivileged, no accept unprivileged",
mountDevices: []specs.Mount{
{
Source: "/dev/null",
Destination: filepath.Join(DeviceListAsVolumeMountsRoot, "GPU0"),
},
{
Source: "/dev/null",
Destination: filepath.Join(DeviceListAsVolumeMountsRoot, "GPU1"),
},
},
envvarDevices: "GPU2,GPU3",
privileged: false,
acceptUnprivileged: false,
acceptMounts: true,
expectedDevices: []string{"GPU0", "GPU1"},
},
{
description: "No mount devices, unprivileged, no accept unprivileged",
mountDevices: nil,
envvarDevices: "GPU0,GPU1",
privileged: false,
acceptUnprivileged: false,
acceptMounts: true,
expectedDevices: nil,
},
{
description: "No mount devices, privileged, no accept unprivileged",
mountDevices: nil,
envvarDevices: "GPU0,GPU1",
privileged: true,
acceptUnprivileged: false,
acceptMounts: true,
expectedDevices: []string{"GPU0", "GPU1"},
},
{
description: "No mount devices, unprivileged, accept unprivileged",
mountDevices: nil,
envvarDevices: "GPU0,GPU1",
privileged: false,
acceptUnprivileged: true,
acceptMounts: true,
expectedDevices: []string{"GPU0", "GPU1"},
},
{
description: "Mount devices, unprivileged, accept unprivileged, no accept mounts",
mountDevices: []specs.Mount{
{
Source: "/dev/null",
Destination: filepath.Join(DeviceListAsVolumeMountsRoot, "GPU0"),
},
{
Source: "/dev/null",
Destination: filepath.Join(DeviceListAsVolumeMountsRoot, "GPU1"),
},
},
envvarDevices: "GPU2,GPU3",
privileged: false,
acceptUnprivileged: true,
acceptMounts: false,
expectedDevices: []string{"GPU2", "GPU3"},
},
{
description: "Mount devices, unprivileged, no accept unprivileged, no accept mounts",
mountDevices: []specs.Mount{
{
Source: "/dev/null",
Destination: filepath.Join(DeviceListAsVolumeMountsRoot, "GPU0"),
},
{
Source: "/dev/null",
Destination: filepath.Join(DeviceListAsVolumeMountsRoot, "GPU1"),
},
},
envvarDevices: "GPU2,GPU3",
privileged: false,
acceptUnprivileged: false,
acceptMounts: false,
expectedDevices: nil,
},
// New test cases for visibleEnvVars functionality
{
description: "preferred env var set and present in env, privileged",
mountDevices: nil,
envvarDevices: "",
privileged: true,
acceptUnprivileged: false,
acceptMounts: true,
preferredVisibleDeviceEnvVars: []string{"DOCKER_RESOURCE_GPUS"},
env: map[string]string{
"DOCKER_RESOURCE_GPUS": "GPU-12345",
},
expectedDevices: []string{"GPU-12345"},
},
{
description: "preferred env var set and present in env, unprivileged but accepted",
mountDevices: nil,
envvarDevices: "",
privileged: false,
acceptUnprivileged: true,
acceptMounts: true,
preferredVisibleDeviceEnvVars: []string{"DOCKER_RESOURCE_GPUS"},
env: map[string]string{
"DOCKER_RESOURCE_GPUS": "GPU-12345",
},
expectedDevices: []string{"GPU-12345"},
},
{
description: "preferred env var set and present in env, unprivileged and not accepted",
mountDevices: nil,
envvarDevices: "",
privileged: false,
acceptUnprivileged: false,
acceptMounts: true,
preferredVisibleDeviceEnvVars: []string{"DOCKER_RESOURCE_GPUS"},
env: map[string]string{
"DOCKER_RESOURCE_GPUS": "GPU-12345",
},
expectedDevices: nil,
},
{
description: "multiple preferred env vars, both present, privileged",
mountDevices: nil,
envvarDevices: "",
privileged: true,
acceptUnprivileged: false,
acceptMounts: true,
preferredVisibleDeviceEnvVars: []string{"DOCKER_RESOURCE_GPUS", "DOCKER_RESOURCE_GPUS_ADDITIONAL"},
env: map[string]string{
"DOCKER_RESOURCE_GPUS": "GPU-12345",
"DOCKER_RESOURCE_GPUS_ADDITIONAL": "GPU-67890",
},
expectedDevices: []string{"GPU-12345", "GPU-67890"},
},
{
description: "preferred env var not present, fallback to NVIDIA_VISIBLE_DEVICES, privileged",
mountDevices: nil,
envvarDevices: "GPU-12345",
privileged: true,
acceptUnprivileged: false,
acceptMounts: true,
preferredVisibleDeviceEnvVars: []string{"DOCKER_RESOURCE_GPUS"},
env: map[string]string{
EnvVarNvidiaVisibleDevices: "GPU-12345",
},
expectedDevices: []string{"GPU-12345"},
},
}
for _, tc := range tests {
t.Run(tc.description, func(t *testing.T) {
// Create env map with both NVIDIA_VISIBLE_DEVICES and any additional env vars
env := make(map[string]string)
if tc.envvarDevices != "" {
env[EnvVarNvidiaVisibleDevices] = tc.envvarDevices
}
for k, v := range tc.env {
env[k] = v
}
image, err := New(
WithEnvMap(env),
WithMounts(tc.mountDevices),
WithPrivileged(tc.privileged),
WithAcceptDeviceListAsVolumeMounts(tc.acceptMounts),
WithAcceptEnvvarUnprivileged(tc.acceptUnprivileged),
WithPreferredVisibleDevicesEnvVars(tc.preferredVisibleDeviceEnvVars...),
)
require.NoError(t, err)
require.Equal(t, tc.expectedDevices, image.VisibleDevices())
}) })
} }
} }
@@ -713,7 +224,7 @@ func TestImexChannelsFromEnvVar(t *testing.T) {
for _, tc := range testCases { for _, tc := range testCases {
for id, baseEnvvars := range map[string][]string{"": nil, "legacy": {"CUDA_VERSION=1.2.3"}} { for id, baseEnvvars := range map[string][]string{"": nil, "legacy": {"CUDA_VERSION=1.2.3"}} {
t.Run(tc.description+id, func(t *testing.T) { t.Run(tc.description+id, func(t *testing.T) {
i, err := newCUDAImageFromEnv(append(baseEnvvars, tc.env...)) i, err := NewCUDAImageFromEnv(append(baseEnvvars, tc.env...))
require.NoError(t, err) require.NoError(t, err)
channels := i.ImexChannelsFromEnvVar() channels := i.ImexChannelsFromEnvVar()
@@ -723,73 +234,6 @@ func TestImexChannelsFromEnvVar(t *testing.T) {
} }
} }
func TestCDIDeviceRequestsFromAnnotations(t *testing.T) {
testCases := []struct {
description string
prefixes []string
annotations map[string]string
expectedDevices []string
}{
{
description: "no annotations",
},
{
description: "no matching annotations",
prefixes: []string{"not-prefix/"},
annotations: map[string]string{
"prefix/foo": "example.com/device=bar",
},
},
{
description: "single matching annotation",
prefixes: []string{"prefix/"},
annotations: map[string]string{
"prefix/foo": "example.com/device=bar",
},
expectedDevices: []string{"example.com/device=bar"},
},
{
description: "multiple matching annotations",
prefixes: []string{"prefix/", "another-prefix/"},
annotations: map[string]string{
"prefix/foo": "example.com/device=bar",
"another-prefix/bar": "example.com/device=baz",
},
expectedDevices: []string{"example.com/device=bar", "example.com/device=baz"},
},
{
description: "multiple matching annotations with duplicate devices",
prefixes: []string{"prefix/", "another-prefix/"},
annotations: map[string]string{
"prefix/foo": "example.com/device=bar",
"another-prefix/bar": "example.com/device=bar",
},
expectedDevices: []string{"example.com/device=bar", "example.com/device=bar"},
},
{
description: "invalid devices are returned as is",
prefixes: []string{"prefix/"},
annotations: map[string]string{
"prefix/foo": "example.com/device",
},
expectedDevices: []string{"example.com/device"},
},
}
for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
image, err := New(
WithAnnotationsPrefixes(tc.prefixes),
WithAnnotations(tc.annotations),
)
require.NoError(t, err)
devices := image.cdiDeviceRequestsFromAnnotations()
require.ElementsMatch(t, tc.expectedDevices, devices)
})
}
}
func makeTestMounts(paths ...string) []specs.Mount { func makeTestMounts(paths ...string) []specs.Mount {
var mounts []specs.Mount var mounts []specs.Mount
for _, path := range paths { for _, path := range paths {

View File

@@ -24,39 +24,20 @@ const (
capSysAdmin = "CAP_SYS_ADMIN" capSysAdmin = "CAP_SYS_ADMIN"
) )
type CapabilitiesGetter interface {
GetCapabilities() []string
}
type OCISpec specs.Spec
type OCISpecCapabilities specs.LinuxCapabilities
// IsPrivileged returns true if the container is a privileged container. // IsPrivileged returns true if the container is a privileged container.
func IsPrivileged(s CapabilitiesGetter) bool { func IsPrivileged(s *specs.Spec) bool {
if s == nil { if s.Process.Capabilities == nil {
return false return false
} }
for _, c := range s.GetCapabilities() {
// We only make sure that the bounding capabibility set has
// CAP_SYS_ADMIN. This allows us to make sure that the container was
// actually started as '--privileged', but also allow non-root users to
// access the privileged NVIDIA capabilities.
for _, c := range s.Process.Capabilities.Bounding {
if c == capSysAdmin { if c == capSysAdmin {
return true return true
} }
} }
return false return false
} }
func (s OCISpec) GetCapabilities() []string {
if s.Process == nil || s.Process.Capabilities == nil {
return nil
}
return (*OCISpecCapabilities)(s.Process.Capabilities).GetCapabilities()
}
func (c OCISpecCapabilities) GetCapabilities() []string {
// We only make sure that the bounding capability set has
// CAP_SYS_ADMIN. This allows us to make sure that the container was
// actually started as '--privileged', but also allow non-root users to
// access the privileged NVIDIA capabilities.
return c.Bounding
}

View File

@@ -1,57 +0,0 @@
/**
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package image
import (
"testing"
"github.com/opencontainers/runtime-spec/specs-go"
"github.com/stretchr/testify/require"
)
func TestIsPrivileged(t *testing.T) {
var tests = []struct {
spec specs.Spec
expected bool
}{
{
specs.Spec{
Process: &specs.Process{
Capabilities: &specs.LinuxCapabilities{
Bounding: []string{"CAP_SYS_ADMIN"},
},
},
},
true,
},
{
specs.Spec{
Process: &specs.Process{
Capabilities: &specs.LinuxCapabilities{
Bounding: []string{"CAP_SYS_FOO"},
},
},
},
false,
},
}
for i, tc := range tests {
privileged := IsPrivileged((*OCISpec)(&tc.spec))
require.Equal(t, tc.expected, privileged, "%d: %v", i, tc)
}
}

View File

@@ -62,7 +62,7 @@ load-kmods = true
#debug = "/var/log/nvidia-container-runtime.log" #debug = "/var/log/nvidia-container-runtime.log"
log-level = "info" log-level = "info"
mode = "auto" mode = "auto"
runtimes = ["runc", "crun"] runtimes = ["docker-runc", "runc", "crun"]
[nvidia-container-runtime.modes] [nvidia-container-runtime.modes]

View File

@@ -23,7 +23,6 @@ type cache struct {
sync.Mutex sync.Mutex
devices []Device devices []Device
envVars []EnvVar
hooks []Hook hooks []Hook
mounts []Mount mounts []Mount
} }
@@ -52,20 +51,6 @@ func (c *cache) Devices() ([]Device, error) {
return c.devices, nil return c.devices, nil
} }
func (c *cache) EnvVars() ([]EnvVar, error) {
c.Lock()
defer c.Unlock()
if c.envVars == nil {
envVars, err := c.d.EnvVars()
if err != nil {
return nil, err
}
c.envVars = envVars
}
return c.envVars, nil
}
func (c *cache) Hooks() ([]Hook, error) { func (c *cache) Hooks() ([]Hook, error) {
c.Lock() c.Lock()
defer c.Unlock() defer c.Unlock()

View File

@@ -22,12 +22,6 @@ type Device struct {
Path string Path string
} }
// EnvVar represents a discovered environment variable.
type EnvVar struct {
Name string
Value string
}
// Mount represents a discovered mount. // Mount represents a discovered mount.
type Mount struct { type Mount struct {
HostPath string HostPath string
@@ -48,7 +42,6 @@ type Hook struct {
//go:generate moq -rm -fmt=goimports -stub -out discover_mock.go . Discover //go:generate moq -rm -fmt=goimports -stub -out discover_mock.go . Discover
type Discover interface { type Discover interface {
Devices() ([]Device, error) Devices() ([]Device, error)
EnvVars() ([]EnvVar, error)
Mounts() ([]Mount, error) Mounts() ([]Mount, error)
Hooks() ([]Hook, error) Hooks() ([]Hook, error)
} }

View File

@@ -20,9 +20,6 @@ var _ Discover = &DiscoverMock{}
// DevicesFunc: func() ([]Device, error) { // DevicesFunc: func() ([]Device, error) {
// panic("mock out the Devices method") // panic("mock out the Devices method")
// }, // },
// EnvVarsFunc: func() ([]EnvVar, error) {
// panic("mock out the EnvVars method")
// },
// HooksFunc: func() ([]Hook, error) { // HooksFunc: func() ([]Hook, error) {
// panic("mock out the Hooks method") // panic("mock out the Hooks method")
// }, // },
@@ -39,9 +36,6 @@ type DiscoverMock struct {
// DevicesFunc mocks the Devices method. // DevicesFunc mocks the Devices method.
DevicesFunc func() ([]Device, error) DevicesFunc func() ([]Device, error)
// EnvVarsFunc mocks the EnvVars method.
EnvVarsFunc func() ([]EnvVar, error)
// HooksFunc mocks the Hooks method. // HooksFunc mocks the Hooks method.
HooksFunc func() ([]Hook, error) HooksFunc func() ([]Hook, error)
@@ -53,9 +47,6 @@ type DiscoverMock struct {
// Devices holds details about calls to the Devices method. // Devices holds details about calls to the Devices method.
Devices []struct { Devices []struct {
} }
// EnvVars holds details about calls to the EnvVars method.
EnvVars []struct {
}
// Hooks holds details about calls to the Hooks method. // Hooks holds details about calls to the Hooks method.
Hooks []struct { Hooks []struct {
} }
@@ -64,7 +55,6 @@ type DiscoverMock struct {
} }
} }
lockDevices sync.RWMutex lockDevices sync.RWMutex
lockEnvVars sync.RWMutex
lockHooks sync.RWMutex lockHooks sync.RWMutex
lockMounts sync.RWMutex lockMounts sync.RWMutex
} }
@@ -100,37 +90,6 @@ func (mock *DiscoverMock) DevicesCalls() []struct {
return calls return calls
} }
// EnvVars calls EnvVarsFunc.
func (mock *DiscoverMock) EnvVars() ([]EnvVar, error) {
callInfo := struct {
}{}
mock.lockEnvVars.Lock()
mock.calls.EnvVars = append(mock.calls.EnvVars, callInfo)
mock.lockEnvVars.Unlock()
if mock.EnvVarsFunc == nil {
var (
envVarsOut []EnvVar
errOut error
)
return envVarsOut, errOut
}
return mock.EnvVarsFunc()
}
// EnvVarsCalls gets all the calls that were made to EnvVars.
// Check the length with:
//
// len(mockedDiscover.EnvVarsCalls())
func (mock *DiscoverMock) EnvVarsCalls() []struct {
} {
var calls []struct {
}
mock.lockEnvVars.RLock()
calls = mock.calls.EnvVars
mock.lockEnvVars.RUnlock()
return calls
}
// Hooks calls HooksFunc. // Hooks calls HooksFunc.
func (mock *DiscoverMock) Hooks() ([]Hook, error) { func (mock *DiscoverMock) Hooks() ([]Hook, error) {
callInfo := struct { callInfo := struct {

View File

@@ -1,41 +0,0 @@
/**
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package discover
var _ Discover = (*EnvVar)(nil)
// Devices returns an empty list of devices for a EnvVar discoverer.
func (e EnvVar) Devices() ([]Device, error) {
return nil, nil
}
// EnvVars returns an empty list of envs for a EnvVar discoverer.
func (e EnvVar) EnvVars() ([]EnvVar, error) {
return []EnvVar{e}, nil
}
// Mounts returns an empty list of mounts for a EnvVar discoverer.
func (e EnvVar) Mounts() ([]Mount, error) {
return nil, nil
}
// Hooks allows the Hook type to also implement the Discoverer interface.
// It returns a single hook
func (e EnvVar) Hooks() ([]Hook, error) {
return nil, nil
}

View File

@@ -45,19 +45,6 @@ func (f firstOf) Devices() ([]Device, error) {
return nil, errs return nil, errs
} }
func (f firstOf) EnvVars() ([]EnvVar, error) {
var errs error
for _, d := range f {
envs, err := d.EnvVars()
if err != nil {
errs = errors.Join(errs, err)
continue
}
return envs, nil
}
return nil, errs
}
func (f firstOf) Hooks() ([]Hook, error) { func (f firstOf) Hooks() ([]Hook, error) {
var errs error var errs error
for _, d := range f { for _, d := range f {

View File

@@ -20,7 +20,6 @@ import (
"fmt" "fmt"
"os" "os"
"path/filepath" "path/filepath"
"runtime"
"strings" "strings"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image" "github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
@@ -82,25 +81,14 @@ func NewGraphicsMountsDiscoverer(logger logger.Interface, driver *root.Driver, h
// vulkan ICD files are at {{ .driverRoot }}/vulkan instead of in /etc/vulkan. // vulkan ICD files are at {{ .driverRoot }}/vulkan instead of in /etc/vulkan.
func newVulkanConfigsDiscover(logger logger.Interface, driver *root.Driver) Discover { func newVulkanConfigsDiscover(logger logger.Interface, driver *root.Driver) Discover {
locator := lookup.First(driver.Configs(), driver.Files()) locator := lookup.First(driver.Configs(), driver.Files())
required := []string{
"vulkan/icd.d/nvidia_icd.json",
"vulkan/icd.d/nvidia_layers.json",
"vulkan/implicit_layer.d/nvidia_layers.json",
}
// For some RPM-based driver packages, the vulkan ICD files are installed to
// /usr/share/vulkan/icd.d/nvidia_icd.%{_target_cpu}.json
// We also include this in the list of candidates for the ICD file.
switch runtime.GOARCH {
case "amd64":
required = append(required, "vulkan/icd.d/nvidia_icd.x86_64.json")
case "arm64":
required = append(required, "vulkan/icd.d/nvidia_icd.aarch64.json")
}
return &mountsToContainerPath{ return &mountsToContainerPath{
logger: logger, logger: logger,
locator: locator, locator: locator,
required: required, required: []string{
"vulkan/icd.d/nvidia_icd.json",
"vulkan/icd.d/nvidia_layers.json",
"vulkan/implicit_layer.d/nvidia_layers.json",
},
containerRoot: "/etc", containerRoot: "/etc",
} }
} }

View File

@@ -25,7 +25,7 @@ import (
func TestGraphicsLibrariesDiscoverer(t *testing.T) { func TestGraphicsLibrariesDiscoverer(t *testing.T) {
logger, _ := testlog.NewNullLogger() logger, _ := testlog.NewNullLogger()
hookCreator := NewHookCreator() hookCreator := NewHookCreator("/usr/bin/nvidia-cdi-hook", false)
testCases := []struct { testCases := []struct {
description string description string

View File

@@ -23,36 +23,6 @@ import (
"tags.cncf.io/container-device-interface/pkg/cdi" "tags.cncf.io/container-device-interface/pkg/cdi"
) )
// A HookName represents a supported CDI hooks.
type HookName string
const (
// AllHooks is a special hook name that allows all hooks to be matched.
AllHooks = HookName("all")
// A ChmodHook is used to set the file mode of the specified paths.
// Deprecated: The chmod hook is deprecated and will be removed in a future release.
ChmodHook = HookName("chmod")
// A CreateSymlinksHook is used to create symlinks in the container.
CreateSymlinksHook = HookName("create-symlinks")
// DisableDeviceNodeModificationHook refers to the hook used to ensure that
// device nodes are not created by libnvidia-ml.so or nvidia-smi in a
// container.
// Added in v1.17.8
DisableDeviceNodeModificationHook = HookName("disable-device-node-modification")
// An EnableCudaCompatHook is used to enabled CUDA Forward Compatibility.
// Added in v1.17.5
EnableCudaCompatHook = HookName("enable-cuda-compat")
// An UpdateLDCacheHook is the hook used to update the ldcache in the
// container. This allows injected libraries to be discoverable.
UpdateLDCacheHook = HookName("update-ldcache")
// A CreateSonameSymlinksHook is the hook used to ensure that soname symlinks
// for injected libraries exist in the container.
CreateSonameSymlinksHook = HookName("create-soname-symlinks")
defaultNvidiaCDIHookPath = "/usr/bin/nvidia-cdi-hook"
)
var _ Discover = (*Hook)(nil) var _ Discover = (*Hook)(nil)
// Devices returns an empty list of devices for a Hook discoverer. // Devices returns an empty list of devices for a Hook discoverer.
@@ -60,11 +30,6 @@ func (h *Hook) Devices() ([]Device, error) {
return nil, nil return nil, nil
} }
// EnvVars returns an empty list of envs for a Hook discoverer.
func (h *Hook) EnvVars() ([]EnvVar, error) {
return nil, nil
}
// Mounts returns an empty list of mounts for a Hook discoverer. // Mounts returns an empty list of mounts for a Hook discoverer.
func (h *Hook) Mounts() ([]Mount, error) { func (h *Hook) Mounts() ([]Mount, error) {
return nil, nil return nil, nil
@@ -80,130 +45,52 @@ func (h *Hook) Hooks() ([]Hook, error) {
return []Hook{*h}, nil return []Hook{*h}, nil
} }
type Option func(*cdiHookCreator) // Option is a function that configures the nvcdilib
type Option func(*CDIHook)
type cdiHookCreator struct { type CDIHook struct {
nvidiaCDIHookPath string nvidiaCDIHookPath string
disabledHooks map[HookName]bool debugLogging bool
fixedArgs []string
debugLogging bool
} }
// An allDisabledHookCreator is a HookCreator that does not create any hooks.
type allDisabledHookCreator struct{}
// Create returns nil for all hooks for an allDisabledHookCreator.
func (a *allDisabledHookCreator) Create(name HookName, args ...string) *Hook {
return nil
}
// A HookCreator defines an interface for creating discover hooks.
type HookCreator interface { type HookCreator interface {
Create(HookName, ...string) *Hook Create(string, ...string) *Hook
} }
// WithDisabledHooks sets the set of hooks that are disabled for the CDI hook creator. func NewHookCreator(nvidiaCDIHookPath string, debugLogging bool) HookCreator {
// This can be specified multiple times. CDIHook := &CDIHook{
func WithDisabledHooks(hooks ...HookName) Option { nvidiaCDIHookPath: nvidiaCDIHookPath,
return func(c *cdiHookCreator) { debugLogging: debugLogging,
for _, hook := range hooks { }
c.disabledHooks[hook] = true
return CDIHook
}
func (c CDIHook) Create(name string, args ...string) *Hook {
if name == "create-symlinks" {
if len(args) == 0 {
return nil
} }
}
}
// WithNVIDIACDIHookPath sets the path to the nvidia-cdi-hook binary. links := []string{}
func WithNVIDIACDIHookPath(nvidiaCDIHookPath string) Option { for _, arg := range args {
return func(c *cdiHookCreator) { links = append(links, "--link", arg)
c.nvidiaCDIHookPath = nvidiaCDIHookPath }
} args = links
}
func NewHookCreator(opts ...Option) HookCreator {
cdiHookCreator := &cdiHookCreator{
nvidiaCDIHookPath: defaultNvidiaCDIHookPath,
disabledHooks: make(map[HookName]bool),
}
for _, opt := range opts {
opt(cdiHookCreator)
}
if cdiHookCreator.disabledHooks[AllHooks] {
return &allDisabledHookCreator{}
}
cdiHookCreator.fixedArgs = getFixedArgsForCDIHookCLI(cdiHookCreator.nvidiaCDIHookPath)
return cdiHookCreator
}
// Create creates a new hook with the given name and arguments.
// If a hook is disabled, a nil hook is returned.
func (c cdiHookCreator) Create(name HookName, args ...string) *Hook {
if c.isDisabled(name, args...) {
return nil
} }
return &Hook{ return &Hook{
Lifecycle: cdi.CreateContainerHook, Lifecycle: cdi.CreateContainerHook,
Path: c.nvidiaCDIHookPath, Path: c.nvidiaCDIHookPath,
Args: append(c.requiredArgs(name), c.transformArgs(name, args...)...), Args: append(c.requiredArgs(name), args...),
Env: []string{fmt.Sprintf("NVIDIA_CTK_DEBUG=%v", c.debugLogging)}, Env: []string{fmt.Sprintf("NVIDIA_CTK_DEBUG=%v", c.debugLogging)},
} }
} }
// isDisabled checks if the specified hook name is disabled. func (c CDIHook) requiredArgs(name string) []string {
func (c cdiHookCreator) isDisabled(name HookName, args ...string) bool { base := filepath.Base(c.nvidiaCDIHookPath)
if c.disabledHooks[name] {
return true
}
switch name {
case CreateSymlinksHook:
if len(args) == 0 {
return true
}
case ChmodHook:
if len(args) == 0 {
return true
}
}
return false
}
func (c cdiHookCreator) requiredArgs(name HookName) []string {
return append(c.fixedArgs, string(name))
}
func (c cdiHookCreator) transformArgs(name HookName, args ...string) []string {
switch name {
case CreateSymlinksHook:
var transformedArgs []string
for _, arg := range args {
transformedArgs = append(transformedArgs, "--link", arg)
}
return transformedArgs
case ChmodHook:
var transformedArgs = []string{"--mode", "755"}
for _, arg := range args {
transformedArgs = append(transformedArgs, "--path", arg)
}
return transformedArgs
default:
return args
}
}
// getFixedArgsForCDIHookCLI returns the fixed arguments for the hook CLI.
// If the nvidia-ctk binary is used, hooks are implemented under the hook
// subcommand.
// For the nvidia-cdi-hook binary, the hooks are implemented as subcommands of
// the top-level CLI.
func getFixedArgsForCDIHookCLI(nvidiaCDIHookPath string) []string {
base := filepath.Base(nvidiaCDIHookPath)
if base == "nvidia-ctk" { if base == "nvidia-ctk" {
return []string{base, "hook"} return []string{base, "hook", name}
} }
return []string{base} return []string{base, name}
} }

View File

@@ -51,24 +51,30 @@ func (d ldconfig) Hooks() ([]Hook, error) {
return nil, fmt.Errorf("failed to discover mounts for ldcache update: %v", err) return nil, fmt.Errorf("failed to discover mounts for ldcache update: %v", err)
} }
var args []string h := createLDCacheUpdateHook(
d.hookCreator,
if d.ldconfigPath != "" { d.ldconfigPath,
args = append(args, "--ldconfig-path", d.ldconfigPath) getLibraryPaths(mounts),
}
for _, f := range uniqueFolders(getLibraryPaths(mounts)) {
args = append(args, "--folder", f)
}
h := Merge(
d.hookCreator.Create(CreateSonameSymlinksHook, args...),
d.hookCreator.Create(UpdateLDCacheHook, args...),
) )
return h.Hooks() return h.Hooks()
} }
// createLDCacheUpdateHook locates the NVIDIA Container Toolkit CLI and creates a hook for updating the LD Cache
func createLDCacheUpdateHook(hookCreator HookCreator, ldconfig string, libraries []string) *Hook {
var args []string
if ldconfig != "" {
args = append(args, "--ldconfig-path", ldconfig)
}
for _, f := range uniqueFolders(libraries) {
args = append(args, "--folder", f)
}
return hookCreator.Create("update-ldcache", args...)
}
// getLibraryPaths extracts the library dirs from the specified mounts // getLibraryPaths extracts the library dirs from the specified mounts
func getLibraryPaths(mounts []Mount) []string { func getLibraryPaths(mounts []Mount) []string {
var paths []string var paths []string

View File

@@ -31,7 +31,7 @@ const (
func TestLDCacheUpdateHook(t *testing.T) { func TestLDCacheUpdateHook(t *testing.T) {
logger, _ := testlog.NewNullLogger() logger, _ := testlog.NewNullLogger()
hookCreator := NewHookCreator(WithNVIDIACDIHookPath(testNvidiaCDIHookPath)) hookCreator := NewHookCreator(testNvidiaCDIHookPath, false)
testCases := []struct { testCases := []struct {
description string description string
@@ -39,24 +39,11 @@ func TestLDCacheUpdateHook(t *testing.T) {
mounts []Mount mounts []Mount
mountError error mountError error
expectedError error expectedError error
expectedHooks []Hook expectedArgs []string
}{ }{
{ {
description: "empty mounts", description: "empty mounts",
expectedHooks: []Hook{ expectedArgs: []string{"nvidia-cdi-hook", "update-ldcache"},
{
Lifecycle: "createContainer",
Path: testNvidiaCDIHookPath,
Args: []string{"nvidia-cdi-hook", "create-soname-symlinks"},
Env: []string{"NVIDIA_CTK_DEBUG=false"},
},
{
Lifecycle: "createContainer",
Path: testNvidiaCDIHookPath,
Args: []string{"nvidia-cdi-hook", "update-ldcache"},
Env: []string{"NVIDIA_CTK_DEBUG=false"},
},
},
}, },
{ {
description: "mount error", description: "mount error",
@@ -79,20 +66,7 @@ func TestLDCacheUpdateHook(t *testing.T) {
Path: "/usr/local/lib/libbar.so", Path: "/usr/local/lib/libbar.so",
}, },
}, },
expectedHooks: []Hook{ expectedArgs: []string{"nvidia-cdi-hook", "update-ldcache", "--folder", "/usr/local/lib", "--folder", "/usr/local/libother"},
{
Lifecycle: "createContainer",
Path: testNvidiaCDIHookPath,
Args: []string{"nvidia-cdi-hook", "create-soname-symlinks", "--folder", "/usr/local/lib", "--folder", "/usr/local/libother"},
Env: []string{"NVIDIA_CTK_DEBUG=false"},
},
{
Lifecycle: "createContainer",
Path: testNvidiaCDIHookPath,
Args: []string{"nvidia-cdi-hook", "update-ldcache", "--folder", "/usr/local/lib", "--folder", "/usr/local/libother"},
Env: []string{"NVIDIA_CTK_DEBUG=false"},
},
},
}, },
{ {
description: "host paths are ignored", description: "host paths are ignored",
@@ -102,38 +76,12 @@ func TestLDCacheUpdateHook(t *testing.T) {
Path: "/usr/local/lib/libfoo.so", Path: "/usr/local/lib/libfoo.so",
}, },
}, },
expectedHooks: []Hook{ expectedArgs: []string{"nvidia-cdi-hook", "update-ldcache", "--folder", "/usr/local/lib"},
{
Lifecycle: "createContainer",
Path: testNvidiaCDIHookPath,
Args: []string{"nvidia-cdi-hook", "create-soname-symlinks", "--folder", "/usr/local/lib"},
Env: []string{"NVIDIA_CTK_DEBUG=false"},
},
{
Lifecycle: "createContainer",
Path: testNvidiaCDIHookPath,
Args: []string{"nvidia-cdi-hook", "update-ldcache", "--folder", "/usr/local/lib"},
Env: []string{"NVIDIA_CTK_DEBUG=false"},
},
},
}, },
{ {
description: "explicit ldconfig path is passed", description: "explicit ldconfig path is passed",
ldconfigPath: testLdconfigPath, ldconfigPath: testLdconfigPath,
expectedHooks: []Hook{ expectedArgs: []string{"nvidia-cdi-hook", "update-ldcache", "--ldconfig-path", testLdconfigPath},
{
Lifecycle: "createContainer",
Path: testNvidiaCDIHookPath,
Args: []string{"nvidia-cdi-hook", "create-soname-symlinks", "--ldconfig-path", testLdconfigPath},
Env: []string{"NVIDIA_CTK_DEBUG=false"},
},
{
Lifecycle: "createContainer",
Path: testNvidiaCDIHookPath,
Args: []string{"nvidia-cdi-hook", "update-ldcache", "--ldconfig-path", testLdconfigPath},
Env: []string{"NVIDIA_CTK_DEBUG=false"},
},
},
}, },
} }
@@ -144,6 +92,13 @@ func TestLDCacheUpdateHook(t *testing.T) {
return tc.mounts, tc.mountError return tc.mounts, tc.mountError
}, },
} }
expectedHook := Hook{
Path: testNvidiaCDIHookPath,
Args: tc.expectedArgs,
Lifecycle: "createContainer",
Env: []string{"NVIDIA_CTK_DEBUG=false"},
}
d, err := NewLDCacheUpdateHook(logger, mountMock, hookCreator, tc.ldconfigPath) d, err := NewLDCacheUpdateHook(logger, mountMock, hookCreator, tc.ldconfigPath)
require.NoError(t, err) require.NoError(t, err)
@@ -157,7 +112,9 @@ func TestLDCacheUpdateHook(t *testing.T) {
} }
require.NoError(t, err) require.NoError(t, err)
require.EqualValues(t, tc.expectedHooks, hooks) require.Len(t, hooks, 1)
require.EqualValues(t, hooks[0], expectedHook)
devices, err := d.Devices() devices, err := d.Devices()
require.NoError(t, err) require.NoError(t, err)

View File

@@ -53,21 +53,6 @@ func (d list) Devices() ([]Device, error) {
return allDevices, nil return allDevices, nil
} }
// EnvVars returns all environment variables from the included discoverers.
func (d list) EnvVars() ([]EnvVar, error) {
var allEnvs []EnvVar
for i, di := range d {
envs, err := di.EnvVars()
if err != nil {
return nil, fmt.Errorf("error discovering envs for discoverer %v: %w", i, err)
}
allEnvs = append(allEnvs, envs...)
}
return allEnvs, nil
}
// Mounts returns all mounts from the included discoverers // Mounts returns all mounts from the included discoverers
func (d list) Mounts() ([]Mount, error) { func (d list) Mounts() ([]Mount, error) {
var allMounts []Mount var allMounts []Mount

View File

@@ -27,11 +27,6 @@ func (e None) Devices() ([]Device, error) {
return nil, nil return nil, nil
} }
// EnvVars returns an empty list of devices
func (e None) EnvVars() ([]EnvVar, error) {
return nil, nil
}
// Mounts returns an empty list of mounts // Mounts returns an empty list of mounts
func (e None) Mounts() ([]Mount, error) { func (e None) Mounts() ([]Mount, error) {
return nil, nil return nil, nil

View File

@@ -113,7 +113,7 @@ func TestWithWithDriverDotSoSymlinks(t *testing.T) {
expectedHooks: []Hook{ expectedHooks: []Hook{
{ {
Lifecycle: "createContainer", Lifecycle: "createContainer",
Path: "/usr/bin/nvidia-cdi-hook", Path: "/path/to/nvidia-cdi-hook",
Args: []string{"nvidia-cdi-hook", "create-symlinks", "--link", "libcuda.so.1::/usr/lib/libcuda.so"}, Args: []string{"nvidia-cdi-hook", "create-symlinks", "--link", "libcuda.so.1::/usr/lib/libcuda.so"},
Env: []string{"NVIDIA_CTK_DEBUG=false"}, Env: []string{"NVIDIA_CTK_DEBUG=false"},
}, },
@@ -146,7 +146,7 @@ func TestWithWithDriverDotSoSymlinks(t *testing.T) {
expectedHooks: []Hook{ expectedHooks: []Hook{
{ {
Lifecycle: "createContainer", Lifecycle: "createContainer",
Path: "/usr/bin/nvidia-cdi-hook", Path: "/path/to/nvidia-cdi-hook",
Args: []string{"nvidia-cdi-hook", "create-symlinks", "--link", "libcuda.so.1::/usr/lib/libcuda.so"}, Args: []string{"nvidia-cdi-hook", "create-symlinks", "--link", "libcuda.so.1::/usr/lib/libcuda.so"},
Env: []string{"NVIDIA_CTK_DEBUG=false"}, Env: []string{"NVIDIA_CTK_DEBUG=false"},
}, },
@@ -178,7 +178,7 @@ func TestWithWithDriverDotSoSymlinks(t *testing.T) {
expectedHooks: []Hook{ expectedHooks: []Hook{
{ {
Lifecycle: "createContainer", Lifecycle: "createContainer",
Path: "/usr/bin/nvidia-cdi-hook", Path: "/path/to/nvidia-cdi-hook",
Args: []string{"nvidia-cdi-hook", "create-symlinks", "--link", "libcuda.so.1::/usr/lib/libcuda.so"}, Args: []string{"nvidia-cdi-hook", "create-symlinks", "--link", "libcuda.so.1::/usr/lib/libcuda.so"},
Env: []string{"NVIDIA_CTK_DEBUG=false"}, Env: []string{"NVIDIA_CTK_DEBUG=false"},
}, },
@@ -248,7 +248,7 @@ func TestWithWithDriverDotSoSymlinks(t *testing.T) {
}, },
{ {
Lifecycle: "createContainer", Lifecycle: "createContainer",
Path: "/usr/bin/nvidia-cdi-hook", Path: "/path/to/nvidia-cdi-hook",
Args: []string{"nvidia-cdi-hook", "create-symlinks", "--link", "libcuda.so.1::/usr/lib/libcuda.so"}, Args: []string{"nvidia-cdi-hook", "create-symlinks", "--link", "libcuda.so.1::/usr/lib/libcuda.so"},
Env: []string{"NVIDIA_CTK_DEBUG=false"}, Env: []string{"NVIDIA_CTK_DEBUG=false"},
}, },
@@ -298,7 +298,7 @@ func TestWithWithDriverDotSoSymlinks(t *testing.T) {
expectedHooks: []Hook{ expectedHooks: []Hook{
{ {
Lifecycle: "createContainer", Lifecycle: "createContainer",
Path: "/usr/bin/nvidia-cdi-hook", Path: "/path/to/nvidia-cdi-hook",
Args: []string{ Args: []string{
"nvidia-cdi-hook", "create-symlinks", "nvidia-cdi-hook", "create-symlinks",
"--link", "libcuda.so.1::/usr/lib/libcuda.so", "--link", "libcuda.so.1::/usr/lib/libcuda.so",
@@ -311,7 +311,7 @@ func TestWithWithDriverDotSoSymlinks(t *testing.T) {
}, },
} }
hookCreator := NewHookCreator() hookCreator := NewHookCreator("/path/to/nvidia-cdi-hook", false)
for _, tc := range testCases { for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) { t.Run(tc.description, func(t *testing.T) {
d := WithDriverDotSoSymlinks( d := WithDriverDotSoSymlinks(

View File

@@ -55,11 +55,6 @@ func FromDiscoverer(d discover.Discover) (*cdi.ContainerEdits, error) {
return nil, fmt.Errorf("failed to discover devices: %v", err) return nil, fmt.Errorf("failed to discover devices: %v", err)
} }
envs, err := d.EnvVars()
if err != nil {
return nil, fmt.Errorf("failed to discover environment variables: %w", err)
}
mounts, err := d.Mounts() mounts, err := d.Mounts()
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to discover mounts: %v", err) return nil, fmt.Errorf("failed to discover mounts: %v", err)
@@ -79,10 +74,6 @@ func FromDiscoverer(d discover.Discover) (*cdi.ContainerEdits, error) {
c.Append(edits) c.Append(edits)
} }
for _, e := range envs {
c.Append(envvar(e).toEdits())
}
for _, m := range mounts { for _, m := range mounts {
c.Append(mount(m).toEdits()) c.Append(mount(m).toEdits())
} }

View File

@@ -1,39 +0,0 @@
/**
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package edits
import (
"fmt"
"tags.cncf.io/container-device-interface/pkg/cdi"
"tags.cncf.io/container-device-interface/specs-go"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
)
type envvar discover.EnvVar
// toEdits converts a discovered envvar to CDI Container Edits.
func (d envvar) toEdits() *cdi.ContainerEdits {
e := cdi.ContainerEdits{
ContainerEdits: &specs.ContainerEdits{
Env: []string{fmt.Sprintf("%s=%s", d.Name, d.Value)},
},
}
return &e
}

View File

@@ -23,114 +23,34 @@ import (
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger" "github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
) )
// A RuntimeMode is used to select a specific mode of operation for the NVIDIA Container Runtime.
type RuntimeMode string
const (
// In LegacyRuntimeMode the nvidia-container-runtime injects the
// nvidia-container-runtime-hook as a prestart hook into the incoming
// container config. This hook invokes the nvidia-container-cli to perform
// the required modifications to the container.
LegacyRuntimeMode = RuntimeMode("legacy")
// In CSVRuntimeMode the nvidia-container-runtime processes a set of CSV
// files to determine which container modification are required. The
// contents of these CSV files are used to generate an in-memory CDI
// specification which is used to modify the container config.
CSVRuntimeMode = RuntimeMode("csv")
// In CDIRuntimeMode the nvidia-container-runtime applies the modifications
// to the container config required for the requested CDI devices in the
// same way that other CDI clients would.
CDIRuntimeMode = RuntimeMode("cdi")
// In JitCDIRuntimeMode the nvidia-container-runtime generates in-memory CDI
// specifications for requested NVIDIA devices.
JitCDIRuntimeMode = RuntimeMode("jit-cdi")
)
type RuntimeModeResolver interface {
ResolveRuntimeMode(string) RuntimeMode
}
type modeResolver struct {
logger logger.Interface
// TODO: This only needs to consider the requested devices.
image *image.CUDA
propertyExtractor info.PropertyExtractor
defaultMode RuntimeMode
}
type Option func(*modeResolver)
func WithDefaultMode(defaultMode RuntimeMode) Option {
return func(mr *modeResolver) {
mr.defaultMode = defaultMode
}
}
func WithLogger(logger logger.Interface) Option {
return func(mr *modeResolver) {
mr.logger = logger
}
}
func WithImage(image *image.CUDA) Option {
return func(mr *modeResolver) {
mr.image = image
}
}
func WithPropertyExtractor(propertyExtractor info.PropertyExtractor) Option {
return func(mr *modeResolver) {
mr.propertyExtractor = propertyExtractor
}
}
func NewRuntimeModeResolver(opts ...Option) RuntimeModeResolver {
r := &modeResolver{
defaultMode: JitCDIRuntimeMode,
}
for _, opt := range opts {
opt(r)
}
if r.logger == nil {
r.logger = &logger.NullLogger{}
}
return r
}
// ResolveAutoMode determines the correct mode for the platform if set to "auto" // ResolveAutoMode determines the correct mode for the platform if set to "auto"
func ResolveAutoMode(logger logger.Interface, mode string, image image.CUDA) (rmode RuntimeMode) { func ResolveAutoMode(logger logger.Interface, mode string, image image.CUDA) (rmode string) {
r := modeResolver{ return resolveMode(logger, mode, image, nil)
logger: logger,
image: &image,
propertyExtractor: nil,
}
return r.ResolveRuntimeMode(mode)
} }
func (m *modeResolver) ResolveRuntimeMode(mode string) (rmode RuntimeMode) { func resolveMode(logger logger.Interface, mode string, image image.CUDA, propertyExtractor info.PropertyExtractor) (rmode string) {
if mode != "auto" { if mode != "auto" {
m.logger.Infof("Using requested mode '%s'", mode) logger.Infof("Using requested mode '%s'", mode)
return RuntimeMode(mode) return mode
} }
defer func() { defer func() {
m.logger.Infof("Auto-detected mode as '%v'", rmode) logger.Infof("Auto-detected mode as '%v'", rmode)
}() }()
if m.image.OnlyFullyQualifiedCDIDevices() { if image.OnlyFullyQualifiedCDIDevices() {
return CDIRuntimeMode return "cdi"
} }
nvinfo := info.New( nvinfo := info.New(
info.WithLogger(m.logger), info.WithLogger(logger),
info.WithPropertyExtractor(m.propertyExtractor), info.WithPropertyExtractor(propertyExtractor),
) )
switch nvinfo.ResolvePlatform() { switch nvinfo.ResolvePlatform() {
case info.PlatformNVML, info.PlatformWSL: case info.PlatformNVML, info.PlatformWSL:
return m.defaultMode return "legacy"
case info.PlatformTegra: case info.PlatformTegra:
return CSVRuntimeMode return "csv"
} }
return m.defaultMode return "legacy"
} }

View File

@@ -43,16 +43,11 @@ func TestResolveAutoMode(t *testing.T) {
mode: "not-auto", mode: "not-auto",
expectedMode: "not-auto", expectedMode: "not-auto",
}, },
{
description: "legacy resolves to legacy",
mode: "legacy",
expectedMode: "legacy",
},
{ {
description: "no info defaults to legacy", description: "no info defaults to legacy",
mode: "auto", mode: "auto",
info: map[string]bool{}, info: map[string]bool{},
expectedMode: "jit-cdi", expectedMode: "legacy",
}, },
{ {
description: "non-nvml, non-tegra, nvgpu resolves to csv", description: "non-nvml, non-tegra, nvgpu resolves to csv",
@@ -85,14 +80,14 @@ func TestResolveAutoMode(t *testing.T) {
expectedMode: "csv", expectedMode: "csv",
}, },
{ {
description: "nvml, non-tegra, non-nvgpu resolves to jit-cdi", description: "nvml, non-tegra, non-nvgpu resolves to legacy",
mode: "auto", mode: "auto",
info: map[string]bool{ info: map[string]bool{
"nvml": true, "nvml": true,
"tegra": false, "tegra": false,
"nvgpu": false, "nvgpu": false,
}, },
expectedMode: "jit-cdi", expectedMode: "legacy",
}, },
{ {
description: "nvml, non-tegra, nvgpu resolves to csv", description: "nvml, non-tegra, nvgpu resolves to csv",
@@ -105,14 +100,14 @@ func TestResolveAutoMode(t *testing.T) {
expectedMode: "csv", expectedMode: "csv",
}, },
{ {
description: "nvml, tegra, non-nvgpu resolves to jit-cdi", description: "nvml, tegra, non-nvgpu resolves to legacy",
mode: "auto", mode: "auto",
info: map[string]bool{ info: map[string]bool{
"nvml": true, "nvml": true,
"tegra": true, "tegra": true,
"nvgpu": false, "nvgpu": false,
}, },
expectedMode: "jit-cdi", expectedMode: "legacy",
}, },
{ {
description: "nvml, tegra, nvgpu resolves to csv", description: "nvml, tegra, nvgpu resolves to csv",
@@ -141,7 +136,7 @@ func TestResolveAutoMode(t *testing.T) {
}, },
}, },
{ {
description: "at least one non-cdi device resolves to jit-cdi", description: "at least one non-cdi device resolves to legacy",
mode: "auto", mode: "auto",
envmap: map[string]string{ envmap: map[string]string{
"NVIDIA_VISIBLE_DEVICES": "nvidia.com/gpu=0,0", "NVIDIA_VISIBLE_DEVICES": "nvidia.com/gpu=0,0",
@@ -151,7 +146,7 @@ func TestResolveAutoMode(t *testing.T) {
"tegra": false, "tegra": false,
"nvgpu": false, "nvgpu": false,
}, },
expectedMode: "jit-cdi", expectedMode: "legacy",
}, },
{ {
description: "at least one non-cdi device resolves to csv", description: "at least one non-cdi device resolves to csv",
@@ -175,7 +170,7 @@ func TestResolveAutoMode(t *testing.T) {
expectedMode: "cdi", expectedMode: "cdi",
}, },
{ {
description: "cdi mount and non-CDI devices resolves to jit-cdi", description: "cdi mount and non-CDI devices resolves to legacy",
mode: "auto", mode: "auto",
mounts: []string{ mounts: []string{
"/var/run/nvidia-container-devices/cdi/nvidia.com/gpu/0", "/var/run/nvidia-container-devices/cdi/nvidia.com/gpu/0",
@@ -186,10 +181,10 @@ func TestResolveAutoMode(t *testing.T) {
"tegra": false, "tegra": false,
"nvgpu": false, "nvgpu": false,
}, },
expectedMode: "jit-cdi", expectedMode: "legacy",
}, },
{ {
description: "cdi mount and non-CDI envvar resolves to cdi", description: "cdi mount and non-CDI envvar resolves to legacy",
mode: "auto", mode: "auto",
envmap: map[string]string{ envmap: map[string]string{
"NVIDIA_VISIBLE_DEVICES": "0", "NVIDIA_VISIBLE_DEVICES": "0",
@@ -202,7 +197,7 @@ func TestResolveAutoMode(t *testing.T) {
"tegra": false, "tegra": false,
"nvgpu": false, "nvgpu": false,
}, },
expectedMode: "cdi", expectedMode: "legacy",
}, },
} }
@@ -237,15 +232,8 @@ func TestResolveAutoMode(t *testing.T) {
image, _ := image.New( image, _ := image.New(
image.WithEnvMap(tc.envmap), image.WithEnvMap(tc.envmap),
image.WithMounts(mounts), image.WithMounts(mounts),
image.WithAcceptDeviceListAsVolumeMounts(true),
image.WithAcceptEnvvarUnprivileged(true),
) )
mr := NewRuntimeModeResolver( mode := resolveMode(logger, tc.mode, image, properties)
WithLogger(logger),
WithImage(&image),
WithPropertyExtractor(properties),
)
mode := mr.ResolveRuntimeMode(tc.mode)
require.EqualValues(t, tc.expectedMode, mode) require.EqualValues(t, tc.expectedMode, mode)
}) })
} }

View File

@@ -1,206 +0,0 @@
/**
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
**/
package ldconfig
import (
"fmt"
"os"
"os/exec"
"path/filepath"
"strings"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
)
const (
// ldsoconfdFilenamePattern specifies the pattern for the filename
// in ld.so.conf.d that includes references to the specified directories.
// The 00-nvcr prefix is chosen to ensure that these libraries have a
// higher precedence than other libraries on the system, but lower than
// the 00-cuda-compat that is included in some containers.
ldsoconfdFilenamePattern = "00-nvcr-*.conf"
)
type Ldconfig struct {
ldconfigPath string
inRoot string
}
// NewRunner creates an exec.Cmd that can be used to run ldconfig.
func NewRunner(id string, ldconfigPath string, containerRoot string, additionalargs ...string) (*exec.Cmd, error) {
args := []string{
id,
strings.TrimPrefix(config.NormalizeLDConfigPath("@"+ldconfigPath), "@"),
containerRoot,
}
args = append(args, additionalargs...)
return createReexecCommand(args)
}
// New creates an Ldconfig struct that is used to perform operations on the
// ldcache and libraries in a particular root (e.g. a container).
func New(ldconfigPath string, inRoot string) (*Ldconfig, error) {
l := &Ldconfig{
ldconfigPath: ldconfigPath,
inRoot: inRoot,
}
if ldconfigPath == "" {
return nil, fmt.Errorf("an ldconfig path must be specified")
}
if inRoot == "" || inRoot == "/" {
return nil, fmt.Errorf("ldconfig must be run in the non-system root")
}
return l, nil
}
// CreateSonameSymlinks uses ldconfig to create the soname symlinks in the
// specified directories.
func (l *Ldconfig) CreateSonameSymlinks(directories ...string) error {
if len(directories) == 0 {
return nil
}
ldconfigPath, err := l.prepareRoot()
if err != nil {
return err
}
args := []string{
filepath.Base(ldconfigPath),
// Explicitly disable updating the LDCache.
"-N",
// Specify -n to only process the specified directories.
"-n",
}
args = append(args, directories...)
return SafeExec(ldconfigPath, args, nil)
}
func (l *Ldconfig) UpdateLDCache(directories ...string) error {
ldconfigPath, err := l.prepareRoot()
if err != nil {
return err
}
args := []string{
filepath.Base(ldconfigPath),
// Explicitly specify using /etc/ld.so.conf since the host's ldconfig may
// be configured to use a different config file by default.
"-f", "/etc/ld.so.conf",
}
if l.ldcacheExists() {
args = append(args, "-C", "/etc/ld.so.cache")
} else {
args = append(args, "-N")
}
// If the ld.so.conf.d directory exists, we create a config file there
// containing the required directories, otherwise we add the specified
// directories to the ldconfig command directly.
if l.ldsoconfdDirectoryExists() {
err := createLdsoconfdFile(ldsoconfdFilenamePattern, directories...)
if err != nil {
return fmt.Errorf("failed to update ld.so.conf.d: %w", err)
}
} else {
args = append(args, directories...)
}
return SafeExec(ldconfigPath, args, nil)
}
func (l *Ldconfig) prepareRoot() (string, error) {
// To prevent leaking the parent proc filesystem, we create a new proc mount
// in the specified root.
if err := mountProc(l.inRoot); err != nil {
return "", fmt.Errorf("error mounting /proc: %w", err)
}
// We mount the host ldconfig before we pivot root since host paths are not
// visible after the pivot root operation.
ldconfigPath, err := mountLdConfig(l.ldconfigPath, l.inRoot)
if err != nil {
return "", fmt.Errorf("error mounting host ldconfig: %w", err)
}
// We pivot to the container root for the new process, this further limits
// access to the host.
if err := pivotRoot(l.inRoot); err != nil {
return "", fmt.Errorf("error running pivot_root: %w", err)
}
return ldconfigPath, nil
}
func (l *Ldconfig) ldcacheExists() bool {
if _, err := os.Stat("/etc/ld.so.cache"); err != nil && os.IsNotExist(err) {
return false
}
return true
}
func (l *Ldconfig) ldsoconfdDirectoryExists() bool {
info, err := os.Stat("/etc/ld.so.conf.d")
if os.IsNotExist(err) {
return false
}
return info.IsDir()
}
// createLdsoconfdFile creates a file at /etc/ld.so.conf.d/.
// The file is created at /etc/ld.so.conf.d/{{ .pattern }} using `CreateTemp` and
// contains the specified directories on each line.
func createLdsoconfdFile(pattern string, dirs ...string) error {
if len(dirs) == 0 {
return nil
}
ldsoconfdDir := "/etc/ld.so.conf.d"
if err := os.MkdirAll(ldsoconfdDir, 0755); err != nil {
return fmt.Errorf("failed to create ld.so.conf.d: %w", err)
}
configFile, err := os.CreateTemp(ldsoconfdDir, pattern)
if err != nil {
return fmt.Errorf("failed to create config file: %w", err)
}
defer func() {
_ = configFile.Close()
}()
added := make(map[string]bool)
for _, dir := range dirs {
if added[dir] {
continue
}
_, err = fmt.Fprintf(configFile, "%s\n", dir)
if err != nil {
return fmt.Errorf("failed to update config file: %w", err)
}
added[dir] = true
}
// The created file needs to be world readable for the cases where the container is run as a non-root user.
if err := configFile.Chmod(0644); err != nil {
return fmt.Errorf("failed to chmod config file: %w", err)
}
return nil
}

View File

@@ -28,29 +28,17 @@ import (
"github.com/NVIDIA/nvidia-container-toolkit/internal/modifier/cdi" "github.com/NVIDIA/nvidia-container-toolkit/internal/modifier/cdi"
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci" "github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi" "github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi"
) "github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/spec"
const (
automaticDeviceVendor = "runtime.nvidia.com"
automaticDeviceClass = "gpu"
automaticDeviceKind = automaticDeviceVendor + "/" + automaticDeviceClass
automaticDevicePrefix = automaticDeviceKind + "="
) )
// NewCDIModifier creates an OCI spec modifier that determines the modifications to make based on the // NewCDIModifier creates an OCI spec modifier that determines the modifications to make based on the
// CDI specifications available on the system. The NVIDIA_VISIBLE_DEVICES environment variable is // CDI specifications available on the system. The NVIDIA_VISIBLE_DEVICES environment variable is
// used to select the devices to include. // used to select the devices to include.
func NewCDIModifier(logger logger.Interface, cfg *config.Config, image image.CUDA, isJitCDI bool) (oci.SpecModifier, error) { func NewCDIModifier(logger logger.Interface, cfg *config.Config, ociSpec oci.Spec) (oci.SpecModifier, error) {
defaultKind := cfg.NVIDIAContainerRuntimeConfig.Modes.CDI.DefaultKind devices, err := getDevicesFromSpec(logger, ociSpec, cfg)
if isJitCDI { if err != nil {
defaultKind = automaticDeviceKind return nil, fmt.Errorf("failed to get required devices from OCI specification: %v", err)
} }
deviceRequestor := newCDIDeviceRequestor(
logger,
image,
defaultKind,
)
devices := deviceRequestor.DeviceRequests()
if len(devices) == 0 { if len(devices) == 0 {
logger.Debugf("No devices requested; no modification required.") logger.Debugf("No devices requested; no modification required.")
return nil, nil return nil, nil
@@ -77,38 +65,87 @@ func NewCDIModifier(logger logger.Interface, cfg *config.Config, image image.CUD
) )
} }
type deviceRequestor interface { func getDevicesFromSpec(logger logger.Interface, ociSpec oci.Spec, cfg *config.Config) ([]string, error) {
DeviceRequests() []string rawSpec, err := ociSpec.Load()
} if err != nil {
return nil, fmt.Errorf("failed to load OCI spec: %v", err)
type cdiDeviceRequestor struct {
image image.CUDA
logger logger.Interface
defaultKind string
}
func newCDIDeviceRequestor(logger logger.Interface, image image.CUDA, defaultKind string) deviceRequestor {
c := &cdiDeviceRequestor{
logger: logger,
image: image,
defaultKind: defaultKind,
} }
return withUniqueDevices(c)
}
func (c *cdiDeviceRequestor) DeviceRequests() []string { annotationDevices, err := getAnnotationDevices(cfg.NVIDIAContainerRuntimeConfig.Modes.CDI.AnnotationPrefixes, rawSpec.Annotations)
if c == nil { if err != nil {
return nil return nil, fmt.Errorf("failed to parse container annotations: %v", err)
} }
if len(annotationDevices) > 0 {
return annotationDevices, nil
}
container, err := image.NewCUDAImageFromSpec(rawSpec)
if err != nil {
return nil, err
}
if cfg.AcceptDeviceListAsVolumeMounts {
mountDevices := container.CDIDevicesFromMounts()
if len(mountDevices) > 0 {
return mountDevices, nil
}
}
var devices []string var devices []string
for _, name := range c.image.VisibleDevices() { seen := make(map[string]bool)
for _, name := range container.VisibleDevicesFromEnvVar() {
if !parser.IsQualifiedName(name) { if !parser.IsQualifiedName(name) {
name = fmt.Sprintf("%s=%s", c.defaultKind, name) name = fmt.Sprintf("%s=%s", cfg.NVIDIAContainerRuntimeConfig.Modes.CDI.DefaultKind, name)
}
if seen[name] {
logger.Debugf("Ignoring duplicate device %q", name)
continue
} }
devices = append(devices, name) devices = append(devices, name)
} }
return devices if len(devices) == 0 {
return nil, nil
}
if cfg.AcceptEnvvarUnprivileged || image.IsPrivileged(rawSpec) {
return devices, nil
}
logger.Warningf("Ignoring devices specified in NVIDIA_VISIBLE_DEVICES: %v", devices)
return nil, nil
}
// getAnnotationDevices returns a list of devices specified in the annotations.
// Keys starting with the specified prefixes are considered and expected to contain a comma-separated list of
// fully-qualified CDI devices names. If any device name is not fully-quality an error is returned.
// The list of returned devices is deduplicated.
func getAnnotationDevices(prefixes []string, annotations map[string]string) ([]string, error) {
devicesByKey := make(map[string][]string)
for key, value := range annotations {
for _, prefix := range prefixes {
if strings.HasPrefix(key, prefix) {
devicesByKey[key] = strings.Split(value, ",")
}
}
}
seen := make(map[string]bool)
var annotationDevices []string
for key, devices := range devicesByKey {
for _, device := range devices {
if !parser.IsQualifiedName(device) {
return nil, fmt.Errorf("invalid device name %q in annotation %q", device, key)
}
if seen[device] {
continue
}
annotationDevices = append(annotationDevices, device)
seen[device] = true
}
}
return annotationDevices, nil
} }
// filterAutomaticDevices searches for "automatic" device names in the input slice. // filterAutomaticDevices searches for "automatic" device names in the input slice.
@@ -118,38 +155,21 @@ func (c *cdiDeviceRequestor) DeviceRequests() []string {
func filterAutomaticDevices(devices []string) []string { func filterAutomaticDevices(devices []string) []string {
var automatic []string var automatic []string
for _, device := range devices { for _, device := range devices {
if !strings.HasPrefix(device, automaticDevicePrefix) { vendor, class, _ := parser.ParseDevice(device)
continue if vendor == "runtime.nvidia.com" && class == "gpu" {
automatic = append(automatic, device)
} }
automatic = append(automatic, device)
} }
return automatic return automatic
} }
func newAutomaticCDISpecModifier(logger logger.Interface, cfg *config.Config, devices []string) (oci.SpecModifier, error) { func newAutomaticCDISpecModifier(logger logger.Interface, cfg *config.Config, devices []string) (oci.SpecModifier, error) {
logger.Debugf("Generating in-memory CDI specs for devices %v", devices) logger.Debugf("Generating in-memory CDI specs for devices %v", devices)
spec, err := generateAutomaticCDISpec(logger, cfg, devices)
var identifiers []string
for _, device := range devices {
identifiers = append(identifiers, strings.TrimPrefix(device, automaticDevicePrefix))
}
cdilib, err := nvcdi.New(
nvcdi.WithLogger(logger),
nvcdi.WithNVIDIACDIHookPath(cfg.NVIDIACTKConfig.Path),
nvcdi.WithDriverRoot(cfg.NVIDIAContainerCLIConfig.Root),
nvcdi.WithVendor(automaticDeviceVendor),
nvcdi.WithClass(automaticDeviceClass),
)
if err != nil {
return nil, fmt.Errorf("failed to construct CDI library: %w", err)
}
spec, err := cdilib.GetSpec(identifiers...)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to generate CDI spec: %w", err) return nil, fmt.Errorf("failed to generate CDI spec: %w", err)
} }
cdiDeviceRequestor, err := cdi.New( cdiModifier, err := cdi.New(
cdi.WithLogger(logger), cdi.WithLogger(logger),
cdi.WithSpec(spec.Raw()), cdi.WithSpec(spec.Raw()),
) )
@@ -157,29 +177,41 @@ func newAutomaticCDISpecModifier(logger logger.Interface, cfg *config.Config, de
return nil, fmt.Errorf("failed to construct CDI modifier: %w", err) return nil, fmt.Errorf("failed to construct CDI modifier: %w", err)
} }
return cdiDeviceRequestor, nil return cdiModifier, nil
} }
type deduplicatedDeviceRequestor struct { func generateAutomaticCDISpec(logger logger.Interface, cfg *config.Config, devices []string) (spec.Interface, error) {
deviceRequestor cdilib, err := nvcdi.New(
} nvcdi.WithLogger(logger),
nvcdi.WithNVIDIACDIHookPath(cfg.NVIDIACTKConfig.Path),
func withUniqueDevices(deviceRequestor deviceRequestor) deviceRequestor { nvcdi.WithDriverRoot(cfg.NVIDIAContainerCLIConfig.Root),
return &deduplicatedDeviceRequestor{deviceRequestor: deviceRequestor} nvcdi.WithVendor("runtime.nvidia.com"),
} nvcdi.WithClass("gpu"),
)
func (d *deduplicatedDeviceRequestor) DeviceRequests() []string { if err != nil {
if d == nil { return nil, fmt.Errorf("failed to construct CDI library: %w", err)
return nil
} }
seen := make(map[string]bool)
var devices []string identifiers := []string{}
for _, device := range d.deviceRequestor.DeviceRequests() { for _, device := range devices {
if seen[device] { _, _, id := parser.ParseDevice(device)
continue identifiers = append(identifiers, id)
}
seen[device] = true
devices = append(devices, device)
} }
return devices
deviceSpecs, err := cdilib.GetDeviceSpecsByID(identifiers...)
if err != nil {
return nil, fmt.Errorf("failed to get CDI device specs: %w", err)
}
commonEdits, err := cdilib.GetCommonEdits()
if err != nil {
return nil, fmt.Errorf("failed to get common CDI spec edits: %w", err)
}
return spec.New(
spec.WithDeviceSpecs(deviceSpecs),
spec.WithEdits(*commonEdits.ContainerEdits),
spec.WithVendor("runtime.nvidia.com"),
spec.WithClass("gpu"),
)
} }

View File

@@ -17,156 +17,76 @@
package modifier package modifier
import ( import (
"fmt"
"testing" "testing"
"github.com/opencontainers/runtime-spec/specs-go"
testlog "github.com/sirupsen/logrus/hooks/test"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
) )
func TestDeviceRequests(t *testing.T) { func TestGetAnnotationDevices(t *testing.T) {
logger, _ := testlog.NewNullLogger()
testCases := []struct { testCases := []struct {
description string description string
input cdiDeviceRequestor
spec *specs.Spec
prefixes []string prefixes []string
annotations map[string]string
expectedDevices []string expectedDevices []string
expectedError error
}{ }{
{ {
description: "empty spec yields no devices", description: "no annotations",
},
{
description: "cdi devices from mounts",
input: cdiDeviceRequestor{
defaultKind: "nvidia.com/gpu",
},
spec: &specs.Spec{
Mounts: []specs.Mount{
{
Destination: "/var/run/nvidia-container-devices/cdi/nvidia.com/gpu/0",
Source: "/dev/null",
},
{
Destination: "/var/run/nvidia-container-devices/cdi/nvidia.com/gpu/1",
Source: "/dev/null",
},
},
},
expectedDevices: []string{"nvidia.com/gpu=0", "nvidia.com/gpu=1"},
},
{
description: "cdi devices from envvar",
input: cdiDeviceRequestor{
defaultKind: "nvidia.com/gpu",
},
spec: &specs.Spec{
Process: &specs.Process{
Env: []string{"NVIDIA_VISIBLE_DEVICES=0,example.com/class=device"},
},
},
expectedDevices: []string{"nvidia.com/gpu=0", "example.com/class=device"},
},
{
description: "cdi devices from envvar with default kind",
input: cdiDeviceRequestor{
defaultKind: "runtime.nvidia.com/gpu",
},
spec: &specs.Spec{
Process: &specs.Process{
Env: []string{"NVIDIA_VISIBLE_DEVICES=all"},
},
},
expectedDevices: []string{"runtime.nvidia.com/gpu=all"},
}, },
{ {
description: "no matching annotations", description: "no matching annotations",
prefixes: []string{"not-prefix/"}, prefixes: []string{"not-prefix/"},
spec: &specs.Spec{ annotations: map[string]string{
Annotations: map[string]string{ "prefix/foo": "example.com/device=bar",
"prefix/foo": "example.com/device=bar",
},
}, },
}, },
{ {
description: "single matching annotation", description: "single matching annotation",
prefixes: []string{"prefix/"}, prefixes: []string{"prefix/"},
spec: &specs.Spec{ annotations: map[string]string{
Annotations: map[string]string{ "prefix/foo": "example.com/device=bar",
"prefix/foo": "example.com/device=bar",
},
}, },
expectedDevices: []string{"example.com/device=bar"}, expectedDevices: []string{"example.com/device=bar"},
}, },
{ {
description: "multiple matching annotations", description: "multiple matching annotations",
prefixes: []string{"prefix/", "another-prefix/"}, prefixes: []string{"prefix/", "another-prefix/"},
spec: &specs.Spec{ annotations: map[string]string{
Annotations: map[string]string{ "prefix/foo": "example.com/device=bar",
"prefix/foo": "example.com/device=bar", "another-prefix/bar": "example.com/device=baz",
"another-prefix/bar": "example.com/device=baz",
},
}, },
expectedDevices: []string{"example.com/device=baz", "example.com/device=bar"}, expectedDevices: []string{"example.com/device=bar", "example.com/device=baz"},
}, },
{ {
description: "multiple matching annotations with duplicate devices", description: "multiple matching annotations with duplicate devices",
prefixes: []string{"prefix/", "another-prefix/"}, prefixes: []string{"prefix/", "another-prefix/"},
spec: &specs.Spec{ annotations: map[string]string{
Annotations: map[string]string{ "prefix/foo": "example.com/device=bar",
"prefix/foo": "example.com/device=bar", "another-prefix/bar": "example.com/device=bar",
"another-prefix/bar": "example.com/device=bar",
},
}, },
expectedDevices: []string{"example.com/device=bar", "example.com/device=bar"}, expectedDevices: []string{"example.com/device=bar"},
}, },
{ {
description: "devices in annotations are expanded", description: "invalid devices",
input: cdiDeviceRequestor{ prefixes: []string{"prefix/"},
defaultKind: "nvidia.com/gpu", annotations: map[string]string{
"prefix/foo": "example.com/device",
}, },
prefixes: []string{"prefix/"}, expectedError: fmt.Errorf("invalid device %q", "example.com/device"),
spec: &specs.Spec{
Annotations: map[string]string{
"prefix/foo": "device",
},
},
expectedDevices: []string{"nvidia.com/gpu=device"},
},
{
description: "invalid devices in annotations are treated as strings",
input: cdiDeviceRequestor{
defaultKind: "nvidia.com/gpu",
},
prefixes: []string{"prefix/"},
spec: &specs.Spec{
Annotations: map[string]string{
"prefix/foo": "example.com/device",
},
},
expectedDevices: []string{"nvidia.com/gpu=example.com/device"},
}, },
} }
for _, tc := range testCases { for _, tc := range testCases {
tc.input.logger = logger
image, err := image.NewCUDAImageFromSpec(
tc.spec,
image.WithAcceptDeviceListAsVolumeMounts(true),
image.WithAcceptEnvvarUnprivileged(true),
image.WithAnnotationsPrefixes(tc.prefixes),
)
require.NoError(t, err)
tc.input.image = image
t.Run(tc.description, func(t *testing.T) { t.Run(tc.description, func(t *testing.T) {
devices := tc.input.DeviceRequests() devices, err := getAnnotationDevices(tc.prefixes, tc.annotations)
if tc.expectedError != nil {
require.Error(t, err)
return
}
require.NoError(t, err) require.NoError(t, err)
require.EqualValues(t, tc.expectedDevices, devices) require.ElementsMatch(t, tc.expectedDevices, devices)
}) })
} }
} }

View File

@@ -33,7 +33,7 @@ import (
// NewCSVModifier creates a modifier that applies modications to an OCI spec if required by the runtime wrapper. // NewCSVModifier creates a modifier that applies modications to an OCI spec if required by the runtime wrapper.
// The modifications are defined by CSV MountSpecs. // The modifications are defined by CSV MountSpecs.
func NewCSVModifier(logger logger.Interface, cfg *config.Config, container image.CUDA) (oci.SpecModifier, error) { func NewCSVModifier(logger logger.Interface, cfg *config.Config, container image.CUDA) (oci.SpecModifier, error) {
if devices := container.VisibleDevices(); len(devices) == 0 { if devices := container.VisibleDevicesFromEnvVar(); len(devices) == 0 {
logger.Infof("No modification required; no devices requested") logger.Infof("No modification required; no devices requested")
return nil, nil return nil, nil
} }

View File

@@ -37,7 +37,7 @@ import (
// //
// If not devices are selected, no changes are made. // If not devices are selected, no changes are made.
func NewFeatureGatedModifier(logger logger.Interface, cfg *config.Config, image image.CUDA, driver *root.Driver, hookCreator discover.HookCreator) (oci.SpecModifier, error) { func NewFeatureGatedModifier(logger logger.Interface, cfg *config.Config, image image.CUDA, driver *root.Driver, hookCreator discover.HookCreator) (oci.SpecModifier, error) {
if devices := image.VisibleDevices(); len(devices) == 0 { if devices := image.VisibleDevicesFromEnvVar(); len(devices) == 0 {
logger.Infof("No modification required; no devices requested") logger.Infof("No modification required; no devices requested")
return nil, nil return nil, nil
} }

View File

@@ -29,10 +29,9 @@ import (
// NewGraphicsModifier constructs a modifier that injects graphics-related modifications into an OCI runtime specification. // NewGraphicsModifier constructs a modifier that injects graphics-related modifications into an OCI runtime specification.
// The value of the NVIDIA_DRIVER_CAPABILITIES environment variable is checked to determine if this modification should be made. // The value of the NVIDIA_DRIVER_CAPABILITIES environment variable is checked to determine if this modification should be made.
func NewGraphicsModifier(logger logger.Interface, cfg *config.Config, container image.CUDA, driver *root.Driver, hookCreator discover.HookCreator) (oci.SpecModifier, error) { func NewGraphicsModifier(logger logger.Interface, cfg *config.Config, containerImage image.CUDA, driver *root.Driver, hookCreator discover.HookCreator) (oci.SpecModifier, error) {
devices, reason := requiresGraphicsModifier(container) if required, reason := requiresGraphicsModifier(containerImage); !required {
if len(devices) == 0 { logger.Infof("No graphics modifier required: %v", reason)
logger.Infof("No graphics modifier required; %v", reason)
return nil, nil return nil, nil
} }
@@ -49,7 +48,7 @@ func NewGraphicsModifier(logger logger.Interface, cfg *config.Config, container
devRoot := driver.Root devRoot := driver.Root
drmNodes, err := discover.NewDRMNodesDiscoverer( drmNodes, err := discover.NewDRMNodesDiscoverer(
logger, logger,
image.NewVisibleDevices(devices...), containerImage.DevicesFromEnvvars(image.EnvVarNvidiaVisibleDevices),
devRoot, devRoot,
hookCreator, hookCreator,
) )
@@ -65,15 +64,14 @@ func NewGraphicsModifier(logger logger.Interface, cfg *config.Config, container
} }
// requiresGraphicsModifier determines whether a graphics modifier is required. // requiresGraphicsModifier determines whether a graphics modifier is required.
func requiresGraphicsModifier(cudaImage image.CUDA) ([]string, string) { func requiresGraphicsModifier(cudaImage image.CUDA) (bool, string) {
devices := cudaImage.VisibleDevices() if devices := cudaImage.VisibleDevicesFromEnvVar(); len(devices) == 0 {
if len(devices) == 0 { return false, "no devices requested"
return nil, "no devices requested"
} }
if !cudaImage.GetDriverCapabilities().Any(image.DriverCapabilityGraphics, image.DriverCapabilityDisplay) { if !cudaImage.GetDriverCapabilities().Any(image.DriverCapabilityGraphics, image.DriverCapabilityDisplay) {
return nil, "no required capabilities requested" return false, "no required capabilities requested"
} }
return devices, "" return true, ""
} }

View File

@@ -26,9 +26,9 @@ import (
func TestGraphicsModifier(t *testing.T) { func TestGraphicsModifier(t *testing.T) {
testCases := []struct { testCases := []struct {
description string description string
envmap map[string]string envmap map[string]string
expectedDevices []string expectedRequired bool
}{ }{
{ {
description: "empty image does not create modifier", description: "empty image does not create modifier",
@@ -52,7 +52,7 @@ func TestGraphicsModifier(t *testing.T) {
"NVIDIA_VISIBLE_DEVICES": "all", "NVIDIA_VISIBLE_DEVICES": "all",
"NVIDIA_DRIVER_CAPABILITIES": "all", "NVIDIA_DRIVER_CAPABILITIES": "all",
}, },
expectedDevices: []string{"all"}, expectedRequired: true,
}, },
{ {
description: "devices with graphics capability creates modifier", description: "devices with graphics capability creates modifier",
@@ -60,7 +60,7 @@ func TestGraphicsModifier(t *testing.T) {
"NVIDIA_VISIBLE_DEVICES": "all", "NVIDIA_VISIBLE_DEVICES": "all",
"NVIDIA_DRIVER_CAPABILITIES": "graphics", "NVIDIA_DRIVER_CAPABILITIES": "graphics",
}, },
expectedDevices: []string{"all"}, expectedRequired: true,
}, },
{ {
description: "devices with compute,graphics capability creates modifier", description: "devices with compute,graphics capability creates modifier",
@@ -68,7 +68,7 @@ func TestGraphicsModifier(t *testing.T) {
"NVIDIA_VISIBLE_DEVICES": "all", "NVIDIA_VISIBLE_DEVICES": "all",
"NVIDIA_DRIVER_CAPABILITIES": "compute,graphics", "NVIDIA_DRIVER_CAPABILITIES": "compute,graphics",
}, },
expectedDevices: []string{"all"}, expectedRequired: true,
}, },
{ {
description: "devices with display capability creates modifier", description: "devices with display capability creates modifier",
@@ -76,7 +76,7 @@ func TestGraphicsModifier(t *testing.T) {
"NVIDIA_VISIBLE_DEVICES": "all", "NVIDIA_VISIBLE_DEVICES": "all",
"NVIDIA_DRIVER_CAPABILITIES": "display", "NVIDIA_DRIVER_CAPABILITIES": "display",
}, },
expectedDevices: []string{"all"}, expectedRequired: true,
}, },
{ {
description: "devices with display,graphics capability creates modifier", description: "devices with display,graphics capability creates modifier",
@@ -84,7 +84,7 @@ func TestGraphicsModifier(t *testing.T) {
"NVIDIA_VISIBLE_DEVICES": "all", "NVIDIA_VISIBLE_DEVICES": "all",
"NVIDIA_DRIVER_CAPABILITIES": "display,graphics", "NVIDIA_DRIVER_CAPABILITIES": "display,graphics",
}, },
expectedDevices: []string{"all"}, expectedRequired: true,
}, },
} }
@@ -94,7 +94,7 @@ func TestGraphicsModifier(t *testing.T) {
image.WithEnvMap(tc.envmap), image.WithEnvMap(tc.envmap),
) )
required, _ := requiresGraphicsModifier(image) required, _ := requiresGraphicsModifier(image)
require.EqualValues(t, tc.expectedDevices, required) require.EqualValues(t, tc.expectedRequired, required)
}) })
} }
} }

View File

@@ -41,11 +41,6 @@ func (d *byPathHookDiscoverer) Devices() ([]discover.Device, error) {
return nil, nil return nil, nil
} }
// EnvVars returns the empty list for the by-path hook discoverer
func (d *byPathHookDiscoverer) EnvVars() ([]discover.EnvVar, error) {
return nil, nil
}
// Hooks returns the hooks for the GPU device. // Hooks returns the hooks for the GPU device.
// The following hooks are detected: // The following hooks are detected:
// 1. A hook to create /dev/dri/by-path symlinks // 1. A hook to create /dev/dri/by-path symlinks

View File

@@ -106,10 +106,6 @@ func (d *nvsandboxutilsDGPU) Devices() ([]discover.Device, error) {
return devices, nil return devices, nil
} }
func (d *nvsandboxutilsDGPU) EnvVars() ([]discover.EnvVar, error) {
return nil, nil
}
// Hooks returns a hook to create the by-path symlinks for the discovered devices. // Hooks returns a hook to create the by-path symlinks for the discovered devices.
func (d *nvsandboxutilsDGPU) Hooks() ([]discover.Hook, error) { func (d *nvsandboxutilsDGPU) Hooks() ([]discover.Hook, error) {
if len(d.deviceLinks) == 0 { if len(d.deviceLinks) == 0 {

View File

@@ -183,7 +183,7 @@ func TestDiscovererFromCSVFiles(t *testing.T) {
}, },
} }
hookCreator := discover.NewHookCreator() hookCreator := discover.NewHookCreator("/usr/bin/nvidia-cdi-hook", false)
for _, tc := range testCases { for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) { t.Run(tc.description, func(t *testing.T) {
defer setGetTargetsFromCSVFiles(tc.moutSpecs)() defer setGetTargetsFromCSVFiles(tc.moutSpecs)()

View File

@@ -18,6 +18,7 @@ package runtime
import ( import (
"fmt" "fmt"
"os"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config" "github.com/NVIDIA/nvidia-container-toolkit/internal/config"
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image" "github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
@@ -65,17 +66,29 @@ func newNVIDIAContainerRuntime(logger logger.Interface, cfg *config.Config, argv
// newSpecModifier is a factory method that creates constructs an OCI spec modifer based on the provided config. // newSpecModifier is a factory method that creates constructs an OCI spec modifer based on the provided config.
func newSpecModifier(logger logger.Interface, cfg *config.Config, ociSpec oci.Spec, driver *root.Driver) (oci.SpecModifier, error) { func newSpecModifier(logger logger.Interface, cfg *config.Config, ociSpec oci.Spec, driver *root.Driver) (oci.SpecModifier, error) {
mode, image, err := initRuntimeModeAndImage(logger, cfg, ociSpec) rawSpec, err := ociSpec.Load()
if err != nil {
return nil, fmt.Errorf("failed to load OCI spec: %v", err)
}
image, err := image.NewCUDAImageFromSpec(rawSpec)
if err != nil { if err != nil {
return nil, err return nil, err
} }
modeModifier, err := newModeModifier(logger, mode, cfg, *image) hookCreator := discover.NewHookCreator(
cfg.NVIDIACTKConfig.Path,
cfg.NVIDIAContainerRuntimeConfig.DebugFilePath == "" || cfg.NVIDIAContainerRuntimeConfig.DebugFilePath == os.DevNull,
)
mode := info.ResolveAutoMode(logger, cfg.NVIDIAContainerRuntimeConfig.Mode, image)
// We update the mode here so that we can continue passing just the config to other functions.
cfg.NVIDIAContainerRuntimeConfig.Mode = mode
modeModifier, err := newModeModifier(logger, mode, cfg, ociSpec, image)
if err != nil { if err != nil {
return nil, err return nil, err
} }
hookCreator := discover.NewHookCreator(discover.WithNVIDIACDIHookPath(cfg.NVIDIACTKConfig.Path))
var modifiers modifier.List var modifiers modifier.List
for _, modifierType := range supportedModifierTypes(mode) { for _, modifierType := range supportedModifierTypes(mode) {
switch modifierType { switch modifierType {
@@ -84,13 +97,13 @@ func newSpecModifier(logger logger.Interface, cfg *config.Config, ociSpec oci.Sp
case "nvidia-hook-remover": case "nvidia-hook-remover":
modifiers = append(modifiers, modifier.NewNvidiaContainerRuntimeHookRemover(logger)) modifiers = append(modifiers, modifier.NewNvidiaContainerRuntimeHookRemover(logger))
case "graphics": case "graphics":
graphicsModifier, err := modifier.NewGraphicsModifier(logger, cfg, *image, driver, hookCreator) graphicsModifier, err := modifier.NewGraphicsModifier(logger, cfg, image, driver, hookCreator)
if err != nil { if err != nil {
return nil, err return nil, err
} }
modifiers = append(modifiers, graphicsModifier) modifiers = append(modifiers, graphicsModifier)
case "feature-gated": case "feature-gated":
featureGatedModifier, err := modifier.NewFeatureGatedModifier(logger, cfg, *image, driver, hookCreator) featureGatedModifier, err := modifier.NewFeatureGatedModifier(logger, cfg, image, driver, hookCreator)
if err != nil { if err != nil {
return nil, err return nil, err
} }
@@ -101,69 +114,26 @@ func newSpecModifier(logger logger.Interface, cfg *config.Config, ociSpec oci.Sp
return modifiers, nil return modifiers, nil
} }
func newModeModifier(logger logger.Interface, mode info.RuntimeMode, cfg *config.Config, image image.CUDA) (oci.SpecModifier, error) { func newModeModifier(logger logger.Interface, mode string, cfg *config.Config, ociSpec oci.Spec, image image.CUDA) (oci.SpecModifier, error) {
switch mode { switch mode {
case info.LegacyRuntimeMode: case "legacy":
return modifier.NewStableRuntimeModifier(logger, cfg.NVIDIAContainerRuntimeHookConfig.Path), nil return modifier.NewStableRuntimeModifier(logger, cfg.NVIDIAContainerRuntimeHookConfig.Path), nil
case info.CSVRuntimeMode: case "csv":
return modifier.NewCSVModifier(logger, cfg, image) return modifier.NewCSVModifier(logger, cfg, image)
case info.CDIRuntimeMode, info.JitCDIRuntimeMode: case "cdi":
return modifier.NewCDIModifier(logger, cfg, image, mode == info.JitCDIRuntimeMode) return modifier.NewCDIModifier(logger, cfg, ociSpec)
} }
return nil, fmt.Errorf("invalid runtime mode: %v", cfg.NVIDIAContainerRuntimeConfig.Mode) return nil, fmt.Errorf("invalid runtime mode: %v", cfg.NVIDIAContainerRuntimeConfig.Mode)
} }
// initRuntimeModeAndImage constructs an image from the specified OCI runtime
// specification and runtime config.
// The image is also used to determine the runtime mode to apply.
// If a non-CDI mode is detected we ensure that the image does not process
// annotation devices.
func initRuntimeModeAndImage(logger logger.Interface, cfg *config.Config, ociSpec oci.Spec) (info.RuntimeMode, *image.CUDA, error) {
rawSpec, err := ociSpec.Load()
if err != nil {
return "", nil, fmt.Errorf("failed to load OCI spec: %v", err)
}
image, err := image.NewCUDAImageFromSpec(
rawSpec,
image.WithLogger(logger),
image.WithAcceptDeviceListAsVolumeMounts(cfg.AcceptDeviceListAsVolumeMounts),
image.WithAcceptEnvvarUnprivileged(cfg.AcceptEnvvarUnprivileged),
image.WithAnnotationsPrefixes(cfg.NVIDIAContainerRuntimeConfig.Modes.CDI.AnnotationPrefixes),
)
if err != nil {
return "", nil, err
}
modeResolver := info.NewRuntimeModeResolver(
info.WithLogger(logger),
info.WithImage(&image),
)
mode := modeResolver.ResolveRuntimeMode(cfg.NVIDIAContainerRuntimeConfig.Mode)
// We update the mode here so that we can continue passing just the config to other functions.
cfg.NVIDIAContainerRuntimeConfig.Mode = string(mode)
if mode == "cdi" || len(cfg.NVIDIAContainerRuntimeConfig.Modes.CDI.AnnotationPrefixes) == 0 {
return mode, &image, nil
}
// For non-cdi modes we explicitly set the annotation prefixes to nil and
// call this function again to force a reconstruction of the image.
// Note that since the mode is now explicitly set, we will effectively skip
// the mode resolution.
cfg.NVIDIAContainerRuntimeConfig.Modes.CDI.AnnotationPrefixes = nil
return initRuntimeModeAndImage(logger, cfg, ociSpec)
}
// supportedModifierTypes returns the modifiers supported for a specific runtime mode. // supportedModifierTypes returns the modifiers supported for a specific runtime mode.
func supportedModifierTypes(mode info.RuntimeMode) []string { func supportedModifierTypes(mode string) []string {
switch mode { switch mode {
case info.CDIRuntimeMode, info.JitCDIRuntimeMode: case "cdi":
// For CDI mode we make no additional modifications. // For CDI mode we make no additional modifications.
return []string{"nvidia-hook-remover", "mode"} return []string{"nvidia-hook-remover", "mode"}
case info.CSVRuntimeMode: case "csv":
// For CSV mode we support mode and feature-gated modification. // For CSV mode we support mode and feature-gated modification.
return []string{"nvidia-hook-remover", "feature-gated", "mode"} return []string{"nvidia-hook-remover", "feature-gated", "mode"}
default: default:

View File

@@ -10,7 +10,7 @@ Build-Depends: debhelper (>= 9)
Package: nvidia-container-toolkit Package: nvidia-container-toolkit
Architecture: any Architecture: any
Depends: ${misc:Depends}, nvidia-container-toolkit-base (= @VERSION@), libnvidia-container-tools (= @VERSION@), libnvidia-container-tools (<< 2.0.0) Depends: ${misc:Depends}, nvidia-container-toolkit-base (= @VERSION@), libnvidia-container-tools (>= @LIBNVIDIA_CONTAINER_TOOLS_VERSION@), libnvidia-container-tools (<< 2.0.0)
Breaks: nvidia-container-runtime (<= 3.5.0-1), nvidia-container-runtime-hook Breaks: nvidia-container-runtime (<= 3.5.0-1), nvidia-container-runtime-hook
Replaces: nvidia-container-runtime (<= 3.5.0-1), nvidia-container-runtime-hook Replaces: nvidia-container-runtime (<= 3.5.0-1), nvidia-container-runtime-hook
Description: NVIDIA Container toolkit Description: NVIDIA Container toolkit

View File

@@ -1,5 +1,3 @@
nvidia-container-runtime /usr/bin nvidia-container-runtime /usr/bin
nvidia-ctk /usr/bin nvidia-ctk /usr/bin
nvidia-cdi-hook /usr/bin nvidia-cdi-hook /usr/bin
nvidia-cdi-refresh.service /etc/systemd/system/
nvidia-cdi-refresh.path /etc/systemd/system/

View File

@@ -5,15 +5,6 @@ set -e
case "$1" in case "$1" in
configure) configure)
/usr/bin/nvidia-ctk --quiet config --config-file=/etc/nvidia-container-runtime/config.toml --in-place /usr/bin/nvidia-ctk --quiet config --config-file=/etc/nvidia-container-runtime/config.toml --in-place
# Enable nvidia-cdi-refresh services on both install and upgrade
if command -v systemctl >/dev/null 2>&1 \
&& systemctl --quiet is-system-running 2>/dev/null; then
systemctl daemon-reload || { echo "Warning: Failed to reload systemd daemon" >&2; true; }
systemctl enable --now nvidia-cdi-refresh.path || { echo "Warning: Failed to enable nvidia-cdi-refresh.path" >&2; true; }
systemctl enable --now nvidia-cdi-refresh.service || { echo "Warning: Failed to enable nvidia-cdi-refresh.service" >&2; true; }
fi
;; ;;
abort-upgrade|abort-remove|abort-deconfigure) abort-upgrade|abort-remove|abort-deconfigure)

View File

@@ -3,6 +3,7 @@
set -e set -e
sed -i "s;@SECTION@;${SECTION:+$SECTION/};g" debian/control sed -i "s;@SECTION@;${SECTION:+$SECTION/};g" debian/control
sed -i "s;@LIBNVIDIA_CONTAINER_TOOLS_VERSION@;${LIBNVIDIA_CONTAINER_TOOLS_VERSION:+$LIBNVIDIA_CONTAINER_TOOLS_VERSION};g" debian/control
sed -i "s;@VERSION@;${VERSION:+$VERSION};g" debian/control sed -i "s;@VERSION@;${VERSION:+$VERSION};g" debian/control
if [ -n "$DISTRIB" ]; then if [ -n "$DISTRIB" ]; then

View File

@@ -5,14 +5,3 @@
%: %:
dh $@ dh $@
override_dh_fixperms:
dh_fixperms
chmod 755 debian/$(shell dh_listpackages)/usr/bin/nvidia-container-runtime-hook || true
chmod 755 debian/$(shell dh_listpackages)/usr/bin/nvidia-container-runtime || true
chmod 755 debian/$(shell dh_listpackages)/usr/bin/nvidia-container-runtime.cdi || true
chmod 755 debian/$(shell dh_listpackages)/usr/bin/nvidia-container-runtime.legacy || true
chmod 755 debian/$(shell dh_listpackages)/usr/bin/nvidia-ctk || true
chmod 755 debian/$(shell dh_listpackages)/usr/bin/nvidia-cdi-hook || true
chmod 644 debian/$(shell dh_listpackages)/etc/systemd/system/nvidia-cdi-refresh.service || true
chmod 644 debian/$(shell dh_listpackages)/etc/systemd/system/nvidia-cdi-refresh.path || true

View File

@@ -17,33 +17,27 @@ Source3: nvidia-container-runtime
Source4: nvidia-container-runtime.cdi Source4: nvidia-container-runtime.cdi
Source5: nvidia-container-runtime.legacy Source5: nvidia-container-runtime.legacy
Source6: nvidia-cdi-hook Source6: nvidia-cdi-hook
Source7: nvidia-cdi-refresh.service
Source8: nvidia-cdi-refresh.path
Obsoletes: nvidia-container-runtime <= 3.5.0-1, nvidia-container-runtime-hook <= 1.4.0-2 Obsoletes: nvidia-container-runtime <= 3.5.0-1, nvidia-container-runtime-hook <= 1.4.0-2
Provides: nvidia-container-runtime Provides: nvidia-container-runtime
Provides: nvidia-container-runtime-hook Provides: nvidia-container-runtime-hook
Requires: libnvidia-container-tools == %{version}-%{release}, libnvidia-container-tools < 2.0.0 Requires: libnvidia-container-tools >= %{libnvidia_container_tools_version}, libnvidia-container-tools < 2.0.0
Requires: nvidia-container-toolkit-base == %{version}-%{release} Requires: nvidia-container-toolkit-base == %{version}-%{release}
%description %description
Provides tools and utilities to enable GPU support in containers. Provides tools and utilities to enable GPU support in containers.
%prep %prep
cp %{SOURCE0} %{SOURCE1} %{SOURCE2} %{SOURCE3} %{SOURCE4} %{SOURCE5} %{SOURCE6} %{SOURCE7} %{SOURCE8} . cp %{SOURCE0} %{SOURCE1} %{SOURCE2} %{SOURCE3} %{SOURCE4} %{SOURCE5} %{SOURCE6} .
%install %install
mkdir -p %{buildroot}%{_bindir} mkdir -p %{buildroot}%{_bindir}
mkdir -p %{buildroot}%{_sysconfdir}/systemd/system/
install -m 755 -t %{buildroot}%{_bindir} nvidia-container-runtime-hook install -m 755 -t %{buildroot}%{_bindir} nvidia-container-runtime-hook
install -m 755 -t %{buildroot}%{_bindir} nvidia-container-runtime install -m 755 -t %{buildroot}%{_bindir} nvidia-container-runtime
install -m 755 -t %{buildroot}%{_bindir} nvidia-container-runtime.cdi install -m 755 -t %{buildroot}%{_bindir} nvidia-container-runtime.cdi
install -m 755 -t %{buildroot}%{_bindir} nvidia-container-runtime.legacy install -m 755 -t %{buildroot}%{_bindir} nvidia-container-runtime.legacy
install -m 755 -t %{buildroot}%{_bindir} nvidia-ctk install -m 755 -t %{buildroot}%{_bindir} nvidia-ctk
install -m 755 -t %{buildroot}%{_bindir} nvidia-cdi-hook install -m 755 -t %{buildroot}%{_bindir} nvidia-cdi-hook
install -m 644 -t %{buildroot}%{_sysconfdir}/systemd/system nvidia-cdi-refresh.service
install -m 644 -t %{buildroot}%{_sysconfdir}/systemd/system nvidia-cdi-refresh.path
%post %post
if [ $1 -gt 1 ]; then # only on package upgrade if [ $1 -gt 1 ]; then # only on package upgrade
@@ -51,14 +45,6 @@ if [ $1 -gt 1 ]; then # only on package upgrade
cp -af %{_bindir}/nvidia-container-runtime-hook %{_localstatedir}/lib/rpm-state/nvidia-container-toolkit cp -af %{_bindir}/nvidia-container-runtime-hook %{_localstatedir}/lib/rpm-state/nvidia-container-toolkit
fi fi
# Reload systemd unit cache and enable nvidia-cdi-refresh services on both install and upgrade
if command -v systemctl >/dev/null 2>&1 \
&& systemctl --quiet is-system-running 2>/dev/null; then
systemctl daemon-reload || { echo "Warning: Failed to reload systemd daemon" >&2; true; }
systemctl enable --now nvidia-cdi-refresh.path || { echo "Warning: Failed to enable nvidia-cdi-refresh.path" >&2; true; }
systemctl enable --now nvidia-cdi-refresh.service || { echo "Warning: Failed to enable nvidia-cdi-refresh.service" >&2; true; }
fi
%posttrans %posttrans
if [ ! -e %{_bindir}/nvidia-container-runtime-hook ]; then if [ ! -e %{_bindir}/nvidia-container-runtime-hook ]; then
# repairing lost file nvidia-container-runtime-hook # repairing lost file nvidia-container-runtime-hook
@@ -83,7 +69,7 @@ fi
# As of 1.10.0-1 we generate the release information automatically # As of 1.10.0-1 we generate the release information automatically
* %{release_date} NVIDIA CORPORATION <cudatools@nvidia.com> %{version}-%{release} * %{release_date} NVIDIA CORPORATION <cudatools@nvidia.com> %{version}-%{release}
- See https://gitlab.com/nvidia/container-toolkit/container-toolkit/-/blob/%{git_commit}/CHANGELOG.md - See https://gitlab.com/nvidia/container-toolkit/container-toolkit/-/blob/%{git_commit}/CHANGELOG.md
- Bump libnvidia-container dependency to libnvidia-container-tools == %{version}-%{release} - Bump libnvidia-container dependency to libnvidia-container-tools >= %{libnvidia_container_tools_version}
# The BASE package consists of the NVIDIA Container Runtime and the NVIDIA Container Toolkit CLI. # The BASE package consists of the NVIDIA Container Runtime and the NVIDIA Container Toolkit CLI.
# This allows the package to be installed on systems where no NVIDIA Container CLI is available. # This allows the package to be installed on systems where no NVIDIA Container CLI is available.
@@ -103,8 +89,6 @@ Provides tools such as the NVIDIA Container Runtime and NVIDIA Container Toolkit
%{_bindir}/nvidia-container-runtime %{_bindir}/nvidia-container-runtime
%{_bindir}/nvidia-ctk %{_bindir}/nvidia-ctk
%{_bindir}/nvidia-cdi-hook %{_bindir}/nvidia-cdi-hook
%{_sysconfdir}/systemd/system/nvidia-cdi-refresh.service
%{_sysconfdir}/systemd/system/nvidia-cdi-refresh.path
# The OPERATOR EXTENSIONS package consists of components that are required to enable GPU support in Kubernetes. # The OPERATOR EXTENSIONS package consists of components that are required to enable GPU support in Kubernetes.
# This package is not distributed as part of the NVIDIA Container Toolkit RPMs. # This package is not distributed as part of the NVIDIA Container Toolkit RPMs.

View File

@@ -21,13 +21,12 @@ import (
"tags.cncf.io/container-device-interface/pkg/cdi" "tags.cncf.io/container-device-interface/pkg/cdi"
"tags.cncf.io/container-device-interface/specs-go" "tags.cncf.io/container-device-interface/specs-go"
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/spec" "github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/spec"
) )
// Interface defines the API for the nvcdi package // Interface defines the API for the nvcdi package
type Interface interface { type Interface interface {
GetSpec(...string) (spec.Interface, error) GetSpec() (spec.Interface, error)
GetCommonEdits() (*cdi.ContainerEdits, error) GetCommonEdits() (*cdi.ContainerEdits, error)
GetAllDeviceSpecs() ([]specs.Device, error) GetAllDeviceSpecs() ([]specs.Device, error)
GetGPUDeviceEdits(device.Device) (*cdi.ContainerEdits, error) GetGPUDeviceEdits(device.Device) (*cdi.ContainerEdits, error)
@@ -37,35 +36,14 @@ type Interface interface {
GetDeviceSpecsByID(...string) ([]specs.Device, error) GetDeviceSpecsByID(...string) ([]specs.Device, error)
} }
// A HookName represents one of the predefined NVIDIA CDI hooks. // A HookName refers to one of the predefined set of CDI hooks that may be
type HookName = discover.HookName // included in the generated CDI specification.
type HookName string
const ( const (
// AllHooks is a special hook name that allows all hooks to be matched. // HookEnableCudaCompat refers to the hook used to enable CUDA Forward Compatibility.
AllHooks = discover.AllHooks // This was added with v1.17.5 of the NVIDIA Container Toolkit.
HookEnableCudaCompat = HookName("enable-cuda-compat")
// A CreateSymlinksHook is used to create symlinks in the container.
CreateSymlinksHook = discover.CreateSymlinksHook
// DisableDeviceNodeModificationHook refers to the hook used to ensure that
// device nodes are not created by libnvidia-ml.so or nvidia-smi in a
// container.
// Added in v1.17.8
DisableDeviceNodeModificationHook = discover.DisableDeviceNodeModificationHook
// An EnableCudaCompatHook is used to enabled CUDA Forward Compatibility.
// Added in v1.17.5
EnableCudaCompatHook = discover.EnableCudaCompatHook
// An UpdateLDCacheHook is used to update the ldcache in the container.
UpdateLDCacheHook = discover.UpdateLDCacheHook
// A CreateSonameSymlinksHook is the hook used to ensure that soname symlinks
// for injected libraries exist in the container.
CreateSonameSymlinksHook = discover.CreateSonameSymlinksHook
// Deprecated: Use CreateSymlinksHook instead.
HookCreateSymlinks = CreateSymlinksHook
// Deprecated: Use EnableCudaCompatHook instead.
HookEnableCudaCompat = EnableCudaCompatHook
// Deprecated: Use UpdateLDCacheHook instead.
HookUpdateLDCache = UpdateLDCacheHook
) )
// A FeatureFlag refers to a specific feature that can be toggled in the CDI api. // A FeatureFlag refers to a specific feature that can be toggled in the CDI api.

View File

@@ -82,7 +82,7 @@ func (l *nvcdilib) newDriverVersionDiscoverer(version string) (discover.Discover
// NewDriverLibraryDiscoverer creates a discoverer for the libraries associated with the specified driver version. // NewDriverLibraryDiscoverer creates a discoverer for the libraries associated with the specified driver version.
func (l *nvcdilib) NewDriverLibraryDiscoverer(version string) (discover.Discover, error) { func (l *nvcdilib) NewDriverLibraryDiscoverer(version string) (discover.Discover, error) {
libraryPaths, libCudaDirectoryPath, err := getVersionLibs(l.logger, l.driver, version) libraryPaths, err := getVersionLibs(l.logger, l.driver, version)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to get libraries for driver version: %v", err) return nil, fmt.Errorf("failed to get libraries for driver version: %v", err)
} }
@@ -106,22 +106,15 @@ func (l *nvcdilib) NewDriverLibraryDiscoverer(version string) (discover.Discover
) )
discoverers = append(discoverers, driverDotSoSymlinksDiscoverer) discoverers = append(discoverers, driverDotSoSymlinksDiscoverer)
// TODO: The following should use the version directly. if l.HookIsSupported(HookEnableCudaCompat) {
cudaCompatLibHookDiscoverer := discover.NewCUDACompatHookDiscoverer(l.logger, l.hookCreator, l.driver) // TODO: The following should use the version directly.
discoverers = append(discoverers, cudaCompatLibHookDiscoverer) cudaCompatLibHookDiscoverer := discover.NewCUDACompatHookDiscoverer(l.logger, l.hookCreator, l.driver)
discoverers = append(discoverers, cudaCompatLibHookDiscoverer)
}
updateLDCache, _ := discover.NewLDCacheUpdateHook(l.logger, libraries, l.hookCreator, l.ldconfigPath) updateLDCache, _ := discover.NewLDCacheUpdateHook(l.logger, libraries, l.hookCreator, l.ldconfigPath)
discoverers = append(discoverers, updateLDCache) discoverers = append(discoverers, updateLDCache)
disableDeviceNodeModification := l.hookCreator.Create(DisableDeviceNodeModificationHook)
discoverers = append(discoverers, disableDeviceNodeModification)
environmentVariable := &discover.EnvVar{
Name: "NVIDIA_CTK_LIBCUDA_DIR",
Value: libCudaDirectoryPath,
}
discoverers = append(discoverers, environmentVariable)
d := discover.Merge(discoverers...) d := discover.Merge(discoverers...)
return d, nil return d, nil
@@ -209,41 +202,39 @@ func NewDriverBinariesDiscoverer(logger logger.Interface, driverRoot string) dis
// getVersionLibs checks the LDCache for libraries ending in the specified driver version. // getVersionLibs checks the LDCache for libraries ending in the specified driver version.
// Although the ldcache at the specified driverRoot is queried, the paths are returned relative to this driverRoot. // Although the ldcache at the specified driverRoot is queried, the paths are returned relative to this driverRoot.
// This allows the standard mount location logic to be used for resolving the mounts. // This allows the standard mount location logic to be used for resolving the mounts.
func getVersionLibs(logger logger.Interface, driver *root.Driver, version string) ([]string, string, error) { func getVersionLibs(logger logger.Interface, driver *root.Driver, version string) ([]string, error) {
logger.Infof("Using driver version %v", version) logger.Infof("Using driver version %v", version)
libCudaPaths, err := cuda.New( libCudaPaths, err := cuda.New(
driver.Libraries(), driver.Libraries(),
).Locate("." + version) ).Locate("." + version)
if err != nil { if err != nil {
return nil, "", fmt.Errorf("failed to locate libcuda.so.%v: %v", version, err) return nil, fmt.Errorf("failed to locate libcuda.so.%v: %v", version, err)
} }
libCudaDirectoryPath := filepath.Dir(libCudaPaths[0]) libRoot := filepath.Dir(libCudaPaths[0])
libraries := lookup.NewFileLocator( libraries := lookup.NewFileLocator(
lookup.WithLogger(logger), lookup.WithLogger(logger),
lookup.WithSearchPaths( lookup.WithSearchPaths(
libCudaDirectoryPath, libRoot,
filepath.Join(libCudaDirectoryPath, "vdpau"), filepath.Join(libRoot, "vdpau"),
), ),
lookup.WithOptional(true), lookup.WithOptional(true),
) )
libs, err := libraries.Locate("*.so." + version) libs, err := libraries.Locate("*.so." + version)
if err != nil { if err != nil {
return nil, "", fmt.Errorf("failed to locate libraries for driver version %v: %v", version, err) return nil, fmt.Errorf("failed to locate libraries for driver version %v: %v", version, err)
} }
if driver.Root == "/" || driver.Root == "" { if driver.Root == "/" || driver.Root == "" {
return libs, libCudaDirectoryPath, nil return libs, nil
} }
libCudaDirectoryPath = driver.RelativeToRoot(libCudaDirectoryPath)
var relative []string var relative []string
for _, l := range libs { for _, l := range libs {
relative = append(relative, strings.TrimPrefix(l, driver.Root)) relative = append(relative, strings.TrimPrefix(l, driver.Root))
} }
return relative, libCudaDirectoryPath, nil return relative, nil
} }

View File

@@ -40,12 +40,13 @@ var requiredDriverStoreFiles = []string{
// newWSLDriverDiscoverer returns a Discoverer for WSL2 drivers. // newWSLDriverDiscoverer returns a Discoverer for WSL2 drivers.
func newWSLDriverDiscoverer(logger logger.Interface, driverRoot string, hookCreator discover.HookCreator, ldconfigPath string) (discover.Discover, error) { func newWSLDriverDiscoverer(logger logger.Interface, driverRoot string, hookCreator discover.HookCreator, ldconfigPath string) (discover.Discover, error) {
if err := dxcore.Init(); err != nil { err := dxcore.Init()
return nil, fmt.Errorf("failed to initialize dxcore: %w", err) if err != nil {
return nil, fmt.Errorf("failed to initialize dxcore: %v", err)
} }
defer func() { defer func() {
if err := dxcore.Shutdown(); err != nil { if err := dxcore.Shutdown(); err != nil {
logger.Warningf("failed to shutdown dxcore: %w", err) logger.Warningf("failed to shutdown dxcore: %v", err)
} }
}() }()
@@ -53,19 +54,32 @@ func newWSLDriverDiscoverer(logger logger.Interface, driverRoot string, hookCrea
if len(driverStorePaths) == 0 { if len(driverStorePaths) == 0 {
return nil, fmt.Errorf("no driver store paths found") return nil, fmt.Errorf("no driver store paths found")
} }
if len(driverStorePaths) > 1 {
logger.Warningf("Found multiple driver store paths: %v", driverStorePaths)
}
logger.Infof("Using WSL driver store paths: %v", driverStorePaths) logger.Infof("Using WSL driver store paths: %v", driverStorePaths)
driverStorePaths = append(driverStorePaths, "/usr/lib/wsl/lib") return newWSLDriverStoreDiscoverer(logger, driverRoot, hookCreator, ldconfigPath, driverStorePaths)
}
driverStoreMounts := discover.NewMounts( // newWSLDriverStoreDiscoverer returns a Discoverer for WSL2 drivers in the driver store associated with a dxcore adapter.
func newWSLDriverStoreDiscoverer(logger logger.Interface, driverRoot string, hookCreator discover.HookCreator, ldconfigPath string, driverStorePaths []string) (discover.Discover, error) {
var searchPaths []string
seen := make(map[string]bool)
for _, path := range driverStorePaths {
if seen[path] {
continue
}
searchPaths = append(searchPaths, path)
}
if len(searchPaths) > 1 {
logger.Warningf("Found multiple driver store paths: %v", searchPaths)
}
searchPaths = append(searchPaths, "/usr/lib/wsl/lib")
libraries := discover.NewMounts(
logger, logger,
lookup.NewFileLocator( lookup.NewFileLocator(
lookup.WithLogger(logger), lookup.WithLogger(logger),
lookup.WithSearchPaths( lookup.WithSearchPaths(
driverStorePaths..., searchPaths...,
), ),
lookup.WithCount(1), lookup.WithCount(1),
), ),
@@ -75,14 +89,14 @@ func newWSLDriverDiscoverer(logger logger.Interface, driverRoot string, hookCrea
symlinkHook := nvidiaSMISimlinkHook{ symlinkHook := nvidiaSMISimlinkHook{
logger: logger, logger: logger,
mountsFrom: driverStoreMounts, mountsFrom: libraries,
hookCreator: hookCreator, hookCreator: hookCreator,
} }
ldcacheHook, _ := discover.NewLDCacheUpdateHook(logger, driverStoreMounts, hookCreator, ldconfigPath) ldcacheHook, _ := discover.NewLDCacheUpdateHook(logger, libraries, hookCreator, ldconfigPath)
d := discover.Merge( d := discover.Merge(
driverStoreMounts, libraries,
symlinkHook, symlinkHook,
ldcacheHook, ldcacheHook,
) )
@@ -121,7 +135,7 @@ func (m nvidiaSMISimlinkHook) Hooks() ([]discover.Hook, error) {
} }
link := "/usr/bin/nvidia-smi" link := "/usr/bin/nvidia-smi"
links := []string{fmt.Sprintf("%s::%s", target, link)} links := []string{fmt.Sprintf("%s::%s", target, link)}
symlinkHook := m.hookCreator.Create(CreateSymlinksHook, links...) symlinkHook := m.hookCreator.Create("create-symlinks", links...)
return symlinkHook.Hooks() return symlinkHook.Hooks()
} }

Some files were not shown because too many files have changed in this diff Show More