mirror of
https://github.com/NVIDIA/nvidia-container-toolkit
synced 2025-06-26 18:18:24 +00:00
Compare commits
1 Commits
pull-reque
...
pull-reque
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9674787e7e |
193
.common-ci.yml
193
.common-ci.yml
@@ -22,7 +22,15 @@ variables:
|
|||||||
BUILD_MULTI_ARCH_IMAGES: "true"
|
BUILD_MULTI_ARCH_IMAGES: "true"
|
||||||
|
|
||||||
stages:
|
stages:
|
||||||
- pull
|
- trigger
|
||||||
|
- image
|
||||||
|
- lint
|
||||||
|
- go-checks
|
||||||
|
- go-build
|
||||||
|
- unit-tests
|
||||||
|
- package-build
|
||||||
|
- image-build
|
||||||
|
- test
|
||||||
- scan
|
- scan
|
||||||
- release
|
- release
|
||||||
- sign
|
- sign
|
||||||
@@ -45,6 +53,108 @@ workflow:
|
|||||||
# We then add all the regular triggers
|
# We then add all the regular triggers
|
||||||
- !reference [.pipeline-trigger-rules, rules]
|
- !reference [.pipeline-trigger-rules, rules]
|
||||||
|
|
||||||
|
# The main or manual job is used to filter out distributions or architectures that are not required on
|
||||||
|
# every build.
|
||||||
|
.main-or-manual:
|
||||||
|
rules:
|
||||||
|
- !reference [.pipeline-trigger-rules, rules]
|
||||||
|
- if: $CI_PIPELINE_SOURCE == "schedule"
|
||||||
|
when: manual
|
||||||
|
|
||||||
|
# The trigger-pipeline job adds a manualy triggered job to the pipeline on merge requests.
|
||||||
|
trigger-pipeline:
|
||||||
|
stage: trigger
|
||||||
|
script:
|
||||||
|
- echo "starting pipeline"
|
||||||
|
rules:
|
||||||
|
- !reference [.main-or-manual, rules]
|
||||||
|
- if: $CI_PIPELINE_SOURCE == "merge_request_event"
|
||||||
|
when: manual
|
||||||
|
allow_failure: false
|
||||||
|
- when: always
|
||||||
|
|
||||||
|
# Define the distribution targets
|
||||||
|
.dist-centos7:
|
||||||
|
rules:
|
||||||
|
- !reference [.main-or-manual, rules]
|
||||||
|
variables:
|
||||||
|
DIST: centos7
|
||||||
|
|
||||||
|
.dist-centos8:
|
||||||
|
variables:
|
||||||
|
DIST: centos8
|
||||||
|
|
||||||
|
.dist-ubi8:
|
||||||
|
rules:
|
||||||
|
- !reference [.main-or-manual, rules]
|
||||||
|
variables:
|
||||||
|
DIST: ubi8
|
||||||
|
|
||||||
|
.dist-ubuntu18.04:
|
||||||
|
variables:
|
||||||
|
DIST: ubuntu18.04
|
||||||
|
|
||||||
|
.dist-ubuntu20.04:
|
||||||
|
variables:
|
||||||
|
DIST: ubuntu20.04
|
||||||
|
|
||||||
|
.dist-packaging:
|
||||||
|
variables:
|
||||||
|
DIST: packaging
|
||||||
|
|
||||||
|
# Define architecture targets
|
||||||
|
.arch-aarch64:
|
||||||
|
variables:
|
||||||
|
ARCH: aarch64
|
||||||
|
|
||||||
|
.arch-amd64:
|
||||||
|
variables:
|
||||||
|
ARCH: amd64
|
||||||
|
|
||||||
|
.arch-arm64:
|
||||||
|
variables:
|
||||||
|
ARCH: arm64
|
||||||
|
|
||||||
|
.arch-ppc64le:
|
||||||
|
rules:
|
||||||
|
- !reference [.main-or-manual, rules]
|
||||||
|
variables:
|
||||||
|
ARCH: ppc64le
|
||||||
|
|
||||||
|
.arch-x86_64:
|
||||||
|
variables:
|
||||||
|
ARCH: x86_64
|
||||||
|
|
||||||
|
# Define the platform targets
|
||||||
|
.platform-amd64:
|
||||||
|
variables:
|
||||||
|
PLATFORM: linux/amd64
|
||||||
|
|
||||||
|
.platform-arm64:
|
||||||
|
variables:
|
||||||
|
PLATFORM: linux/arm64
|
||||||
|
|
||||||
|
# Define test helpers
|
||||||
|
.integration:
|
||||||
|
stage: test
|
||||||
|
variables:
|
||||||
|
IMAGE_NAME: "${CI_REGISTRY_IMAGE}/container-toolkit"
|
||||||
|
VERSION: "${CI_COMMIT_SHORT_SHA}"
|
||||||
|
before_script:
|
||||||
|
- apk add --no-cache make bash jq
|
||||||
|
- docker login -u "${CI_REGISTRY_USER}" -p "${CI_REGISTRY_PASSWORD}" "${CI_REGISTRY}"
|
||||||
|
- docker pull "${IMAGE_NAME}:${VERSION}-${DIST}"
|
||||||
|
script:
|
||||||
|
- make -f deployments/container/Makefile test-${DIST}
|
||||||
|
|
||||||
|
# Define the test targets
|
||||||
|
test-packaging:
|
||||||
|
extends:
|
||||||
|
- .integration
|
||||||
|
- .dist-packaging
|
||||||
|
needs:
|
||||||
|
- image-packaging
|
||||||
|
|
||||||
# Download the regctl binary for use in the release steps
|
# Download the regctl binary for use in the release steps
|
||||||
.regctl-setup:
|
.regctl-setup:
|
||||||
before_script:
|
before_script:
|
||||||
@@ -54,3 +164,84 @@ workflow:
|
|||||||
- curl -sSLo bin/regctl https://github.com/regclient/regclient/releases/download/${REGCTL_VERSION}/regctl-linux-amd64
|
- curl -sSLo bin/regctl https://github.com/regclient/regclient/releases/download/${REGCTL_VERSION}/regctl-linux-amd64
|
||||||
- chmod a+x bin/regctl
|
- chmod a+x bin/regctl
|
||||||
- export PATH=$(pwd)/bin:${PATH}
|
- export PATH=$(pwd)/bin:${PATH}
|
||||||
|
|
||||||
|
# .release forms the base of the deployment jobs which push images to the CI registry.
|
||||||
|
# This is extended with the version to be deployed (e.g. the SHA or TAG) and the
|
||||||
|
# target os.
|
||||||
|
.release:
|
||||||
|
stage: release
|
||||||
|
variables:
|
||||||
|
# Define the source image for the release
|
||||||
|
IMAGE_NAME: "${CI_REGISTRY_IMAGE}/container-toolkit"
|
||||||
|
VERSION: "${CI_COMMIT_SHORT_SHA}"
|
||||||
|
# OUT_IMAGE_VERSION is overridden for external releases
|
||||||
|
OUT_IMAGE_VERSION: "${CI_COMMIT_SHORT_SHA}"
|
||||||
|
before_script:
|
||||||
|
- !reference [.regctl-setup, before_script]
|
||||||
|
# We ensure that the components of the output image are set:
|
||||||
|
- 'echo Image Name: ${OUT_IMAGE_NAME} ; [[ -n "${OUT_IMAGE_NAME}" ]] || exit 1'
|
||||||
|
- 'echo Version: ${OUT_IMAGE_VERSION} ; [[ -n "${OUT_IMAGE_VERSION}" ]] || exit 1'
|
||||||
|
|
||||||
|
- apk add --no-cache make bash
|
||||||
|
script:
|
||||||
|
# Log in to the "output" registry, tag the image and push the image
|
||||||
|
- 'echo "Logging in to CI registry ${CI_REGISTRY}"'
|
||||||
|
- regctl registry login "${CI_REGISTRY}" -u "${CI_REGISTRY_USER}" -p "${CI_REGISTRY_PASSWORD}"
|
||||||
|
- '[ ${CI_REGISTRY} = ${OUT_REGISTRY} ] || echo "Logging in to output registry ${OUT_REGISTRY}"'
|
||||||
|
- '[ ${CI_REGISTRY} = ${OUT_REGISTRY} ] || regctl registry login "${OUT_REGISTRY}" -u "${OUT_REGISTRY_USER}" -p "${OUT_REGISTRY_TOKEN}"'
|
||||||
|
|
||||||
|
# Since OUT_IMAGE_NAME and OUT_IMAGE_VERSION are set, this will push the CI image to the
|
||||||
|
# Target
|
||||||
|
- make -f deployments/container/Makefile push-${DIST}
|
||||||
|
|
||||||
|
# Define a staging release step that pushes an image to an internal "staging" repository
|
||||||
|
# This is triggered for all pipelines (i.e. not only tags) to test the pipeline steps
|
||||||
|
# outside of the release process.
|
||||||
|
.release:staging:
|
||||||
|
extends:
|
||||||
|
- .release
|
||||||
|
variables:
|
||||||
|
OUT_REGISTRY_USER: "${NGC_REGISTRY_USER}"
|
||||||
|
OUT_REGISTRY_TOKEN: "${NGC_REGISTRY_TOKEN}"
|
||||||
|
OUT_REGISTRY: "${NGC_REGISTRY}"
|
||||||
|
OUT_IMAGE_NAME: "${NGC_REGISTRY_STAGING_IMAGE_NAME}"
|
||||||
|
|
||||||
|
# Define an external release step that pushes an image to an external repository.
|
||||||
|
# This includes a devlopment image off main.
|
||||||
|
.release:external:
|
||||||
|
extends:
|
||||||
|
- .release
|
||||||
|
variables:
|
||||||
|
FORCE_PUBLISH_IMAGES: "yes"
|
||||||
|
rules:
|
||||||
|
- if: $CI_COMMIT_TAG
|
||||||
|
variables:
|
||||||
|
OUT_IMAGE_VERSION: "${CI_COMMIT_TAG}"
|
||||||
|
- if: $CI_COMMIT_BRANCH == $RELEASE_DEVEL_BRANCH
|
||||||
|
variables:
|
||||||
|
OUT_IMAGE_VERSION: "${DEVEL_RELEASE_IMAGE_VERSION}"
|
||||||
|
|
||||||
|
# Define the release jobs
|
||||||
|
release:staging-ubi8:
|
||||||
|
extends:
|
||||||
|
- .release:staging
|
||||||
|
- .dist-ubi8
|
||||||
|
needs:
|
||||||
|
- image-ubi8
|
||||||
|
|
||||||
|
release:staging-ubuntu20.04:
|
||||||
|
extends:
|
||||||
|
- .release:staging
|
||||||
|
- .dist-ubuntu20.04
|
||||||
|
needs:
|
||||||
|
- test-toolkit-ubuntu20.04
|
||||||
|
- test-containerd-ubuntu20.04
|
||||||
|
- test-crio-ubuntu20.04
|
||||||
|
- test-docker-ubuntu20.04
|
||||||
|
|
||||||
|
release:staging-packaging:
|
||||||
|
extends:
|
||||||
|
- .release:staging
|
||||||
|
- .dist-packaging
|
||||||
|
needs:
|
||||||
|
- test-packaging
|
||||||
|
|||||||
2
.github/workflows/e2e.yaml
vendored
2
.github/workflows/e2e.yaml
vendored
@@ -72,7 +72,7 @@ jobs:
|
|||||||
env:
|
env:
|
||||||
E2E_INSTALL_CTK: "true"
|
E2E_INSTALL_CTK: "true"
|
||||||
E2E_IMAGE_NAME: ghcr.io/nvidia/container-toolkit
|
E2E_IMAGE_NAME: ghcr.io/nvidia/container-toolkit
|
||||||
E2E_IMAGE_TAG: ${{ inputs.version }}
|
E2E_IMAGE_TAG: ${{ inputs.version }}-ubuntu20.04
|
||||||
E2E_SSH_USER: ${{ secrets.E2E_SSH_USER }}
|
E2E_SSH_USER: ${{ secrets.E2E_SSH_USER }}
|
||||||
E2E_SSH_HOST: ${{ steps.holodeck_public_dns_name.outputs.result }}
|
E2E_SSH_HOST: ${{ steps.holodeck_public_dns_name.outputs.result }}
|
||||||
run: |
|
run: |
|
||||||
|
|||||||
12
.github/workflows/image.yaml
vendored
12
.github/workflows/image.yaml
vendored
@@ -79,9 +79,15 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
strategy:
|
strategy:
|
||||||
matrix:
|
matrix:
|
||||||
target:
|
dist:
|
||||||
- application
|
- ubuntu20.04
|
||||||
|
- ubi8
|
||||||
- packaging
|
- packaging
|
||||||
|
ispr:
|
||||||
|
- ${{ github.ref_name != 'main' && !startsWith( github.ref_name, 'release-' ) }}
|
||||||
|
exclude:
|
||||||
|
- ispr: true
|
||||||
|
dist: ubi8
|
||||||
needs: packages
|
needs: packages
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
@@ -117,4 +123,4 @@ jobs:
|
|||||||
BUILD_MULTI_ARCH_IMAGES: ${{ inputs.build_multi_arch_images }}
|
BUILD_MULTI_ARCH_IMAGES: ${{ inputs.build_multi_arch_images }}
|
||||||
run: |
|
run: |
|
||||||
echo "${VERSION}"
|
echo "${VERSION}"
|
||||||
make -f deployments/container/Makefile build-${{ matrix.target }}
|
make -f deployments/container/Makefile build-${{ matrix.dist }}
|
||||||
|
|||||||
228
.gitlab-ci.yml
Normal file
228
.gitlab-ci.yml
Normal file
@@ -0,0 +1,228 @@
|
|||||||
|
# Copyright (c) 2019-2022, NVIDIA CORPORATION. All rights reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
include:
|
||||||
|
- .common-ci.yml
|
||||||
|
|
||||||
|
# Define the package build helpers
|
||||||
|
.multi-arch-build:
|
||||||
|
before_script:
|
||||||
|
- apk add --no-cache coreutils build-base sed git bash make
|
||||||
|
- '[[ -n "${SKIP_QEMU_SETUP}" ]] || docker run --rm --privileged multiarch/qemu-user-static --reset -p yes -c yes'
|
||||||
|
|
||||||
|
.package-artifacts:
|
||||||
|
variables:
|
||||||
|
ARTIFACTS_NAME: "toolkit-container-${CI_PIPELINE_ID}"
|
||||||
|
ARTIFACTS_ROOT: "toolkit-container-${CI_PIPELINE_ID}"
|
||||||
|
DIST_DIR: ${CI_PROJECT_DIR}/${ARTIFACTS_ROOT}
|
||||||
|
|
||||||
|
.package-build:
|
||||||
|
extends:
|
||||||
|
- .multi-arch-build
|
||||||
|
- .package-artifacts
|
||||||
|
stage: package-build
|
||||||
|
timeout: 3h
|
||||||
|
script:
|
||||||
|
- ./scripts/build-packages.sh ${DIST}-${ARCH}
|
||||||
|
|
||||||
|
artifacts:
|
||||||
|
name: ${ARTIFACTS_NAME}
|
||||||
|
paths:
|
||||||
|
- ${ARTIFACTS_ROOT}
|
||||||
|
needs:
|
||||||
|
- job: package-meta-packages
|
||||||
|
artifacts: true
|
||||||
|
|
||||||
|
# Define the package build targets
|
||||||
|
package-meta-packages:
|
||||||
|
extends:
|
||||||
|
- .package-artifacts
|
||||||
|
stage: package-build
|
||||||
|
variables:
|
||||||
|
SKIP_LIBNVIDIA_CONTAINER: "yes"
|
||||||
|
SKIP_NVIDIA_CONTAINER_TOOLKIT: "yes"
|
||||||
|
parallel:
|
||||||
|
matrix:
|
||||||
|
- PACKAGING: [deb, rpm]
|
||||||
|
before_script:
|
||||||
|
- apk add --no-cache coreutils build-base sed git bash make
|
||||||
|
script:
|
||||||
|
- ./scripts/build-packages.sh ${PACKAGING}
|
||||||
|
artifacts:
|
||||||
|
name: ${ARTIFACTS_NAME}
|
||||||
|
paths:
|
||||||
|
- ${ARTIFACTS_ROOT}
|
||||||
|
|
||||||
|
package-centos7-aarch64:
|
||||||
|
extends:
|
||||||
|
- .package-build
|
||||||
|
- .dist-centos7
|
||||||
|
- .arch-aarch64
|
||||||
|
|
||||||
|
package-centos7-x86_64:
|
||||||
|
extends:
|
||||||
|
- .package-build
|
||||||
|
- .dist-centos7
|
||||||
|
- .arch-x86_64
|
||||||
|
|
||||||
|
package-centos8-ppc64le:
|
||||||
|
extends:
|
||||||
|
- .package-build
|
||||||
|
- .dist-centos8
|
||||||
|
- .arch-ppc64le
|
||||||
|
|
||||||
|
package-ubuntu18.04-amd64:
|
||||||
|
extends:
|
||||||
|
- .package-build
|
||||||
|
- .dist-ubuntu18.04
|
||||||
|
- .arch-amd64
|
||||||
|
|
||||||
|
package-ubuntu18.04-arm64:
|
||||||
|
extends:
|
||||||
|
- .package-build
|
||||||
|
- .dist-ubuntu18.04
|
||||||
|
- .arch-arm64
|
||||||
|
|
||||||
|
package-ubuntu18.04-ppc64le:
|
||||||
|
extends:
|
||||||
|
- .package-build
|
||||||
|
- .dist-ubuntu18.04
|
||||||
|
- .arch-ppc64le
|
||||||
|
|
||||||
|
.buildx-setup:
|
||||||
|
before_script:
|
||||||
|
- export BUILDX_VERSION=v0.6.3
|
||||||
|
- apk add --no-cache curl
|
||||||
|
- mkdir -p ~/.docker/cli-plugins
|
||||||
|
- curl -sSLo ~/.docker/cli-plugins/docker-buildx "https://github.com/docker/buildx/releases/download/${BUILDX_VERSION}/buildx-${BUILDX_VERSION}.linux-amd64"
|
||||||
|
- chmod a+x ~/.docker/cli-plugins/docker-buildx
|
||||||
|
|
||||||
|
- docker buildx create --use --platform=linux/amd64,linux/arm64
|
||||||
|
|
||||||
|
- '[[ -n "${SKIP_QEMU_SETUP}" ]] || docker run --rm --privileged multiarch/qemu-user-static --reset -p yes'
|
||||||
|
|
||||||
|
# Define the image build targets
|
||||||
|
.image-build:
|
||||||
|
stage: image-build
|
||||||
|
variables:
|
||||||
|
IMAGE_NAME: "${CI_REGISTRY_IMAGE}/container-toolkit"
|
||||||
|
VERSION: "${CI_COMMIT_SHORT_SHA}"
|
||||||
|
PUSH_ON_BUILD: "true"
|
||||||
|
before_script:
|
||||||
|
- !reference [.buildx-setup, before_script]
|
||||||
|
|
||||||
|
- apk add --no-cache bash make git
|
||||||
|
- 'echo "Logging in to CI registry ${CI_REGISTRY}"'
|
||||||
|
- docker login -u "${CI_REGISTRY_USER}" -p "${CI_REGISTRY_PASSWORD}" "${CI_REGISTRY}"
|
||||||
|
script:
|
||||||
|
- make -f deployments/container/Makefile build-${DIST}
|
||||||
|
|
||||||
|
image-ubi8:
|
||||||
|
extends:
|
||||||
|
- .image-build
|
||||||
|
- .package-artifacts
|
||||||
|
- .dist-ubi8
|
||||||
|
needs:
|
||||||
|
# Note: The ubi8 image uses the centos7 packages
|
||||||
|
- package-centos7-aarch64
|
||||||
|
- package-centos7-x86_64
|
||||||
|
|
||||||
|
image-ubuntu20.04:
|
||||||
|
extends:
|
||||||
|
- .image-build
|
||||||
|
- .package-artifacts
|
||||||
|
- .dist-ubuntu20.04
|
||||||
|
needs:
|
||||||
|
- package-ubuntu18.04-amd64
|
||||||
|
- package-ubuntu18.04-arm64
|
||||||
|
- job: package-ubuntu18.04-ppc64le
|
||||||
|
optional: true
|
||||||
|
|
||||||
|
# The DIST=packaging target creates an image containing all built packages
|
||||||
|
image-packaging:
|
||||||
|
extends:
|
||||||
|
- .image-build
|
||||||
|
- .package-artifacts
|
||||||
|
- .dist-packaging
|
||||||
|
needs:
|
||||||
|
- job: package-ubuntu18.04-amd64
|
||||||
|
- job: package-ubuntu18.04-arm64
|
||||||
|
- job: package-amazonlinux2-aarch64
|
||||||
|
optional: true
|
||||||
|
- job: package-amazonlinux2-x86_64
|
||||||
|
optional: true
|
||||||
|
- job: package-centos7-aarch64
|
||||||
|
optional: true
|
||||||
|
- job: package-centos7-x86_64
|
||||||
|
optional: true
|
||||||
|
- job: package-centos8-ppc64le
|
||||||
|
optional: true
|
||||||
|
- job: package-debian10-amd64
|
||||||
|
optional: true
|
||||||
|
- job: package-opensuse-leap15.1-x86_64
|
||||||
|
optional: true
|
||||||
|
- job: package-ubuntu18.04-ppc64le
|
||||||
|
optional: true
|
||||||
|
|
||||||
|
# Define publish test helpers
|
||||||
|
.test:docker:
|
||||||
|
extends:
|
||||||
|
- .integration
|
||||||
|
variables:
|
||||||
|
TEST_CASES: "docker"
|
||||||
|
|
||||||
|
.test:containerd:
|
||||||
|
# TODO: The containerd tests fail due to issues with SIGHUP.
|
||||||
|
# Until this is resolved with retry up to twice and allow failure here.
|
||||||
|
retry: 2
|
||||||
|
allow_failure: true
|
||||||
|
extends:
|
||||||
|
- .integration
|
||||||
|
variables:
|
||||||
|
TEST_CASES: "containerd"
|
||||||
|
|
||||||
|
.test:crio:
|
||||||
|
extends:
|
||||||
|
- .integration
|
||||||
|
variables:
|
||||||
|
TEST_CASES: "crio"
|
||||||
|
|
||||||
|
# Define the test targets
|
||||||
|
test-toolkit-ubuntu20.04:
|
||||||
|
extends:
|
||||||
|
- .test:toolkit
|
||||||
|
- .dist-ubuntu20.04
|
||||||
|
needs:
|
||||||
|
- image-ubuntu20.04
|
||||||
|
|
||||||
|
test-containerd-ubuntu20.04:
|
||||||
|
extends:
|
||||||
|
- .test:containerd
|
||||||
|
- .dist-ubuntu20.04
|
||||||
|
needs:
|
||||||
|
- image-ubuntu20.04
|
||||||
|
|
||||||
|
test-crio-ubuntu20.04:
|
||||||
|
extends:
|
||||||
|
- .test:crio
|
||||||
|
- .dist-ubuntu20.04
|
||||||
|
needs:
|
||||||
|
- image-ubuntu20.04
|
||||||
|
|
||||||
|
test-docker-ubuntu20.04:
|
||||||
|
extends:
|
||||||
|
- .test:docker
|
||||||
|
- .dist-ubuntu20.04
|
||||||
|
needs:
|
||||||
|
- image-ubuntu20.04
|
||||||
309
.nvidia-ci.yml
309
.nvidia-ci.yml
@@ -39,62 +39,19 @@ variables:
|
|||||||
KITMAKER_RELEASE_FOLDER: "kitmaker"
|
KITMAKER_RELEASE_FOLDER: "kitmaker"
|
||||||
PACKAGE_ARCHIVE_RELEASE_FOLDER: "releases"
|
PACKAGE_ARCHIVE_RELEASE_FOLDER: "releases"
|
||||||
|
|
||||||
# .copy-images copies the required application and packaging images from the
|
.image-pull:
|
||||||
# IN_IMAGE="${IN_IMAGE_NAME}:${IN_IMAGE_TAG}${TAG_SUFFIX}"
|
stage: image-build
|
||||||
# to
|
|
||||||
# OUT_IMAGE="${OUT_IMAGE_NAME}:${OUT_IMAGE_TAG}${TAG_SUFFIX}"
|
|
||||||
# The script also logs into IN_REGISTRY and OUT_REGISTRY using the supplied
|
|
||||||
# username and tokens.
|
|
||||||
.copy-images:
|
|
||||||
parallel:
|
|
||||||
matrix:
|
|
||||||
- TAG_SUFFIX: ["", "-packaging"]
|
|
||||||
before_script:
|
|
||||||
- !reference [.regctl-setup, before_script]
|
|
||||||
- apk add --no-cache make bash
|
|
||||||
variables:
|
|
||||||
REGCTL: regctl
|
|
||||||
script:
|
|
||||||
- |
|
|
||||||
if [ -n ${IN_REGISTRY} ] && [ -n ${IN_REGISTRY_USER} ]; then
|
|
||||||
echo "Logging in to ${IN_REGISTRY}"
|
|
||||||
${REGCTL} registry login "${IN_REGISTRY}" -u "${IN_REGISTRY_USER}" -p "${IN_REGISTRY_TOKEN}" || exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
if [ -n ${OUT_REGISTRY} ] && [ -n ${OUT_REGISTRY_USER} ] && [ "${IN_REGISTRY}" != "${OUT_REGISTRY}" ]; then
|
|
||||||
echo "Logging in to ${OUT_REGISTRY}"
|
|
||||||
${REGCTL} registry login "${OUT_REGISTRY}" -u "${OUT_REGISTRY_USER}" -p "${OUT_REGISTRY_TOKEN}" || exit 1
|
|
||||||
fi
|
|
||||||
|
|
||||||
export IN_IMAGE="${IN_IMAGE_NAME}:${IN_IMAGE_TAG}${TAG_SUFFIX}"
|
|
||||||
export OUT_IMAGE="${OUT_IMAGE_NAME}:${OUT_IMAGE_TAG}${TAG_SUFFIX}"
|
|
||||||
|
|
||||||
echo "Copying ${IN_IMAGE} to ${OUT_IMAGE}"
|
|
||||||
${REGCTL} image copy ${IN_IMAGE} ${OUT_IMAGE}
|
|
||||||
|
|
||||||
# pull-images pulls images from the public CI registry to the internal CI registry.
|
|
||||||
pull-images:
|
|
||||||
extends:
|
|
||||||
- .copy-images
|
|
||||||
stage: pull
|
|
||||||
variables:
|
variables:
|
||||||
IN_REGISTRY: "${STAGING_REGISTRY}"
|
IN_REGISTRY: "${STAGING_REGISTRY}"
|
||||||
IN_IMAGE_NAME: ${STAGING_REGISTRY}/container-toolkit
|
IN_IMAGE_NAME: container-toolkit
|
||||||
IN_IMAGE_TAG: "${STAGING_VERSION}"
|
IN_VERSION: "${STAGING_VERSION}"
|
||||||
|
|
||||||
OUT_REGISTRY: "${CI_REGISTRY}"
|
|
||||||
OUT_REGISTRY_USER: "${CI_REGISTRY_USER}"
|
OUT_REGISTRY_USER: "${CI_REGISTRY_USER}"
|
||||||
OUT_REGISTRY_TOKEN: "${CI_REGISTRY_PASSWORD}"
|
OUT_REGISTRY_TOKEN: "${CI_REGISTRY_PASSWORD}"
|
||||||
|
OUT_REGISTRY: "${CI_REGISTRY}"
|
||||||
OUT_IMAGE_NAME: "${CI_REGISTRY_IMAGE}/container-toolkit"
|
OUT_IMAGE_NAME: "${CI_REGISTRY_IMAGE}/container-toolkit"
|
||||||
OUT_IMAGE_TAG: "${CI_COMMIT_SHORT_SHA}"
|
PUSH_MULTIPLE_TAGS: "false"
|
||||||
# We delay the job start to allow the public pipeline to generate the required images.
|
# We delay the job start to allow the public pipeline to generate the required images.
|
||||||
rules:
|
rules:
|
||||||
# If the pipeline is triggered from a tag or the WEB UI we don't delay the
|
|
||||||
# start of the pipeline.
|
|
||||||
- if: $CI_COMMIT_TAG || $CI_PIPELINE_SOURCE == "web"
|
|
||||||
# If the pipeline is triggered through other means (i.e. a branch or MR)
|
|
||||||
# we add a 30 minute delay to ensure that the images are available in the
|
|
||||||
# public CI registry.
|
|
||||||
- when: delayed
|
- when: delayed
|
||||||
start_in: 30 minutes
|
start_in: 30 minutes
|
||||||
timeout: 30 minutes
|
timeout: 30 minutes
|
||||||
@@ -103,6 +60,30 @@ pull-images:
|
|||||||
when:
|
when:
|
||||||
- job_execution_timeout
|
- job_execution_timeout
|
||||||
- stuck_or_timeout_failure
|
- stuck_or_timeout_failure
|
||||||
|
before_script:
|
||||||
|
- !reference [.regctl-setup, before_script]
|
||||||
|
- apk add --no-cache make bash
|
||||||
|
- >
|
||||||
|
regctl manifest get ${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION}-${DIST} --list > /dev/null && echo "${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION}-${DIST}" || ( echo "${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION}-${DIST} does not exist" && sleep infinity )
|
||||||
|
script:
|
||||||
|
- regctl registry login "${OUT_REGISTRY}" -u "${OUT_REGISTRY_USER}" -p "${OUT_REGISTRY_TOKEN}"
|
||||||
|
- make -f deployments/container/Makefile IMAGE=${IN_REGISTRY}/${IN_IMAGE_NAME}:${IN_VERSION}-${DIST} OUT_IMAGE=${OUT_IMAGE_NAME}:${CI_COMMIT_SHORT_SHA}-${DIST} push-${DIST}
|
||||||
|
|
||||||
|
image-ubi8:
|
||||||
|
extends:
|
||||||
|
- .dist-ubi8
|
||||||
|
- .image-pull
|
||||||
|
|
||||||
|
image-ubuntu20.04:
|
||||||
|
extends:
|
||||||
|
- .dist-ubuntu20.04
|
||||||
|
- .image-pull
|
||||||
|
|
||||||
|
# The DIST=packaging target creates an image containing all built packages
|
||||||
|
image-packaging:
|
||||||
|
extends:
|
||||||
|
- .dist-packaging
|
||||||
|
- .image-pull
|
||||||
|
|
||||||
# We skip the integration tests for the internal CI:
|
# We skip the integration tests for the internal CI:
|
||||||
.integration:
|
.integration:
|
||||||
@@ -114,37 +95,27 @@ pull-images:
|
|||||||
|
|
||||||
# The .scan step forms the base of the image scan operation performed before releasing
|
# The .scan step forms the base of the image scan operation performed before releasing
|
||||||
# images.
|
# images.
|
||||||
scan-images:
|
.scan:
|
||||||
stage: scan
|
stage: scan
|
||||||
needs:
|
|
||||||
- pull-images
|
|
||||||
image: "${PULSE_IMAGE}"
|
image: "${PULSE_IMAGE}"
|
||||||
parallel:
|
|
||||||
matrix:
|
|
||||||
- TAG_SUFFIX: [""]
|
|
||||||
PLATFORM: ["linux/amd64", "linux/arm64"]
|
|
||||||
- TAG_SUFFIX: "-packaging"
|
|
||||||
PLATFORM: "linux/amd64"
|
|
||||||
variables:
|
variables:
|
||||||
IMAGE: "${CI_REGISTRY_IMAGE}/container-toolkit:${CI_COMMIT_SHORT_SHA}"
|
IMAGE: "${CI_REGISTRY_IMAGE}/container-toolkit:${CI_COMMIT_SHORT_SHA}-${DIST}"
|
||||||
IMAGE_ARCHIVE: "container-toolkit-${CI_JOB_ID}.tar"
|
IMAGE_ARCHIVE: "container-toolkit-${DIST}-${ARCH}-${CI_JOB_ID}.tar"
|
||||||
rules:
|
rules:
|
||||||
- if: $IGNORE_SCANS == "yes"
|
- if: $SKIP_SCANS != "yes"
|
||||||
allow_failure: true
|
- when: manual
|
||||||
- when: on_success
|
before_script:
|
||||||
script:
|
- docker login -u "${CI_REGISTRY_USER}" -p "${CI_REGISTRY_PASSWORD}" "${CI_REGISTRY}"
|
||||||
- |
|
# TODO: We should specify the architecture here and scan all architectures
|
||||||
docker login -u "${CI_REGISTRY_USER}" -p "${CI_REGISTRY_PASSWORD}" "${CI_REGISTRY}"
|
- docker pull --platform="${PLATFORM}" "${IMAGE}"
|
||||||
export SCAN_IMAGE=${IMAGE}${TAG_SUFFIX}
|
- docker save "${IMAGE}" -o "${IMAGE_ARCHIVE}"
|
||||||
echo "Scanning image ${SCAN_IMAGE} for ${PLATFORM}"
|
- AuthHeader=$(echo -n $SSA_CLIENT_ID:$SSA_CLIENT_SECRET | base64 -w0)
|
||||||
docker pull --platform="${PLATFORM}" "${SCAN_IMAGE}"
|
- >
|
||||||
docker save "${SCAN_IMAGE}" -o "${IMAGE_ARCHIVE}"
|
|
||||||
AuthHeader=$(echo -n $SSA_CLIENT_ID:$SSA_CLIENT_SECRET | base64 -w0)
|
|
||||||
export SSA_TOKEN=$(curl --request POST --header "Authorization: Basic $AuthHeader" --header "Content-Type: application/x-www-form-urlencoded" ${SSA_ISSUER_URL} | jq ".access_token" | tr -d '"')
|
export SSA_TOKEN=$(curl --request POST --header "Authorization: Basic $AuthHeader" --header "Content-Type: application/x-www-form-urlencoded" ${SSA_ISSUER_URL} | jq ".access_token" | tr -d '"')
|
||||||
if [ -z "$SSA_TOKEN" ]; then exit 1; else echo "SSA_TOKEN set!"; fi
|
- if [ -z "$SSA_TOKEN" ]; then exit 1; else echo "SSA_TOKEN set!"; fi
|
||||||
|
script:
|
||||||
pulse-cli -n $NSPECT_ID --ssa $SSA_TOKEN scan -i $IMAGE_ARCHIVE -p $CONTAINER_POLICY -o
|
- pulse-cli -n $NSPECT_ID --ssa $SSA_TOKEN scan -i $IMAGE_ARCHIVE -p $CONTAINER_POLICY -o
|
||||||
rm -f "${IMAGE_ARCHIVE}"
|
- rm -f "${IMAGE_ARCHIVE}"
|
||||||
artifacts:
|
artifacts:
|
||||||
when: always
|
when: always
|
||||||
expire_in: 1 week
|
expire_in: 1 week
|
||||||
@@ -155,10 +126,62 @@ scan-images:
|
|||||||
- vulns.json
|
- vulns.json
|
||||||
- policy_evaluation.json
|
- policy_evaluation.json
|
||||||
|
|
||||||
upload-kitmaker-packages:
|
# Define the scan targets
|
||||||
|
scan-ubuntu20.04-amd64:
|
||||||
|
extends:
|
||||||
|
- .dist-ubuntu20.04
|
||||||
|
- .platform-amd64
|
||||||
|
- .scan
|
||||||
|
needs:
|
||||||
|
- image-ubuntu20.04
|
||||||
|
|
||||||
|
scan-ubuntu20.04-arm64:
|
||||||
|
extends:
|
||||||
|
- .dist-ubuntu20.04
|
||||||
|
- .platform-arm64
|
||||||
|
- .scan
|
||||||
|
needs:
|
||||||
|
- image-ubuntu20.04
|
||||||
|
- scan-ubuntu20.04-amd64
|
||||||
|
|
||||||
|
scan-ubi8-amd64:
|
||||||
|
extends:
|
||||||
|
- .dist-ubi8
|
||||||
|
- .platform-amd64
|
||||||
|
- .scan
|
||||||
|
needs:
|
||||||
|
- image-ubi8
|
||||||
|
|
||||||
|
scan-ubi8-arm64:
|
||||||
|
extends:
|
||||||
|
- .dist-ubi8
|
||||||
|
- .platform-arm64
|
||||||
|
- .scan
|
||||||
|
needs:
|
||||||
|
- image-ubi8
|
||||||
|
- scan-ubi8-amd64
|
||||||
|
|
||||||
|
scan-packaging:
|
||||||
|
extends:
|
||||||
|
- .dist-packaging
|
||||||
|
- .scan
|
||||||
|
needs:
|
||||||
|
- image-packaging
|
||||||
|
|
||||||
|
# Define external release helpers
|
||||||
|
.release:ngc:
|
||||||
|
extends:
|
||||||
|
- .release:external
|
||||||
|
variables:
|
||||||
|
OUT_REGISTRY_USER: "${NGC_REGISTRY_USER}"
|
||||||
|
OUT_REGISTRY_TOKEN: "${NGC_REGISTRY_TOKEN}"
|
||||||
|
OUT_REGISTRY: "${NGC_REGISTRY}"
|
||||||
|
OUT_IMAGE_NAME: "${NGC_REGISTRY_IMAGE}"
|
||||||
|
|
||||||
|
.release:packages:
|
||||||
stage: release
|
stage: release
|
||||||
needs:
|
needs:
|
||||||
- pull-images
|
- image-packaging
|
||||||
variables:
|
variables:
|
||||||
VERSION: "${CI_COMMIT_SHORT_SHA}"
|
VERSION: "${CI_COMMIT_SHORT_SHA}"
|
||||||
PACKAGE_REGISTRY: "${CI_REGISTRY}"
|
PACKAGE_REGISTRY: "${CI_REGISTRY}"
|
||||||
@@ -176,81 +199,34 @@ upload-kitmaker-packages:
|
|||||||
- ./scripts/release-kitmaker-artifactory.sh "${KITMAKER_ARTIFACTORY_REPO}"
|
- ./scripts/release-kitmaker-artifactory.sh "${KITMAKER_ARTIFACTORY_REPO}"
|
||||||
- rm -rf ${ARTIFACTS_DIR}
|
- rm -rf ${ARTIFACTS_DIR}
|
||||||
|
|
||||||
push-images-to-staging:
|
# Define the package release targets
|
||||||
|
release:packages:kitmaker:
|
||||||
extends:
|
extends:
|
||||||
- .copy-images
|
- .release:packages
|
||||||
stage: release
|
|
||||||
|
release:staging-ubuntu20.04:
|
||||||
|
extends:
|
||||||
|
- .release:staging
|
||||||
|
- .dist-ubuntu20.04
|
||||||
needs:
|
needs:
|
||||||
- scan-images
|
- image-ubuntu20.04
|
||||||
variables:
|
|
||||||
IN_REGISTRY: "${CI_REGISTRY}"
|
|
||||||
IN_REGISTRY_USER: "${CI_REGISTRY_USER}"
|
|
||||||
IN_REGISTRY_TOKEN: "${CI_REGISTRY_PASSWORD}"
|
|
||||||
IN_IMAGE_NAME: "${CI_REGISTRY_IMAGE}/container-toolkit"
|
|
||||||
IN_IMAGE_TAG: "${CI_COMMIT_SHORT_SHA}"
|
|
||||||
|
|
||||||
OUT_REGISTRY: "${NGC_REGISTRY}"
|
# Define the external release targets
|
||||||
OUT_REGISTRY_USER: "${NGC_REGISTRY_USER}"
|
# Release to NGC
|
||||||
OUT_REGISTRY_TOKEN: "${NGC_REGISTRY_TOKEN}"
|
release:ngc-ubuntu20.04:
|
||||||
OUT_IMAGE_NAME: "${NGC_STAGING_REGISTRY}/container-toolkit"
|
|
||||||
OUT_IMAGE_TAG: "${CI_COMMIT_SHORT_SHA}"
|
|
||||||
|
|
||||||
.release-images:
|
|
||||||
extends:
|
extends:
|
||||||
- .copy-images
|
- .dist-ubuntu20.04
|
||||||
stage: release
|
- .release:ngc
|
||||||
needs:
|
|
||||||
- scan-images
|
|
||||||
- push-images-to-staging
|
|
||||||
variables:
|
|
||||||
IN_REGISTRY: "${CI_REGISTRY}"
|
|
||||||
IN_REGISTRY_USER: "${CI_REGISTRY_USER}"
|
|
||||||
IN_REGISTRY_TOKEN: "${CI_REGISTRY_PASSWORD}"
|
|
||||||
IN_IMAGE_NAME: "${CI_REGISTRY_IMAGE}/container-toolkit"
|
|
||||||
IN_IMAGE_TAG: "${CI_COMMIT_SHORT_SHA}"
|
|
||||||
|
|
||||||
OUT_REGISTRY: "${NGC_REGISTRY}"
|
release:ngc-ubi8:
|
||||||
OUT_REGISTRY_USER: "${NGC_REGISTRY_USER}"
|
|
||||||
OUT_REGISTRY_TOKEN: "${NGC_REGISTRY_TOKEN}"
|
|
||||||
OUT_IMAGE_NAME: "${NGC_REGISTRY_IMAGE}"
|
|
||||||
OUT_IMAGE_TAG: "${CI_COMMIT_TAG}"
|
|
||||||
|
|
||||||
release-images-to-ngc:
|
|
||||||
extends:
|
extends:
|
||||||
- .release-images
|
- .dist-ubi8
|
||||||
rules:
|
- .release:ngc
|
||||||
- if: $CI_COMMIT_TAG
|
|
||||||
|
|
||||||
release-images-dummy:
|
release:ngc-packaging:
|
||||||
extends:
|
extends:
|
||||||
- .release-images
|
- .dist-packaging
|
||||||
variables:
|
- .release:ngc
|
||||||
REGCTL: "echo [DUMMY] regctl"
|
|
||||||
rules:
|
|
||||||
- if: $CI_COMMIT_TAG == null || $CI_COMMIT_TAG == ""
|
|
||||||
|
|
||||||
# .sign-images forms the base of the jobs which sign images in the NGC registry.
|
|
||||||
.sign-images:
|
|
||||||
stage: sign
|
|
||||||
image: ubuntu:latest
|
|
||||||
parallel:
|
|
||||||
matrix:
|
|
||||||
- TAG_SUFFIX: ["", "-packaging"]
|
|
||||||
variables:
|
|
||||||
IMAGE_NAME: "${NGC_REGISTRY_IMAGE}"
|
|
||||||
IMAGE_TAG: "${CI_COMMIT_TAG}"
|
|
||||||
NGC_CLI: "ngc-cli/ngc"
|
|
||||||
before_script:
|
|
||||||
- !reference [.ngccli-setup, before_script]
|
|
||||||
script:
|
|
||||||
- |
|
|
||||||
# We ensure that the IMAGE_NAME and IMAGE_TAG is set
|
|
||||||
echo Image Name: ${IMAGE_NAME} && [[ -n "${IMAGE_NAME}" ]] || exit 1
|
|
||||||
echo Image Tag: ${IMAGE_TAG} && [[ -n "${IMAGE_TAG}" ]] || exit 1
|
|
||||||
|
|
||||||
export IMAGE=${IMAGE_NAME}:${IMAGE_TAG}${TAG_SUFFIX}
|
|
||||||
echo "Signing the image ${IMAGE}"
|
|
||||||
${NGC_CLI} registry image publish --source ${IMAGE} ${IMAGE} --public --discoverable --allow-guest --sign --org nvidia
|
|
||||||
|
|
||||||
# Define the external image signing steps for NGC
|
# Define the external image signing steps for NGC
|
||||||
# Download the ngc cli binary for use in the sign steps
|
# Download the ngc cli binary for use in the sign steps
|
||||||
@@ -268,24 +244,45 @@ release-images-dummy:
|
|||||||
- unzip ngccli_linux.zip
|
- unzip ngccli_linux.zip
|
||||||
- chmod u+x ngc-cli/ngc
|
- chmod u+x ngc-cli/ngc
|
||||||
|
|
||||||
sign-ngc-images:
|
# .sign forms the base of the deployment jobs which signs images in the CI registry.
|
||||||
extends:
|
# This is extended with the image name and version to be deployed.
|
||||||
- .sign-images
|
.sign:ngc:
|
||||||
needs:
|
image: ubuntu:latest
|
||||||
- release-images-to-ngc
|
stage: sign
|
||||||
rules:
|
rules:
|
||||||
- if: $CI_COMMIT_TAG
|
- if: $CI_COMMIT_TAG
|
||||||
variables:
|
variables:
|
||||||
NGC_CLI_API_KEY: "${NGC_REGISTRY_TOKEN}"
|
NGC_CLI_API_KEY: "${NGC_REGISTRY_TOKEN}"
|
||||||
|
IMAGE_NAME: "${NGC_REGISTRY_IMAGE}"
|
||||||
|
IMAGE_TAG: "${CI_COMMIT_TAG}-${DIST}"
|
||||||
retry:
|
retry:
|
||||||
max: 2
|
max: 2
|
||||||
|
before_script:
|
||||||
|
- !reference [.ngccli-setup, before_script]
|
||||||
|
# We ensure that the IMAGE_NAME and IMAGE_TAG is set
|
||||||
|
- 'echo Image Name: ${IMAGE_NAME} && [[ -n "${IMAGE_NAME}" ]] || exit 1'
|
||||||
|
- 'echo Image Tag: ${IMAGE_TAG} && [[ -n "${IMAGE_TAG}" ]] || exit 1'
|
||||||
|
script:
|
||||||
|
- 'echo "Signing the image ${IMAGE_NAME}:${IMAGE_TAG}"'
|
||||||
|
- ngc-cli/ngc registry image publish --source ${IMAGE_NAME}:${IMAGE_TAG} ${IMAGE_NAME}:${IMAGE_TAG} --public --discoverable --allow-guest --sign --org nvidia
|
||||||
|
|
||||||
sign-images-dummy:
|
sign:ngc-ubuntu20.04:
|
||||||
extends:
|
extends:
|
||||||
- .sign-images
|
- .dist-ubuntu20.04
|
||||||
|
- .sign:ngc
|
||||||
needs:
|
needs:
|
||||||
- release-images-dummy
|
- release:ngc-ubuntu20.04
|
||||||
variables:
|
|
||||||
NGC_CLI: "echo [DUMMY] ngc-cli/ngc"
|
sign:ngc-ubi8:
|
||||||
rules:
|
extends:
|
||||||
- if: $CI_COMMIT_TAG == null || $CI_COMMIT_TAG == ""
|
- .dist-ubi8
|
||||||
|
- .sign:ngc
|
||||||
|
needs:
|
||||||
|
- release:ngc-ubi8
|
||||||
|
|
||||||
|
sign:ngc-packaging:
|
||||||
|
extends:
|
||||||
|
- .dist-packaging
|
||||||
|
- .sign:ngc
|
||||||
|
needs:
|
||||||
|
- release:ngc-packaging
|
||||||
|
|||||||
147
CHANGELOG.md
147
CHANGELOG.md
@@ -1,139 +1,34 @@
|
|||||||
# NVIDIA Container Toolkit Changelog
|
# NVIDIA Container Toolkit Changelog
|
||||||
|
|
||||||
## v1.18.0-rc.1
|
## v1.17.4
|
||||||
|
- Disable mounting of compat libs from container by default
|
||||||
- Add create-soname-symlinks hook
|
|
||||||
- Require matching version of libnvidia-container-tools
|
|
||||||
- Add envvar for libcuda.so parent dir to CDI spec
|
|
||||||
- Add EnvVar to Discover interface
|
|
||||||
- Resolve to legacy by default in nvidia-container-runtime-hook
|
|
||||||
- Default to jit-cdi mode in the nvidia runtime
|
|
||||||
- Use functional options to construct runtime mode resolver
|
|
||||||
- Add NVIDIA_CTK_CONFIG_FILE_PATH envvar
|
|
||||||
- Switch to cuda ubi9 base image
|
|
||||||
- Use single version tag for image
|
|
||||||
- BUGFIX: modifier: respect GPU volume-mount device requests
|
|
||||||
- Ensure consistent sorting of annotation devices
|
|
||||||
- Extract deb and rpm packages to single image
|
|
||||||
- Remove docker-run as default runtime candidate
|
|
||||||
- Return annotation devices from VisibleDevices
|
|
||||||
- Make CDI device requests consistent with other methods
|
|
||||||
- Construct container info once
|
|
||||||
- Add logic to extract annotation device requests to image type
|
|
||||||
- Add IsPrivileged function to CUDA container type
|
|
||||||
- Add device IDs to nvcdi.GetSpec API
|
|
||||||
- Refactor extracting requested devices from the container image
|
|
||||||
- Add EnvVars option for all nvidia-ctk cdi commands
|
|
||||||
- Add nvidia-cdi-refresh service
|
|
||||||
- Add discovery of arch-specific vulkan ICD
|
|
||||||
- Add disabled-device-node-modification hook to CDI spec
|
|
||||||
- Add a hook to disable device node creation in a container
|
|
||||||
- Remove redundant deduplication of search paths for WSL
|
|
||||||
- Added ability to disable specific (or all) CDI hooks
|
|
||||||
- Consolidate HookName functionality on internal/discover pkg
|
|
||||||
- Add envvar to control debug logging in CDI hooks
|
|
||||||
- Add FeatureFlags to the nvcdi API
|
|
||||||
- Reenable nvsandboxutils for driver discovery
|
|
||||||
- Edit discover.mounts to have a deterministic output
|
|
||||||
- Refactor the way we create CDI Hooks
|
|
||||||
- Issue warning on unsupported CDI hook
|
|
||||||
- Run update-ldcache in isolated namespaces
|
|
||||||
- Add cuda-compat-mode config option
|
|
||||||
- Fix mode detection on Thor-based systems
|
|
||||||
- Add rprivate to CDI mount options
|
|
||||||
- Skip nil discoverers in merge
|
|
||||||
- bump runc go dep to v1.3.0
|
|
||||||
- Fix resolution of libs in LDCache on ARM
|
|
||||||
- Updated .release:staging to stage images in nvstaging
|
|
||||||
- Refactor toolkit installer
|
|
||||||
- Allow container runtime executable path to be specified
|
|
||||||
- Add support for building ubuntu22.04 on arm64
|
|
||||||
- Fix race condition in mounts cache
|
|
||||||
- Add support for building ubuntu22.04 on amd64
|
|
||||||
- Fix update-ldcache arguments
|
|
||||||
- Remove positional arguments from nvidia-ctk-installer
|
|
||||||
- Remove deprecated --runtime-args from nvidia-ctk-installer
|
|
||||||
- Add version info to nvidia-ctk-installer
|
|
||||||
- Update nvidia-ctk-installer app name to match binary name
|
|
||||||
- Allow nvidia-ctk config --set to accept comma-separated lists
|
|
||||||
- Disable enable-cuda-compat hook for management containers
|
|
||||||
- Allow enable-cuda-compat hook to be disabled in CDI spec generation
|
|
||||||
- Add disable-cuda-compat-lib-hook feature flag
|
|
||||||
- Add basic integration tests for forward compat
|
|
||||||
- Ensure that mode hook is executed last
|
|
||||||
- Add enable-cuda-compat hook to CDI spec generation
|
|
||||||
- Add ldconfig hook in legacy mode
|
|
||||||
- Add enable-cuda-compat hook if required
|
|
||||||
- Add enable-cuda-compat hook to allow compat libs to be discovered
|
|
||||||
- Use libcontainer execseal to run ldconfig
|
|
||||||
- Add ignore-imex-channel-requests feature flag
|
|
||||||
- Disable nvsandboxutils in nvcdi API
|
|
||||||
- Allow cdi mode to work with --gpus flag
|
|
||||||
- Add E2E GitHub Action for Container Toolkit
|
|
||||||
- Add remote-test option for E2E
|
|
||||||
- Enable CDI in runtime if CDI_ENABLED is set
|
|
||||||
- Fix overwriting docker feature flags
|
|
||||||
- Add option in toolkit container to enable CDI in runtime
|
|
||||||
- Remove Set from engine config API
|
|
||||||
- Add EnableCDI() method to engine.Interface
|
|
||||||
- Add IMEX binaries to CDI discovery
|
|
||||||
- Rename test folder to tests
|
|
||||||
- Add allow-cuda-compat-libs-from-container feature flag
|
- Add allow-cuda-compat-libs-from-container feature flag
|
||||||
- Disable mounting of compat libs from container
|
|
||||||
- Skip graphics modifier in CSV mode
|
- Skip graphics modifier in CSV mode
|
||||||
- Move nvidia-toolkit to nvidia-ctk-installer
|
|
||||||
- Automated regression testing for the NVIDIA Container Toolkit
|
|
||||||
- Add support for containerd version 3 config
|
|
||||||
- Remove watch option from create-dev-char-symlinks
|
|
||||||
- Add string TOML source
|
|
||||||
- Improve the implementation for UseLegacyConfig
|
|
||||||
- Properly pass configSearchPaths to a Driver constructor
|
- Properly pass configSearchPaths to a Driver constructor
|
||||||
- Fix create-device-node test when devices exist
|
|
||||||
- Add imex mode to CDI spec generation
|
|
||||||
- Only allow host-relative LDConfig paths
|
|
||||||
- Fix NVIDIA_IMEX_CHANNELS handling on legacy images
|
|
||||||
- Fix bug in default config file path
|
|
||||||
- Fix fsnotify.Remove logic function.
|
|
||||||
- Force symlink creation in create-symlink hook
|
|
||||||
|
|
||||||
### Changes in the Toolkit Container
|
|
||||||
|
|
||||||
- Create /work/nvidia-toolkit symlink
|
|
||||||
- Use Apache license for images
|
|
||||||
- Switch to golang distroless image
|
|
||||||
- Switch to cuda ubi9 base image
|
|
||||||
- Use single version tag for image
|
|
||||||
- Extract deb and rpm packages to single image
|
|
||||||
- Bump nvidia/cuda in /deployments/container
|
|
||||||
- Bump nvidia/cuda in /deployments/container
|
|
||||||
- Add E2E GitHub Action for Container Toolkit
|
|
||||||
- Bump nvidia/cuda in /deployments/container
|
|
||||||
- Move nvidia-toolkit to nvidia-ctk-installer
|
|
||||||
- Add support for containerd version 3 config
|
- Add support for containerd version 3 config
|
||||||
- Improve the implementation for UseLegacyConfig
|
- Add string TOML source
|
||||||
- Bump nvidia/cuda in /deployments/container
|
|
||||||
- Add imex mode to CDI spec generation
|
|
||||||
- Only allow host-relative LDConfig paths
|
|
||||||
- Fallback to file for runtime config
|
|
||||||
|
|
||||||
### Changes in libnvidia-container
|
### Changes in libnvidia-container
|
||||||
|
|
||||||
- Fix pointer accessing local variable out of scope
|
|
||||||
- Require version match between libnvidia-container-tools and libnvidia-container1
|
|
||||||
- Add libnvidia-gpucomp.so to the list of compute libs
|
|
||||||
- Use VERSION_ prefix for version parts in makefiles
|
|
||||||
- Add additional logging
|
|
||||||
- Do not discard container flags when --cuda-compat-mode is not specified
|
|
||||||
- Remove unneeded --no-cntlibs argument from list command
|
|
||||||
- Add cuda-compat-mode flag to configure command
|
|
||||||
- Skip files when user has insufficient permissions
|
|
||||||
- Fix building with Go 1.24
|
|
||||||
- Add no-cntlibs CLI option to nvidia-container-cli
|
- Add no-cntlibs CLI option to nvidia-container-cli
|
||||||
- Fix always using fallback
|
|
||||||
- Add fallback for systems without memfd_create()
|
|
||||||
- Create virtual copy of host ldconfig binary before calling fexecve()
|
|
||||||
- Fix some typos in text.
|
|
||||||
|
|
||||||
|
### Changes in the Toolkit Container
|
||||||
|
- Bump CUDA base image version to 12.6.3
|
||||||
|
|
||||||
|
## v1.17.3
|
||||||
|
- Only allow host-relative LDConfig paths by default.
|
||||||
|
### Changes in libnvidia-container
|
||||||
|
- Create virtual copy of host ldconfig binary before calling fexecve()
|
||||||
|
|
||||||
|
## v1.17.2
|
||||||
|
- Fixed a bug where legacy images would set imex channels as `all`.
|
||||||
|
|
||||||
|
## v1.17.1
|
||||||
|
- Fixed a bug where specific symlinks existing in a container image could cause a container to fail to start.
|
||||||
|
- Fixed a bug on Tegra-based systems where a container would fail to start.
|
||||||
|
- Fixed a bug where the default container runtime config path was not properly set.
|
||||||
|
|
||||||
|
### Changes in the Toolkit Container
|
||||||
|
- Fallback to using a config file if the current runtime config can not be determined from the command line.
|
||||||
|
|
||||||
## v1.17.0
|
## v1.17.0
|
||||||
- Promote v1.17.0-rc.2 to v1.17.0
|
- Promote v1.17.0-rc.2 to v1.17.0
|
||||||
|
|||||||
@@ -20,10 +20,8 @@ import (
|
|||||||
"github.com/urfave/cli/v2"
|
"github.com/urfave/cli/v2"
|
||||||
|
|
||||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/chmod"
|
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/chmod"
|
||||||
createsonamesymlinks "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/create-soname-symlinks"
|
|
||||||
symlinks "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/create-symlinks"
|
symlinks "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/create-symlinks"
|
||||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/cudacompat"
|
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/cudacompat"
|
||||||
disabledevicenodemodification "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/disable-device-node-modification"
|
|
||||||
ldcache "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/update-ldcache"
|
ldcache "github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-cdi-hook/update-ldcache"
|
||||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||||
)
|
)
|
||||||
@@ -36,8 +34,6 @@ func New(logger logger.Interface) []*cli.Command {
|
|||||||
symlinks.NewCommand(logger),
|
symlinks.NewCommand(logger),
|
||||||
chmod.NewCommand(logger),
|
chmod.NewCommand(logger),
|
||||||
cudacompat.NewCommand(logger),
|
cudacompat.NewCommand(logger),
|
||||||
createsonamesymlinks.NewCommand(logger),
|
|
||||||
disabledevicenodemodification.NewCommand(logger),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,166 +0,0 @@
|
|||||||
/**
|
|
||||||
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
**/
|
|
||||||
|
|
||||||
package create_soname_symlinks
|
|
||||||
|
|
||||||
import (
|
|
||||||
"errors"
|
|
||||||
"fmt"
|
|
||||||
"log"
|
|
||||||
"os"
|
|
||||||
|
|
||||||
"github.com/moby/sys/reexec"
|
|
||||||
"github.com/urfave/cli/v2"
|
|
||||||
|
|
||||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/ldconfig"
|
|
||||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
|
||||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
|
|
||||||
)
|
|
||||||
|
|
||||||
const (
|
|
||||||
reexecUpdateLdCacheCommandName = "reexec-create-soname-symlinks"
|
|
||||||
)
|
|
||||||
|
|
||||||
type command struct {
|
|
||||||
logger logger.Interface
|
|
||||||
}
|
|
||||||
|
|
||||||
type options struct {
|
|
||||||
folders cli.StringSlice
|
|
||||||
ldconfigPath string
|
|
||||||
containerSpec string
|
|
||||||
}
|
|
||||||
|
|
||||||
func init() {
|
|
||||||
reexec.Register(reexecUpdateLdCacheCommandName, createSonameSymlinksHandler)
|
|
||||||
if reexec.Init() {
|
|
||||||
os.Exit(0)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// NewCommand constructs an create-soname-symlinks command with the specified logger
|
|
||||||
func NewCommand(logger logger.Interface) *cli.Command {
|
|
||||||
c := command{
|
|
||||||
logger: logger,
|
|
||||||
}
|
|
||||||
return c.build()
|
|
||||||
}
|
|
||||||
|
|
||||||
// build the create-soname-symlinks command
|
|
||||||
func (m command) build() *cli.Command {
|
|
||||||
cfg := options{}
|
|
||||||
|
|
||||||
// Create the 'create-soname-symlinks' command
|
|
||||||
c := cli.Command{
|
|
||||||
Name: "create-soname-symlinks",
|
|
||||||
Usage: "Create soname symlinks libraries in specified directories",
|
|
||||||
Before: func(c *cli.Context) error {
|
|
||||||
return m.validateFlags(c, &cfg)
|
|
||||||
},
|
|
||||||
Action: func(c *cli.Context) error {
|
|
||||||
return m.run(c, &cfg)
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
c.Flags = []cli.Flag{
|
|
||||||
&cli.StringSliceFlag{
|
|
||||||
Name: "folder",
|
|
||||||
Usage: "Specify a directory to generate soname symlinks in. Can be specified multiple times",
|
|
||||||
Destination: &cfg.folders,
|
|
||||||
},
|
|
||||||
&cli.StringFlag{
|
|
||||||
Name: "ldconfig-path",
|
|
||||||
Usage: "Specify the path to ldconfig on the host",
|
|
||||||
Destination: &cfg.ldconfigPath,
|
|
||||||
Value: "/sbin/ldconfig",
|
|
||||||
},
|
|
||||||
&cli.StringFlag{
|
|
||||||
Name: "container-spec",
|
|
||||||
Usage: "Specify the path to the OCI container spec. If empty or '-' the spec will be read from STDIN",
|
|
||||||
Destination: &cfg.containerSpec,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
return &c
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m command) validateFlags(c *cli.Context, cfg *options) error {
|
|
||||||
if cfg.ldconfigPath == "" {
|
|
||||||
return errors.New("ldconfig-path must be specified")
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (m command) run(c *cli.Context, cfg *options) error {
|
|
||||||
s, err := oci.LoadContainerState(cfg.containerSpec)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("failed to load container state: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
containerRootDir, err := s.GetContainerRoot()
|
|
||||||
if err != nil || containerRootDir == "" || containerRootDir == "/" {
|
|
||||||
return fmt.Errorf("failed to determined container root: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
cmd, err := ldconfig.NewRunner(
|
|
||||||
reexecUpdateLdCacheCommandName,
|
|
||||||
cfg.ldconfigPath,
|
|
||||||
containerRootDir,
|
|
||||||
cfg.folders.Value()...,
|
|
||||||
)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
return cmd.Run()
|
|
||||||
}
|
|
||||||
|
|
||||||
// createSonameSymlinksHandler wraps createSonameSymlinks with error handling.
|
|
||||||
func createSonameSymlinksHandler() {
|
|
||||||
if err := createSonameSymlinks(os.Args); err != nil {
|
|
||||||
log.Printf("Error updating ldcache: %v", err)
|
|
||||||
os.Exit(1)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// createSonameSymlinks ensures that soname symlinks are created in the
|
|
||||||
// specified directories.
|
|
||||||
// It is invoked from a reexec'd handler and provides namespace isolation for
|
|
||||||
// the operations performed by this hook. At the point where this is invoked,
|
|
||||||
// we are in a new mount namespace that is cloned from the parent.
|
|
||||||
//
|
|
||||||
// args[0] is the reexec initializer function name
|
|
||||||
// args[1] is the path of the ldconfig binary on the host
|
|
||||||
// args[2] is the container root directory
|
|
||||||
// The remaining args are directories where soname symlinks need to be created.
|
|
||||||
func createSonameSymlinks(args []string) error {
|
|
||||||
if len(args) < 3 {
|
|
||||||
return fmt.Errorf("incorrect arguments: %v", args)
|
|
||||||
}
|
|
||||||
hostLdconfigPath := args[1]
|
|
||||||
containerRootDirPath := args[2]
|
|
||||||
|
|
||||||
ldconfig, err := ldconfig.New(
|
|
||||||
hostLdconfigPath,
|
|
||||||
containerRootDirPath,
|
|
||||||
)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("failed to construct ldconfig runner: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
return ldconfig.CreateSonameSymlinks(args[3:]...)
|
|
||||||
}
|
|
||||||
@@ -1,144 +0,0 @@
|
|||||||
/**
|
|
||||||
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
**/
|
|
||||||
|
|
||||||
package disabledevicenodemodification
|
|
||||||
|
|
||||||
import (
|
|
||||||
"bufio"
|
|
||||||
"bytes"
|
|
||||||
"errors"
|
|
||||||
"fmt"
|
|
||||||
"io"
|
|
||||||
"os"
|
|
||||||
"strings"
|
|
||||||
|
|
||||||
"github.com/urfave/cli/v2"
|
|
||||||
|
|
||||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
|
||||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
|
|
||||||
)
|
|
||||||
|
|
||||||
const (
|
|
||||||
nvidiaDriverParamsPath = "/proc/driver/nvidia/params"
|
|
||||||
)
|
|
||||||
|
|
||||||
type options struct {
|
|
||||||
containerSpec string
|
|
||||||
}
|
|
||||||
|
|
||||||
// NewCommand constructs an disable-device-node-modification subcommand with the specified logger
|
|
||||||
func NewCommand(logger logger.Interface) *cli.Command {
|
|
||||||
cfg := options{}
|
|
||||||
|
|
||||||
c := cli.Command{
|
|
||||||
Name: "disable-device-node-modification",
|
|
||||||
Usage: "Ensure that the /proc/driver/nvidia/params file present in the container does not allow device node modifications.",
|
|
||||||
Before: func(c *cli.Context) error {
|
|
||||||
return validateFlags(c, &cfg)
|
|
||||||
},
|
|
||||||
Action: func(c *cli.Context) error {
|
|
||||||
return run(c, &cfg)
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
c.Flags = []cli.Flag{
|
|
||||||
&cli.StringFlag{
|
|
||||||
Name: "container-spec",
|
|
||||||
Hidden: true,
|
|
||||||
Usage: "Specify the path to the OCI container spec. If empty or '-' the spec will be read from STDIN",
|
|
||||||
Destination: &cfg.containerSpec,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
return &c
|
|
||||||
}
|
|
||||||
|
|
||||||
func validateFlags(c *cli.Context, cfg *options) error {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func run(_ *cli.Context, cfg *options) error {
|
|
||||||
modifiedParamsFileContents, err := getModifiedNVIDIAParamsContents()
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("failed to get modified params file contents: %w", err)
|
|
||||||
}
|
|
||||||
if len(modifiedParamsFileContents) == 0 {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
s, err := oci.LoadContainerState(cfg.containerSpec)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("failed to load container state: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
containerRootDirPath, err := s.GetContainerRoot()
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("failed to determined container root: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
return createParamsFileInContainer(containerRootDirPath, modifiedParamsFileContents)
|
|
||||||
}
|
|
||||||
|
|
||||||
func getModifiedNVIDIAParamsContents() ([]byte, error) {
|
|
||||||
hostNvidiaParamsFile, err := os.Open(nvidiaDriverParamsPath)
|
|
||||||
if errors.Is(err, os.ErrNotExist) {
|
|
||||||
return nil, nil
|
|
||||||
}
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("failed to load params file: %w", err)
|
|
||||||
}
|
|
||||||
defer hostNvidiaParamsFile.Close()
|
|
||||||
|
|
||||||
modifiedContents, err := getModifiedParamsFileContentsFromReader(hostNvidiaParamsFile)
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("failed to get modfied params file contents: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
return modifiedContents, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// getModifiedParamsFileContentsFromReader returns the contents of a modified params file from the specified reader.
|
|
||||||
func getModifiedParamsFileContentsFromReader(r io.Reader) ([]byte, error) {
|
|
||||||
var modified bytes.Buffer
|
|
||||||
scanner := bufio.NewScanner(r)
|
|
||||||
|
|
||||||
var requiresModification bool
|
|
||||||
for scanner.Scan() {
|
|
||||||
line := scanner.Text()
|
|
||||||
if strings.HasPrefix(line, "ModifyDeviceFiles: ") {
|
|
||||||
if line == "ModifyDeviceFiles: 0" {
|
|
||||||
return nil, nil
|
|
||||||
}
|
|
||||||
if line == "ModifyDeviceFiles: 1" {
|
|
||||||
line = "ModifyDeviceFiles: 0"
|
|
||||||
requiresModification = true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if _, err := modified.WriteString(line + "\n"); err != nil {
|
|
||||||
return nil, fmt.Errorf("failed to create output buffer: %w", err)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if err := scanner.Err(); err != nil {
|
|
||||||
return nil, fmt.Errorf("failed to read params file: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if !requiresModification {
|
|
||||||
return nil, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
return modified.Bytes(), nil
|
|
||||||
}
|
|
||||||
@@ -1,91 +0,0 @@
|
|||||||
/**
|
|
||||||
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
**/
|
|
||||||
|
|
||||||
package disabledevicenodemodification
|
|
||||||
|
|
||||||
import (
|
|
||||||
"bytes"
|
|
||||||
"testing"
|
|
||||||
|
|
||||||
"github.com/stretchr/testify/require"
|
|
||||||
)
|
|
||||||
|
|
||||||
func TestGetModifiedParamsFileContentsFromReader(t *testing.T) {
|
|
||||||
testCases := map[string]struct {
|
|
||||||
contents []byte
|
|
||||||
expectedError error
|
|
||||||
expectedContents []byte
|
|
||||||
}{
|
|
||||||
"no contents": {
|
|
||||||
contents: nil,
|
|
||||||
expectedError: nil,
|
|
||||||
expectedContents: nil,
|
|
||||||
},
|
|
||||||
"other contents are ignored": {
|
|
||||||
contents: []byte(`# Some other content
|
|
||||||
that we don't care about
|
|
||||||
`),
|
|
||||||
expectedError: nil,
|
|
||||||
expectedContents: nil,
|
|
||||||
},
|
|
||||||
"already zero requires no modification": {
|
|
||||||
contents: []byte("ModifyDeviceFiles: 0"),
|
|
||||||
expectedError: nil,
|
|
||||||
expectedContents: nil,
|
|
||||||
},
|
|
||||||
"leading spaces require no modification": {
|
|
||||||
contents: []byte(" ModifyDeviceFiles: 1"),
|
|
||||||
},
|
|
||||||
"Trailing spaces require no modification": {
|
|
||||||
contents: []byte("ModifyDeviceFiles: 1 "),
|
|
||||||
},
|
|
||||||
"Not 1 require no modification": {
|
|
||||||
contents: []byte("ModifyDeviceFiles: 11"),
|
|
||||||
},
|
|
||||||
"single line requires modification": {
|
|
||||||
contents: []byte("ModifyDeviceFiles: 1"),
|
|
||||||
expectedError: nil,
|
|
||||||
expectedContents: []byte("ModifyDeviceFiles: 0\n"),
|
|
||||||
},
|
|
||||||
"single line with trailing newline requires modification": {
|
|
||||||
contents: []byte("ModifyDeviceFiles: 1\n"),
|
|
||||||
expectedError: nil,
|
|
||||||
expectedContents: []byte("ModifyDeviceFiles: 0\n"),
|
|
||||||
},
|
|
||||||
"other content is maintained": {
|
|
||||||
contents: []byte(`ModifyDeviceFiles: 1
|
|
||||||
other content
|
|
||||||
that
|
|
||||||
is maintained`),
|
|
||||||
expectedError: nil,
|
|
||||||
expectedContents: []byte(`ModifyDeviceFiles: 0
|
|
||||||
other content
|
|
||||||
that
|
|
||||||
is maintained
|
|
||||||
`),
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
for description, tc := range testCases {
|
|
||||||
t.Run(description, func(t *testing.T) {
|
|
||||||
contents, err := getModifiedParamsFileContentsFromReader(bytes.NewReader(tc.contents))
|
|
||||||
require.EqualValues(t, tc.expectedError, err)
|
|
||||||
require.EqualValues(t, string(tc.expectedContents), string(contents))
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
@@ -1,63 +0,0 @@
|
|||||||
//go:build linux
|
|
||||||
|
|
||||||
/**
|
|
||||||
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
**/
|
|
||||||
|
|
||||||
package disabledevicenodemodification
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
"os"
|
|
||||||
"path/filepath"
|
|
||||||
|
|
||||||
"github.com/opencontainers/runc/libcontainer/utils"
|
|
||||||
"golang.org/x/sys/unix"
|
|
||||||
)
|
|
||||||
|
|
||||||
func createParamsFileInContainer(containerRootDirPath string, contents []byte) error {
|
|
||||||
tmpRoot, err := os.MkdirTemp("", "nvct-empty-dir*")
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("failed to create temp root: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
if err := createTmpFs(tmpRoot, len(contents)); err != nil {
|
|
||||||
return fmt.Errorf("failed to create tmpfs mount for params file: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
modifiedParamsFile, err := os.OpenFile(filepath.Join(tmpRoot, "nvct-params"), os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0444)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("failed to open modified params file: %w", err)
|
|
||||||
}
|
|
||||||
defer modifiedParamsFile.Close()
|
|
||||||
|
|
||||||
if _, err := modifiedParamsFile.Write(contents); err != nil {
|
|
||||||
return fmt.Errorf("failed to write temporary params file: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
err = utils.WithProcfd(containerRootDirPath, nvidiaDriverParamsPath, func(nvidiaDriverParamsFdPath string) error {
|
|
||||||
return unix.Mount(modifiedParamsFile.Name(), nvidiaDriverParamsFdPath, "", unix.MS_BIND|unix.MS_RDONLY|unix.MS_NODEV|unix.MS_PRIVATE|unix.MS_NOSYMFOLLOW, "")
|
|
||||||
})
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("failed to mount modified params file: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func createTmpFs(target string, size int) error {
|
|
||||||
return unix.Mount("tmpfs", target, "tmpfs", 0, fmt.Sprintf("size=%d", size))
|
|
||||||
}
|
|
||||||
@@ -1,27 +0,0 @@
|
|||||||
//go:build !linux
|
|
||||||
// +build !linux
|
|
||||||
|
|
||||||
/**
|
|
||||||
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
**/
|
|
||||||
|
|
||||||
package disabledevicenodemodification
|
|
||||||
|
|
||||||
import "fmt"
|
|
||||||
|
|
||||||
func createParamsFileInContainer(containerRootDirPath string, contents []byte) error {
|
|
||||||
return fmt.Errorf("not supported")
|
|
||||||
}
|
|
||||||
46
cmd/nvidia-cdi-hook/update-ldcache/container-root.go
Normal file
46
cmd/nvidia-cdi-hook/update-ldcache/container-root.go
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
/**
|
||||||
|
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
**/
|
||||||
|
|
||||||
|
package ldcache
|
||||||
|
|
||||||
|
import (
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
|
||||||
|
"github.com/moby/sys/symlink"
|
||||||
|
)
|
||||||
|
|
||||||
|
// A containerRoot represents the root filesystem of a container.
|
||||||
|
type containerRoot string
|
||||||
|
|
||||||
|
// hasPath checks whether the specified path exists in the root.
|
||||||
|
func (r containerRoot) hasPath(path string) bool {
|
||||||
|
resolved, err := r.resolve(path)
|
||||||
|
if err != nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
if _, err := os.Stat(resolved); err != nil && os.IsNotExist(err) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// resolve returns the absolute path including root path.
|
||||||
|
// Symlinks are resolved, but are guaranteed to resolve in the root.
|
||||||
|
func (r containerRoot) resolve(path string) (string, error) {
|
||||||
|
absolute := filepath.Clean(filepath.Join(string(r), path))
|
||||||
|
return symlink.FollowSymlinkInScope(absolute, string(r))
|
||||||
|
}
|
||||||
@@ -17,7 +17,7 @@
|
|||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
**/
|
**/
|
||||||
|
|
||||||
package ldconfig
|
package ldcache
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"errors"
|
"errors"
|
||||||
@@ -29,8 +29,8 @@ import (
|
|||||||
"syscall"
|
"syscall"
|
||||||
|
|
||||||
securejoin "github.com/cyphar/filepath-securejoin"
|
securejoin "github.com/cyphar/filepath-securejoin"
|
||||||
"github.com/moby/sys/reexec"
|
|
||||||
|
|
||||||
|
"github.com/moby/sys/reexec"
|
||||||
"github.com/opencontainers/runc/libcontainer/utils"
|
"github.com/opencontainers/runc/libcontainer/utils"
|
||||||
"golang.org/x/sys/unix"
|
"golang.org/x/sys/unix"
|
||||||
)
|
)
|
||||||
@@ -182,7 +182,7 @@ func createTmpFs(target string, size int) error {
|
|||||||
// createReexecCommand creates a command that can be used to trigger the reexec
|
// createReexecCommand creates a command that can be used to trigger the reexec
|
||||||
// initializer.
|
// initializer.
|
||||||
// On linux this command runs in new namespaces.
|
// On linux this command runs in new namespaces.
|
||||||
func createReexecCommand(args []string) (*exec.Cmd, error) {
|
func createReexecCommand(args []string) *exec.Cmd {
|
||||||
cmd := reexec.Command(args...)
|
cmd := reexec.Command(args...)
|
||||||
cmd.Stdin = os.Stdin
|
cmd.Stdin = os.Stdin
|
||||||
cmd.Stdout = os.Stdout
|
cmd.Stdout = os.Stdout
|
||||||
@@ -196,5 +196,5 @@ func createReexecCommand(args []string) (*exec.Cmd, error) {
|
|||||||
syscall.CLONE_NEWNET,
|
syscall.CLONE_NEWNET,
|
||||||
}
|
}
|
||||||
|
|
||||||
return cmd, nil
|
return cmd
|
||||||
}
|
}
|
||||||
@@ -17,11 +17,14 @@
|
|||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
**/
|
**/
|
||||||
|
|
||||||
package ldconfig
|
package ldcache
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"os"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
|
|
||||||
|
"github.com/moby/sys/reexec"
|
||||||
)
|
)
|
||||||
|
|
||||||
func pivotRoot(newroot string) error {
|
func pivotRoot(newroot string) error {
|
||||||
@@ -36,6 +39,13 @@ func mountProc(newroot string) error {
|
|||||||
return fmt.Errorf("not supported")
|
return fmt.Errorf("not supported")
|
||||||
}
|
}
|
||||||
|
|
||||||
func createReexecCommand(args []string) (*exec.Cmd, error) {
|
// createReexecCommand creates a command that can be used ot trigger the reexec
|
||||||
return nil, fmt.Errorf("not supported")
|
// initializer.
|
||||||
|
func createReexecCommand(args []string) *exec.Cmd {
|
||||||
|
cmd := reexec.Command(args...)
|
||||||
|
cmd.Stdin = os.Stdin
|
||||||
|
cmd.Stdout = os.Stdout
|
||||||
|
cmd.Stderr = os.Stderr
|
||||||
|
|
||||||
|
return cmd
|
||||||
}
|
}
|
||||||
@@ -16,7 +16,7 @@
|
|||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
**/
|
**/
|
||||||
|
|
||||||
package ldconfig
|
package ldcache
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
@@ -16,7 +16,7 @@
|
|||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
**/
|
**/
|
||||||
|
|
||||||
package ldconfig
|
package ldcache
|
||||||
|
|
||||||
import "syscall"
|
import "syscall"
|
||||||
|
|
||||||
@@ -21,16 +21,24 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"log"
|
"log"
|
||||||
"os"
|
"os"
|
||||||
|
"strings"
|
||||||
|
|
||||||
"github.com/moby/sys/reexec"
|
"github.com/moby/sys/reexec"
|
||||||
"github.com/urfave/cli/v2"
|
"github.com/urfave/cli/v2"
|
||||||
|
|
||||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/ldconfig"
|
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
|
||||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
|
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
|
// ldsoconfdFilenamePattern specifies the pattern for the filename
|
||||||
|
// in ld.so.conf.d that includes references to the specified directories.
|
||||||
|
// The 00-nvcr prefix is chosen to ensure that these libraries have a
|
||||||
|
// higher precedence than other libraries on the system, but lower than
|
||||||
|
// the 00-cuda-compat that is included in some containers.
|
||||||
|
ldsoconfdFilenamePattern = "00-nvcr-*.conf"
|
||||||
|
|
||||||
reexecUpdateLdCacheCommandName = "reexec-update-ldcache"
|
reexecUpdateLdCacheCommandName = "reexec-update-ldcache"
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -115,15 +123,15 @@ func (m command) run(c *cli.Context, cfg *options) error {
|
|||||||
return fmt.Errorf("failed to determined container root: %v", err)
|
return fmt.Errorf("failed to determined container root: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
cmd, err := ldconfig.NewRunner(
|
args := []string{
|
||||||
reexecUpdateLdCacheCommandName,
|
reexecUpdateLdCacheCommandName,
|
||||||
cfg.ldconfigPath,
|
strings.TrimPrefix(config.NormalizeLDConfigPath("@"+cfg.ldconfigPath), "@"),
|
||||||
containerRootDir,
|
containerRootDir,
|
||||||
cfg.folders.Value()...,
|
|
||||||
)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
}
|
||||||
|
args = append(args, cfg.folders.Value()...)
|
||||||
|
|
||||||
|
cmd := createReexecCommand(args)
|
||||||
|
|
||||||
return cmd.Run()
|
return cmd.Run()
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -135,16 +143,15 @@ func updateLdCacheHandler() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// updateLdCache ensures that the ldcache in the container is updated to include
|
// updateLdCache is invoked from a reexec'd handler and provides namespace
|
||||||
// libraries that are mounted from the host.
|
// isolation for the operations performed by this hook.
|
||||||
// It is invoked from a reexec'd handler and provides namespace isolation for
|
// At the point where this is invoked, we are in a new mount namespace that is
|
||||||
// the operations performed by this hook. At the point where this is invoked,
|
// cloned from the parent.
|
||||||
// we are in a new mount namespace that is cloned from the parent.
|
|
||||||
//
|
//
|
||||||
// args[0] is the reexec initializer function name
|
// args[0] is the reexec initializer function name
|
||||||
// args[1] is the path of the ldconfig binary on the host
|
// args[1] is the path of the ldconfig binary on the host
|
||||||
// args[2] is the container root directory
|
// args[2] is the container root directory
|
||||||
// The remaining args are folders where soname symlinks need to be created.
|
// The remaining args are folders that need to be added to the ldcache.
|
||||||
func updateLdCache(args []string) error {
|
func updateLdCache(args []string) error {
|
||||||
if len(args) < 3 {
|
if len(args) < 3 {
|
||||||
return fmt.Errorf("incorrect arguments: %v", args)
|
return fmt.Errorf("incorrect arguments: %v", args)
|
||||||
@@ -152,13 +159,97 @@ func updateLdCache(args []string) error {
|
|||||||
hostLdconfigPath := args[1]
|
hostLdconfigPath := args[1]
|
||||||
containerRootDirPath := args[2]
|
containerRootDirPath := args[2]
|
||||||
|
|
||||||
ldconfig, err := ldconfig.New(
|
// To prevent leaking the parent proc filesystem, we create a new proc mount
|
||||||
hostLdconfigPath,
|
// in the container root.
|
||||||
containerRootDirPath,
|
if err := mountProc(containerRootDirPath); err != nil {
|
||||||
)
|
return fmt.Errorf("error mounting /proc: %w", err)
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("failed to construct ldconfig runner: %w", err)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return ldconfig.UpdateLDCache(args[3:]...)
|
// We mount the host ldconfig before we pivot root since host paths are not
|
||||||
|
// visible after the pivot root operation.
|
||||||
|
ldconfigPath, err := mountLdConfig(hostLdconfigPath, containerRootDirPath)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("error mounting host ldconfig: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// We pivot to the container root for the new process, this further limits
|
||||||
|
// access to the host.
|
||||||
|
if err := pivotRoot(containerRootDirPath); err != nil {
|
||||||
|
return fmt.Errorf("error running pivot_root: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return runLdconfig(ldconfigPath, args[3:]...)
|
||||||
|
}
|
||||||
|
|
||||||
|
// runLdconfig runs the ldconfig binary and ensures that the specified directories
|
||||||
|
// are processed for the ldcache.
|
||||||
|
func runLdconfig(ldconfigPath string, directories ...string) error {
|
||||||
|
args := []string{
|
||||||
|
"ldconfig",
|
||||||
|
// Explicitly specify using /etc/ld.so.conf since the host's ldconfig may
|
||||||
|
// be configured to use a different config file by default.
|
||||||
|
// Note that since we apply the `-r {{ .containerRootDir }}` argument, /etc/ld.so.conf is
|
||||||
|
// in the container.
|
||||||
|
"-f", "/etc/ld.so.conf",
|
||||||
|
}
|
||||||
|
|
||||||
|
containerRoot := containerRoot("/")
|
||||||
|
|
||||||
|
if containerRoot.hasPath("/etc/ld.so.cache") {
|
||||||
|
args = append(args, "-C", "/etc/ld.so.cache")
|
||||||
|
} else {
|
||||||
|
args = append(args, "-N")
|
||||||
|
}
|
||||||
|
|
||||||
|
if containerRoot.hasPath("/etc/ld.so.conf.d") {
|
||||||
|
err := createLdsoconfdFile(ldsoconfdFilenamePattern, directories...)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to update ld.so.conf.d: %w", err)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
args = append(args, directories...)
|
||||||
|
}
|
||||||
|
|
||||||
|
return SafeExec(ldconfigPath, args, nil)
|
||||||
|
}
|
||||||
|
|
||||||
|
// createLdsoconfdFile creates a file at /etc/ld.so.conf.d/.
|
||||||
|
// The file is created at /etc/ld.so.conf.d/{{ .pattern }} using `CreateTemp` and
|
||||||
|
// contains the specified directories on each line.
|
||||||
|
func createLdsoconfdFile(pattern string, dirs ...string) error {
|
||||||
|
if len(dirs) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
ldsoconfdDir := "/etc/ld.so.conf.d"
|
||||||
|
if err := os.MkdirAll(ldsoconfdDir, 0755); err != nil {
|
||||||
|
return fmt.Errorf("failed to create ld.so.conf.d: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
configFile, err := os.CreateTemp(ldsoconfdDir, pattern)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to create config file: %w", err)
|
||||||
|
}
|
||||||
|
defer func() {
|
||||||
|
_ = configFile.Close()
|
||||||
|
}()
|
||||||
|
|
||||||
|
added := make(map[string]bool)
|
||||||
|
for _, dir := range dirs {
|
||||||
|
if added[dir] {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
_, err = fmt.Fprintf(configFile, "%s\n", dir)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to update config file: %w", err)
|
||||||
|
}
|
||||||
|
added[dir] = true
|
||||||
|
}
|
||||||
|
|
||||||
|
// The created file needs to be world readable for the cases where the container is run as a non-root user.
|
||||||
|
if err := configFile.Chmod(0644); err != nil {
|
||||||
|
return fmt.Errorf("failed to chmod config file: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -13,6 +13,10 @@ import (
|
|||||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
|
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
capSysAdmin = "CAP_SYS_ADMIN"
|
||||||
|
)
|
||||||
|
|
||||||
type nvidiaConfig struct {
|
type nvidiaConfig struct {
|
||||||
Devices []string
|
Devices []string
|
||||||
MigConfigDevices string
|
MigConfigDevices string
|
||||||
@@ -99,9 +103,9 @@ func loadSpec(path string) (spec *Spec) {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *Spec) GetCapabilities() []string {
|
func isPrivileged(s *Spec) bool {
|
||||||
if s == nil || s.Process == nil || s.Process.Capabilities == nil {
|
if s.Process.Capabilities == nil {
|
||||||
return nil
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
var caps []string
|
var caps []string
|
||||||
@@ -114,22 +118,67 @@ func (s *Spec) GetCapabilities() []string {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
log.Panicln("could not decode Process.Capabilities in OCI spec:", err)
|
log.Panicln("could not decode Process.Capabilities in OCI spec:", err)
|
||||||
}
|
}
|
||||||
return caps
|
for _, c := range caps {
|
||||||
|
if c == capSysAdmin {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
// Otherwise, parse s.Process.Capabilities as:
|
// Otherwise, parse s.Process.Capabilities as:
|
||||||
// github.com/opencontainers/runtime-spec/blob/v1.0.0/specs-go/config.go#L30-L54
|
// github.com/opencontainers/runtime-spec/blob/v1.0.0/specs-go/config.go#L30-L54
|
||||||
capabilities := specs.LinuxCapabilities{}
|
process := specs.Process{
|
||||||
err := json.Unmarshal(*s.Process.Capabilities, &capabilities)
|
Env: s.Process.Env,
|
||||||
|
}
|
||||||
|
|
||||||
|
err := json.Unmarshal(*s.Process.Capabilities, &process.Capabilities)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Panicln("could not decode Process.Capabilities in OCI spec:", err)
|
log.Panicln("could not decode Process.Capabilities in OCI spec:", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
return image.OCISpecCapabilities(capabilities).GetCapabilities()
|
fullSpec := specs.Spec{
|
||||||
|
Version: *s.Version,
|
||||||
|
Process: &process,
|
||||||
|
}
|
||||||
|
|
||||||
|
return image.IsPrivileged(&fullSpec)
|
||||||
}
|
}
|
||||||
|
|
||||||
func isPrivileged(s *Spec) bool {
|
func getDevicesFromEnvvar(containerImage image.CUDA, swarmResourceEnvvars []string) []string {
|
||||||
return image.IsPrivileged(s)
|
// We check if the image has at least one of the Swarm resource envvars defined and use this
|
||||||
|
// if specified.
|
||||||
|
for _, envvar := range swarmResourceEnvvars {
|
||||||
|
if containerImage.HasEnvvar(envvar) {
|
||||||
|
return containerImage.DevicesFromEnvvars(swarmResourceEnvvars...).List()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return containerImage.VisibleDevicesFromEnvVar()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (hookConfig *hookConfig) getDevices(image image.CUDA, privileged bool) []string {
|
||||||
|
// If enabled, try and get the device list from volume mounts first
|
||||||
|
if hookConfig.AcceptDeviceListAsVolumeMounts {
|
||||||
|
devices := image.VisibleDevicesFromMounts()
|
||||||
|
if len(devices) > 0 {
|
||||||
|
return devices
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fallback to reading from the environment variable if privileges are correct
|
||||||
|
devices := getDevicesFromEnvvar(image, hookConfig.getSwarmResourceEnvvars())
|
||||||
|
if len(devices) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
if privileged || hookConfig.AcceptEnvvarUnprivileged {
|
||||||
|
return devices
|
||||||
|
}
|
||||||
|
|
||||||
|
configName := hookConfig.getConfigOption("AcceptEnvvarUnprivileged")
|
||||||
|
log.Printf("Ignoring devices specified in NVIDIA_VISIBLE_DEVICES (privileged=%v, %v=%v) ", privileged, configName, hookConfig.AcceptEnvvarUnprivileged)
|
||||||
|
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func getMigConfigDevices(i image.CUDA) *string {
|
func getMigConfigDevices(i image.CUDA) *string {
|
||||||
@@ -176,6 +225,7 @@ func (hookConfig *hookConfig) getDriverCapabilities(cudaImage image.CUDA, legacy
|
|||||||
// We use the default driver capabilities by default. This is filtered to only include the
|
// We use the default driver capabilities by default. This is filtered to only include the
|
||||||
// supported capabilities
|
// supported capabilities
|
||||||
supportedDriverCapabilities := image.NewDriverCapabilities(hookConfig.SupportedDriverCapabilities)
|
supportedDriverCapabilities := image.NewDriverCapabilities(hookConfig.SupportedDriverCapabilities)
|
||||||
|
|
||||||
capabilities := supportedDriverCapabilities.Intersection(image.DefaultDriverCapabilities)
|
capabilities := supportedDriverCapabilities.Intersection(image.DefaultDriverCapabilities)
|
||||||
|
|
||||||
capsEnvSpecified := cudaImage.HasEnvvar(image.EnvVarNvidiaDriverCapabilities)
|
capsEnvSpecified := cudaImage.HasEnvvar(image.EnvVarNvidiaDriverCapabilities)
|
||||||
@@ -201,7 +251,7 @@ func (hookConfig *hookConfig) getDriverCapabilities(cudaImage image.CUDA, legacy
|
|||||||
func (hookConfig *hookConfig) getNvidiaConfig(image image.CUDA, privileged bool) *nvidiaConfig {
|
func (hookConfig *hookConfig) getNvidiaConfig(image image.CUDA, privileged bool) *nvidiaConfig {
|
||||||
legacyImage := image.IsLegacy()
|
legacyImage := image.IsLegacy()
|
||||||
|
|
||||||
devices := image.VisibleDevices()
|
devices := hookConfig.getDevices(image, privileged)
|
||||||
if len(devices) == 0 {
|
if len(devices) == 0 {
|
||||||
// empty devices means this is not a GPU container.
|
// empty devices means this is not a GPU container.
|
||||||
return nil
|
return nil
|
||||||
@@ -242,14 +292,7 @@ func (hookConfig *hookConfig) getNvidiaConfig(image image.CUDA, privileged bool)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (hookConfig *hookConfig) getContainerConfig() (config *containerConfig) {
|
func (hookConfig *hookConfig) getContainerConfig() (config containerConfig) {
|
||||||
hookConfig.Lock()
|
|
||||||
defer hookConfig.Unlock()
|
|
||||||
|
|
||||||
if hookConfig.containerConfig != nil {
|
|
||||||
return hookConfig.containerConfig
|
|
||||||
}
|
|
||||||
|
|
||||||
var h HookState
|
var h HookState
|
||||||
d := json.NewDecoder(os.Stdin)
|
d := json.NewDecoder(os.Stdin)
|
||||||
if err := d.Decode(&h); err != nil {
|
if err := d.Decode(&h); err != nil {
|
||||||
@@ -263,28 +306,20 @@ func (hookConfig *hookConfig) getContainerConfig() (config *containerConfig) {
|
|||||||
|
|
||||||
s := loadSpec(path.Join(b, "config.json"))
|
s := loadSpec(path.Join(b, "config.json"))
|
||||||
|
|
||||||
privileged := isPrivileged(s)
|
image, err := image.New(
|
||||||
|
|
||||||
i, err := image.New(
|
|
||||||
image.WithEnv(s.Process.Env),
|
image.WithEnv(s.Process.Env),
|
||||||
image.WithMounts(s.Mounts),
|
image.WithMounts(s.Mounts),
|
||||||
image.WithPrivileged(privileged),
|
|
||||||
image.WithDisableRequire(hookConfig.DisableRequire),
|
image.WithDisableRequire(hookConfig.DisableRequire),
|
||||||
image.WithAcceptDeviceListAsVolumeMounts(hookConfig.AcceptDeviceListAsVolumeMounts),
|
|
||||||
image.WithAcceptEnvvarUnprivileged(hookConfig.AcceptEnvvarUnprivileged),
|
|
||||||
image.WithPreferredVisibleDevicesEnvVars(hookConfig.getSwarmResourceEnvvars()...),
|
|
||||||
)
|
)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Panicln(err)
|
log.Panicln(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
cc := containerConfig{
|
privileged := isPrivileged(s)
|
||||||
|
return containerConfig{
|
||||||
Pid: h.Pid,
|
Pid: h.Pid,
|
||||||
Rootfs: s.Root.Path,
|
Rootfs: s.Root.Path,
|
||||||
Image: i,
|
Image: image,
|
||||||
Nvidia: hookConfig.getNvidiaConfig(i, privileged),
|
Nvidia: hookConfig.getNvidiaConfig(image, privileged),
|
||||||
}
|
}
|
||||||
hookConfig.containerConfig = &cc
|
|
||||||
|
|
||||||
return hookConfig.containerConfig
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,8 +1,10 @@
|
|||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"path/filepath"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
|
"github.com/opencontainers/runtime-spec/specs-go"
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
|
|
||||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
|
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
|
||||||
@@ -477,17 +479,14 @@ func TestGetNvidiaConfig(t *testing.T) {
|
|||||||
t.Run(tc.description, func(t *testing.T) {
|
t.Run(tc.description, func(t *testing.T) {
|
||||||
image, _ := image.New(
|
image, _ := image.New(
|
||||||
image.WithEnvMap(tc.env),
|
image.WithEnvMap(tc.env),
|
||||||
image.WithPrivileged(tc.privileged),
|
|
||||||
image.WithPreferredVisibleDevicesEnvVars(tc.hookConfig.getSwarmResourceEnvvars()...),
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// Wrap the call to getNvidiaConfig() in a closure.
|
// Wrap the call to getNvidiaConfig() in a closure.
|
||||||
var cfg *nvidiaConfig
|
var cfg *nvidiaConfig
|
||||||
getConfig := func() {
|
getConfig := func() {
|
||||||
hookCfg := tc.hookConfig
|
hookCfg := tc.hookConfig
|
||||||
if hookCfg == nil {
|
if hookCfg == nil {
|
||||||
defaultConfig, _ := config.GetDefault()
|
defaultConfig, _ := config.GetDefault()
|
||||||
hookCfg = &hookConfig{Config: defaultConfig}
|
hookCfg = &hookConfig{defaultConfig}
|
||||||
}
|
}
|
||||||
cfg = hookCfg.getNvidiaConfig(image, tc.privileged)
|
cfg = hookCfg.getNvidiaConfig(image, tc.privileged)
|
||||||
}
|
}
|
||||||
@@ -519,6 +518,340 @@ func TestGetNvidiaConfig(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestDeviceListSourcePriority(t *testing.T) {
|
||||||
|
var tests = []struct {
|
||||||
|
description string
|
||||||
|
mountDevices []specs.Mount
|
||||||
|
envvarDevices string
|
||||||
|
privileged bool
|
||||||
|
acceptUnprivileged bool
|
||||||
|
acceptMounts bool
|
||||||
|
expectedDevices []string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
description: "Mount devices, unprivileged, no accept unprivileged",
|
||||||
|
mountDevices: []specs.Mount{
|
||||||
|
{
|
||||||
|
Source: "/dev/null",
|
||||||
|
Destination: filepath.Join(image.DeviceListAsVolumeMountsRoot, "GPU0"),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Source: "/dev/null",
|
||||||
|
Destination: filepath.Join(image.DeviceListAsVolumeMountsRoot, "GPU1"),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
envvarDevices: "GPU2,GPU3",
|
||||||
|
privileged: false,
|
||||||
|
acceptUnprivileged: false,
|
||||||
|
acceptMounts: true,
|
||||||
|
expectedDevices: []string{"GPU0", "GPU1"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "No mount devices, unprivileged, no accept unprivileged",
|
||||||
|
mountDevices: nil,
|
||||||
|
envvarDevices: "GPU0,GPU1",
|
||||||
|
privileged: false,
|
||||||
|
acceptUnprivileged: false,
|
||||||
|
acceptMounts: true,
|
||||||
|
expectedDevices: nil,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "No mount devices, privileged, no accept unprivileged",
|
||||||
|
mountDevices: nil,
|
||||||
|
envvarDevices: "GPU0,GPU1",
|
||||||
|
privileged: true,
|
||||||
|
acceptUnprivileged: false,
|
||||||
|
acceptMounts: true,
|
||||||
|
expectedDevices: []string{"GPU0", "GPU1"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "No mount devices, unprivileged, accept unprivileged",
|
||||||
|
mountDevices: nil,
|
||||||
|
envvarDevices: "GPU0,GPU1",
|
||||||
|
privileged: false,
|
||||||
|
acceptUnprivileged: true,
|
||||||
|
acceptMounts: true,
|
||||||
|
expectedDevices: []string{"GPU0", "GPU1"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "Mount devices, unprivileged, accept unprivileged, no accept mounts",
|
||||||
|
mountDevices: []specs.Mount{
|
||||||
|
{
|
||||||
|
Source: "/dev/null",
|
||||||
|
Destination: filepath.Join(image.DeviceListAsVolumeMountsRoot, "GPU0"),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Source: "/dev/null",
|
||||||
|
Destination: filepath.Join(image.DeviceListAsVolumeMountsRoot, "GPU1"),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
envvarDevices: "GPU2,GPU3",
|
||||||
|
privileged: false,
|
||||||
|
acceptUnprivileged: true,
|
||||||
|
acceptMounts: false,
|
||||||
|
expectedDevices: []string{"GPU2", "GPU3"},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "Mount devices, unprivileged, no accept unprivileged, no accept mounts",
|
||||||
|
mountDevices: []specs.Mount{
|
||||||
|
{
|
||||||
|
Source: "/dev/null",
|
||||||
|
Destination: filepath.Join(image.DeviceListAsVolumeMountsRoot, "GPU0"),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
Source: "/dev/null",
|
||||||
|
Destination: filepath.Join(image.DeviceListAsVolumeMountsRoot, "GPU1"),
|
||||||
|
},
|
||||||
|
},
|
||||||
|
envvarDevices: "GPU2,GPU3",
|
||||||
|
privileged: false,
|
||||||
|
acceptUnprivileged: false,
|
||||||
|
acceptMounts: false,
|
||||||
|
expectedDevices: nil,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
for _, tc := range tests {
|
||||||
|
t.Run(tc.description, func(t *testing.T) {
|
||||||
|
// Wrap the call to getDevices() in a closure.
|
||||||
|
var devices []string
|
||||||
|
getDevices := func() {
|
||||||
|
image, _ := image.New(
|
||||||
|
image.WithEnvMap(
|
||||||
|
map[string]string{
|
||||||
|
image.EnvVarNvidiaVisibleDevices: tc.envvarDevices,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
image.WithMounts(tc.mountDevices),
|
||||||
|
)
|
||||||
|
defaultConfig, _ := config.GetDefault()
|
||||||
|
cfg := &hookConfig{defaultConfig}
|
||||||
|
cfg.AcceptEnvvarUnprivileged = tc.acceptUnprivileged
|
||||||
|
cfg.AcceptDeviceListAsVolumeMounts = tc.acceptMounts
|
||||||
|
devices = cfg.getDevices(image, tc.privileged)
|
||||||
|
}
|
||||||
|
|
||||||
|
// For all other tests, just grab the devices and check the results
|
||||||
|
getDevices()
|
||||||
|
|
||||||
|
require.Equal(t, tc.expectedDevices, devices)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestGetDevicesFromEnvvar(t *testing.T) {
|
||||||
|
envDockerResourceGPUs := "DOCKER_RESOURCE_GPUS"
|
||||||
|
gpuID := "GPU-12345"
|
||||||
|
anotherGPUID := "GPU-67890"
|
||||||
|
thirdGPUID := "MIG-12345"
|
||||||
|
|
||||||
|
var tests = []struct {
|
||||||
|
description string
|
||||||
|
swarmResourceEnvvars []string
|
||||||
|
env map[string]string
|
||||||
|
expectedDevices []string
|
||||||
|
}{
|
||||||
|
{
|
||||||
|
description: "empty env returns nil for non-legacy image",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "blank NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image",
|
||||||
|
env: map[string]string{
|
||||||
|
image.EnvVarNvidiaVisibleDevices: "",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "'void' NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image",
|
||||||
|
env: map[string]string{
|
||||||
|
image.EnvVarNvidiaVisibleDevices: "void",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "'none' NVIDIA_VISIBLE_DEVICES returns empty for non-legacy image",
|
||||||
|
env: map[string]string{
|
||||||
|
image.EnvVarNvidiaVisibleDevices: "none",
|
||||||
|
},
|
||||||
|
expectedDevices: []string{""},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "NVIDIA_VISIBLE_DEVICES set returns value for non-legacy image",
|
||||||
|
env: map[string]string{
|
||||||
|
image.EnvVarNvidiaVisibleDevices: gpuID,
|
||||||
|
},
|
||||||
|
expectedDevices: []string{gpuID},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "NVIDIA_VISIBLE_DEVICES set returns value for legacy image",
|
||||||
|
env: map[string]string{
|
||||||
|
image.EnvVarNvidiaVisibleDevices: gpuID,
|
||||||
|
image.EnvVarCudaVersion: "legacy",
|
||||||
|
},
|
||||||
|
expectedDevices: []string{gpuID},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "empty env returns all for legacy image",
|
||||||
|
env: map[string]string{
|
||||||
|
image.EnvVarCudaVersion: "legacy",
|
||||||
|
},
|
||||||
|
expectedDevices: []string{"all"},
|
||||||
|
},
|
||||||
|
// Add the `DOCKER_RESOURCE_GPUS` envvar and ensure that this is ignored when
|
||||||
|
// not enabled
|
||||||
|
{
|
||||||
|
description: "missing NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image",
|
||||||
|
env: map[string]string{
|
||||||
|
envDockerResourceGPUs: anotherGPUID,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "blank NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image",
|
||||||
|
env: map[string]string{
|
||||||
|
image.EnvVarNvidiaVisibleDevices: "",
|
||||||
|
envDockerResourceGPUs: anotherGPUID,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "'void' NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image",
|
||||||
|
env: map[string]string{
|
||||||
|
image.EnvVarNvidiaVisibleDevices: "void",
|
||||||
|
envDockerResourceGPUs: anotherGPUID,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "'none' NVIDIA_VISIBLE_DEVICES returns empty for non-legacy image",
|
||||||
|
env: map[string]string{
|
||||||
|
image.EnvVarNvidiaVisibleDevices: "none",
|
||||||
|
envDockerResourceGPUs: anotherGPUID,
|
||||||
|
},
|
||||||
|
expectedDevices: []string{""},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "NVIDIA_VISIBLE_DEVICES set returns value for non-legacy image",
|
||||||
|
env: map[string]string{
|
||||||
|
image.EnvVarNvidiaVisibleDevices: gpuID,
|
||||||
|
envDockerResourceGPUs: anotherGPUID,
|
||||||
|
},
|
||||||
|
expectedDevices: []string{gpuID},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "NVIDIA_VISIBLE_DEVICES set returns value for legacy image",
|
||||||
|
env: map[string]string{
|
||||||
|
image.EnvVarNvidiaVisibleDevices: gpuID,
|
||||||
|
envDockerResourceGPUs: anotherGPUID,
|
||||||
|
image.EnvVarCudaVersion: "legacy",
|
||||||
|
},
|
||||||
|
expectedDevices: []string{gpuID},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "empty env returns all for legacy image",
|
||||||
|
env: map[string]string{
|
||||||
|
envDockerResourceGPUs: anotherGPUID,
|
||||||
|
image.EnvVarCudaVersion: "legacy",
|
||||||
|
},
|
||||||
|
expectedDevices: []string{"all"},
|
||||||
|
},
|
||||||
|
// Add the `DOCKER_RESOURCE_GPUS` envvar and ensure that this is selected when
|
||||||
|
// enabled
|
||||||
|
{
|
||||||
|
description: "empty env returns nil for non-legacy image",
|
||||||
|
swarmResourceEnvvars: []string{envDockerResourceGPUs},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "blank DOCKER_RESOURCE_GPUS returns nil for non-legacy image",
|
||||||
|
swarmResourceEnvvars: []string{envDockerResourceGPUs},
|
||||||
|
env: map[string]string{
|
||||||
|
envDockerResourceGPUs: "",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "'void' DOCKER_RESOURCE_GPUS returns nil for non-legacy image",
|
||||||
|
swarmResourceEnvvars: []string{envDockerResourceGPUs},
|
||||||
|
env: map[string]string{
|
||||||
|
envDockerResourceGPUs: "void",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "'none' DOCKER_RESOURCE_GPUS returns empty for non-legacy image",
|
||||||
|
swarmResourceEnvvars: []string{envDockerResourceGPUs},
|
||||||
|
env: map[string]string{
|
||||||
|
envDockerResourceGPUs: "none",
|
||||||
|
},
|
||||||
|
expectedDevices: []string{""},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "DOCKER_RESOURCE_GPUS set returns value for non-legacy image",
|
||||||
|
swarmResourceEnvvars: []string{envDockerResourceGPUs},
|
||||||
|
env: map[string]string{
|
||||||
|
envDockerResourceGPUs: gpuID,
|
||||||
|
},
|
||||||
|
expectedDevices: []string{gpuID},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "DOCKER_RESOURCE_GPUS set returns value for legacy image",
|
||||||
|
swarmResourceEnvvars: []string{envDockerResourceGPUs},
|
||||||
|
env: map[string]string{
|
||||||
|
envDockerResourceGPUs: gpuID,
|
||||||
|
image.EnvVarCudaVersion: "legacy",
|
||||||
|
},
|
||||||
|
expectedDevices: []string{gpuID},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "DOCKER_RESOURCE_GPUS is selected if present",
|
||||||
|
swarmResourceEnvvars: []string{envDockerResourceGPUs},
|
||||||
|
env: map[string]string{
|
||||||
|
envDockerResourceGPUs: anotherGPUID,
|
||||||
|
},
|
||||||
|
expectedDevices: []string{anotherGPUID},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "DOCKER_RESOURCE_GPUS overrides NVIDIA_VISIBLE_DEVICES if present",
|
||||||
|
swarmResourceEnvvars: []string{envDockerResourceGPUs},
|
||||||
|
env: map[string]string{
|
||||||
|
image.EnvVarNvidiaVisibleDevices: gpuID,
|
||||||
|
envDockerResourceGPUs: anotherGPUID,
|
||||||
|
},
|
||||||
|
expectedDevices: []string{anotherGPUID},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "DOCKER_RESOURCE_GPUS_ADDITIONAL overrides NVIDIA_VISIBLE_DEVICES if present",
|
||||||
|
swarmResourceEnvvars: []string{"DOCKER_RESOURCE_GPUS_ADDITIONAL"},
|
||||||
|
env: map[string]string{
|
||||||
|
image.EnvVarNvidiaVisibleDevices: gpuID,
|
||||||
|
"DOCKER_RESOURCE_GPUS_ADDITIONAL": anotherGPUID,
|
||||||
|
},
|
||||||
|
expectedDevices: []string{anotherGPUID},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "All available swarm resource envvars are selected and override NVIDIA_VISIBLE_DEVICES if present",
|
||||||
|
swarmResourceEnvvars: []string{"DOCKER_RESOURCE_GPUS", "DOCKER_RESOURCE_GPUS_ADDITIONAL"},
|
||||||
|
env: map[string]string{
|
||||||
|
image.EnvVarNvidiaVisibleDevices: gpuID,
|
||||||
|
"DOCKER_RESOURCE_GPUS": thirdGPUID,
|
||||||
|
"DOCKER_RESOURCE_GPUS_ADDITIONAL": anotherGPUID,
|
||||||
|
},
|
||||||
|
expectedDevices: []string{thirdGPUID, anotherGPUID},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "DOCKER_RESOURCE_GPUS_ADDITIONAL or DOCKER_RESOURCE_GPUS override NVIDIA_VISIBLE_DEVICES if present",
|
||||||
|
swarmResourceEnvvars: []string{"DOCKER_RESOURCE_GPUS", "DOCKER_RESOURCE_GPUS_ADDITIONAL"},
|
||||||
|
env: map[string]string{
|
||||||
|
image.EnvVarNvidiaVisibleDevices: gpuID,
|
||||||
|
"DOCKER_RESOURCE_GPUS_ADDITIONAL": anotherGPUID,
|
||||||
|
},
|
||||||
|
expectedDevices: []string{anotherGPUID},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tc := range tests {
|
||||||
|
t.Run(tc.description, func(t *testing.T) {
|
||||||
|
image, _ := image.New(
|
||||||
|
image.WithEnvMap(tc.env),
|
||||||
|
)
|
||||||
|
devices := getDevicesFromEnvvar(image, tc.swarmResourceEnvvars)
|
||||||
|
require.EqualValues(t, tc.expectedDevices, devices)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestGetDriverCapabilities(t *testing.T) {
|
func TestGetDriverCapabilities(t *testing.T) {
|
||||||
|
|
||||||
supportedCapabilities := "compute,display,utility,video"
|
supportedCapabilities := "compute,display,utility,video"
|
||||||
|
|||||||
@@ -4,46 +4,50 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"log"
|
"log"
|
||||||
"os"
|
"os"
|
||||||
|
"path"
|
||||||
"reflect"
|
"reflect"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
|
||||||
|
|
||||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
|
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
|
||||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
|
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
|
||||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/info"
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
configPath = "/etc/nvidia-container-runtime/config.toml"
|
||||||
|
driverPath = "/run/nvidia/driver"
|
||||||
)
|
)
|
||||||
|
|
||||||
// hookConfig wraps the toolkit config.
|
// hookConfig wraps the toolkit config.
|
||||||
// This allows for functions to be defined on the local type.
|
// This allows for functions to be defined on the local type.
|
||||||
type hookConfig struct {
|
type hookConfig struct {
|
||||||
sync.Mutex
|
|
||||||
*config.Config
|
*config.Config
|
||||||
containerConfig *containerConfig
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// loadConfig loads the required paths for the hook config.
|
// loadConfig loads the required paths for the hook config.
|
||||||
func loadConfig() (*config.Config, error) {
|
func loadConfig() (*config.Config, error) {
|
||||||
configFilePath, required := getConfigFilePath()
|
var configPaths []string
|
||||||
cfg, err := config.New(
|
var required bool
|
||||||
config.WithConfigFile(configFilePath),
|
if len(*configflag) != 0 {
|
||||||
config.WithRequired(true),
|
configPaths = append(configPaths, *configflag)
|
||||||
)
|
required = true
|
||||||
if err == nil {
|
} else {
|
||||||
return cfg.Config()
|
configPaths = append(configPaths, path.Join(driverPath, configPath), configPath)
|
||||||
} else if os.IsNotExist(err) && !required {
|
|
||||||
return config.GetDefault()
|
|
||||||
}
|
}
|
||||||
return nil, fmt.Errorf("couldn't open required configuration file: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
func getConfigFilePath() (string, bool) {
|
for _, p := range configPaths {
|
||||||
if configFromFlag := *configflag; configFromFlag != "" {
|
cfg, err := config.New(
|
||||||
return configFromFlag, true
|
config.WithConfigFile(p),
|
||||||
|
config.WithRequired(true),
|
||||||
|
)
|
||||||
|
if err == nil {
|
||||||
|
return cfg.Config()
|
||||||
|
} else if os.IsNotExist(err) && !required {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
return nil, fmt.Errorf("couldn't open required configuration file: %v", err)
|
||||||
}
|
}
|
||||||
if configFromEnvvar := os.Getenv(config.FilePathOverrideEnvVar); configFromEnvvar != "" {
|
|
||||||
return configFromEnvvar, true
|
return config.GetDefault()
|
||||||
}
|
|
||||||
return config.GetConfigFilePath(), false
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func getHookConfig() (*hookConfig, error) {
|
func getHookConfig() (*hookConfig, error) {
|
||||||
@@ -51,7 +55,7 @@ func getHookConfig() (*hookConfig, error) {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to load config: %v", err)
|
return nil, fmt.Errorf("failed to load config: %v", err)
|
||||||
}
|
}
|
||||||
config := &hookConfig{Config: cfg}
|
config := &hookConfig{cfg}
|
||||||
|
|
||||||
allSupportedDriverCapabilities := image.SupportedDriverCapabilities
|
allSupportedDriverCapabilities := image.SupportedDriverCapabilities
|
||||||
if config.SupportedDriverCapabilities == "all" {
|
if config.SupportedDriverCapabilities == "all" {
|
||||||
@@ -69,8 +73,8 @@ func getHookConfig() (*hookConfig, error) {
|
|||||||
|
|
||||||
// getConfigOption returns the toml config option associated with the
|
// getConfigOption returns the toml config option associated with the
|
||||||
// specified struct field.
|
// specified struct field.
|
||||||
func (c *hookConfig) getConfigOption(fieldName string) string {
|
func (c hookConfig) getConfigOption(fieldName string) string {
|
||||||
t := reflect.TypeOf(&c)
|
t := reflect.TypeOf(c)
|
||||||
f, ok := t.FieldByName(fieldName)
|
f, ok := t.FieldByName(fieldName)
|
||||||
if !ok {
|
if !ok {
|
||||||
return fieldName
|
return fieldName
|
||||||
@@ -84,7 +88,7 @@ func (c *hookConfig) getConfigOption(fieldName string) string {
|
|||||||
|
|
||||||
// getSwarmResourceEnvvars returns the swarm resource envvars for the config.
|
// getSwarmResourceEnvvars returns the swarm resource envvars for the config.
|
||||||
func (c *hookConfig) getSwarmResourceEnvvars() []string {
|
func (c *hookConfig) getSwarmResourceEnvvars() []string {
|
||||||
if c == nil || c.SwarmResource == "" {
|
if c.SwarmResource == "" {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -123,21 +127,3 @@ func (c *hookConfig) nvidiaContainerCliCUDACompatModeFlags() []string {
|
|||||||
}
|
}
|
||||||
return []string{flag}
|
return []string{flag}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *hookConfig) assertModeIsLegacy() error {
|
|
||||||
if c.NVIDIAContainerRuntimeHookConfig.SkipModeDetection {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
mr := info.NewRuntimeModeResolver(
|
|
||||||
info.WithLogger(&logInterceptor{}),
|
|
||||||
info.WithImage(&c.containerConfig.Image),
|
|
||||||
info.WithDefaultMode(info.LegacyRuntimeMode),
|
|
||||||
)
|
|
||||||
|
|
||||||
mode := mr.ResolveRuntimeMode(c.NVIDIAContainerRuntimeConfig.Mode)
|
|
||||||
if mode == "legacy" {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
return fmt.Errorf("invoking the NVIDIA Container Runtime Hook directly (e.g. specifying the docker --gpus flag) is not supported. Please use the NVIDIA Container Runtime (e.g. specify the --runtime=nvidia flag) instead")
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -90,10 +90,10 @@ func TestGetHookConfig(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
var cfg *hookConfig
|
var cfg hookConfig
|
||||||
getHookConfig := func() {
|
getHookConfig := func() {
|
||||||
c, _ := getHookConfig()
|
c, _ := getHookConfig()
|
||||||
cfg = c
|
cfg = *c
|
||||||
}
|
}
|
||||||
|
|
||||||
if tc.expectedPanic {
|
if tc.expectedPanic {
|
||||||
|
|||||||
@@ -55,7 +55,7 @@ func getCLIPath(config config.ContainerCLIConfig) string {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// getRootfsPath returns an absolute path. We don't need to resolve symlinks for now.
|
// getRootfsPath returns an absolute path. We don't need to resolve symlinks for now.
|
||||||
func getRootfsPath(config *containerConfig) string {
|
func getRootfsPath(config containerConfig) string {
|
||||||
rootfs, err := filepath.Abs(config.Rootfs)
|
rootfs, err := filepath.Abs(config.Rootfs)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Panicln(err)
|
log.Panicln(err)
|
||||||
@@ -82,8 +82,8 @@ func doPrestart() {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := hook.assertModeIsLegacy(); err != nil {
|
if !hook.NVIDIAContainerRuntimeHookConfig.SkipModeDetection && info.ResolveAutoMode(&logInterceptor{}, hook.NVIDIAContainerRuntimeConfig.Mode, container.Image) != "legacy" {
|
||||||
log.Panicf("%v", err)
|
log.Panicln("invoking the NVIDIA Container Runtime Hook directly (e.g. specifying the docker --gpus flag) is not supported. Please use the NVIDIA Container Runtime (e.g. specify the --runtime=nvidia flag) instead.")
|
||||||
}
|
}
|
||||||
|
|
||||||
rootfs := getRootfsPath(container)
|
rootfs := getRootfsPath(container)
|
||||||
|
|||||||
@@ -21,8 +21,8 @@ The `runtimes` config option allows for the low-level runtime to be specified. T
|
|||||||
The default value for this setting is:
|
The default value for this setting is:
|
||||||
```toml
|
```toml
|
||||||
runtimes = [
|
runtimes = [
|
||||||
|
"docker-runc",
|
||||||
"runc",
|
"runc",
|
||||||
"crun",
|
|
||||||
]
|
]
|
||||||
```
|
```
|
||||||
|
|
||||||
|
|||||||
@@ -122,10 +122,11 @@ func TestGoodInput(t *testing.T) {
|
|||||||
err = cmdCreate.Run()
|
err = cmdCreate.Run()
|
||||||
require.NoError(t, err, "runtime should not return an error")
|
require.NoError(t, err, "runtime should not return an error")
|
||||||
|
|
||||||
// Check config.json to ensure that the NVIDIA prestart was not inserted.
|
// Check config.json for NVIDIA prestart hook
|
||||||
spec, err = cfg.getRuntimeSpec()
|
spec, err = cfg.getRuntimeSpec()
|
||||||
require.NoError(t, err, "should be no errors when reading and parsing spec from config.json")
|
require.NoError(t, err, "should be no errors when reading and parsing spec from config.json")
|
||||||
require.Empty(t, spec.Hooks, "there should be no hooks in config.json")
|
require.NotEmpty(t, spec.Hooks, "there should be hooks in config.json")
|
||||||
|
require.Equal(t, 1, nvidiaHookCount(spec.Hooks), "exactly one nvidia prestart hook should be inserted correctly into config.json")
|
||||||
}
|
}
|
||||||
|
|
||||||
// NVIDIA prestart hook already present in config file
|
// NVIDIA prestart hook already present in config file
|
||||||
@@ -167,10 +168,11 @@ func TestDuplicateHook(t *testing.T) {
|
|||||||
output, err := cmdCreate.CombinedOutput()
|
output, err := cmdCreate.CombinedOutput()
|
||||||
require.NoErrorf(t, err, "runtime should not return an error", "output=%v", string(output))
|
require.NoErrorf(t, err, "runtime should not return an error", "output=%v", string(output))
|
||||||
|
|
||||||
// Check config.json to ensure that the NVIDIA prestart hook was removed.
|
// Check config.json for NVIDIA prestart hook
|
||||||
spec, err = cfg.getRuntimeSpec()
|
spec, err = cfg.getRuntimeSpec()
|
||||||
require.NoError(t, err, "should be no errors when reading and parsing spec from config.json")
|
require.NoError(t, err, "should be no errors when reading and parsing spec from config.json")
|
||||||
require.Empty(t, spec.Hooks, "there should be no hooks in config.json")
|
require.NotEmpty(t, spec.Hooks, "there should be hooks in config.json")
|
||||||
|
require.Equal(t, 1, nvidiaHookCount(spec.Hooks), "exactly one nvidia prestart hook should be inserted correctly into config.json")
|
||||||
}
|
}
|
||||||
|
|
||||||
// addNVIDIAHook is a basic wrapper for an addHookModifier that is used for
|
// addNVIDIAHook is a basic wrapper for an addHookModifier that is used for
|
||||||
@@ -238,3 +240,18 @@ func (c testConfig) generateNewRuntimeSpec() error {
|
|||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Return number of valid NVIDIA prestart hooks in runtime spec
|
||||||
|
func nvidiaHookCount(hooks *specs.Hooks) int {
|
||||||
|
if hooks == nil {
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
|
count := 0
|
||||||
|
for _, hook := range hooks.Prestart {
|
||||||
|
if strings.Contains(hook.Path, nvidiaHook) {
|
||||||
|
count++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return count
|
||||||
|
}
|
||||||
|
|||||||
@@ -14,7 +14,6 @@ import (
|
|||||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk-installer/toolkit"
|
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk-installer/toolkit"
|
||||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/info"
|
"github.com/NVIDIA/nvidia-container-toolkit/internal/info"
|
||||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/lookup"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
@@ -28,7 +27,7 @@ const (
|
|||||||
)
|
)
|
||||||
|
|
||||||
var availableRuntimes = map[string]struct{}{"docker": {}, "crio": {}, "containerd": {}}
|
var availableRuntimes = map[string]struct{}{"docker": {}, "crio": {}, "containerd": {}}
|
||||||
var defaultLowLevelRuntimes = []string{"runc", "crun"}
|
var defaultLowLevelRuntimes = []string{"docker-runc", "runc", "crun"}
|
||||||
|
|
||||||
var waitingForSignal = make(chan bool, 1)
|
var waitingForSignal = make(chan bool, 1)
|
||||||
var signalReceived = make(chan bool, 1)
|
var signalReceived = make(chan bool, 1)
|
||||||
@@ -37,11 +36,10 @@ var signalReceived = make(chan bool, 1)
|
|||||||
type options struct {
|
type options struct {
|
||||||
toolkitInstallDir string
|
toolkitInstallDir string
|
||||||
|
|
||||||
noDaemon bool
|
noDaemon bool
|
||||||
runtime string
|
runtime string
|
||||||
pidFile string
|
pidFile string
|
||||||
sourceRoot string
|
sourceRoot string
|
||||||
packageType string
|
|
||||||
|
|
||||||
toolkitOptions toolkit.Options
|
toolkitOptions toolkit.Options
|
||||||
runtimeOptions runtime.Options
|
runtimeOptions runtime.Options
|
||||||
@@ -125,17 +123,11 @@ func (a app) build() *cli.App {
|
|||||||
EnvVars: []string{"TOOLKIT_INSTALL_DIR", "ROOT"},
|
EnvVars: []string{"TOOLKIT_INSTALL_DIR", "ROOT"},
|
||||||
},
|
},
|
||||||
&cli.StringFlag{
|
&cli.StringFlag{
|
||||||
Name: "toolkit-source-root",
|
Name: "source-root",
|
||||||
Usage: "The folder where the required toolkit artifacts can be found. If this is not specified, the path /artifacts/{{ .ToolkitPackageType }} is used where ToolkitPackageType is the resolved package type",
|
Value: "/",
|
||||||
|
Usage: "The folder where the required toolkit artifacts can be found",
|
||||||
Destination: &options.sourceRoot,
|
Destination: &options.sourceRoot,
|
||||||
EnvVars: []string{"TOOLKIT_SOURCE_ROOT"},
|
EnvVars: []string{"SOURCE_ROOT"},
|
||||||
},
|
|
||||||
&cli.StringFlag{
|
|
||||||
Name: "toolkit-package-type",
|
|
||||||
Usage: "specify the package type to use for the toolkit. One of ['deb', 'rpm', 'auto', '']. If 'auto' or '' are used, the type is inferred automatically.",
|
|
||||||
Value: "auto",
|
|
||||||
Destination: &options.packageType,
|
|
||||||
EnvVars: []string{"TOOLKIT_PACKAGE_TYPE"},
|
|
||||||
},
|
},
|
||||||
&cli.StringFlag{
|
&cli.StringFlag{
|
||||||
Name: "pid-file",
|
Name: "pid-file",
|
||||||
@@ -153,15 +145,6 @@ func (a app) build() *cli.App {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (a *app) Before(c *cli.Context, o *options) error {
|
func (a *app) Before(c *cli.Context, o *options) error {
|
||||||
if o.sourceRoot == "" {
|
|
||||||
sourceRoot, err := a.resolveSourceRoot(o.runtimeOptions.HostRootMount, o.packageType)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("failed to resolve source root: %v", err)
|
|
||||||
}
|
|
||||||
a.logger.Infof("Resolved source root to %v", sourceRoot)
|
|
||||||
o.sourceRoot = sourceRoot
|
|
||||||
}
|
|
||||||
|
|
||||||
a.toolkit = toolkit.NewInstaller(
|
a.toolkit = toolkit.NewInstaller(
|
||||||
toolkit.WithLogger(a.logger),
|
toolkit.WithLogger(a.logger),
|
||||||
toolkit.WithSourceRoot(o.sourceRoot),
|
toolkit.WithSourceRoot(o.sourceRoot),
|
||||||
@@ -294,35 +277,3 @@ func (a *app) shutdown(pidFile string) {
|
|||||||
a.logger.Warningf("Unable to remove pidfile: %v", err)
|
a.logger.Warningf("Unable to remove pidfile: %v", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a *app) resolveSourceRoot(hostRoot string, packageType string) (string, error) {
|
|
||||||
resolvedPackageType, err := a.resolvePackageType(hostRoot, packageType)
|
|
||||||
if err != nil {
|
|
||||||
return "", err
|
|
||||||
}
|
|
||||||
switch resolvedPackageType {
|
|
||||||
case "deb":
|
|
||||||
return "/artifacts/deb", nil
|
|
||||||
case "rpm":
|
|
||||||
return "/artifacts/rpm", nil
|
|
||||||
default:
|
|
||||||
return "", fmt.Errorf("invalid package type: %v", resolvedPackageType)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func (a *app) resolvePackageType(hostRoot string, packageType string) (rPackageTypes string, rerr error) {
|
|
||||||
if packageType != "" && packageType != "auto" {
|
|
||||||
return packageType, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
locator := lookup.NewExecutableLocator(a.logger, hostRoot)
|
|
||||||
if candidates, err := locator.Locate("/usr/bin/rpm"); err == nil && len(candidates) > 0 {
|
|
||||||
return "rpm", nil
|
|
||||||
}
|
|
||||||
|
|
||||||
if candidates, err := locator.Locate("/usr/bin/dpkg"); err == nil && len(candidates) > 0 {
|
|
||||||
return "deb", nil
|
|
||||||
}
|
|
||||||
|
|
||||||
return "deb", nil
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -67,7 +67,7 @@ swarm-resource = ""
|
|||||||
debug = "/dev/null"
|
debug = "/dev/null"
|
||||||
log-level = "info"
|
log-level = "info"
|
||||||
mode = "auto"
|
mode = "auto"
|
||||||
runtimes = ["runc", "crun"]
|
runtimes = ["docker-runc", "runc", "crun"]
|
||||||
|
|
||||||
[nvidia-container-runtime.modes]
|
[nvidia-container-runtime.modes]
|
||||||
|
|
||||||
@@ -131,7 +131,7 @@ swarm-resource = ""
|
|||||||
debug = "/dev/null"
|
debug = "/dev/null"
|
||||||
log-level = "info"
|
log-level = "info"
|
||||||
mode = "auto"
|
mode = "auto"
|
||||||
runtimes = ["runc", "crun"]
|
runtimes = ["docker-runc", "runc", "crun"]
|
||||||
|
|
||||||
[nvidia-container-runtime.modes]
|
[nvidia-container-runtime.modes]
|
||||||
|
|
||||||
@@ -198,7 +198,7 @@ swarm-resource = ""
|
|||||||
debug = "/dev/null"
|
debug = "/dev/null"
|
||||||
log-level = "info"
|
log-level = "info"
|
||||||
mode = "auto"
|
mode = "auto"
|
||||||
runtimes = ["runc", "crun"]
|
runtimes = ["docker-runc", "runc", "crun"]
|
||||||
|
|
||||||
[nvidia-container-runtime.modes]
|
[nvidia-container-runtime.modes]
|
||||||
|
|
||||||
@@ -262,7 +262,7 @@ swarm-resource = ""
|
|||||||
debug = "/dev/null"
|
debug = "/dev/null"
|
||||||
log-level = "info"
|
log-level = "info"
|
||||||
mode = "auto"
|
mode = "auto"
|
||||||
runtimes = ["runc", "crun"]
|
runtimes = ["docker-runc", "runc", "crun"]
|
||||||
|
|
||||||
[nvidia-container-runtime.modes]
|
[nvidia-container-runtime.modes]
|
||||||
|
|
||||||
@@ -348,7 +348,7 @@ swarm-resource = ""
|
|||||||
debug = "/dev/null"
|
debug = "/dev/null"
|
||||||
log-level = "info"
|
log-level = "info"
|
||||||
mode = "auto"
|
mode = "auto"
|
||||||
runtimes = ["runc", "crun"]
|
runtimes = ["docker-runc", "runc", "crun"]
|
||||||
|
|
||||||
[nvidia-container-runtime.modes]
|
[nvidia-container-runtime.modes]
|
||||||
|
|
||||||
@@ -433,7 +433,7 @@ swarm-resource = ""
|
|||||||
"--driver-root-ctr-path=" + hostRoot,
|
"--driver-root-ctr-path=" + hostRoot,
|
||||||
"--pid-file=" + filepath.Join(testRoot, "toolkit.pid"),
|
"--pid-file=" + filepath.Join(testRoot, "toolkit.pid"),
|
||||||
"--restart-mode=none",
|
"--restart-mode=none",
|
||||||
"--toolkit-source-root=" + filepath.Join(artifactRoot, "deb"),
|
"--source-root=" + filepath.Join(artifactRoot, "deb"),
|
||||||
}
|
}
|
||||||
|
|
||||||
err := app.Run(append(testArgs, tc.args...))
|
err := app.Run(append(testArgs, tc.args...))
|
||||||
|
|||||||
@@ -28,7 +28,7 @@ type createDirectory struct {
|
|||||||
logger logger.Interface
|
logger logger.Interface
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *ToolkitInstaller) createDirectory() Installer {
|
func (t *toolkitInstaller) createDirectory() Installer {
|
||||||
return &createDirectory{
|
return &createDirectory{
|
||||||
logger: t.logger,
|
logger: t.logger,
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -28,18 +28,20 @@ import (
|
|||||||
log "github.com/sirupsen/logrus"
|
log "github.com/sirupsen/logrus"
|
||||||
|
|
||||||
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk-installer/container/operator"
|
"github.com/NVIDIA/nvidia-container-toolkit/cmd/nvidia-ctk-installer/container/operator"
|
||||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
type executable struct {
|
type executable struct {
|
||||||
requiresKernelModule bool
|
requiresKernelModule bool
|
||||||
path string
|
path string
|
||||||
symlink string
|
symlink string
|
||||||
|
args []string
|
||||||
env map[string]string
|
env map[string]string
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *ToolkitInstaller) collectExecutables(destDir string) ([]Installer, error) {
|
func (t *toolkitInstaller) collectExecutables(destDir string) ([]Installer, error) {
|
||||||
configFilePath := t.ConfigFilePath(destDir)
|
configHome := filepath.Join(destDir, ".config")
|
||||||
|
configDir := filepath.Join(configHome, "nvidia-container-runtime")
|
||||||
|
configPath := filepath.Join(configDir, "config.toml")
|
||||||
|
|
||||||
executables := []executable{
|
executables := []executable{
|
||||||
{
|
{
|
||||||
@@ -54,7 +56,7 @@ func (t *ToolkitInstaller) collectExecutables(destDir string) ([]Installer, erro
|
|||||||
path: runtime.Path,
|
path: runtime.Path,
|
||||||
requiresKernelModule: true,
|
requiresKernelModule: true,
|
||||||
env: map[string]string{
|
env: map[string]string{
|
||||||
config.FilePathOverrideEnvVar: configFilePath,
|
"XDG_CONFIG_HOME": configHome,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
executables = append(executables, e)
|
executables = append(executables, e)
|
||||||
@@ -70,9 +72,7 @@ func (t *ToolkitInstaller) collectExecutables(destDir string) ([]Installer, erro
|
|||||||
executable{
|
executable{
|
||||||
path: "nvidia-container-runtime-hook",
|
path: "nvidia-container-runtime-hook",
|
||||||
symlink: "nvidia-container-toolkit",
|
symlink: "nvidia-container-toolkit",
|
||||||
env: map[string]string{
|
args: []string{fmt.Sprintf("-config %s", configPath)},
|
||||||
config.FilePathOverrideEnvVar: configFilePath,
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -94,6 +94,7 @@ func (t *ToolkitInstaller) collectExecutables(destDir string) ([]Installer, erro
|
|||||||
Source: executablePath,
|
Source: executablePath,
|
||||||
WrappedExecutable: dotRealFilename,
|
WrappedExecutable: dotRealFilename,
|
||||||
CheckModules: executable.requiresKernelModule,
|
CheckModules: executable.requiresKernelModule,
|
||||||
|
Args: executable.args,
|
||||||
Envvars: map[string]string{
|
Envvars: map[string]string{
|
||||||
"PATH": strings.Join([]string{destDir, "$PATH"}, ":"),
|
"PATH": strings.Join([]string{destDir, "$PATH"}, ":"),
|
||||||
},
|
},
|
||||||
@@ -123,6 +124,7 @@ type wrapper struct {
|
|||||||
Envvars map[string]string
|
Envvars map[string]string
|
||||||
WrappedExecutable string
|
WrappedExecutable string
|
||||||
CheckModules bool
|
CheckModules bool
|
||||||
|
Args []string
|
||||||
}
|
}
|
||||||
|
|
||||||
type render struct {
|
type render struct {
|
||||||
@@ -163,6 +165,9 @@ fi
|
|||||||
{{$key}}={{$value}} \
|
{{$key}}={{$value}} \
|
||||||
{{- end }}
|
{{- end }}
|
||||||
{{ .DestDir }}/{{ .WrappedExecutable }} \
|
{{ .DestDir }}/{{ .WrappedExecutable }} \
|
||||||
|
{{- range $arg := .Args }}
|
||||||
|
{{$arg}} \
|
||||||
|
{{- end }}
|
||||||
"$@"
|
"$@"
|
||||||
`
|
`
|
||||||
|
|
||||||
|
|||||||
@@ -68,6 +68,19 @@ fi
|
|||||||
PATH=/foo/bar/baz \
|
PATH=/foo/bar/baz \
|
||||||
/dest-dir/some-runtime \
|
/dest-dir/some-runtime \
|
||||||
"$@"
|
"$@"
|
||||||
|
`,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
description: "args are added",
|
||||||
|
w: &wrapper{
|
||||||
|
WrappedExecutable: "some-runtime",
|
||||||
|
Args: []string{"--config foo", "bar"},
|
||||||
|
},
|
||||||
|
expected: `#! /bin/sh
|
||||||
|
/dest-dir/some-runtime \
|
||||||
|
--config foo \
|
||||||
|
bar \
|
||||||
|
"$@"
|
||||||
`,
|
`,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -33,7 +33,7 @@ type Installer interface {
|
|||||||
Install(string) error
|
Install(string) error
|
||||||
}
|
}
|
||||||
|
|
||||||
type ToolkitInstaller struct {
|
type toolkitInstaller struct {
|
||||||
logger logger.Interface
|
logger logger.Interface
|
||||||
ignoreErrors bool
|
ignoreErrors bool
|
||||||
sourceRoot string
|
sourceRoot string
|
||||||
@@ -43,13 +43,11 @@ type ToolkitInstaller struct {
|
|||||||
ensureTargetDirectory Installer
|
ensureTargetDirectory Installer
|
||||||
}
|
}
|
||||||
|
|
||||||
var _ Installer = (*ToolkitInstaller)(nil)
|
var _ Installer = (*toolkitInstaller)(nil)
|
||||||
|
|
||||||
// New creates a toolkit installer with the specified options.
|
// New creates a toolkit installer with the specified options.
|
||||||
func New(opts ...Option) (*ToolkitInstaller, error) {
|
func New(opts ...Option) (Installer, error) {
|
||||||
t := &ToolkitInstaller{
|
t := &toolkitInstaller{}
|
||||||
sourceRoot: "/",
|
|
||||||
}
|
|
||||||
for _, opt := range opts {
|
for _, opt := range opts {
|
||||||
opt(t)
|
opt(t)
|
||||||
}
|
}
|
||||||
@@ -57,6 +55,9 @@ func New(opts ...Option) (*ToolkitInstaller, error) {
|
|||||||
if t.logger == nil {
|
if t.logger == nil {
|
||||||
t.logger = logger.New()
|
t.logger = logger.New()
|
||||||
}
|
}
|
||||||
|
if t.sourceRoot == "" {
|
||||||
|
t.sourceRoot = "/"
|
||||||
|
}
|
||||||
if t.artifactRoot == nil {
|
if t.artifactRoot == nil {
|
||||||
artifactRoot, err := newArtifactRoot(t.logger, t.sourceRoot)
|
artifactRoot, err := newArtifactRoot(t.logger, t.sourceRoot)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -73,7 +74,7 @@ func New(opts ...Option) (*ToolkitInstaller, error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Install ensures that the required toolkit files are installed in the specified directory.
|
// Install ensures that the required toolkit files are installed in the specified directory.
|
||||||
func (t *ToolkitInstaller) Install(destDir string) error {
|
func (t *toolkitInstaller) Install(destDir string) error {
|
||||||
var installers []Installer
|
var installers []Installer
|
||||||
|
|
||||||
installers = append(installers, t.ensureTargetDirectory)
|
installers = append(installers, t.ensureTargetDirectory)
|
||||||
@@ -98,11 +99,6 @@ func (t *ToolkitInstaller) Install(destDir string) error {
|
|||||||
return errs
|
return errs
|
||||||
}
|
}
|
||||||
|
|
||||||
func (t *ToolkitInstaller) ConfigFilePath(destDir string) string {
|
|
||||||
toolkitConfigDir := filepath.Join(destDir, ".config", "nvidia-container-runtime")
|
|
||||||
return filepath.Join(toolkitConfigDir, "config.toml")
|
|
||||||
}
|
|
||||||
|
|
||||||
type symlink struct {
|
type symlink struct {
|
||||||
linkname string
|
linkname string
|
||||||
target string
|
target string
|
||||||
|
|||||||
@@ -112,7 +112,7 @@ func TestToolkitInstaller(t *testing.T) {
|
|||||||
return nil
|
return nil
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
i := ToolkitInstaller{
|
i := toolkitInstaller{
|
||||||
logger: logger,
|
logger: logger,
|
||||||
artifactRoot: r,
|
artifactRoot: r,
|
||||||
ensureTargetDirectory: createDirectory,
|
ensureTargetDirectory: createDirectory,
|
||||||
@@ -172,8 +172,8 @@ if [ "${?}" != "0" ]; then
|
|||||||
echo "nvidia driver modules are not yet loaded, invoking runc directly"
|
echo "nvidia driver modules are not yet loaded, invoking runc directly"
|
||||||
exec runc "$@"
|
exec runc "$@"
|
||||||
fi
|
fi
|
||||||
NVIDIA_CTK_CONFIG_FILE_PATH=/foo/bar/baz/.config/nvidia-container-runtime/config.toml \
|
|
||||||
PATH=/foo/bar/baz:$PATH \
|
PATH=/foo/bar/baz:$PATH \
|
||||||
|
XDG_CONFIG_HOME=/foo/bar/baz/.config \
|
||||||
/foo/bar/baz/nvidia-container-runtime.real \
|
/foo/bar/baz/nvidia-container-runtime.real \
|
||||||
"$@"
|
"$@"
|
||||||
`,
|
`,
|
||||||
@@ -187,8 +187,8 @@ if [ "${?}" != "0" ]; then
|
|||||||
echo "nvidia driver modules are not yet loaded, invoking runc directly"
|
echo "nvidia driver modules are not yet loaded, invoking runc directly"
|
||||||
exec runc "$@"
|
exec runc "$@"
|
||||||
fi
|
fi
|
||||||
NVIDIA_CTK_CONFIG_FILE_PATH=/foo/bar/baz/.config/nvidia-container-runtime/config.toml \
|
|
||||||
PATH=/foo/bar/baz:$PATH \
|
PATH=/foo/bar/baz:$PATH \
|
||||||
|
XDG_CONFIG_HOME=/foo/bar/baz/.config \
|
||||||
/foo/bar/baz/nvidia-container-runtime.cdi.real \
|
/foo/bar/baz/nvidia-container-runtime.cdi.real \
|
||||||
"$@"
|
"$@"
|
||||||
`,
|
`,
|
||||||
@@ -202,8 +202,8 @@ if [ "${?}" != "0" ]; then
|
|||||||
echo "nvidia driver modules are not yet loaded, invoking runc directly"
|
echo "nvidia driver modules are not yet loaded, invoking runc directly"
|
||||||
exec runc "$@"
|
exec runc "$@"
|
||||||
fi
|
fi
|
||||||
NVIDIA_CTK_CONFIG_FILE_PATH=/foo/bar/baz/.config/nvidia-container-runtime/config.toml \
|
|
||||||
PATH=/foo/bar/baz:$PATH \
|
PATH=/foo/bar/baz:$PATH \
|
||||||
|
XDG_CONFIG_HOME=/foo/bar/baz/.config \
|
||||||
/foo/bar/baz/nvidia-container-runtime.legacy.real \
|
/foo/bar/baz/nvidia-container-runtime.legacy.real \
|
||||||
"$@"
|
"$@"
|
||||||
`,
|
`,
|
||||||
@@ -240,9 +240,9 @@ PATH=/foo/bar/baz:$PATH \
|
|||||||
path: "/foo/bar/baz/nvidia-container-runtime-hook",
|
path: "/foo/bar/baz/nvidia-container-runtime-hook",
|
||||||
mode: 0777,
|
mode: 0777,
|
||||||
wrapper: `#! /bin/sh
|
wrapper: `#! /bin/sh
|
||||||
NVIDIA_CTK_CONFIG_FILE_PATH=/foo/bar/baz/.config/nvidia-container-runtime/config.toml \
|
|
||||||
PATH=/foo/bar/baz:$PATH \
|
PATH=/foo/bar/baz:$PATH \
|
||||||
/foo/bar/baz/nvidia-container-runtime-hook.real \
|
/foo/bar/baz/nvidia-container-runtime-hook.real \
|
||||||
|
-config /foo/bar/baz/.config/nvidia-container-runtime/config.toml \
|
||||||
"$@"
|
"$@"
|
||||||
`,
|
`,
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -28,7 +28,7 @@ import (
|
|||||||
// A predefined set of library candidates are considered, with the first one
|
// A predefined set of library candidates are considered, with the first one
|
||||||
// resulting in success being installed to the toolkit folder. The install process
|
// resulting in success being installed to the toolkit folder. The install process
|
||||||
// resolves the symlink for the library and copies the versioned library itself.
|
// resolves the symlink for the library and copies the versioned library itself.
|
||||||
func (t *ToolkitInstaller) collectLibraries() ([]Installer, error) {
|
func (t *toolkitInstaller) collectLibraries() ([]Installer, error) {
|
||||||
requiredLibraries := []string{
|
requiredLibraries := []string{
|
||||||
"libnvidia-container.so.1",
|
"libnvidia-container.so.1",
|
||||||
"libnvidia-container-go.so.1",
|
"libnvidia-container-go.so.1",
|
||||||
|
|||||||
@@ -19,29 +19,29 @@ package installer
|
|||||||
|
|
||||||
import "github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
import "github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||||
|
|
||||||
type Option func(*ToolkitInstaller)
|
type Option func(*toolkitInstaller)
|
||||||
|
|
||||||
func WithLogger(logger logger.Interface) Option {
|
func WithLogger(logger logger.Interface) Option {
|
||||||
return func(ti *ToolkitInstaller) {
|
return func(ti *toolkitInstaller) {
|
||||||
ti.logger = logger
|
ti.logger = logger
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func WithArtifactRoot(artifactRoot *artifactRoot) Option {
|
func WithArtifactRoot(artifactRoot *artifactRoot) Option {
|
||||||
return func(ti *ToolkitInstaller) {
|
return func(ti *toolkitInstaller) {
|
||||||
ti.artifactRoot = artifactRoot
|
ti.artifactRoot = artifactRoot
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func WithIgnoreErrors(ignoreErrors bool) Option {
|
func WithIgnoreErrors(ignoreErrors bool) Option {
|
||||||
return func(ti *ToolkitInstaller) {
|
return func(ti *toolkitInstaller) {
|
||||||
ti.ignoreErrors = ignoreErrors
|
ti.ignoreErrors = ignoreErrors
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// WithSourceRoot sets the root directory for locating artifacts to be installed.
|
// WithSourceRoot sets the root directory for locating artifacts to be installed.
|
||||||
func WithSourceRoot(sourceRoot string) Option {
|
func WithSourceRoot(sourceRoot string) Option {
|
||||||
return func(ti *ToolkitInstaller) {
|
return func(ti *toolkitInstaller) {
|
||||||
ti.sourceRoot = sourceRoot
|
ti.sourceRoot = sourceRoot
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -37,6 +37,8 @@ import (
|
|||||||
const (
|
const (
|
||||||
// DefaultNvidiaDriverRoot specifies the default NVIDIA driver run directory
|
// DefaultNvidiaDriverRoot specifies the default NVIDIA driver run directory
|
||||||
DefaultNvidiaDriverRoot = "/run/nvidia/driver"
|
DefaultNvidiaDriverRoot = "/run/nvidia/driver"
|
||||||
|
|
||||||
|
configFilename = "config.toml"
|
||||||
)
|
)
|
||||||
|
|
||||||
type cdiOptions struct {
|
type cdiOptions struct {
|
||||||
@@ -213,8 +215,7 @@ func Flags(opts *Options) []cli.Flag {
|
|||||||
|
|
||||||
// An Installer is used to install the NVIDIA Container Toolkit from the toolkit container.
|
// An Installer is used to install the NVIDIA Container Toolkit from the toolkit container.
|
||||||
type Installer struct {
|
type Installer struct {
|
||||||
logger logger.Interface
|
logger logger.Interface
|
||||||
|
|
||||||
sourceRoot string
|
sourceRoot string
|
||||||
// toolkitRoot specifies the destination path at which the toolkit is installed.
|
// toolkitRoot specifies the destination path at which the toolkit is installed.
|
||||||
toolkitRoot string
|
toolkitRoot string
|
||||||
@@ -314,7 +315,7 @@ func (t *Installer) Install(cli *cli.Context, opts *Options) error {
|
|||||||
t.logger.Errorf("Ignoring error: %v", fmt.Errorf("could not install toolkit components: %w", err))
|
t.logger.Errorf("Ignoring error: %v", fmt.Errorf("could not install toolkit components: %w", err))
|
||||||
}
|
}
|
||||||
|
|
||||||
err = t.installToolkitConfig(cli, opts, toolkit.ConfigFilePath(t.toolkitRoot))
|
err = t.installToolkitConfig(cli, opts)
|
||||||
if err != nil && !opts.ignoreErrors {
|
if err != nil && !opts.ignoreErrors {
|
||||||
return fmt.Errorf("error installing NVIDIA container toolkit config: %v", err)
|
return fmt.Errorf("error installing NVIDIA container toolkit config: %v", err)
|
||||||
} else if err != nil {
|
} else if err != nil {
|
||||||
@@ -341,11 +342,13 @@ func (t *Installer) Install(cli *cli.Context, opts *Options) error {
|
|||||||
|
|
||||||
// installToolkitConfig installs the config file for the NVIDIA container toolkit ensuring
|
// installToolkitConfig installs the config file for the NVIDIA container toolkit ensuring
|
||||||
// that the settings are updated to match the desired install and nvidia driver directories.
|
// that the settings are updated to match the desired install and nvidia driver directories.
|
||||||
func (t *Installer) installToolkitConfig(c *cli.Context, opts *Options, toolkitConfigPath string) error {
|
func (t *Installer) installToolkitConfig(c *cli.Context, opts *Options) error {
|
||||||
|
toolkitConfigDir := filepath.Join(t.toolkitRoot, ".config", "nvidia-container-runtime")
|
||||||
|
toolkitConfigPath := filepath.Join(toolkitConfigDir, configFilename)
|
||||||
|
|
||||||
t.logger.Infof("Installing NVIDIA container toolkit config '%v'", toolkitConfigPath)
|
t.logger.Infof("Installing NVIDIA container toolkit config '%v'", toolkitConfigPath)
|
||||||
|
|
||||||
err := t.createDirectories(filepath.Dir(toolkitConfigPath))
|
err := t.createDirectories(toolkitConfigDir)
|
||||||
if err != nil && !opts.ignoreErrors {
|
if err != nil && !opts.ignoreErrors {
|
||||||
return fmt.Errorf("could not create required directories: %v", err)
|
return fmt.Errorf("could not create required directories: %v", err)
|
||||||
} else if err != nil {
|
} else if err != nil {
|
||||||
|
|||||||
@@ -86,7 +86,6 @@ devices:
|
|||||||
hostPath: /host/driver/root/dev/nvidia-caps-imex-channels/channel2047
|
hostPath: /host/driver/root/dev/nvidia-caps-imex-channels/channel2047
|
||||||
containerEdits:
|
containerEdits:
|
||||||
env:
|
env:
|
||||||
- NVIDIA_CTK_LIBCUDA_DIR=/lib/x86_64-linux-gnu
|
|
||||||
- NVIDIA_VISIBLE_DEVICES=void
|
- NVIDIA_VISIBLE_DEVICES=void
|
||||||
hooks:
|
hooks:
|
||||||
- hookName: createContainer
|
- hookName: createContainer
|
||||||
@@ -98,15 +97,6 @@ containerEdits:
|
|||||||
- libcuda.so.1::/lib/x86_64-linux-gnu/libcuda.so
|
- libcuda.so.1::/lib/x86_64-linux-gnu/libcuda.so
|
||||||
env:
|
env:
|
||||||
- NVIDIA_CTK_DEBUG=false
|
- NVIDIA_CTK_DEBUG=false
|
||||||
- hookName: createContainer
|
|
||||||
path: {{ .toolkitRoot }}/nvidia-cdi-hook
|
|
||||||
args:
|
|
||||||
- nvidia-cdi-hook
|
|
||||||
- create-soname-symlinks
|
|
||||||
- --folder
|
|
||||||
- /lib/x86_64-linux-gnu
|
|
||||||
env:
|
|
||||||
- NVIDIA_CTK_DEBUG=false
|
|
||||||
- hookName: createContainer
|
- hookName: createContainer
|
||||||
path: {{ .toolkitRoot }}/nvidia-cdi-hook
|
path: {{ .toolkitRoot }}/nvidia-cdi-hook
|
||||||
args:
|
args:
|
||||||
|
|||||||
@@ -57,7 +57,6 @@ type options struct {
|
|||||||
|
|
||||||
configSearchPaths cli.StringSlice
|
configSearchPaths cli.StringSlice
|
||||||
librarySearchPaths cli.StringSlice
|
librarySearchPaths cli.StringSlice
|
||||||
disabledHooks cli.StringSlice
|
|
||||||
|
|
||||||
csv struct {
|
csv struct {
|
||||||
files cli.StringSlice
|
files cli.StringSlice
|
||||||
@@ -97,20 +96,17 @@ func (m command) build() *cli.Command {
|
|||||||
Name: "config-search-path",
|
Name: "config-search-path",
|
||||||
Usage: "Specify the path to search for config files when discovering the entities that should be included in the CDI specification.",
|
Usage: "Specify the path to search for config files when discovering the entities that should be included in the CDI specification.",
|
||||||
Destination: &opts.configSearchPaths,
|
Destination: &opts.configSearchPaths,
|
||||||
EnvVars: []string{"NVIDIA_CTK_CDI_GENERATE_CONFIG_SEARCH_PATHS"},
|
|
||||||
},
|
},
|
||||||
&cli.StringFlag{
|
&cli.StringFlag{
|
||||||
Name: "output",
|
Name: "output",
|
||||||
Usage: "Specify the file to output the generated CDI specification to. If this is '' the specification is output to STDOUT",
|
Usage: "Specify the file to output the generated CDI specification to. If this is '' the specification is output to STDOUT",
|
||||||
Destination: &opts.output,
|
Destination: &opts.output,
|
||||||
EnvVars: []string{"NVIDIA_CTK_CDI_OUTPUT_FILE_PATH"},
|
|
||||||
},
|
},
|
||||||
&cli.StringFlag{
|
&cli.StringFlag{
|
||||||
Name: "format",
|
Name: "format",
|
||||||
Usage: "The output format for the generated spec [json | yaml]. This overrides the format defined by the output file extension (if specified).",
|
Usage: "The output format for the generated spec [json | yaml]. This overrides the format defined by the output file extension (if specified).",
|
||||||
Value: spec.FormatYAML,
|
Value: spec.FormatYAML,
|
||||||
Destination: &opts.format,
|
Destination: &opts.format,
|
||||||
EnvVars: []string{"NVIDIA_CTK_CDI_GENERATE_OUTPUT_FORMAT"},
|
|
||||||
},
|
},
|
||||||
&cli.StringFlag{
|
&cli.StringFlag{
|
||||||
Name: "mode",
|
Name: "mode",
|
||||||
@@ -120,32 +116,27 @@ func (m command) build() *cli.Command {
|
|||||||
"If mode is set to 'auto' the mode will be determined based on the system configuration.",
|
"If mode is set to 'auto' the mode will be determined based on the system configuration.",
|
||||||
Value: string(nvcdi.ModeAuto),
|
Value: string(nvcdi.ModeAuto),
|
||||||
Destination: &opts.mode,
|
Destination: &opts.mode,
|
||||||
EnvVars: []string{"NVIDIA_CTK_CDI_GENERATE_MODE"},
|
|
||||||
},
|
},
|
||||||
&cli.StringFlag{
|
&cli.StringFlag{
|
||||||
Name: "dev-root",
|
Name: "dev-root",
|
||||||
Usage: "Specify the root where `/dev` is located. If this is not specified, the driver-root is assumed.",
|
Usage: "Specify the root where `/dev` is located. If this is not specified, the driver-root is assumed.",
|
||||||
Destination: &opts.devRoot,
|
Destination: &opts.devRoot,
|
||||||
EnvVars: []string{"NVIDIA_CTK_DEV_ROOT"},
|
|
||||||
},
|
},
|
||||||
&cli.StringSliceFlag{
|
&cli.StringSliceFlag{
|
||||||
Name: "device-name-strategy",
|
Name: "device-name-strategy",
|
||||||
Usage: "Specify the strategy for generating device names. If this is specified multiple times, the devices will be duplicated for each strategy. One of [index | uuid | type-index]",
|
Usage: "Specify the strategy for generating device names. If this is specified multiple times, the devices will be duplicated for each strategy. One of [index | uuid | type-index]",
|
||||||
Value: cli.NewStringSlice(nvcdi.DeviceNameStrategyIndex, nvcdi.DeviceNameStrategyUUID),
|
Value: cli.NewStringSlice(nvcdi.DeviceNameStrategyIndex, nvcdi.DeviceNameStrategyUUID),
|
||||||
Destination: &opts.deviceNameStrategies,
|
Destination: &opts.deviceNameStrategies,
|
||||||
EnvVars: []string{"NVIDIA_CTK_CDI_GENERATE_DEVICE_NAME_STRATEGIES"},
|
|
||||||
},
|
},
|
||||||
&cli.StringFlag{
|
&cli.StringFlag{
|
||||||
Name: "driver-root",
|
Name: "driver-root",
|
||||||
Usage: "Specify the NVIDIA GPU driver root to use when discovering the entities that should be included in the CDI specification.",
|
Usage: "Specify the NVIDIA GPU driver root to use when discovering the entities that should be included in the CDI specification.",
|
||||||
Destination: &opts.driverRoot,
|
Destination: &opts.driverRoot,
|
||||||
EnvVars: []string{"NVIDIA_CTK_DRIVER_ROOT"},
|
|
||||||
},
|
},
|
||||||
&cli.StringSliceFlag{
|
&cli.StringSliceFlag{
|
||||||
Name: "library-search-path",
|
Name: "library-search-path",
|
||||||
Usage: "Specify the path to search for libraries when discovering the entities that should be included in the CDI specification.\n\tNote: This option only applies to CSV mode.",
|
Usage: "Specify the path to search for libraries when discovering the entities that should be included in the CDI specification.\n\tNote: This option only applies to CSV mode.",
|
||||||
Destination: &opts.librarySearchPaths,
|
Destination: &opts.librarySearchPaths,
|
||||||
EnvVars: []string{"NVIDIA_CTK_CDI_GENERATE_LIBRARY_SEARCH_PATHS"},
|
|
||||||
},
|
},
|
||||||
&cli.StringFlag{
|
&cli.StringFlag{
|
||||||
Name: "nvidia-cdi-hook-path",
|
Name: "nvidia-cdi-hook-path",
|
||||||
@@ -154,13 +145,11 @@ func (m command) build() *cli.Command {
|
|||||||
"If not specified, the PATH will be searched for `nvidia-cdi-hook`. " +
|
"If not specified, the PATH will be searched for `nvidia-cdi-hook`. " +
|
||||||
"NOTE: That if this is specified as `nvidia-ctk`, the PATH will be searched for `nvidia-ctk` instead.",
|
"NOTE: That if this is specified as `nvidia-ctk`, the PATH will be searched for `nvidia-ctk` instead.",
|
||||||
Destination: &opts.nvidiaCDIHookPath,
|
Destination: &opts.nvidiaCDIHookPath,
|
||||||
EnvVars: []string{"NVIDIA_CTK_CDI_HOOK_PATH"},
|
|
||||||
},
|
},
|
||||||
&cli.StringFlag{
|
&cli.StringFlag{
|
||||||
Name: "ldconfig-path",
|
Name: "ldconfig-path",
|
||||||
Usage: "Specify the path to use for ldconfig in the generated CDI specification",
|
Usage: "Specify the path to use for ldconfig in the generated CDI specification",
|
||||||
Destination: &opts.ldconfigPath,
|
Destination: &opts.ldconfigPath,
|
||||||
EnvVars: []string{"NVIDIA_CTK_CDI_GENERATE_LDCONFIG_PATH"},
|
|
||||||
},
|
},
|
||||||
&cli.StringFlag{
|
&cli.StringFlag{
|
||||||
Name: "vendor",
|
Name: "vendor",
|
||||||
@@ -168,7 +157,6 @@ func (m command) build() *cli.Command {
|
|||||||
Usage: "the vendor string to use for the generated CDI specification.",
|
Usage: "the vendor string to use for the generated CDI specification.",
|
||||||
Value: "nvidia.com",
|
Value: "nvidia.com",
|
||||||
Destination: &opts.vendor,
|
Destination: &opts.vendor,
|
||||||
EnvVars: []string{"NVIDIA_CTK_CDI_GENERATE_VENDOR"},
|
|
||||||
},
|
},
|
||||||
&cli.StringFlag{
|
&cli.StringFlag{
|
||||||
Name: "class",
|
Name: "class",
|
||||||
@@ -176,30 +164,17 @@ func (m command) build() *cli.Command {
|
|||||||
Usage: "the class string to use for the generated CDI specification.",
|
Usage: "the class string to use for the generated CDI specification.",
|
||||||
Value: "gpu",
|
Value: "gpu",
|
||||||
Destination: &opts.class,
|
Destination: &opts.class,
|
||||||
EnvVars: []string{"NVIDIA_CTK_CDI_GENERATE_CLASS"},
|
|
||||||
},
|
},
|
||||||
&cli.StringSliceFlag{
|
&cli.StringSliceFlag{
|
||||||
Name: "csv.file",
|
Name: "csv.file",
|
||||||
Usage: "The path to the list of CSV files to use when generating the CDI specification in CSV mode.",
|
Usage: "The path to the list of CSV files to use when generating the CDI specification in CSV mode.",
|
||||||
Value: cli.NewStringSlice(csv.DefaultFileList()...),
|
Value: cli.NewStringSlice(csv.DefaultFileList()...),
|
||||||
Destination: &opts.csv.files,
|
Destination: &opts.csv.files,
|
||||||
EnvVars: []string{"NVIDIA_CTK_CDI_GENERATE_CSV_FILES"},
|
|
||||||
},
|
},
|
||||||
&cli.StringSliceFlag{
|
&cli.StringSliceFlag{
|
||||||
Name: "csv.ignore-pattern",
|
Name: "csv.ignore-pattern",
|
||||||
Usage: "specify a pattern the CSV mount specifications.",
|
Usage: "Specify a pattern the CSV mount specifications.",
|
||||||
Destination: &opts.csv.ignorePatterns,
|
Destination: &opts.csv.ignorePatterns,
|
||||||
EnvVars: []string{"NVIDIA_CTK_CDI_GENERATE_CSV_IGNORE_PATTERNS"},
|
|
||||||
},
|
|
||||||
&cli.StringSliceFlag{
|
|
||||||
Name: "disable-hook",
|
|
||||||
Aliases: []string{"disable-hooks"},
|
|
||||||
Usage: "specify a specific hook to skip when generating CDI " +
|
|
||||||
"specifications. This can be specified multiple times and the " +
|
|
||||||
"special hook name 'all' can be used ensure that the generated " +
|
|
||||||
"CDI specification does not include any hooks.",
|
|
||||||
Destination: &opts.disabledHooks,
|
|
||||||
EnvVars: []string{"NVIDIA_CTK_CDI_GENERATE_DISABLED_HOOKS"},
|
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -287,7 +262,7 @@ func (m command) generateSpec(opts *options) (spec.Interface, error) {
|
|||||||
deviceNamers = append(deviceNamers, deviceNamer)
|
deviceNamers = append(deviceNamers, deviceNamer)
|
||||||
}
|
}
|
||||||
|
|
||||||
cdiOptions := []nvcdi.Option{
|
cdilib, err := nvcdi.New(
|
||||||
nvcdi.WithLogger(m.logger),
|
nvcdi.WithLogger(m.logger),
|
||||||
nvcdi.WithDriverRoot(opts.driverRoot),
|
nvcdi.WithDriverRoot(opts.driverRoot),
|
||||||
nvcdi.WithDevRoot(opts.devRoot),
|
nvcdi.WithDevRoot(opts.devRoot),
|
||||||
@@ -301,13 +276,7 @@ func (m command) generateSpec(opts *options) (spec.Interface, error) {
|
|||||||
nvcdi.WithCSVIgnorePatterns(opts.csv.ignorePatterns.Value()),
|
nvcdi.WithCSVIgnorePatterns(opts.csv.ignorePatterns.Value()),
|
||||||
// We set the following to allow for dependency injection:
|
// We set the following to allow for dependency injection:
|
||||||
nvcdi.WithNvmlLib(opts.nvmllib),
|
nvcdi.WithNvmlLib(opts.nvmllib),
|
||||||
}
|
)
|
||||||
|
|
||||||
for _, hook := range opts.disabledHooks.Value() {
|
|
||||||
cdiOptions = append(cdiOptions, nvcdi.WithDisabledHook(hook))
|
|
||||||
}
|
|
||||||
|
|
||||||
cdilib, err := nvcdi.New(cdiOptions...)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to create CDI library: %v", err)
|
return nil, fmt.Errorf("failed to create CDI library: %v", err)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -26,7 +26,6 @@ import (
|
|||||||
"github.com/NVIDIA/go-nvml/pkg/nvml/mock/dgxa100"
|
"github.com/NVIDIA/go-nvml/pkg/nvml/mock/dgxa100"
|
||||||
testlog "github.com/sirupsen/logrus/hooks/test"
|
testlog "github.com/sirupsen/logrus/hooks/test"
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
"github.com/urfave/cli/v2"
|
|
||||||
|
|
||||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/test"
|
"github.com/NVIDIA/nvidia-container-toolkit/internal/test"
|
||||||
)
|
)
|
||||||
@@ -80,7 +79,6 @@ devices:
|
|||||||
hostPath: {{ .driverRoot }}/dev/nvidia0
|
hostPath: {{ .driverRoot }}/dev/nvidia0
|
||||||
containerEdits:
|
containerEdits:
|
||||||
env:
|
env:
|
||||||
- NVIDIA_CTK_LIBCUDA_DIR=/lib/x86_64-linux-gnu
|
|
||||||
- NVIDIA_VISIBLE_DEVICES=void
|
- NVIDIA_VISIBLE_DEVICES=void
|
||||||
deviceNodes:
|
deviceNodes:
|
||||||
- path: /dev/nvidiactl
|
- path: /dev/nvidiactl
|
||||||
@@ -103,15 +101,6 @@ containerEdits:
|
|||||||
- --host-driver-version=999.88.77
|
- --host-driver-version=999.88.77
|
||||||
env:
|
env:
|
||||||
- NVIDIA_CTK_DEBUG=false
|
- NVIDIA_CTK_DEBUG=false
|
||||||
- hookName: createContainer
|
|
||||||
path: /usr/bin/nvidia-cdi-hook
|
|
||||||
args:
|
|
||||||
- nvidia-cdi-hook
|
|
||||||
- create-soname-symlinks
|
|
||||||
- --folder
|
|
||||||
- /lib/x86_64-linux-gnu
|
|
||||||
env:
|
|
||||||
- NVIDIA_CTK_DEBUG=false
|
|
||||||
- hookName: createContainer
|
- hookName: createContainer
|
||||||
path: /usr/bin/nvidia-cdi-hook
|
path: /usr/bin/nvidia-cdi-hook
|
||||||
args:
|
args:
|
||||||
@@ -121,227 +110,6 @@ containerEdits:
|
|||||||
- /lib/x86_64-linux-gnu
|
- /lib/x86_64-linux-gnu
|
||||||
env:
|
env:
|
||||||
- NVIDIA_CTK_DEBUG=false
|
- NVIDIA_CTK_DEBUG=false
|
||||||
- hookName: createContainer
|
|
||||||
path: /usr/bin/nvidia-cdi-hook
|
|
||||||
args:
|
|
||||||
- nvidia-cdi-hook
|
|
||||||
- disable-device-node-modification
|
|
||||||
env:
|
|
||||||
- NVIDIA_CTK_DEBUG=false
|
|
||||||
mounts:
|
|
||||||
- hostPath: {{ .driverRoot }}/lib/x86_64-linux-gnu/libcuda.so.999.88.77
|
|
||||||
containerPath: /lib/x86_64-linux-gnu/libcuda.so.999.88.77
|
|
||||||
options:
|
|
||||||
- ro
|
|
||||||
- nosuid
|
|
||||||
- nodev
|
|
||||||
- rbind
|
|
||||||
- rprivate
|
|
||||||
`,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
description: "disableHooks1",
|
|
||||||
options: options{
|
|
||||||
format: "yaml",
|
|
||||||
mode: "nvml",
|
|
||||||
vendor: "example.com",
|
|
||||||
class: "device",
|
|
||||||
driverRoot: driverRoot,
|
|
||||||
disabledHooks: valueOf(cli.NewStringSlice("enable-cuda-compat")),
|
|
||||||
},
|
|
||||||
expectedOptions: options{
|
|
||||||
format: "yaml",
|
|
||||||
mode: "nvml",
|
|
||||||
vendor: "example.com",
|
|
||||||
class: "device",
|
|
||||||
nvidiaCDIHookPath: "/usr/bin/nvidia-cdi-hook",
|
|
||||||
driverRoot: driverRoot,
|
|
||||||
disabledHooks: valueOf(cli.NewStringSlice("enable-cuda-compat")),
|
|
||||||
},
|
|
||||||
expectedSpec: `---
|
|
||||||
cdiVersion: 0.5.0
|
|
||||||
kind: example.com/device
|
|
||||||
devices:
|
|
||||||
- name: "0"
|
|
||||||
containerEdits:
|
|
||||||
deviceNodes:
|
|
||||||
- path: /dev/nvidia0
|
|
||||||
hostPath: {{ .driverRoot }}/dev/nvidia0
|
|
||||||
- name: all
|
|
||||||
containerEdits:
|
|
||||||
deviceNodes:
|
|
||||||
- path: /dev/nvidia0
|
|
||||||
hostPath: {{ .driverRoot }}/dev/nvidia0
|
|
||||||
containerEdits:
|
|
||||||
env:
|
|
||||||
- NVIDIA_CTK_LIBCUDA_DIR=/lib/x86_64-linux-gnu
|
|
||||||
- NVIDIA_VISIBLE_DEVICES=void
|
|
||||||
deviceNodes:
|
|
||||||
- path: /dev/nvidiactl
|
|
||||||
hostPath: {{ .driverRoot }}/dev/nvidiactl
|
|
||||||
hooks:
|
|
||||||
- hookName: createContainer
|
|
||||||
path: /usr/bin/nvidia-cdi-hook
|
|
||||||
args:
|
|
||||||
- nvidia-cdi-hook
|
|
||||||
- create-symlinks
|
|
||||||
- --link
|
|
||||||
- libcuda.so.1::/lib/x86_64-linux-gnu/libcuda.so
|
|
||||||
env:
|
|
||||||
- NVIDIA_CTK_DEBUG=false
|
|
||||||
- hookName: createContainer
|
|
||||||
path: /usr/bin/nvidia-cdi-hook
|
|
||||||
args:
|
|
||||||
- nvidia-cdi-hook
|
|
||||||
- create-soname-symlinks
|
|
||||||
- --folder
|
|
||||||
- /lib/x86_64-linux-gnu
|
|
||||||
env:
|
|
||||||
- NVIDIA_CTK_DEBUG=false
|
|
||||||
- hookName: createContainer
|
|
||||||
path: /usr/bin/nvidia-cdi-hook
|
|
||||||
args:
|
|
||||||
- nvidia-cdi-hook
|
|
||||||
- update-ldcache
|
|
||||||
- --folder
|
|
||||||
- /lib/x86_64-linux-gnu
|
|
||||||
env:
|
|
||||||
- NVIDIA_CTK_DEBUG=false
|
|
||||||
- hookName: createContainer
|
|
||||||
path: /usr/bin/nvidia-cdi-hook
|
|
||||||
args:
|
|
||||||
- nvidia-cdi-hook
|
|
||||||
- disable-device-node-modification
|
|
||||||
env:
|
|
||||||
- NVIDIA_CTK_DEBUG=false
|
|
||||||
mounts:
|
|
||||||
- hostPath: {{ .driverRoot }}/lib/x86_64-linux-gnu/libcuda.so.999.88.77
|
|
||||||
containerPath: /lib/x86_64-linux-gnu/libcuda.so.999.88.77
|
|
||||||
options:
|
|
||||||
- ro
|
|
||||||
- nosuid
|
|
||||||
- nodev
|
|
||||||
- rbind
|
|
||||||
- rprivate
|
|
||||||
`,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
description: "disableHooks2",
|
|
||||||
options: options{
|
|
||||||
format: "yaml",
|
|
||||||
mode: "nvml",
|
|
||||||
vendor: "example.com",
|
|
||||||
class: "device",
|
|
||||||
driverRoot: driverRoot,
|
|
||||||
disabledHooks: valueOf(cli.NewStringSlice("enable-cuda-compat", "update-ldcache")),
|
|
||||||
},
|
|
||||||
expectedOptions: options{
|
|
||||||
format: "yaml",
|
|
||||||
mode: "nvml",
|
|
||||||
vendor: "example.com",
|
|
||||||
class: "device",
|
|
||||||
nvidiaCDIHookPath: "/usr/bin/nvidia-cdi-hook",
|
|
||||||
driverRoot: driverRoot,
|
|
||||||
disabledHooks: valueOf(cli.NewStringSlice("enable-cuda-compat", "update-ldcache")),
|
|
||||||
},
|
|
||||||
expectedSpec: `---
|
|
||||||
cdiVersion: 0.5.0
|
|
||||||
kind: example.com/device
|
|
||||||
devices:
|
|
||||||
- name: "0"
|
|
||||||
containerEdits:
|
|
||||||
deviceNodes:
|
|
||||||
- path: /dev/nvidia0
|
|
||||||
hostPath: {{ .driverRoot }}/dev/nvidia0
|
|
||||||
- name: all
|
|
||||||
containerEdits:
|
|
||||||
deviceNodes:
|
|
||||||
- path: /dev/nvidia0
|
|
||||||
hostPath: {{ .driverRoot }}/dev/nvidia0
|
|
||||||
containerEdits:
|
|
||||||
env:
|
|
||||||
- NVIDIA_CTK_LIBCUDA_DIR=/lib/x86_64-linux-gnu
|
|
||||||
- NVIDIA_VISIBLE_DEVICES=void
|
|
||||||
deviceNodes:
|
|
||||||
- path: /dev/nvidiactl
|
|
||||||
hostPath: {{ .driverRoot }}/dev/nvidiactl
|
|
||||||
hooks:
|
|
||||||
- hookName: createContainer
|
|
||||||
path: /usr/bin/nvidia-cdi-hook
|
|
||||||
args:
|
|
||||||
- nvidia-cdi-hook
|
|
||||||
- create-symlinks
|
|
||||||
- --link
|
|
||||||
- libcuda.so.1::/lib/x86_64-linux-gnu/libcuda.so
|
|
||||||
env:
|
|
||||||
- NVIDIA_CTK_DEBUG=false
|
|
||||||
- hookName: createContainer
|
|
||||||
path: /usr/bin/nvidia-cdi-hook
|
|
||||||
args:
|
|
||||||
- nvidia-cdi-hook
|
|
||||||
- create-soname-symlinks
|
|
||||||
- --folder
|
|
||||||
- /lib/x86_64-linux-gnu
|
|
||||||
env:
|
|
||||||
- NVIDIA_CTK_DEBUG=false
|
|
||||||
- hookName: createContainer
|
|
||||||
path: /usr/bin/nvidia-cdi-hook
|
|
||||||
args:
|
|
||||||
- nvidia-cdi-hook
|
|
||||||
- disable-device-node-modification
|
|
||||||
env:
|
|
||||||
- NVIDIA_CTK_DEBUG=false
|
|
||||||
mounts:
|
|
||||||
- hostPath: {{ .driverRoot }}/lib/x86_64-linux-gnu/libcuda.so.999.88.77
|
|
||||||
containerPath: /lib/x86_64-linux-gnu/libcuda.so.999.88.77
|
|
||||||
options:
|
|
||||||
- ro
|
|
||||||
- nosuid
|
|
||||||
- nodev
|
|
||||||
- rbind
|
|
||||||
- rprivate
|
|
||||||
`,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
description: "disableHooksAll",
|
|
||||||
options: options{
|
|
||||||
format: "yaml",
|
|
||||||
mode: "nvml",
|
|
||||||
vendor: "example.com",
|
|
||||||
class: "device",
|
|
||||||
driverRoot: driverRoot,
|
|
||||||
disabledHooks: valueOf(cli.NewStringSlice("all")),
|
|
||||||
},
|
|
||||||
expectedOptions: options{
|
|
||||||
format: "yaml",
|
|
||||||
mode: "nvml",
|
|
||||||
vendor: "example.com",
|
|
||||||
class: "device",
|
|
||||||
nvidiaCDIHookPath: "/usr/bin/nvidia-cdi-hook",
|
|
||||||
driverRoot: driverRoot,
|
|
||||||
disabledHooks: valueOf(cli.NewStringSlice("all")),
|
|
||||||
},
|
|
||||||
expectedSpec: `---
|
|
||||||
cdiVersion: 0.5.0
|
|
||||||
kind: example.com/device
|
|
||||||
devices:
|
|
||||||
- name: "0"
|
|
||||||
containerEdits:
|
|
||||||
deviceNodes:
|
|
||||||
- path: /dev/nvidia0
|
|
||||||
hostPath: {{ .driverRoot }}/dev/nvidia0
|
|
||||||
- name: all
|
|
||||||
containerEdits:
|
|
||||||
deviceNodes:
|
|
||||||
- path: /dev/nvidia0
|
|
||||||
hostPath: {{ .driverRoot }}/dev/nvidia0
|
|
||||||
containerEdits:
|
|
||||||
env:
|
|
||||||
- NVIDIA_CTK_LIBCUDA_DIR=/lib/x86_64-linux-gnu
|
|
||||||
- NVIDIA_VISIBLE_DEVICES=void
|
|
||||||
deviceNodes:
|
|
||||||
- path: /dev/nvidiactl
|
|
||||||
hostPath: {{ .driverRoot }}/dev/nvidiactl
|
|
||||||
mounts:
|
mounts:
|
||||||
- hostPath: {{ .driverRoot }}/lib/x86_64-linux-gnu/libcuda.so.999.88.77
|
- hostPath: {{ .driverRoot }}/lib/x86_64-linux-gnu/libcuda.so.999.88.77
|
||||||
containerPath: /lib/x86_64-linux-gnu/libcuda.so.999.88.77
|
containerPath: /lib/x86_64-linux-gnu/libcuda.so.999.88.77
|
||||||
@@ -394,9 +162,3 @@ containerEdits:
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// valueOf returns the value of a pointer.
|
|
||||||
// Note that this does not check for a nil pointer and is only used for testing.
|
|
||||||
func valueOf[T any](v *T) T {
|
|
||||||
return *v
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -64,7 +64,6 @@ func (m command) build() *cli.Command {
|
|||||||
Usage: "specify the directories to scan for CDI specifications",
|
Usage: "specify the directories to scan for CDI specifications",
|
||||||
Value: cli.NewStringSlice(cdi.DefaultSpecDirs...),
|
Value: cli.NewStringSlice(cdi.DefaultSpecDirs...),
|
||||||
Destination: &cfg.cdiSpecDirs,
|
Destination: &cfg.cdiSpecDirs,
|
||||||
EnvVars: []string{"NVIDIA_CTK_CDI_SPEC_DIRS"},
|
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,171 +0,0 @@
|
|||||||
# SPDX-FileCopyrightText: Copyright (c) 2019 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
|
|
||||||
ARG GOLANG_VERSION=x.x.x
|
|
||||||
ARG VERSION="N/A"
|
|
||||||
|
|
||||||
FROM nvcr.io/nvidia/cuda:12.9.0-base-ubi9 AS build
|
|
||||||
|
|
||||||
RUN dnf install -y \
|
|
||||||
wget make git gcc \
|
|
||||||
&& \
|
|
||||||
rm -rf /var/cache/yum/*
|
|
||||||
|
|
||||||
ARG GOLANG_VERSION=x.x.x
|
|
||||||
RUN set -eux; \
|
|
||||||
\
|
|
||||||
arch="$(uname -m)"; \
|
|
||||||
case "${arch##*-}" in \
|
|
||||||
x86_64 | amd64) ARCH='amd64' ;; \
|
|
||||||
ppc64el | ppc64le) ARCH='ppc64le' ;; \
|
|
||||||
aarch64 | arm64) ARCH='arm64' ;; \
|
|
||||||
*) echo "unsupported architecture" ; exit 1 ;; \
|
|
||||||
esac; \
|
|
||||||
wget -nv -O - https://storage.googleapis.com/golang/go${GOLANG_VERSION}.linux-${ARCH}.tar.gz \
|
|
||||||
| tar -C /usr/local -xz
|
|
||||||
|
|
||||||
|
|
||||||
ENV GOPATH=/go
|
|
||||||
ENV PATH=$GOPATH/bin:/usr/local/go/bin:$PATH
|
|
||||||
|
|
||||||
WORKDIR /build
|
|
||||||
COPY . .
|
|
||||||
|
|
||||||
RUN mkdir -p /artifacts/bin
|
|
||||||
ARG VERSION="N/A"
|
|
||||||
ARG GIT_COMMIT="unknown"
|
|
||||||
RUN make PREFIX=/artifacts/bin cmd-nvidia-ctk-installer
|
|
||||||
|
|
||||||
# The packaging stage collects the deb and rpm packages built for
|
|
||||||
# supported architectures.
|
|
||||||
FROM nvcr.io/nvidia/distroless/go:v3.1.9-dev AS packaging
|
|
||||||
|
|
||||||
USER 0:0
|
|
||||||
SHELL ["/busybox/sh", "-c"]
|
|
||||||
RUN ln -s /busybox/sh /bin/sh
|
|
||||||
|
|
||||||
ARG ARTIFACTS_ROOT
|
|
||||||
COPY ${ARTIFACTS_ROOT} /artifacts/packages/
|
|
||||||
|
|
||||||
WORKDIR /artifacts
|
|
||||||
|
|
||||||
# build-args are added to the manifest.txt file below.
|
|
||||||
ARG PACKAGE_VERSION
|
|
||||||
ARG GIT_BRANCH
|
|
||||||
ARG GIT_COMMIT
|
|
||||||
ARG GIT_COMMIT_SHORT
|
|
||||||
ARG SOURCE_DATE_EPOCH
|
|
||||||
ARG VERSION
|
|
||||||
|
|
||||||
# Create a manifest.txt file with the absolute paths of all deb and rpm packages in the container
|
|
||||||
RUN echo "#IMAGE_EPOCH=$(date '+%s')" > /artifacts/manifest.txt && \
|
|
||||||
env | sed 's/^/#/g' >> /artifacts/manifest.txt && \
|
|
||||||
find /artifacts/packages -iname '*.deb' -o -iname '*.rpm' >> /artifacts/manifest.txt
|
|
||||||
|
|
||||||
LABEL name="NVIDIA Container Toolkit Packages"
|
|
||||||
LABEL vendor="NVIDIA"
|
|
||||||
LABEL version="${VERSION}"
|
|
||||||
LABEL release="N/A"
|
|
||||||
LABEL summary="deb and rpm packages for the NVIDIA Container Toolkit"
|
|
||||||
LABEL description="See summary"
|
|
||||||
|
|
||||||
COPY LICENSE /licenses/
|
|
||||||
|
|
||||||
# The debpackages stage is used to extract the contents of deb packages.
|
|
||||||
FROM nvcr.io/nvidia/cuda:12.9.0-base-ubuntu20.04 AS debpackages
|
|
||||||
|
|
||||||
ARG TARGETARCH
|
|
||||||
ARG PACKAGE_DIST_DEB=ubuntu18.04
|
|
||||||
|
|
||||||
COPY --from=packaging /artifacts/packages/${PACKAGE_DIST_DEB} /deb-packages
|
|
||||||
|
|
||||||
RUN mkdir -p /artifacts/deb
|
|
||||||
RUN set -eux; \
|
|
||||||
\
|
|
||||||
case "${TARGETARCH}" in \
|
|
||||||
x86_64 | amd64) ARCH='amd64' ;; \
|
|
||||||
ppc64el | ppc64le) ARCH='ppc64le' ;; \
|
|
||||||
aarch64 | arm64) ARCH='arm64' ;; \
|
|
||||||
*) echo "unsupported architecture" ; exit 1 ;; \
|
|
||||||
esac; \
|
|
||||||
for p in $(ls /deb-packages/${ARCH}/*.deb); do dpkg-deb -xv $p /artifacts/deb/; done
|
|
||||||
|
|
||||||
# The rpmpackages stage is used to extract the contents of the rpm packages.
|
|
||||||
FROM nvcr.io/nvidia/cuda:12.9.0-base-ubi9 AS rpmpackages
|
|
||||||
RUN dnf install -y cpio
|
|
||||||
|
|
||||||
ARG TARGETARCH
|
|
||||||
ARG PACKAGE_DIST_RPM=centos7
|
|
||||||
|
|
||||||
COPY --from=packaging /artifacts/packages/${PACKAGE_DIST_RPM} /rpm-packages
|
|
||||||
|
|
||||||
RUN mkdir -p /artifacts/rpm
|
|
||||||
RUN set -eux; \
|
|
||||||
\
|
|
||||||
case "${TARGETARCH}" in \
|
|
||||||
x86_64 | amd64) ARCH='x86_64' ;; \
|
|
||||||
ppc64el | ppc64le) ARCH='ppc64le' ;; \
|
|
||||||
aarch64 | arm64) ARCH='aarch64' ;; \
|
|
||||||
*) echo "unsupported architecture" ; exit 1 ;; \
|
|
||||||
esac; \
|
|
||||||
for p in $(ls /rpm-packages/${ARCH}/*.rpm); do rpm2cpio $p | cpio -idmv -D /artifacts/rpm; done
|
|
||||||
|
|
||||||
# The artifacts image serves as an intermediate stage to collect the artifacts
|
|
||||||
# From the previous stages:
|
|
||||||
# - The extracted deb packages
|
|
||||||
# - The extracted rpm packages
|
|
||||||
# - The nvidia-ctk-installer binary
|
|
||||||
FROM scratch AS artifacts
|
|
||||||
|
|
||||||
COPY --from=rpmpackages /artifacts/rpm /artifacts/rpm
|
|
||||||
COPY --from=debpackages /artifacts/deb /artifacts/deb
|
|
||||||
COPY --from=build /artifacts/bin /artifacts/build
|
|
||||||
|
|
||||||
# The application stage contains the application used as a GPU Operator
|
|
||||||
# operand.
|
|
||||||
FROM nvcr.io/nvidia/distroless/go:v3.1.9-dev AS application
|
|
||||||
|
|
||||||
USER 0:0
|
|
||||||
SHELL ["/busybox/sh", "-c"]
|
|
||||||
RUN ln -s /busybox/sh /bin/sh
|
|
||||||
|
|
||||||
ENV NVIDIA_DISABLE_REQUIRE="true"
|
|
||||||
ENV NVIDIA_VISIBLE_DEVICES=void
|
|
||||||
ENV NVIDIA_DRIVER_CAPABILITIES=utility
|
|
||||||
|
|
||||||
COPY --from=artifacts /artifacts/rpm /artifacts/rpm
|
|
||||||
COPY --from=artifacts /artifacts/deb /artifacts/deb
|
|
||||||
COPY --from=artifacts /artifacts/build /work
|
|
||||||
|
|
||||||
WORKDIR /work
|
|
||||||
ENV PATH=/work:$PATH
|
|
||||||
|
|
||||||
ARG VERSION
|
|
||||||
LABEL io.k8s.display-name="NVIDIA Container Runtime Config"
|
|
||||||
LABEL name="NVIDIA Container Runtime Config"
|
|
||||||
LABEL vendor="NVIDIA"
|
|
||||||
LABEL version="${VERSION}"
|
|
||||||
LABEL release="N/A"
|
|
||||||
LABEL summary="Automatically Configure your Container Runtime for GPU support."
|
|
||||||
LABEL description="See summary"
|
|
||||||
|
|
||||||
COPY LICENSE /licenses/
|
|
||||||
|
|
||||||
ENTRYPOINT ["/work/nvidia-ctk-installer"]
|
|
||||||
|
|
||||||
# The GPU Operator exec's nvidia-toolkit in its entrypoint.
|
|
||||||
# We create a symlink here to ensure compatibility with older
|
|
||||||
# GPU Operator versions.
|
|
||||||
RUN ln -s /work/nvidia-ctk-installer /work/nvidia-toolkit
|
|
||||||
38
deployments/container/Dockerfile.packaging
Normal file
38
deployments/container/Dockerfile.packaging
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
# Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
ARG GOLANG_VERSION=x.x.x
|
||||||
|
|
||||||
|
FROM nvcr.io/nvidia/cuda:12.9.0-base-ubuntu20.04
|
||||||
|
|
||||||
|
ARG ARTIFACTS_ROOT
|
||||||
|
COPY ${ARTIFACTS_ROOT} /artifacts/packages/
|
||||||
|
|
||||||
|
WORKDIR /artifacts/packages
|
||||||
|
|
||||||
|
# build-args are added to the manifest.txt file below.
|
||||||
|
ARG PACKAGE_DIST
|
||||||
|
ARG PACKAGE_VERSION
|
||||||
|
ARG GIT_BRANCH
|
||||||
|
ARG GIT_COMMIT
|
||||||
|
ARG GIT_COMMIT_SHORT
|
||||||
|
ARG SOURCE_DATE_EPOCH
|
||||||
|
ARG VERSION
|
||||||
|
|
||||||
|
# Create a manifest.txt file with the absolute paths of all deb and rpm packages in the container
|
||||||
|
RUN echo "#IMAGE_EPOCH=$(date '+%s')" > /artifacts/manifest.txt && \
|
||||||
|
env | sed 's/^/#/g' >> /artifacts/manifest.txt && \
|
||||||
|
find /artifacts/packages -iname '*.deb' -o -iname '*.rpm' >> /artifacts/manifest.txt
|
||||||
|
|
||||||
|
RUN mkdir /licenses && mv /NGC-DL-CONTAINER-LICENSE /licenses/NGC-DL-CONTAINER-LICENSE
|
||||||
90
deployments/container/Dockerfile.ubi8
Normal file
90
deployments/container/Dockerfile.ubi8
Normal file
@@ -0,0 +1,90 @@
|
|||||||
|
# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
ARG GOLANG_VERSION=x.x.x
|
||||||
|
ARG VERSION="N/A"
|
||||||
|
|
||||||
|
FROM nvcr.io/nvidia/cuda:12.9.0-base-ubi8 AS build
|
||||||
|
|
||||||
|
RUN yum install -y \
|
||||||
|
wget make git gcc \
|
||||||
|
&& \
|
||||||
|
rm -rf /var/cache/yum/*
|
||||||
|
|
||||||
|
ARG GOLANG_VERSION=x.x.x
|
||||||
|
RUN set -eux; \
|
||||||
|
\
|
||||||
|
arch="$(uname -m)"; \
|
||||||
|
case "${arch##*-}" in \
|
||||||
|
x86_64 | amd64) ARCH='amd64' ;; \
|
||||||
|
ppc64el | ppc64le) ARCH='ppc64le' ;; \
|
||||||
|
aarch64 | arm64) ARCH='arm64' ;; \
|
||||||
|
*) echo "unsupported architecture" ; exit 1 ;; \
|
||||||
|
esac; \
|
||||||
|
wget -nv -O - https://storage.googleapis.com/golang/go${GOLANG_VERSION}.linux-${ARCH}.tar.gz \
|
||||||
|
| tar -C /usr/local -xz
|
||||||
|
|
||||||
|
|
||||||
|
ENV GOPATH=/go
|
||||||
|
ENV PATH=$GOPATH/bin:/usr/local/go/bin:$PATH
|
||||||
|
|
||||||
|
WORKDIR /build
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
RUN mkdir /artifacts
|
||||||
|
ARG VERSION="N/A"
|
||||||
|
ARG GIT_COMMIT="unknown"
|
||||||
|
RUN make PREFIX=/artifacts cmd-nvidia-ctk-installer
|
||||||
|
|
||||||
|
FROM nvcr.io/nvidia/cuda:12.9.0-base-ubi8
|
||||||
|
|
||||||
|
ENV NVIDIA_DISABLE_REQUIRE="true"
|
||||||
|
ENV NVIDIA_VISIBLE_DEVICES=void
|
||||||
|
ENV NVIDIA_DRIVER_CAPABILITIES=utility
|
||||||
|
|
||||||
|
ARG ARTIFACTS_ROOT
|
||||||
|
ARG PACKAGE_DIST
|
||||||
|
COPY ${ARTIFACTS_ROOT}/${PACKAGE_DIST} /artifacts/packages/${PACKAGE_DIST}
|
||||||
|
|
||||||
|
WORKDIR /artifacts/packages
|
||||||
|
|
||||||
|
ARG PACKAGE_VERSION
|
||||||
|
ARG TARGETARCH
|
||||||
|
ENV PACKAGE_ARCH=${TARGETARCH}
|
||||||
|
|
||||||
|
RUN PACKAGE_ARCH=${PACKAGE_ARCH/amd64/x86_64} && PACKAGE_ARCH=${PACKAGE_ARCH/arm64/aarch64} && \
|
||||||
|
yum localinstall -y \
|
||||||
|
${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container1-1.*.rpm \
|
||||||
|
${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container-tools-1.*.rpm \
|
||||||
|
${PACKAGE_DIST}/${PACKAGE_ARCH}/nvidia-container-toolkit*-${PACKAGE_VERSION}*.rpm
|
||||||
|
|
||||||
|
WORKDIR /work
|
||||||
|
|
||||||
|
COPY --from=build /artifacts/nvidia-ctk-installer /work/nvidia-ctk-installer
|
||||||
|
RUN ln -s nvidia-ctk-installer nvidia-toolkit
|
||||||
|
|
||||||
|
ENV PATH=/work:$PATH
|
||||||
|
|
||||||
|
ARG VERSION
|
||||||
|
LABEL io.k8s.display-name="NVIDIA Container Runtime Config"
|
||||||
|
LABEL name="NVIDIA Container Runtime Config"
|
||||||
|
LABEL vendor="NVIDIA"
|
||||||
|
LABEL version="${VERSION}"
|
||||||
|
LABEL release="N/A"
|
||||||
|
LABEL summary="Automatically Configure your Container Runtime for GPU support."
|
||||||
|
LABEL description="See summary"
|
||||||
|
|
||||||
|
RUN mkdir /licenses && mv /NGC-DL-CONTAINER-LICENSE /licenses/NGC-DL-CONTAINER-LICENSE
|
||||||
|
|
||||||
|
ENTRYPOINT ["/work/nvidia-ctk-installer"]
|
||||||
98
deployments/container/Dockerfile.ubuntu
Normal file
98
deployments/container/Dockerfile.ubuntu
Normal file
@@ -0,0 +1,98 @@
|
|||||||
|
# Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
|
||||||
|
ARG GOLANG_VERSION=x.x.x
|
||||||
|
ARG VERSION="N/A"
|
||||||
|
|
||||||
|
FROM nvcr.io/nvidia/cuda:12.9.0-base-ubuntu20.04 AS build
|
||||||
|
|
||||||
|
RUN apt-get update && \
|
||||||
|
apt-get install -y wget make git gcc \
|
||||||
|
&& \
|
||||||
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
ARG GOLANG_VERSION=x.x.x
|
||||||
|
RUN set -eux; \
|
||||||
|
\
|
||||||
|
arch="$(uname -m)"; \
|
||||||
|
case "${arch##*-}" in \
|
||||||
|
x86_64 | amd64) ARCH='amd64' ;; \
|
||||||
|
ppc64el | ppc64le) ARCH='ppc64le' ;; \
|
||||||
|
aarch64 | arm64) ARCH='arm64' ;; \
|
||||||
|
*) echo "unsupported architecture" ; exit 1 ;; \
|
||||||
|
esac; \
|
||||||
|
wget -nv -O - https://storage.googleapis.com/golang/go${GOLANG_VERSION}.linux-${ARCH}.tar.gz \
|
||||||
|
| tar -C /usr/local -xz
|
||||||
|
|
||||||
|
ENV GOPATH=/go
|
||||||
|
ENV PATH=$GOPATH/bin:/usr/local/go/bin:$PATH
|
||||||
|
|
||||||
|
WORKDIR /build
|
||||||
|
COPY . .
|
||||||
|
|
||||||
|
RUN mkdir /artifacts
|
||||||
|
ARG VERSION="N/A"
|
||||||
|
ARG GIT_COMMIT="unknown"
|
||||||
|
RUN make PREFIX=/artifacts cmd-nvidia-ctk-installer
|
||||||
|
|
||||||
|
FROM nvcr.io/nvidia/cuda:12.9.0-base-ubuntu20.04
|
||||||
|
|
||||||
|
# Remove the CUDA repository configurations to avoid issues with rotated GPG keys
|
||||||
|
RUN rm -f /etc/apt/sources.list.d/cuda.list
|
||||||
|
|
||||||
|
ARG DEBIAN_FRONTEND=noninteractive
|
||||||
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||||
|
libcap2 \
|
||||||
|
curl \
|
||||||
|
&& \
|
||||||
|
rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
|
ENV NVIDIA_DISABLE_REQUIRE="true"
|
||||||
|
ENV NVIDIA_VISIBLE_DEVICES=void
|
||||||
|
ENV NVIDIA_DRIVER_CAPABILITIES=utility
|
||||||
|
|
||||||
|
ARG ARTIFACTS_ROOT
|
||||||
|
ARG PACKAGE_DIST
|
||||||
|
COPY ${ARTIFACTS_ROOT}/${PACKAGE_DIST} /artifacts/packages/${PACKAGE_DIST}
|
||||||
|
|
||||||
|
WORKDIR /artifacts/packages
|
||||||
|
|
||||||
|
ARG PACKAGE_VERSION
|
||||||
|
ARG TARGETARCH
|
||||||
|
ENV PACKAGE_ARCH=${TARGETARCH}
|
||||||
|
|
||||||
|
RUN dpkg -i \
|
||||||
|
${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container1_1.*.deb \
|
||||||
|
${PACKAGE_DIST}/${PACKAGE_ARCH}/libnvidia-container-tools_1.*.deb \
|
||||||
|
${PACKAGE_DIST}/${PACKAGE_ARCH}/nvidia-container-toolkit*_${PACKAGE_VERSION}*.deb
|
||||||
|
|
||||||
|
WORKDIR /work
|
||||||
|
|
||||||
|
COPY --from=build /artifacts/nvidia-ctk-installer /work/nvidia-ctk-installer
|
||||||
|
RUN ln -s nvidia-ctk-installer nvidia-toolkit
|
||||||
|
|
||||||
|
ENV PATH=/work:$PATH
|
||||||
|
|
||||||
|
ARG VERSION
|
||||||
|
LABEL io.k8s.display-name="NVIDIA Container Runtime Config"
|
||||||
|
LABEL name="NVIDIA Container Runtime Config"
|
||||||
|
LABEL vendor="NVIDIA"
|
||||||
|
LABEL version="${VERSION}"
|
||||||
|
LABEL release="N/A"
|
||||||
|
LABEL summary="Automatically Configure your Container Runtime for GPU support."
|
||||||
|
LABEL description="See summary"
|
||||||
|
|
||||||
|
RUN mkdir /licenses && mv /NGC-DL-CONTAINER-LICENSE /licenses/NGC-DL-CONTAINER-LICENSE
|
||||||
|
|
||||||
|
ENTRYPOINT ["/work/nvidia-ctk-installer"]
|
||||||
@@ -29,17 +29,17 @@ include $(CURDIR)/versions.mk
|
|||||||
|
|
||||||
IMAGE_VERSION := $(VERSION)
|
IMAGE_VERSION := $(VERSION)
|
||||||
|
|
||||||
IMAGE_TAG ?= $(VERSION)
|
IMAGE_TAG ?= $(VERSION)-$(DIST)
|
||||||
IMAGE = $(IMAGE_NAME):$(IMAGE_TAG)
|
IMAGE = $(IMAGE_NAME):$(IMAGE_TAG)
|
||||||
|
|
||||||
OUT_IMAGE_NAME ?= $(IMAGE_NAME)
|
OUT_IMAGE_NAME ?= $(IMAGE_NAME)
|
||||||
OUT_IMAGE_VERSION ?= $(IMAGE_VERSION)
|
OUT_IMAGE_VERSION ?= $(IMAGE_VERSION)
|
||||||
OUT_IMAGE_TAG = $(OUT_IMAGE_VERSION)
|
OUT_IMAGE_TAG = $(OUT_IMAGE_VERSION)-$(DIST)
|
||||||
OUT_IMAGE = $(OUT_IMAGE_NAME):$(OUT_IMAGE_TAG)
|
OUT_IMAGE = $(OUT_IMAGE_NAME):$(OUT_IMAGE_TAG)
|
||||||
|
|
||||||
##### Public rules #####
|
##### Public rules #####
|
||||||
DEFAULT_PUSH_TARGET := application
|
DEFAULT_PUSH_TARGET := ubuntu20.04
|
||||||
DISTRIBUTIONS := $(DEFAULT_PUSH_TARGET)
|
DISTRIBUTIONS := ubuntu20.04 ubi8
|
||||||
|
|
||||||
META_TARGETS := packaging
|
META_TARGETS := packaging
|
||||||
|
|
||||||
@@ -56,16 +56,30 @@ else
|
|||||||
include $(CURDIR)/deployments/container/multi-arch.mk
|
include $(CURDIR)/deployments/container/multi-arch.mk
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
# For the default push target we also push a short tag equal to the version.
|
||||||
|
# We skip this for the development release
|
||||||
|
DEVEL_RELEASE_IMAGE_VERSION ?= devel
|
||||||
|
PUSH_MULTIPLE_TAGS ?= true
|
||||||
|
ifeq ($(strip $(OUT_IMAGE_VERSION)),$(DEVEL_RELEASE_IMAGE_VERSION))
|
||||||
|
PUSH_MULTIPLE_TAGS = false
|
||||||
|
endif
|
||||||
|
ifeq ($(PUSH_MULTIPLE_TAGS),true)
|
||||||
|
push-$(DEFAULT_PUSH_TARGET): push-short
|
||||||
|
endif
|
||||||
|
|
||||||
|
push-%: DIST = $(*)
|
||||||
|
push-short: DIST = $(DEFAULT_PUSH_TARGET)
|
||||||
|
|
||||||
# Define the push targets
|
# Define the push targets
|
||||||
$(PUSH_TARGETS): push-%:
|
$(PUSH_TARGETS): push-%:
|
||||||
$(CURDIR)/scripts/publish-image.sh $(IMAGE) $(OUT_IMAGE)
|
$(CURDIR)/scripts/publish-image.sh $(IMAGE) $(OUT_IMAGE)
|
||||||
|
|
||||||
DOCKERFILE = $(CURDIR)/deployments/container/Dockerfile
|
push-short:
|
||||||
|
$(CURDIR)/scripts/publish-image.sh $(IMAGE) $(OUT_IMAGE)
|
||||||
|
|
||||||
# For packaging targets we set the output image tag to include the -packaging suffix.
|
|
||||||
%-packaging: INTERMEDIATE_TARGET := --target=packaging
|
build-%: DIST = $(*)
|
||||||
%-packaging: IMAGE_TAG = $(IMAGE_VERSION)-packaging
|
build-%: DOCKERFILE = $(CURDIR)/deployments/container/Dockerfile.$(DOCKERFILE_SUFFIX)
|
||||||
%-packaging: OUT_IMAGE_TAG = $(IMAGE_VERSION)-packaging
|
|
||||||
|
|
||||||
ARTIFACTS_ROOT ?= $(shell realpath --relative-to=$(CURDIR) $(DIST_DIR))
|
ARTIFACTS_ROOT ?= $(shell realpath --relative-to=$(CURDIR) $(DIST_DIR))
|
||||||
|
|
||||||
@@ -76,12 +90,10 @@ $(IMAGE_TARGETS): image-%: $(ARTIFACTS_ROOT)
|
|||||||
--provenance=false --sbom=false \
|
--provenance=false --sbom=false \
|
||||||
$(DOCKER_BUILD_OPTIONS) \
|
$(DOCKER_BUILD_OPTIONS) \
|
||||||
$(DOCKER_BUILD_PLATFORM_OPTIONS) \
|
$(DOCKER_BUILD_PLATFORM_OPTIONS) \
|
||||||
$(INTERMEDIATE_TARGET) \
|
|
||||||
--tag $(IMAGE) \
|
--tag $(IMAGE) \
|
||||||
--build-arg ARTIFACTS_ROOT="$(ARTIFACTS_ROOT)" \
|
--build-arg ARTIFACTS_ROOT="$(ARTIFACTS_ROOT)" \
|
||||||
--build-arg GOLANG_VERSION="$(GOLANG_VERSION)" \
|
--build-arg GOLANG_VERSION="$(GOLANG_VERSION)" \
|
||||||
--build-arg PACKAGE_DIST_DEB="$(PACKAGE_DIST_DEB)" \
|
--build-arg PACKAGE_DIST="$(PACKAGE_DIST)" \
|
||||||
--build-arg PACKAGE_DIST_RPM="$(PACKAGE_DIST_RPM)" \
|
|
||||||
--build-arg PACKAGE_VERSION="$(PACKAGE_VERSION)" \
|
--build-arg PACKAGE_VERSION="$(PACKAGE_VERSION)" \
|
||||||
--build-arg VERSION="$(VERSION)" \
|
--build-arg VERSION="$(VERSION)" \
|
||||||
--build-arg GIT_COMMIT="$(GIT_COMMIT)" \
|
--build-arg GIT_COMMIT="$(GIT_COMMIT)" \
|
||||||
@@ -91,17 +103,25 @@ $(IMAGE_TARGETS): image-%: $(ARTIFACTS_ROOT)
|
|||||||
-f $(DOCKERFILE) \
|
-f $(DOCKERFILE) \
|
||||||
$(CURDIR)
|
$(CURDIR)
|
||||||
|
|
||||||
|
build-ubuntu%: DOCKERFILE_SUFFIX := ubuntu
|
||||||
|
build-ubuntu%: PACKAGE_DIST = ubuntu18.04
|
||||||
|
|
||||||
PACKAGE_DIST_DEB = ubuntu18.04
|
build-ubi8: DOCKERFILE_SUFFIX := ubi8
|
||||||
# TODO: This needs to be set to centos8 for ppc64le builds
|
build-ubi8: PACKAGE_DIST = centos7
|
||||||
PACKAGE_DIST_RPM = centos7
|
|
||||||
|
|
||||||
# Handle the default build target.
|
build-packaging: DOCKERFILE_SUFFIX := packaging
|
||||||
.PHONY: build push
|
build-packaging: PACKAGE_ARCH := amd64
|
||||||
build: build-$(DEFAULT_PUSH_TARGET)
|
build-packaging: PACKAGE_DIST = all
|
||||||
push: push-$(DEFAULT_PUSH_TARGET)
|
|
||||||
|
|
||||||
# Test targets
|
# Test targets
|
||||||
|
test-%: DIST = $(*)
|
||||||
|
|
||||||
|
# Handle the default build target.
|
||||||
|
.PHONY: build
|
||||||
|
build: $(DEFAULT_PUSH_TARGET)
|
||||||
|
$(DEFAULT_PUSH_TARGET): build-$(DEFAULT_PUSH_TARGET)
|
||||||
|
$(DEFAULT_PUSH_TARGET): DIST = $(DEFAULT_PUSH_TARGET)
|
||||||
|
|
||||||
TEST_CASES ?= docker crio containerd
|
TEST_CASES ?= docker crio containerd
|
||||||
$(TEST_TARGETS): test-%:
|
$(TEST_TARGETS): test-%:
|
||||||
TEST_CASES="$(TEST_CASES)" bash -x $(CURDIR)/test/container/main.sh run \
|
TEST_CASES="$(TEST_CASES)" bash -x $(CURDIR)/test/container/main.sh run \
|
||||||
|
|||||||
@@ -23,3 +23,11 @@ $(BUILD_TARGETS): build-%: image-%
|
|||||||
else
|
else
|
||||||
$(BUILD_TARGETS): build-%: image-%
|
$(BUILD_TARGETS): build-%: image-%
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
# For the default distribution we also retag the image.
|
||||||
|
# Note: This needs to be updated for multi-arch images.
|
||||||
|
ifeq ($(IMAGE_TAG),$(VERSION)-$(DIST))
|
||||||
|
$(DEFAULT_PUSH_TARGET):
|
||||||
|
$(DOCKER) image inspect $(IMAGE) > /dev/null || $(DOCKER) pull $(IMAGE)
|
||||||
|
$(DOCKER) tag $(IMAGE) $(subst :$(IMAGE_TAG),:$(VERSION),$(IMAGE))
|
||||||
|
endif
|
||||||
|
|||||||
@@ -14,7 +14,7 @@
|
|||||||
|
|
||||||
# This Dockerfile is also used to define the golang version used in this project
|
# This Dockerfile is also used to define the golang version used in this project
|
||||||
# This allows dependabot to manage this version in addition to other images.
|
# This allows dependabot to manage this version in addition to other images.
|
||||||
FROM golang:1.24.4
|
FROM golang:1.24.3
|
||||||
|
|
||||||
WORKDIR /work
|
WORKDIR /work
|
||||||
COPY * .
|
COPY * .
|
||||||
|
|||||||
@@ -1,28 +0,0 @@
|
|||||||
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
|
|
||||||
[Unit]
|
|
||||||
Description=Refresh NVIDIA CDI specification file
|
|
||||||
ConditionPathExists=/usr/bin/nvidia-smi
|
|
||||||
ConditionPathExists=/usr/bin/nvidia-ctk
|
|
||||||
|
|
||||||
[Service]
|
|
||||||
Type=oneshot
|
|
||||||
EnvironmentFile=-/etc/nvidia-container-toolkit/cdi-refresh.env
|
|
||||||
ExecCondition=/usr/bin/grep -qE '/nvidia.ko' /lib/modules/%v/modules.dep
|
|
||||||
ExecStart=/usr/bin/nvidia-ctk cdi generate --output=/var/run/cdi/nvidia.yaml
|
|
||||||
CapabilityBoundingSet=CAP_SYS_MODULE CAP_SYS_ADMIN CAP_MKNOD
|
|
||||||
|
|
||||||
[Install]
|
|
||||||
WantedBy=multi-user.target
|
|
||||||
@@ -55,7 +55,9 @@ RUN make PREFIX=${DIST_DIR} cmds
|
|||||||
|
|
||||||
WORKDIR $DIST_DIR
|
WORKDIR $DIST_DIR
|
||||||
COPY packaging/debian ./debian
|
COPY packaging/debian ./debian
|
||||||
COPY deployments/systemd/ .
|
|
||||||
|
ARG LIBNVIDIA_CONTAINER_TOOLS_VERSION
|
||||||
|
ENV LIBNVIDIA_CONTAINER_TOOLS_VERSION ${LIBNVIDIA_CONTAINER_TOOLS_VERSION}
|
||||||
|
|
||||||
RUN dch --create --package="${PKG_NAME}" \
|
RUN dch --create --package="${PKG_NAME}" \
|
||||||
--newversion "${REVISION}" \
|
--newversion "${REVISION}" \
|
||||||
@@ -65,6 +67,6 @@ RUN dch --create --package="${PKG_NAME}" \
|
|||||||
if [ "$REVISION" != "$(dpkg-parsechangelog --show-field=Version)" ]; then exit 1; fi
|
if [ "$REVISION" != "$(dpkg-parsechangelog --show-field=Version)" ]; then exit 1; fi
|
||||||
|
|
||||||
CMD export DISTRIB="$(lsb_release -cs)" && \
|
CMD export DISTRIB="$(lsb_release -cs)" && \
|
||||||
debuild -eDISTRIB -eSECTION -eVERSION="${REVISION}" \
|
debuild -eDISTRIB -eSECTION -eLIBNVIDIA_CONTAINER_TOOLS_VERSION -eVERSION="${REVISION}" \
|
||||||
--dpkg-buildpackage-hook='sh debian/prepare' -i -us -uc -b && \
|
--dpkg-buildpackage-hook='sh debian/prepare' -i -us -uc -b && \
|
||||||
mv /tmp/*.deb /dist
|
mv /tmp/*.deb /dist
|
||||||
|
|||||||
@@ -46,7 +46,9 @@ RUN make PREFIX=${DIST_DIR} cmds
|
|||||||
|
|
||||||
WORKDIR $DIST_DIR/..
|
WORKDIR $DIST_DIR/..
|
||||||
COPY packaging/rpm .
|
COPY packaging/rpm .
|
||||||
COPY deployments/systemd/ .
|
|
||||||
|
ARG LIBNVIDIA_CONTAINER_TOOLS_VERSION
|
||||||
|
ENV LIBNVIDIA_CONTAINER_TOOLS_VERSION ${LIBNVIDIA_CONTAINER_TOOLS_VERSION}
|
||||||
|
|
||||||
CMD arch=$(uname -m) && \
|
CMD arch=$(uname -m) && \
|
||||||
rpmbuild --clean --target=$arch -bb \
|
rpmbuild --clean --target=$arch -bb \
|
||||||
@@ -54,6 +56,7 @@ CMD arch=$(uname -m) && \
|
|||||||
-D "release_date $(date +'%a %b %d %Y')" \
|
-D "release_date $(date +'%a %b %d %Y')" \
|
||||||
-D "git_commit ${GIT_COMMIT}" \
|
-D "git_commit ${GIT_COMMIT}" \
|
||||||
-D "version ${PKG_VERS}" \
|
-D "version ${PKG_VERS}" \
|
||||||
|
-D "libnvidia_container_tools_version ${LIBNVIDIA_CONTAINER_TOOLS_VERSION}" \
|
||||||
-D "release ${PKG_REV}" \
|
-D "release ${PKG_REV}" \
|
||||||
SPECS/nvidia-container-toolkit.spec && \
|
SPECS/nvidia-container-toolkit.spec && \
|
||||||
mv RPMS/$arch/*.rpm /dist
|
mv RPMS/$arch/*.rpm /dist
|
||||||
|
|||||||
@@ -71,7 +71,9 @@ RUN make PREFIX=${DIST_DIR} cmds
|
|||||||
|
|
||||||
WORKDIR $DIST_DIR/..
|
WORKDIR $DIST_DIR/..
|
||||||
COPY packaging/rpm .
|
COPY packaging/rpm .
|
||||||
COPY deployments/systemd/ ${DIST_DIR}/
|
|
||||||
|
ARG LIBNVIDIA_CONTAINER_TOOLS_VERSION
|
||||||
|
ENV LIBNVIDIA_CONTAINER_TOOLS_VERSION ${LIBNVIDIA_CONTAINER_TOOLS_VERSION}
|
||||||
|
|
||||||
CMD arch=$(uname -m) && \
|
CMD arch=$(uname -m) && \
|
||||||
rpmbuild --clean --target=$arch -bb \
|
rpmbuild --clean --target=$arch -bb \
|
||||||
@@ -79,6 +81,7 @@ CMD arch=$(uname -m) && \
|
|||||||
-D "release_date $(date +'%a %b %d %Y')" \
|
-D "release_date $(date +'%a %b %d %Y')" \
|
||||||
-D "git_commit ${GIT_COMMIT}" \
|
-D "git_commit ${GIT_COMMIT}" \
|
||||||
-D "version ${PKG_VERS}" \
|
-D "version ${PKG_VERS}" \
|
||||||
|
-D "libnvidia_container_tools_version ${LIBNVIDIA_CONTAINER_TOOLS_VERSION}" \
|
||||||
-D "release ${PKG_REV}" \
|
-D "release ${PKG_REV}" \
|
||||||
SPECS/nvidia-container-toolkit.spec && \
|
SPECS/nvidia-container-toolkit.spec && \
|
||||||
mv RPMS/$arch/*.rpm /dist
|
mv RPMS/$arch/*.rpm /dist
|
||||||
|
|||||||
@@ -53,16 +53,18 @@ RUN make PREFIX=${DIST_DIR} cmds
|
|||||||
|
|
||||||
WORKDIR $DIST_DIR
|
WORKDIR $DIST_DIR
|
||||||
COPY packaging/debian ./debian
|
COPY packaging/debian ./debian
|
||||||
COPY deployments/systemd/ .
|
|
||||||
|
ARG LIBNVIDIA_CONTAINER_TOOLS_VERSION
|
||||||
|
ENV LIBNVIDIA_CONTAINER_TOOLS_VERSION ${LIBNVIDIA_CONTAINER_TOOLS_VERSION}
|
||||||
|
|
||||||
RUN dch --create --package="${PKG_NAME}" \
|
RUN dch --create --package="${PKG_NAME}" \
|
||||||
--newversion "${REVISION}" \
|
--newversion "${REVISION}" \
|
||||||
"See https://gitlab.com/nvidia/container-toolkit/container-toolkit/-/blob/${GIT_COMMIT}/CHANGELOG.md for the changelog" && \
|
"See https://gitlab.com/nvidia/container-toolkit/container-toolkit/-/blob/${GIT_COMMIT}/CHANGELOG.md for the changelog" && \
|
||||||
dch --append "Bump libnvidia-container dependency to ${REVISION}" && \
|
dch --append "Bump libnvidia-container dependency to ${LIBNVIDIA_CONTAINER_TOOLS_VERSION}" && \
|
||||||
dch -r "" && \
|
dch -r "" && \
|
||||||
if [ "$REVISION" != "$(dpkg-parsechangelog --show-field=Version)" ]; then exit 1; fi
|
if [ "$REVISION" != "$(dpkg-parsechangelog --show-field=Version)" ]; then exit 1; fi
|
||||||
|
|
||||||
CMD export DISTRIB="$(lsb_release -cs)" && \
|
CMD export DISTRIB="$(lsb_release -cs)" && \
|
||||||
debuild -eDISTRIB -eSECTION -eVERSION="${REVISION}" \
|
debuild -eDISTRIB -eSECTION -eLIBNVIDIA_CONTAINER_TOOLS_VERSION -eVERSION="${REVISION}" \
|
||||||
--dpkg-buildpackage-hook='sh debian/prepare' -i -us -uc -b && \
|
--dpkg-buildpackage-hook='sh debian/prepare' -i -us -uc -b && \
|
||||||
mv /tmp/*.deb /dist
|
mv /tmp/*.deb /dist
|
||||||
|
|||||||
@@ -85,6 +85,11 @@ docker-all: $(AMD64_TARGETS) $(X86_64_TARGETS) \
|
|||||||
--%: docker-build-%
|
--%: docker-build-%
|
||||||
@
|
@
|
||||||
|
|
||||||
|
LIBNVIDIA_CONTAINER_VERSION ?= $(LIB_VERSION)
|
||||||
|
LIBNVIDIA_CONTAINER_TAG ?= $(LIB_TAG)
|
||||||
|
|
||||||
|
LIBNVIDIA_CONTAINER_TOOLS_VERSION := $(LIBNVIDIA_CONTAINER_VERSION)$(if $(LIBNVIDIA_CONTAINER_TAG),~$(LIBNVIDIA_CONTAINER_TAG))-1
|
||||||
|
|
||||||
# private ubuntu target
|
# private ubuntu target
|
||||||
--ubuntu%: OS := ubuntu
|
--ubuntu%: OS := ubuntu
|
||||||
|
|
||||||
@@ -124,6 +129,7 @@ docker-build-%:
|
|||||||
--build-arg PKG_NAME="$(LIB_NAME)" \
|
--build-arg PKG_NAME="$(LIB_NAME)" \
|
||||||
--build-arg PKG_VERS="$(PACKAGE_VERSION)" \
|
--build-arg PKG_VERS="$(PACKAGE_VERSION)" \
|
||||||
--build-arg PKG_REV="$(PACKAGE_REVISION)" \
|
--build-arg PKG_REV="$(PACKAGE_REVISION)" \
|
||||||
|
--build-arg LIBNVIDIA_CONTAINER_TOOLS_VERSION="$(LIBNVIDIA_CONTAINER_TOOLS_VERSION)" \
|
||||||
--build-arg GIT_COMMIT="$(GIT_COMMIT)" \
|
--build-arg GIT_COMMIT="$(GIT_COMMIT)" \
|
||||||
--tag $(BUILDIMAGE) \
|
--tag $(BUILDIMAGE) \
|
||||||
--file $(DOCKERFILE) .
|
--file $(DOCKERFILE) .
|
||||||
|
|||||||
10
go.mod
10
go.mod
@@ -3,8 +3,8 @@ module github.com/NVIDIA/nvidia-container-toolkit
|
|||||||
go 1.23.0
|
go 1.23.0
|
||||||
|
|
||||||
require (
|
require (
|
||||||
github.com/NVIDIA/go-nvlib v0.7.3
|
github.com/NVIDIA/go-nvlib v0.7.2
|
||||||
github.com/NVIDIA/go-nvml v0.12.9-0
|
github.com/NVIDIA/go-nvml v0.12.4-1
|
||||||
github.com/cyphar/filepath-securejoin v0.4.1
|
github.com/cyphar/filepath-securejoin v0.4.1
|
||||||
github.com/moby/sys/reexec v0.1.0
|
github.com/moby/sys/reexec v0.1.0
|
||||||
github.com/moby/sys/symlink v0.3.0
|
github.com/moby/sys/symlink v0.3.0
|
||||||
@@ -13,15 +13,15 @@ require (
|
|||||||
github.com/pelletier/go-toml v1.9.5
|
github.com/pelletier/go-toml v1.9.5
|
||||||
github.com/sirupsen/logrus v1.9.3
|
github.com/sirupsen/logrus v1.9.3
|
||||||
github.com/stretchr/testify v1.10.0
|
github.com/stretchr/testify v1.10.0
|
||||||
github.com/urfave/cli/v2 v2.27.7
|
github.com/urfave/cli/v2 v2.27.6
|
||||||
golang.org/x/mod v0.25.0
|
golang.org/x/mod v0.24.0
|
||||||
golang.org/x/sys v0.33.0
|
golang.org/x/sys v0.33.0
|
||||||
tags.cncf.io/container-device-interface v1.0.1
|
tags.cncf.io/container-device-interface v1.0.1
|
||||||
tags.cncf.io/container-device-interface/specs-go v1.0.0
|
tags.cncf.io/container-device-interface/specs-go v1.0.0
|
||||||
)
|
)
|
||||||
|
|
||||||
require (
|
require (
|
||||||
github.com/cpuguy83/go-md2man/v2 v2.0.7 // indirect
|
github.com/cpuguy83/go-md2man/v2 v2.0.5 // indirect
|
||||||
github.com/davecgh/go-spew v1.1.1 // indirect
|
github.com/davecgh/go-spew v1.1.1 // indirect
|
||||||
github.com/fsnotify/fsnotify v1.7.0 // indirect
|
github.com/fsnotify/fsnotify v1.7.0 // indirect
|
||||||
github.com/google/uuid v1.6.0 // indirect
|
github.com/google/uuid v1.6.0 // indirect
|
||||||
|
|||||||
20
go.sum
20
go.sum
@@ -1,11 +1,11 @@
|
|||||||
github.com/NVIDIA/go-nvlib v0.7.3 h1:kXc8PkWUlrwedSpM4fR8xT/DAq1NKy8HqhpgteFcGAw=
|
github.com/NVIDIA/go-nvlib v0.7.2 h1:7sy/NVUa4sM9FLKwH6CjBfHSWrJUmv8emVyxLTzjfOA=
|
||||||
github.com/NVIDIA/go-nvlib v0.7.3/go.mod h1:i95Je7GinMy/+BDs++DAdbPmT2TubjNP8i8joC7DD7I=
|
github.com/NVIDIA/go-nvlib v0.7.2/go.mod h1:2Kh2kYSP5IJ8EKf0/SYDzHiQKb9EJkwOf2LQzu6pXzY=
|
||||||
github.com/NVIDIA/go-nvml v0.12.9-0 h1:e344UK8ZkeMeeLkdQtRhmXRxNf+u532LDZPGMtkdus0=
|
github.com/NVIDIA/go-nvml v0.12.4-1 h1:WKUvqshhWSNTfm47ETRhv0A0zJyr1ncCuHiXwoTrBEc=
|
||||||
github.com/NVIDIA/go-nvml v0.12.9-0/go.mod h1:+KNA7c7gIBH7SKSJ1ntlwkfN80zdx8ovl4hrK3LmPt4=
|
github.com/NVIDIA/go-nvml v0.12.4-1/go.mod h1:8Llmj+1Rr+9VGGwZuRer5N/aCjxGuR5nPb/9ebBiIEQ=
|
||||||
github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM=
|
github.com/blang/semver/v4 v4.0.0 h1:1PFHFE6yCCTv8C1TeyNNarDzntLi7wMI5i/pzqYIsAM=
|
||||||
github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ=
|
github.com/blang/semver/v4 v4.0.0/go.mod h1:IbckMUScFkM3pff0VJDNKRiT6TG/YpiHIM2yvyW5YoQ=
|
||||||
github.com/cpuguy83/go-md2man/v2 v2.0.7 h1:zbFlGlXEAKlwXpmvle3d8Oe3YnkKIK4xSRTd3sHPnBo=
|
github.com/cpuguy83/go-md2man/v2 v2.0.5 h1:ZtcqGrnekaHpVLArFSe4HK5DoKx1T0rq2DwVB0alcyc=
|
||||||
github.com/cpuguy83/go-md2man/v2 v2.0.7/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g=
|
github.com/cpuguy83/go-md2man/v2 v2.0.5/go.mod h1:tgQtvFlXSQOSOSIRvRPT7W67SCa46tRHOmNcaadrF8o=
|
||||||
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
|
github.com/creack/pty v1.1.9/go.mod h1:oKZEueFk5CKHvIhNR5MUki03XCEU+Q6VDXinZuGJ33E=
|
||||||
github.com/cyphar/filepath-securejoin v0.4.1 h1:JyxxyPEaktOD+GAnqIqTf9A8tHyAG22rowi7HkoSU1s=
|
github.com/cyphar/filepath-securejoin v0.4.1 h1:JyxxyPEaktOD+GAnqIqTf9A8tHyAG22rowi7HkoSU1s=
|
||||||
github.com/cyphar/filepath-securejoin v0.4.1/go.mod h1:Sdj7gXlvMcPZsbhwhQ33GguGLDGQL7h7bg04C/+u9jI=
|
github.com/cyphar/filepath-securejoin v0.4.1/go.mod h1:Sdj7gXlvMcPZsbhwhQ33GguGLDGQL7h7bg04C/+u9jI=
|
||||||
@@ -69,8 +69,8 @@ github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf
|
|||||||
github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635 h1:kdXcSzyDtseVEc4yCz2qF8ZrQvIDBJLl4S1c3GCXmoI=
|
github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635 h1:kdXcSzyDtseVEc4yCz2qF8ZrQvIDBJLl4S1c3GCXmoI=
|
||||||
github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635/go.mod h1:hkRG7XYTFWNJGYcbNJQlaLq0fg1yr4J4t/NcTQtrfww=
|
github.com/syndtr/gocapability v0.0.0-20200815063812-42c35b437635/go.mod h1:hkRG7XYTFWNJGYcbNJQlaLq0fg1yr4J4t/NcTQtrfww=
|
||||||
github.com/urfave/cli v1.19.1/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijbERA=
|
github.com/urfave/cli v1.19.1/go.mod h1:70zkFmudgCuE/ngEzBv17Jvp/497gISqfk5gWijbERA=
|
||||||
github.com/urfave/cli/v2 v2.27.7 h1:bH59vdhbjLv3LAvIu6gd0usJHgoTTPhCFib8qqOwXYU=
|
github.com/urfave/cli/v2 v2.27.6 h1:VdRdS98FNhKZ8/Az8B7MTyGQmpIr36O1EHybx/LaZ4g=
|
||||||
github.com/urfave/cli/v2 v2.27.7/go.mod h1:CyNAG/xg+iAOg0N4MPGZqVmv2rCoP267496AOXUZjA4=
|
github.com/urfave/cli/v2 v2.27.6/go.mod h1:3Sevf16NykTbInEnD0yKkjDAeZDS0A6bzhBH5hrMvTQ=
|
||||||
github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU=
|
github.com/xeipuuv/gojsonpointer v0.0.0-20180127040702-4e3ac2762d5f/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU=
|
||||||
github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb h1:zGWFAtiMcyryUHoUjUJX0/lt1H2+i2Ka2n+D3DImSNo=
|
github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb h1:zGWFAtiMcyryUHoUjUJX0/lt1H2+i2Ka2n+D3DImSNo=
|
||||||
github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU=
|
github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb/go.mod h1:N2zxlSyiKSe5eX1tZViRH5QA0qijqEDrYZiPEAiq3wU=
|
||||||
@@ -80,8 +80,8 @@ github.com/xeipuuv/gojsonschema v1.2.0 h1:LhYJRs+L4fBtjZUfuSZIKGeVu0QRy8e5Xi7D17
|
|||||||
github.com/xeipuuv/gojsonschema v1.2.0/go.mod h1:anYRn/JVcOK2ZgGU+IjEV4nwlhoK5sQluxsYJ78Id3Y=
|
github.com/xeipuuv/gojsonschema v1.2.0/go.mod h1:anYRn/JVcOK2ZgGU+IjEV4nwlhoK5sQluxsYJ78Id3Y=
|
||||||
github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 h1:gEOO8jv9F4OT7lGCjxCBTO/36wtF6j2nSip77qHd4x4=
|
github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1 h1:gEOO8jv9F4OT7lGCjxCBTO/36wtF6j2nSip77qHd4x4=
|
||||||
github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1/go.mod h1:Ohn+xnUBiLI6FVj/9LpzZWtj1/D6lUovWYBkxHVV3aM=
|
github.com/xrash/smetrics v0.0.0-20240521201337-686a1a2994c1/go.mod h1:Ohn+xnUBiLI6FVj/9LpzZWtj1/D6lUovWYBkxHVV3aM=
|
||||||
golang.org/x/mod v0.25.0 h1:n7a+ZbQKQA/Ysbyb0/6IbB1H/X41mKgbhfv7AfG/44w=
|
golang.org/x/mod v0.24.0 h1:ZfthKaKaT4NrhGVZHO1/WDTwGES4De8KtWO0SIbNJMU=
|
||||||
golang.org/x/mod v0.25.0/go.mod h1:IXM97Txy2VM4PJ3gI61r1YEk/gAj6zAHN3AdZt6S9Ww=
|
golang.org/x/mod v0.24.0/go.mod h1:IXM97Txy2VM4PJ3gI61r1YEk/gAj6zAHN3AdZt6S9Ww=
|
||||||
golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||||
golang.org/x/sys v0.0.0-20191115151921-52ab43148777/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
golang.org/x/sys v0.0.0-20191115151921-52ab43148777/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||||
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
|
|||||||
@@ -53,6 +53,6 @@ docker run --rm \
|
|||||||
-v $(pwd):$(pwd) \
|
-v $(pwd):$(pwd) \
|
||||||
-w $(pwd) \
|
-w $(pwd) \
|
||||||
-u $(id -u):$(id -g) \
|
-u $(id -u):$(id -g) \
|
||||||
--entrypoint="sh" \
|
--entrypoint="bash" \
|
||||||
${IMAGE} \
|
${IMAGE} \
|
||||||
-c "cp -p -R /artifacts/* ${DIST_DIR}"
|
-c "cp --preserve=timestamps -R /artifacts/* ${DIST_DIR}"
|
||||||
|
|||||||
@@ -31,10 +31,8 @@ import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
FilePathOverrideEnvVar = "NVIDIA_CTK_CONFIG_FILE_PATH"
|
configOverride = "XDG_CONFIG_HOME"
|
||||||
RelativeFilePath = "nvidia-container-runtime/config.toml"
|
configFilePath = "nvidia-container-runtime/config.toml"
|
||||||
|
|
||||||
configRootOverride = "XDG_CONFIG_HOME"
|
|
||||||
|
|
||||||
nvidiaCTKExecutable = "nvidia-ctk"
|
nvidiaCTKExecutable = "nvidia-ctk"
|
||||||
nvidiaCTKDefaultFilePath = "/usr/bin/nvidia-ctk"
|
nvidiaCTKDefaultFilePath = "/usr/bin/nvidia-ctk"
|
||||||
@@ -76,15 +74,11 @@ type Config struct {
|
|||||||
|
|
||||||
// GetConfigFilePath returns the path to the config file for the configured system
|
// GetConfigFilePath returns the path to the config file for the configured system
|
||||||
func GetConfigFilePath() string {
|
func GetConfigFilePath() string {
|
||||||
if configFilePathOverride := os.Getenv(FilePathOverrideEnvVar); configFilePathOverride != "" {
|
if XDGConfigDir := os.Getenv(configOverride); len(XDGConfigDir) != 0 {
|
||||||
return configFilePathOverride
|
return filepath.Join(XDGConfigDir, configFilePath)
|
||||||
}
|
|
||||||
configRoot := "/etc"
|
|
||||||
if XDGConfigDir := os.Getenv(configRootOverride); len(XDGConfigDir) != 0 {
|
|
||||||
configRoot = XDGConfigDir
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return filepath.Join(configRoot, RelativeFilePath)
|
return filepath.Join("/etc", configFilePath)
|
||||||
}
|
}
|
||||||
|
|
||||||
// GetConfig sets up the config struct. Values are read from a toml file
|
// GetConfig sets up the config struct. Values are read from a toml file
|
||||||
@@ -116,7 +110,7 @@ func GetDefault() (*Config, error) {
|
|||||||
NVIDIAContainerRuntimeConfig: RuntimeConfig{
|
NVIDIAContainerRuntimeConfig: RuntimeConfig{
|
||||||
DebugFilePath: "/dev/null",
|
DebugFilePath: "/dev/null",
|
||||||
LogLevel: "info",
|
LogLevel: "info",
|
||||||
Runtimes: []string{"runc", "crun"},
|
Runtimes: []string{"docker-runc", "runc", "crun"},
|
||||||
Mode: "auto",
|
Mode: "auto",
|
||||||
Modes: modesConfig{
|
Modes: modesConfig{
|
||||||
CSV: csvModeConfig{
|
CSV: csvModeConfig{
|
||||||
|
|||||||
@@ -27,26 +27,9 @@ import (
|
|||||||
|
|
||||||
func TestGetConfigWithCustomConfig(t *testing.T) {
|
func TestGetConfigWithCustomConfig(t *testing.T) {
|
||||||
testDir := t.TempDir()
|
testDir := t.TempDir()
|
||||||
t.Setenv(configRootOverride, testDir)
|
t.Setenv(configOverride, testDir)
|
||||||
|
|
||||||
filename := filepath.Join(testDir, RelativeFilePath)
|
filename := filepath.Join(testDir, configFilePath)
|
||||||
|
|
||||||
// By default debug is disabled
|
|
||||||
contents := []byte("[nvidia-container-runtime]\ndebug = \"/nvidia-container-toolkit.log\"")
|
|
||||||
|
|
||||||
require.NoError(t, os.MkdirAll(filepath.Dir(filename), 0766))
|
|
||||||
require.NoError(t, os.WriteFile(filename, contents, 0600))
|
|
||||||
|
|
||||||
cfg, err := GetConfig()
|
|
||||||
require.NoError(t, err)
|
|
||||||
require.Equal(t, "/nvidia-container-toolkit.log", cfg.NVIDIAContainerRuntimeConfig.DebugFilePath)
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestGetConfigWithConfigFilePathOverride(t *testing.T) {
|
|
||||||
testDir := t.TempDir()
|
|
||||||
filename := filepath.Join(testDir, RelativeFilePath)
|
|
||||||
|
|
||||||
t.Setenv(FilePathOverrideEnvVar, filename)
|
|
||||||
|
|
||||||
// By default debug is disabled
|
// By default debug is disabled
|
||||||
contents := []byte("[nvidia-container-runtime]\ndebug = \"/nvidia-container-toolkit.log\"")
|
contents := []byte("[nvidia-container-runtime]\ndebug = \"/nvidia-container-toolkit.log\"")
|
||||||
@@ -80,7 +63,7 @@ func TestGetConfig(t *testing.T) {
|
|||||||
NVIDIAContainerRuntimeConfig: RuntimeConfig{
|
NVIDIAContainerRuntimeConfig: RuntimeConfig{
|
||||||
DebugFilePath: "/dev/null",
|
DebugFilePath: "/dev/null",
|
||||||
LogLevel: "info",
|
LogLevel: "info",
|
||||||
Runtimes: []string{"runc", "crun"},
|
Runtimes: []string{"docker-runc", "runc", "crun"},
|
||||||
Mode: "auto",
|
Mode: "auto",
|
||||||
Modes: modesConfig{
|
Modes: modesConfig{
|
||||||
CSV: csvModeConfig{
|
CSV: csvModeConfig{
|
||||||
@@ -187,7 +170,7 @@ func TestGetConfig(t *testing.T) {
|
|||||||
NVIDIAContainerRuntimeConfig: RuntimeConfig{
|
NVIDIAContainerRuntimeConfig: RuntimeConfig{
|
||||||
DebugFilePath: "/dev/null",
|
DebugFilePath: "/dev/null",
|
||||||
LogLevel: "info",
|
LogLevel: "info",
|
||||||
Runtimes: []string{"runc", "crun"},
|
Runtimes: []string{"docker-runc", "runc", "crun"},
|
||||||
Mode: "auto",
|
Mode: "auto",
|
||||||
Modes: modesConfig{
|
Modes: modesConfig{
|
||||||
CSV: csvModeConfig{
|
CSV: csvModeConfig{
|
||||||
@@ -306,7 +289,7 @@ func TestGetConfig(t *testing.T) {
|
|||||||
NVIDIAContainerRuntimeConfig: RuntimeConfig{
|
NVIDIAContainerRuntimeConfig: RuntimeConfig{
|
||||||
DebugFilePath: "/dev/null",
|
DebugFilePath: "/dev/null",
|
||||||
LogLevel: "info",
|
LogLevel: "info",
|
||||||
Runtimes: []string{"runc", "crun"},
|
Runtimes: []string{"docker-runc", "runc", "crun"},
|
||||||
Mode: "auto",
|
Mode: "auto",
|
||||||
Modes: modesConfig{
|
Modes: modesConfig{
|
||||||
CSV: csvModeConfig{
|
CSV: csvModeConfig{
|
||||||
@@ -348,7 +331,7 @@ func TestGetConfig(t *testing.T) {
|
|||||||
NVIDIAContainerRuntimeConfig: RuntimeConfig{
|
NVIDIAContainerRuntimeConfig: RuntimeConfig{
|
||||||
DebugFilePath: "/dev/null",
|
DebugFilePath: "/dev/null",
|
||||||
LogLevel: "info",
|
LogLevel: "info",
|
||||||
Runtimes: []string{"runc", "crun"},
|
Runtimes: []string{"docker-runc", "runc", "crun"},
|
||||||
Mode: "auto",
|
Mode: "auto",
|
||||||
Modes: modesConfig{
|
Modes: modesConfig{
|
||||||
CSV: csvModeConfig{
|
CSV: csvModeConfig{
|
||||||
|
|||||||
@@ -21,35 +21,22 @@ import (
|
|||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/opencontainers/runtime-spec/specs-go"
|
"github.com/opencontainers/runtime-spec/specs-go"
|
||||||
|
|
||||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
type builder struct {
|
type builder struct {
|
||||||
CUDA
|
env map[string]string
|
||||||
|
mounts []specs.Mount
|
||||||
disableRequire bool
|
disableRequire bool
|
||||||
}
|
}
|
||||||
|
|
||||||
// Option is a functional option for creating a CUDA image.
|
|
||||||
type Option func(*builder) error
|
|
||||||
|
|
||||||
// New creates a new CUDA image from the input options.
|
// New creates a new CUDA image from the input options.
|
||||||
func New(opt ...Option) (CUDA, error) {
|
func New(opt ...Option) (CUDA, error) {
|
||||||
b := &builder{
|
b := &builder{}
|
||||||
CUDA: CUDA{
|
|
||||||
acceptEnvvarUnprivileged: true,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
for _, o := range opt {
|
for _, o := range opt {
|
||||||
if err := o(b); err != nil {
|
if err := o(b); err != nil {
|
||||||
return CUDA{}, err
|
return CUDA{}, err
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if b.logger == nil {
|
|
||||||
b.logger = logger.New()
|
|
||||||
}
|
|
||||||
if b.env == nil {
|
if b.env == nil {
|
||||||
b.env = make(map[string]string)
|
b.env = make(map[string]string)
|
||||||
}
|
}
|
||||||
@@ -63,36 +50,15 @@ func (b builder) build() (CUDA, error) {
|
|||||||
b.env[EnvVarNvidiaDisableRequire] = "true"
|
b.env[EnvVarNvidiaDisableRequire] = "true"
|
||||||
}
|
}
|
||||||
|
|
||||||
return b.CUDA, nil
|
c := CUDA{
|
||||||
|
env: b.env,
|
||||||
|
mounts: b.mounts,
|
||||||
|
}
|
||||||
|
return c, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func WithAcceptDeviceListAsVolumeMounts(acceptDeviceListAsVolumeMounts bool) Option {
|
// Option is a functional option for creating a CUDA image.
|
||||||
return func(b *builder) error {
|
type Option func(*builder) error
|
||||||
b.acceptDeviceListAsVolumeMounts = acceptDeviceListAsVolumeMounts
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func WithAcceptEnvvarUnprivileged(acceptEnvvarUnprivileged bool) Option {
|
|
||||||
return func(b *builder) error {
|
|
||||||
b.acceptEnvvarUnprivileged = acceptEnvvarUnprivileged
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func WithAnnotations(annotations map[string]string) Option {
|
|
||||||
return func(b *builder) error {
|
|
||||||
b.annotations = annotations
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func WithAnnotationsPrefixes(annotationsPrefixes []string) Option {
|
|
||||||
return func(b *builder) error {
|
|
||||||
b.annotationsPrefixes = annotationsPrefixes
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// WithDisableRequire sets the disable require option.
|
// WithDisableRequire sets the disable require option.
|
||||||
func WithDisableRequire(disableRequire bool) Option {
|
func WithDisableRequire(disableRequire bool) Option {
|
||||||
@@ -127,14 +93,6 @@ func WithEnvMap(env map[string]string) Option {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// WithLogger sets the logger to use when creating the CUDA image.
|
|
||||||
func WithLogger(logger logger.Interface) Option {
|
|
||||||
return func(b *builder) error {
|
|
||||||
b.logger = logger
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// WithMounts sets the mounts associated with the CUDA image.
|
// WithMounts sets the mounts associated with the CUDA image.
|
||||||
func WithMounts(mounts []specs.Mount) Option {
|
func WithMounts(mounts []specs.Mount) Option {
|
||||||
return func(b *builder) error {
|
return func(b *builder) error {
|
||||||
@@ -142,20 +100,3 @@ func WithMounts(mounts []specs.Mount) Option {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// WithPreferredVisibleDevicesEnvVars sets the environment variables that
|
|
||||||
// should take precedence over the default NVIDIA_VISIBLE_DEVICES.
|
|
||||||
func WithPreferredVisibleDevicesEnvVars(preferredVisibleDeviceEnvVars ...string) Option {
|
|
||||||
return func(b *builder) error {
|
|
||||||
b.preferredVisibleDeviceEnvVars = preferredVisibleDeviceEnvVars
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// WithPrivileged sets whether an image is privileged or not.
|
|
||||||
func WithPrivileged(isPrivileged bool) Option {
|
|
||||||
return func(b *builder) error {
|
|
||||||
b.isPrivileged = isPrivileged
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -19,15 +19,12 @@ package image
|
|||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"slices"
|
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/opencontainers/runtime-spec/specs-go"
|
"github.com/opencontainers/runtime-spec/specs-go"
|
||||||
"golang.org/x/mod/semver"
|
"golang.org/x/mod/semver"
|
||||||
"tags.cncf.io/container-device-interface/pkg/parser"
|
"tags.cncf.io/container-device-interface/pkg/parser"
|
||||||
|
|
||||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
@@ -41,44 +38,27 @@ const (
|
|||||||
// a map of environment variable to values that can be used to perform lookups
|
// a map of environment variable to values that can be used to perform lookups
|
||||||
// such as requirements.
|
// such as requirements.
|
||||||
type CUDA struct {
|
type CUDA struct {
|
||||||
logger logger.Interface
|
env map[string]string
|
||||||
|
mounts []specs.Mount
|
||||||
annotations map[string]string
|
|
||||||
env map[string]string
|
|
||||||
isPrivileged bool
|
|
||||||
mounts []specs.Mount
|
|
||||||
|
|
||||||
annotationsPrefixes []string
|
|
||||||
acceptDeviceListAsVolumeMounts bool
|
|
||||||
acceptEnvvarUnprivileged bool
|
|
||||||
preferredVisibleDeviceEnvVars []string
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewCUDAImageFromSpec creates a CUDA image from the input OCI runtime spec.
|
// NewCUDAImageFromSpec creates a CUDA image from the input OCI runtime spec.
|
||||||
// The process environment is read (if present) to construc the CUDA Image.
|
// The process environment is read (if present) to construc the CUDA Image.
|
||||||
func NewCUDAImageFromSpec(spec *specs.Spec, opts ...Option) (CUDA, error) {
|
func NewCUDAImageFromSpec(spec *specs.Spec) (CUDA, error) {
|
||||||
if spec == nil {
|
|
||||||
return New(opts...)
|
|
||||||
}
|
|
||||||
|
|
||||||
var env []string
|
var env []string
|
||||||
if spec.Process != nil {
|
if spec != nil && spec.Process != nil {
|
||||||
env = spec.Process.Env
|
env = spec.Process.Env
|
||||||
}
|
}
|
||||||
|
|
||||||
specOpts := []Option{
|
return New(
|
||||||
WithAnnotations(spec.Annotations),
|
|
||||||
WithEnv(env),
|
WithEnv(env),
|
||||||
WithMounts(spec.Mounts),
|
WithMounts(spec.Mounts),
|
||||||
WithPrivileged(IsPrivileged((*OCISpec)(spec))),
|
)
|
||||||
}
|
|
||||||
|
|
||||||
return New(append(opts, specOpts...)...)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// newCUDAImageFromEnv creates a CUDA image from the input environment. The environment
|
// NewCUDAImageFromEnv creates a CUDA image from the input environment. The environment
|
||||||
// is a list of strings of the form ENVAR=VALUE.
|
// is a list of strings of the form ENVAR=VALUE.
|
||||||
func newCUDAImageFromEnv(env []string) (CUDA, error) {
|
func NewCUDAImageFromEnv(env []string) (CUDA, error) {
|
||||||
return New(WithEnv(env))
|
return New(WithEnv(env))
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -103,10 +83,6 @@ func (i CUDA) IsLegacy() bool {
|
|||||||
return len(legacyCudaVersion) > 0 && len(cudaRequire) == 0
|
return len(legacyCudaVersion) > 0 && len(cudaRequire) == 0
|
||||||
}
|
}
|
||||||
|
|
||||||
func (i CUDA) IsPrivileged() bool {
|
|
||||||
return i.isPrivileged
|
|
||||||
}
|
|
||||||
|
|
||||||
// GetRequirements returns the requirements from all NVIDIA_REQUIRE_ environment
|
// GetRequirements returns the requirements from all NVIDIA_REQUIRE_ environment
|
||||||
// variables.
|
// variables.
|
||||||
func (i CUDA) GetRequirements() ([]string, error) {
|
func (i CUDA) GetRequirements() ([]string, error) {
|
||||||
@@ -144,8 +120,8 @@ func (i CUDA) HasDisableRequire() bool {
|
|||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
// devicesFromEnvvars returns the devices requested by the image through environment variables
|
// DevicesFromEnvvars returns the devices requested by the image through environment variables
|
||||||
func (i CUDA) devicesFromEnvvars(envVars ...string) []string {
|
func (i CUDA) DevicesFromEnvvars(envVars ...string) VisibleDevices {
|
||||||
// We concantenate all the devices from the specified env.
|
// We concantenate all the devices from the specified env.
|
||||||
var isSet bool
|
var isSet bool
|
||||||
var devices []string
|
var devices []string
|
||||||
@@ -166,15 +142,15 @@ func (i CUDA) devicesFromEnvvars(envVars ...string) []string {
|
|||||||
|
|
||||||
// Environment variable unset with legacy image: default to "all".
|
// Environment variable unset with legacy image: default to "all".
|
||||||
if !isSet && len(devices) == 0 && i.IsLegacy() {
|
if !isSet && len(devices) == 0 && i.IsLegacy() {
|
||||||
devices = []string{"all"}
|
return NewVisibleDevices("all")
|
||||||
}
|
}
|
||||||
|
|
||||||
// Environment variable unset or empty or "void": return nil
|
// Environment variable unset or empty or "void": return nil
|
||||||
if len(devices) == 0 || requested["void"] {
|
if len(devices) == 0 || requested["void"] {
|
||||||
devices = []string{"void"}
|
return NewVisibleDevices("void")
|
||||||
}
|
}
|
||||||
|
|
||||||
return NewVisibleDevices(devices...).List()
|
return NewVisibleDevices(devices...)
|
||||||
}
|
}
|
||||||
|
|
||||||
// GetDriverCapabilities returns the requested driver capabilities.
|
// GetDriverCapabilities returns the requested driver capabilities.
|
||||||
@@ -224,137 +200,46 @@ func parseMajorMinorVersion(version string) (string, error) {
|
|||||||
// OnlyFullyQualifiedCDIDevices returns true if all devices requested in the image are requested as CDI devices/
|
// OnlyFullyQualifiedCDIDevices returns true if all devices requested in the image are requested as CDI devices/
|
||||||
func (i CUDA) OnlyFullyQualifiedCDIDevices() bool {
|
func (i CUDA) OnlyFullyQualifiedCDIDevices() bool {
|
||||||
var hasCDIdevice bool
|
var hasCDIdevice bool
|
||||||
for _, device := range i.VisibleDevices() {
|
for _, device := range i.VisibleDevicesFromEnvVar() {
|
||||||
if !parser.IsQualifiedName(device) {
|
if !parser.IsQualifiedName(device) {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
hasCDIdevice = true
|
hasCDIdevice = true
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for _, device := range i.DevicesFromMounts() {
|
||||||
|
if !strings.HasPrefix(device, "cdi/") {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
hasCDIdevice = true
|
||||||
|
}
|
||||||
return hasCDIdevice
|
return hasCDIdevice
|
||||||
}
|
}
|
||||||
|
|
||||||
// visibleEnvVars returns the environment variables that are used to determine device visibility.
|
// VisibleDevicesFromEnvVar returns the set of visible devices requested through
|
||||||
// It returns the preferred environment variables that are set, or NVIDIA_VISIBLE_DEVICES if none are set.
|
// the NVIDIA_VISIBLE_DEVICES environment variable.
|
||||||
func (i CUDA) visibleEnvVars() []string {
|
func (i CUDA) VisibleDevicesFromEnvVar() []string {
|
||||||
var envVars []string
|
return i.DevicesFromEnvvars(EnvVarNvidiaVisibleDevices).List()
|
||||||
for _, envVar := range i.preferredVisibleDeviceEnvVars {
|
|
||||||
if !i.HasEnvvar(envVar) {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
envVars = append(envVars, envVar)
|
|
||||||
}
|
|
||||||
if len(envVars) > 0 {
|
|
||||||
return envVars
|
|
||||||
}
|
|
||||||
return []string{EnvVarNvidiaVisibleDevices}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// VisibleDevices returns a list of devices requested in the container image.
|
// VisibleDevicesFromMounts returns the set of visible devices requested as mounts.
|
||||||
// If volume mount requests are enabled these are returned if requested,
|
func (i CUDA) VisibleDevicesFromMounts() []string {
|
||||||
// otherwise device requests through environment variables are considered.
|
|
||||||
// In cases where environment variable requests required privileged containers,
|
|
||||||
// such devices requests are ignored.
|
|
||||||
func (i CUDA) VisibleDevices() []string {
|
|
||||||
// If annotation device requests are present, these are preferred.
|
|
||||||
annotationDeviceRequests := i.cdiDeviceRequestsFromAnnotations()
|
|
||||||
if len(annotationDeviceRequests) > 0 {
|
|
||||||
return annotationDeviceRequests
|
|
||||||
}
|
|
||||||
|
|
||||||
// If enabled, try and get the device list from volume mounts first
|
|
||||||
if i.acceptDeviceListAsVolumeMounts {
|
|
||||||
volumeMountDeviceRequests := i.visibleDevicesFromMounts()
|
|
||||||
if len(volumeMountDeviceRequests) > 0 {
|
|
||||||
return volumeMountDeviceRequests
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get the Fallback to reading from the environment variable if privileges are correct
|
|
||||||
envVarDeviceRequests := i.visibleDevicesFromEnvVar()
|
|
||||||
if len(envVarDeviceRequests) == 0 {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// If the container is privileged, or environment variable requests are
|
|
||||||
// allowed for unprivileged containers, these devices are returned.
|
|
||||||
if i.isPrivileged || i.acceptEnvvarUnprivileged {
|
|
||||||
return envVarDeviceRequests
|
|
||||||
}
|
|
||||||
|
|
||||||
// We log a warning if we are ignoring the environment variable requests.
|
|
||||||
envVars := i.visibleEnvVars()
|
|
||||||
if len(envVars) > 0 {
|
|
||||||
i.logger.Warningf("Ignoring devices requested by environment variable(s) in unprivileged container: %v", envVars)
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// cdiDeviceRequestsFromAnnotations returns a list of devices specified in the
|
|
||||||
// annotations.
|
|
||||||
// Keys starting with the specified prefixes are considered and expected to
|
|
||||||
// contain a comma-separated list of fully-qualified CDI devices names.
|
|
||||||
// The format of the requested devices is not checked and the list is not
|
|
||||||
// deduplicated.
|
|
||||||
func (i CUDA) cdiDeviceRequestsFromAnnotations() []string {
|
|
||||||
if len(i.annotationsPrefixes) == 0 || len(i.annotations) == 0 {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
var annotationKeys []string
|
|
||||||
for key := range i.annotations {
|
|
||||||
for _, prefix := range i.annotationsPrefixes {
|
|
||||||
if strings.HasPrefix(key, prefix) {
|
|
||||||
annotationKeys = append(annotationKeys, key)
|
|
||||||
// There is no need to check additional prefixes since we
|
|
||||||
// typically deduplicate devices in any case.
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// We sort the annotationKeys for consistent results.
|
|
||||||
slices.Sort(annotationKeys)
|
|
||||||
|
|
||||||
var devices []string
|
var devices []string
|
||||||
for _, key := range annotationKeys {
|
for _, device := range i.DevicesFromMounts() {
|
||||||
devices = append(devices, strings.Split(i.annotations[key], ",")...)
|
|
||||||
}
|
|
||||||
return devices
|
|
||||||
}
|
|
||||||
|
|
||||||
// visibleDevicesFromEnvVar returns the set of visible devices requested through environment variables.
|
|
||||||
// If any of the preferredVisibleDeviceEnvVars are present in the image, they
|
|
||||||
// are used to determine the visible devices. If this is not the case, the
|
|
||||||
// NVIDIA_VISIBLE_DEVICES environment variable is used.
|
|
||||||
func (i CUDA) visibleDevicesFromEnvVar() []string {
|
|
||||||
envVars := i.visibleEnvVars()
|
|
||||||
return i.devicesFromEnvvars(envVars...)
|
|
||||||
}
|
|
||||||
|
|
||||||
// visibleDevicesFromMounts returns the set of visible devices requested as mounts.
|
|
||||||
func (i CUDA) visibleDevicesFromMounts() []string {
|
|
||||||
var devices []string
|
|
||||||
for _, device := range i.requestsFromMounts() {
|
|
||||||
switch {
|
switch {
|
||||||
|
case strings.HasPrefix(device, volumeMountDevicePrefixCDI):
|
||||||
|
continue
|
||||||
case strings.HasPrefix(device, volumeMountDevicePrefixImex):
|
case strings.HasPrefix(device, volumeMountDevicePrefixImex):
|
||||||
continue
|
continue
|
||||||
case strings.HasPrefix(device, volumeMountDevicePrefixCDI):
|
|
||||||
name, err := cdiDeviceMountRequest(device).qualifiedName()
|
|
||||||
if err != nil {
|
|
||||||
i.logger.Warningf("Ignoring invalid mount request for CDI device %v: %v", device, err)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
devices = append(devices, name)
|
|
||||||
default:
|
|
||||||
devices = append(devices, device)
|
|
||||||
}
|
}
|
||||||
|
devices = append(devices, device)
|
||||||
}
|
}
|
||||||
return devices
|
return devices
|
||||||
}
|
}
|
||||||
|
|
||||||
// requestsFromMounts returns a list of device specified as mounts.
|
// DevicesFromMounts returns a list of device specified as mounts.
|
||||||
func (i CUDA) requestsFromMounts() []string {
|
// TODO: This should be merged with getDevicesFromMounts used in the NVIDIA Container Runtime
|
||||||
|
func (i CUDA) DevicesFromMounts() []string {
|
||||||
root := filepath.Clean(DeviceListAsVolumeMountsRoot)
|
root := filepath.Clean(DeviceListAsVolumeMountsRoot)
|
||||||
seen := make(map[string]bool)
|
seen := make(map[string]bool)
|
||||||
var devices []string
|
var devices []string
|
||||||
@@ -386,35 +271,28 @@ func (i CUDA) requestsFromMounts() []string {
|
|||||||
return devices
|
return devices
|
||||||
}
|
}
|
||||||
|
|
||||||
// a cdiDeviceMountRequest represents a CDI device requests as a mount.
|
// CDIDevicesFromMounts returns a list of CDI devices specified as mounts on the image.
|
||||||
// Here the host path /dev/null is mounted to a particular path in the container.
|
func (i CUDA) CDIDevicesFromMounts() []string {
|
||||||
// The container path has the form:
|
var devices []string
|
||||||
// /var/run/nvidia-container-devices/cdi/<vendor>/<class>/<device>
|
for _, mountDevice := range i.DevicesFromMounts() {
|
||||||
// or
|
if !strings.HasPrefix(mountDevice, volumeMountDevicePrefixCDI) {
|
||||||
// /var/run/nvidia-container-devices/cdi/<vendor>/<class>=<device>
|
continue
|
||||||
type cdiDeviceMountRequest string
|
}
|
||||||
|
parts := strings.SplitN(strings.TrimPrefix(mountDevice, volumeMountDevicePrefixCDI), "/", 3)
|
||||||
// qualifiedName returns the fully-qualified name of the CDI device.
|
if len(parts) != 3 {
|
||||||
func (m cdiDeviceMountRequest) qualifiedName() (string, error) {
|
continue
|
||||||
if !strings.HasPrefix(string(m), volumeMountDevicePrefixCDI) {
|
}
|
||||||
return "", fmt.Errorf("invalid mount CDI device request: %s", m)
|
vendor := parts[0]
|
||||||
|
class := parts[1]
|
||||||
|
device := parts[2]
|
||||||
|
devices = append(devices, fmt.Sprintf("%s/%s=%s", vendor, class, device))
|
||||||
}
|
}
|
||||||
|
return devices
|
||||||
requestedDevice := strings.TrimPrefix(string(m), volumeMountDevicePrefixCDI)
|
|
||||||
if parser.IsQualifiedName(requestedDevice) {
|
|
||||||
return requestedDevice, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
parts := strings.SplitN(requestedDevice, "/", 3)
|
|
||||||
if len(parts) != 3 {
|
|
||||||
return "", fmt.Errorf("invalid mount CDI device request: %s", m)
|
|
||||||
}
|
|
||||||
return fmt.Sprintf("%s/%s=%s", parts[0], parts[1], parts[2]), nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// ImexChannelsFromEnvVar returns the list of IMEX channels requested for the image.
|
// ImexChannelsFromEnvVar returns the list of IMEX channels requested for the image.
|
||||||
func (i CUDA) ImexChannelsFromEnvVar() []string {
|
func (i CUDA) ImexChannelsFromEnvVar() []string {
|
||||||
imexChannels := i.devicesFromEnvvars(EnvVarNvidiaImexChannels)
|
imexChannels := i.DevicesFromEnvvars(EnvVarNvidiaImexChannels).List()
|
||||||
if len(imexChannels) == 1 && imexChannels[0] == "all" {
|
if len(imexChannels) == 1 && imexChannels[0] == "all" {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@@ -424,7 +302,7 @@ func (i CUDA) ImexChannelsFromEnvVar() []string {
|
|||||||
// ImexChannelsFromMounts returns the list of IMEX channels requested for the image.
|
// ImexChannelsFromMounts returns the list of IMEX channels requested for the image.
|
||||||
func (i CUDA) ImexChannelsFromMounts() []string {
|
func (i CUDA) ImexChannelsFromMounts() []string {
|
||||||
var channels []string
|
var channels []string
|
||||||
for _, mountDevice := range i.requestsFromMounts() {
|
for _, mountDevice := range i.DevicesFromMounts() {
|
||||||
if !strings.HasPrefix(mountDevice, volumeMountDevicePrefixImex) {
|
if !strings.HasPrefix(mountDevice, volumeMountDevicePrefixImex) {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -21,91 +21,9 @@ import (
|
|||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/opencontainers/runtime-spec/specs-go"
|
"github.com/opencontainers/runtime-spec/specs-go"
|
||||||
testlog "github.com/sirupsen/logrus/hooks/test"
|
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestNewCUDAImageFromSpec(t *testing.T) {
|
|
||||||
logger, _ := testlog.NewNullLogger()
|
|
||||||
|
|
||||||
testCases := []struct {
|
|
||||||
description string
|
|
||||||
spec *specs.Spec
|
|
||||||
options []Option
|
|
||||||
expected CUDA
|
|
||||||
}{
|
|
||||||
{
|
|
||||||
description: "no env vars",
|
|
||||||
spec: &specs.Spec{
|
|
||||||
Process: &specs.Process{
|
|
||||||
Env: []string{},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
expected: CUDA{
|
|
||||||
logger: logger,
|
|
||||||
env: map[string]string{},
|
|
||||||
acceptEnvvarUnprivileged: true,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
description: "NVIDIA_VISIBLE_DEVICES=all",
|
|
||||||
spec: &specs.Spec{
|
|
||||||
Process: &specs.Process{
|
|
||||||
Env: []string{"NVIDIA_VISIBLE_DEVICES=all"},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
expected: CUDA{
|
|
||||||
logger: logger,
|
|
||||||
env: map[string]string{"NVIDIA_VISIBLE_DEVICES": "all"},
|
|
||||||
acceptEnvvarUnprivileged: true,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
description: "Spec overrides options",
|
|
||||||
spec: &specs.Spec{
|
|
||||||
Process: &specs.Process{
|
|
||||||
Env: []string{"NVIDIA_VISIBLE_DEVICES=all"},
|
|
||||||
},
|
|
||||||
Mounts: []specs.Mount{
|
|
||||||
{
|
|
||||||
Source: "/spec-source",
|
|
||||||
Destination: "/spec-destination",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
options: []Option{
|
|
||||||
WithEnvMap(map[string]string{"OTHER": "value"}),
|
|
||||||
WithMounts([]specs.Mount{
|
|
||||||
{
|
|
||||||
Source: "/option-source",
|
|
||||||
Destination: "/option-destination",
|
|
||||||
},
|
|
||||||
}),
|
|
||||||
},
|
|
||||||
expected: CUDA{
|
|
||||||
logger: logger,
|
|
||||||
env: map[string]string{"NVIDIA_VISIBLE_DEVICES": "all"},
|
|
||||||
mounts: []specs.Mount{
|
|
||||||
{
|
|
||||||
Source: "/spec-source",
|
|
||||||
Destination: "/spec-destination",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
acceptEnvvarUnprivileged: true,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, tc := range testCases {
|
|
||||||
t.Run(tc.description, func(t *testing.T) {
|
|
||||||
options := append([]Option{WithLogger(logger)}, tc.options...)
|
|
||||||
image, err := NewCUDAImageFromSpec(tc.spec, options...)
|
|
||||||
require.NoError(t, err)
|
|
||||||
require.EqualValues(t, tc.expected, image)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestParseMajorMinorVersionValid(t *testing.T) {
|
func TestParseMajorMinorVersionValid(t *testing.T) {
|
||||||
var tests = []struct {
|
var tests = []struct {
|
||||||
version string
|
version string
|
||||||
@@ -204,7 +122,7 @@ func TestGetRequirements(t *testing.T) {
|
|||||||
|
|
||||||
for _, tc := range testCases {
|
for _, tc := range testCases {
|
||||||
t.Run(tc.description, func(t *testing.T) {
|
t.Run(tc.description, func(t *testing.T) {
|
||||||
image, err := newCUDAImageFromEnv(tc.env)
|
image, err := NewCUDAImageFromEnv(tc.env)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
|
||||||
requirements, err := image.GetRequirements()
|
requirements, err := image.GetRequirements()
|
||||||
@@ -215,226 +133,6 @@ func TestGetRequirements(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestGetDevicesFromEnvvar(t *testing.T) {
|
|
||||||
envDockerResourceGPUs := "DOCKER_RESOURCE_GPUS"
|
|
||||||
gpuID := "GPU-12345"
|
|
||||||
anotherGPUID := "GPU-67890"
|
|
||||||
thirdGPUID := "MIG-12345"
|
|
||||||
|
|
||||||
var tests = []struct {
|
|
||||||
description string
|
|
||||||
preferredVisibleDeviceEnvVars []string
|
|
||||||
env map[string]string
|
|
||||||
expectedDevices []string
|
|
||||||
}{
|
|
||||||
{
|
|
||||||
description: "empty env returns nil for non-legacy image",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
description: "blank NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image",
|
|
||||||
env: map[string]string{
|
|
||||||
EnvVarNvidiaVisibleDevices: "",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
description: "'void' NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image",
|
|
||||||
env: map[string]string{
|
|
||||||
EnvVarNvidiaVisibleDevices: "void",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
description: "'none' NVIDIA_VISIBLE_DEVICES returns empty for non-legacy image",
|
|
||||||
env: map[string]string{
|
|
||||||
EnvVarNvidiaVisibleDevices: "none",
|
|
||||||
},
|
|
||||||
expectedDevices: []string{""},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
description: "NVIDIA_VISIBLE_DEVICES set returns value for non-legacy image",
|
|
||||||
env: map[string]string{
|
|
||||||
EnvVarNvidiaVisibleDevices: gpuID,
|
|
||||||
},
|
|
||||||
expectedDevices: []string{gpuID},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
description: "NVIDIA_VISIBLE_DEVICES set returns value for legacy image",
|
|
||||||
env: map[string]string{
|
|
||||||
EnvVarNvidiaVisibleDevices: gpuID,
|
|
||||||
EnvVarCudaVersion: "legacy",
|
|
||||||
},
|
|
||||||
expectedDevices: []string{gpuID},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
description: "empty env returns all for legacy image",
|
|
||||||
env: map[string]string{
|
|
||||||
EnvVarCudaVersion: "legacy",
|
|
||||||
},
|
|
||||||
expectedDevices: []string{"all"},
|
|
||||||
},
|
|
||||||
// Add the `DOCKER_RESOURCE_GPUS` envvar and ensure that this is ignored when
|
|
||||||
// not enabled
|
|
||||||
{
|
|
||||||
description: "missing NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image",
|
|
||||||
env: map[string]string{
|
|
||||||
envDockerResourceGPUs: anotherGPUID,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
description: "blank NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image",
|
|
||||||
env: map[string]string{
|
|
||||||
EnvVarNvidiaVisibleDevices: "",
|
|
||||||
envDockerResourceGPUs: anotherGPUID,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
description: "'void' NVIDIA_VISIBLE_DEVICES returns nil for non-legacy image",
|
|
||||||
env: map[string]string{
|
|
||||||
EnvVarNvidiaVisibleDevices: "void",
|
|
||||||
envDockerResourceGPUs: anotherGPUID,
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
description: "'none' NVIDIA_VISIBLE_DEVICES returns empty for non-legacy image",
|
|
||||||
env: map[string]string{
|
|
||||||
EnvVarNvidiaVisibleDevices: "none",
|
|
||||||
envDockerResourceGPUs: anotherGPUID,
|
|
||||||
},
|
|
||||||
expectedDevices: []string{""},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
description: "NVIDIA_VISIBLE_DEVICES set returns value for non-legacy image",
|
|
||||||
env: map[string]string{
|
|
||||||
EnvVarNvidiaVisibleDevices: gpuID,
|
|
||||||
envDockerResourceGPUs: anotherGPUID,
|
|
||||||
},
|
|
||||||
expectedDevices: []string{gpuID},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
description: "NVIDIA_VISIBLE_DEVICES set returns value for legacy image",
|
|
||||||
env: map[string]string{
|
|
||||||
EnvVarNvidiaVisibleDevices: gpuID,
|
|
||||||
envDockerResourceGPUs: anotherGPUID,
|
|
||||||
EnvVarCudaVersion: "legacy",
|
|
||||||
},
|
|
||||||
expectedDevices: []string{gpuID},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
description: "empty env returns all for legacy image",
|
|
||||||
env: map[string]string{
|
|
||||||
envDockerResourceGPUs: anotherGPUID,
|
|
||||||
EnvVarCudaVersion: "legacy",
|
|
||||||
},
|
|
||||||
expectedDevices: []string{"all"},
|
|
||||||
},
|
|
||||||
// Add the `DOCKER_RESOURCE_GPUS` envvar and ensure that this is selected when
|
|
||||||
// enabled
|
|
||||||
{
|
|
||||||
description: "empty env returns nil for non-legacy image",
|
|
||||||
preferredVisibleDeviceEnvVars: []string{envDockerResourceGPUs},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
description: "blank DOCKER_RESOURCE_GPUS returns nil for non-legacy image",
|
|
||||||
preferredVisibleDeviceEnvVars: []string{envDockerResourceGPUs},
|
|
||||||
env: map[string]string{
|
|
||||||
envDockerResourceGPUs: "",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
description: "'void' DOCKER_RESOURCE_GPUS returns nil for non-legacy image",
|
|
||||||
preferredVisibleDeviceEnvVars: []string{envDockerResourceGPUs},
|
|
||||||
env: map[string]string{
|
|
||||||
envDockerResourceGPUs: "void",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
description: "'none' DOCKER_RESOURCE_GPUS returns empty for non-legacy image",
|
|
||||||
preferredVisibleDeviceEnvVars: []string{envDockerResourceGPUs},
|
|
||||||
env: map[string]string{
|
|
||||||
envDockerResourceGPUs: "none",
|
|
||||||
},
|
|
||||||
expectedDevices: []string{""},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
description: "DOCKER_RESOURCE_GPUS set returns value for non-legacy image",
|
|
||||||
preferredVisibleDeviceEnvVars: []string{envDockerResourceGPUs},
|
|
||||||
env: map[string]string{
|
|
||||||
envDockerResourceGPUs: gpuID,
|
|
||||||
},
|
|
||||||
expectedDevices: []string{gpuID},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
description: "DOCKER_RESOURCE_GPUS set returns value for legacy image",
|
|
||||||
preferredVisibleDeviceEnvVars: []string{envDockerResourceGPUs},
|
|
||||||
env: map[string]string{
|
|
||||||
envDockerResourceGPUs: gpuID,
|
|
||||||
EnvVarCudaVersion: "legacy",
|
|
||||||
},
|
|
||||||
expectedDevices: []string{gpuID},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
description: "DOCKER_RESOURCE_GPUS is selected if present",
|
|
||||||
preferredVisibleDeviceEnvVars: []string{envDockerResourceGPUs},
|
|
||||||
env: map[string]string{
|
|
||||||
envDockerResourceGPUs: anotherGPUID,
|
|
||||||
},
|
|
||||||
expectedDevices: []string{anotherGPUID},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
description: "DOCKER_RESOURCE_GPUS overrides NVIDIA_VISIBLE_DEVICES if present",
|
|
||||||
preferredVisibleDeviceEnvVars: []string{envDockerResourceGPUs},
|
|
||||||
env: map[string]string{
|
|
||||||
EnvVarNvidiaVisibleDevices: gpuID,
|
|
||||||
envDockerResourceGPUs: anotherGPUID,
|
|
||||||
},
|
|
||||||
expectedDevices: []string{anotherGPUID},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
description: "DOCKER_RESOURCE_GPUS_ADDITIONAL overrides NVIDIA_VISIBLE_DEVICES if present",
|
|
||||||
preferredVisibleDeviceEnvVars: []string{"DOCKER_RESOURCE_GPUS_ADDITIONAL"},
|
|
||||||
env: map[string]string{
|
|
||||||
EnvVarNvidiaVisibleDevices: gpuID,
|
|
||||||
"DOCKER_RESOURCE_GPUS_ADDITIONAL": anotherGPUID,
|
|
||||||
},
|
|
||||||
expectedDevices: []string{anotherGPUID},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
description: "All available swarm resource envvars are selected and override NVIDIA_VISIBLE_DEVICES if present",
|
|
||||||
preferredVisibleDeviceEnvVars: []string{"DOCKER_RESOURCE_GPUS", "DOCKER_RESOURCE_GPUS_ADDITIONAL"},
|
|
||||||
env: map[string]string{
|
|
||||||
EnvVarNvidiaVisibleDevices: gpuID,
|
|
||||||
"DOCKER_RESOURCE_GPUS": thirdGPUID,
|
|
||||||
"DOCKER_RESOURCE_GPUS_ADDITIONAL": anotherGPUID,
|
|
||||||
},
|
|
||||||
expectedDevices: []string{thirdGPUID, anotherGPUID},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
description: "DOCKER_RESOURCE_GPUS_ADDITIONAL or DOCKER_RESOURCE_GPUS override NVIDIA_VISIBLE_DEVICES if present",
|
|
||||||
preferredVisibleDeviceEnvVars: []string{"DOCKER_RESOURCE_GPUS", "DOCKER_RESOURCE_GPUS_ADDITIONAL"},
|
|
||||||
env: map[string]string{
|
|
||||||
EnvVarNvidiaVisibleDevices: gpuID,
|
|
||||||
"DOCKER_RESOURCE_GPUS_ADDITIONAL": anotherGPUID,
|
|
||||||
},
|
|
||||||
expectedDevices: []string{anotherGPUID},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, tc := range tests {
|
|
||||||
t.Run(tc.description, func(t *testing.T) {
|
|
||||||
image, err := New(
|
|
||||||
WithEnvMap(tc.env),
|
|
||||||
WithPrivileged(true),
|
|
||||||
WithAcceptDeviceListAsVolumeMounts(false),
|
|
||||||
WithAcceptEnvvarUnprivileged(false),
|
|
||||||
WithPreferredVisibleDevicesEnvVars(tc.preferredVisibleDeviceEnvVars...),
|
|
||||||
)
|
|
||||||
|
|
||||||
require.NoError(t, err)
|
|
||||||
devices := image.visibleDevicesFromEnvVar()
|
|
||||||
require.EqualValues(t, tc.expectedDevices, devices)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestGetVisibleDevicesFromMounts(t *testing.T) {
|
func TestGetVisibleDevicesFromMounts(t *testing.T) {
|
||||||
var tests = []struct {
|
var tests = []struct {
|
||||||
description string
|
description string
|
||||||
@@ -487,9 +185,9 @@ func TestGetVisibleDevicesFromMounts(t *testing.T) {
|
|||||||
expectedDevices: []string{"GPU0-MIG0/0/1", "GPU1-MIG0/0/1"},
|
expectedDevices: []string{"GPU0-MIG0/0/1", "GPU1-MIG0/0/1"},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
description: "cdi devices are included",
|
description: "cdi devices are ignored",
|
||||||
mounts: makeTestMounts("GPU0", "nvidia.com/gpu=all", "GPU1"),
|
mounts: makeTestMounts("GPU0", "cdi/nvidia.com/gpu=all", "GPU1"),
|
||||||
expectedDevices: []string{"GPU0", "nvidia.com/gpu=all", "GPU1"},
|
expectedDevices: []string{"GPU0", "GPU1"},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
description: "imex devices are ignored",
|
description: "imex devices are ignored",
|
||||||
@@ -499,195 +197,8 @@ func TestGetVisibleDevicesFromMounts(t *testing.T) {
|
|||||||
}
|
}
|
||||||
for _, tc := range tests {
|
for _, tc := range tests {
|
||||||
t.Run(tc.description, func(t *testing.T) {
|
t.Run(tc.description, func(t *testing.T) {
|
||||||
image, err := New(WithMounts(tc.mounts))
|
image, _ := New(WithMounts(tc.mounts))
|
||||||
require.NoError(t, err)
|
require.Equal(t, tc.expectedDevices, image.VisibleDevicesFromMounts())
|
||||||
require.Equal(t, tc.expectedDevices, image.visibleDevicesFromMounts())
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestVisibleDevices(t *testing.T) {
|
|
||||||
var tests = []struct {
|
|
||||||
description string
|
|
||||||
mountDevices []specs.Mount
|
|
||||||
envvarDevices string
|
|
||||||
privileged bool
|
|
||||||
acceptUnprivileged bool
|
|
||||||
acceptMounts bool
|
|
||||||
preferredVisibleDeviceEnvVars []string
|
|
||||||
env map[string]string
|
|
||||||
expectedDevices []string
|
|
||||||
}{
|
|
||||||
{
|
|
||||||
description: "Mount devices, unprivileged, no accept unprivileged",
|
|
||||||
mountDevices: []specs.Mount{
|
|
||||||
{
|
|
||||||
Source: "/dev/null",
|
|
||||||
Destination: filepath.Join(DeviceListAsVolumeMountsRoot, "GPU0"),
|
|
||||||
},
|
|
||||||
{
|
|
||||||
Source: "/dev/null",
|
|
||||||
Destination: filepath.Join(DeviceListAsVolumeMountsRoot, "GPU1"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
envvarDevices: "GPU2,GPU3",
|
|
||||||
privileged: false,
|
|
||||||
acceptUnprivileged: false,
|
|
||||||
acceptMounts: true,
|
|
||||||
expectedDevices: []string{"GPU0", "GPU1"},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
description: "No mount devices, unprivileged, no accept unprivileged",
|
|
||||||
mountDevices: nil,
|
|
||||||
envvarDevices: "GPU0,GPU1",
|
|
||||||
privileged: false,
|
|
||||||
acceptUnprivileged: false,
|
|
||||||
acceptMounts: true,
|
|
||||||
expectedDevices: nil,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
description: "No mount devices, privileged, no accept unprivileged",
|
|
||||||
mountDevices: nil,
|
|
||||||
envvarDevices: "GPU0,GPU1",
|
|
||||||
privileged: true,
|
|
||||||
acceptUnprivileged: false,
|
|
||||||
acceptMounts: true,
|
|
||||||
expectedDevices: []string{"GPU0", "GPU1"},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
description: "No mount devices, unprivileged, accept unprivileged",
|
|
||||||
mountDevices: nil,
|
|
||||||
envvarDevices: "GPU0,GPU1",
|
|
||||||
privileged: false,
|
|
||||||
acceptUnprivileged: true,
|
|
||||||
acceptMounts: true,
|
|
||||||
expectedDevices: []string{"GPU0", "GPU1"},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
description: "Mount devices, unprivileged, accept unprivileged, no accept mounts",
|
|
||||||
mountDevices: []specs.Mount{
|
|
||||||
{
|
|
||||||
Source: "/dev/null",
|
|
||||||
Destination: filepath.Join(DeviceListAsVolumeMountsRoot, "GPU0"),
|
|
||||||
},
|
|
||||||
{
|
|
||||||
Source: "/dev/null",
|
|
||||||
Destination: filepath.Join(DeviceListAsVolumeMountsRoot, "GPU1"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
envvarDevices: "GPU2,GPU3",
|
|
||||||
privileged: false,
|
|
||||||
acceptUnprivileged: true,
|
|
||||||
acceptMounts: false,
|
|
||||||
expectedDevices: []string{"GPU2", "GPU3"},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
description: "Mount devices, unprivileged, no accept unprivileged, no accept mounts",
|
|
||||||
mountDevices: []specs.Mount{
|
|
||||||
{
|
|
||||||
Source: "/dev/null",
|
|
||||||
Destination: filepath.Join(DeviceListAsVolumeMountsRoot, "GPU0"),
|
|
||||||
},
|
|
||||||
{
|
|
||||||
Source: "/dev/null",
|
|
||||||
Destination: filepath.Join(DeviceListAsVolumeMountsRoot, "GPU1"),
|
|
||||||
},
|
|
||||||
},
|
|
||||||
envvarDevices: "GPU2,GPU3",
|
|
||||||
privileged: false,
|
|
||||||
acceptUnprivileged: false,
|
|
||||||
acceptMounts: false,
|
|
||||||
expectedDevices: nil,
|
|
||||||
},
|
|
||||||
// New test cases for visibleEnvVars functionality
|
|
||||||
{
|
|
||||||
description: "preferred env var set and present in env, privileged",
|
|
||||||
mountDevices: nil,
|
|
||||||
envvarDevices: "",
|
|
||||||
privileged: true,
|
|
||||||
acceptUnprivileged: false,
|
|
||||||
acceptMounts: true,
|
|
||||||
preferredVisibleDeviceEnvVars: []string{"DOCKER_RESOURCE_GPUS"},
|
|
||||||
env: map[string]string{
|
|
||||||
"DOCKER_RESOURCE_GPUS": "GPU-12345",
|
|
||||||
},
|
|
||||||
expectedDevices: []string{"GPU-12345"},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
description: "preferred env var set and present in env, unprivileged but accepted",
|
|
||||||
mountDevices: nil,
|
|
||||||
envvarDevices: "",
|
|
||||||
privileged: false,
|
|
||||||
acceptUnprivileged: true,
|
|
||||||
acceptMounts: true,
|
|
||||||
preferredVisibleDeviceEnvVars: []string{"DOCKER_RESOURCE_GPUS"},
|
|
||||||
env: map[string]string{
|
|
||||||
"DOCKER_RESOURCE_GPUS": "GPU-12345",
|
|
||||||
},
|
|
||||||
expectedDevices: []string{"GPU-12345"},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
description: "preferred env var set and present in env, unprivileged and not accepted",
|
|
||||||
mountDevices: nil,
|
|
||||||
envvarDevices: "",
|
|
||||||
privileged: false,
|
|
||||||
acceptUnprivileged: false,
|
|
||||||
acceptMounts: true,
|
|
||||||
preferredVisibleDeviceEnvVars: []string{"DOCKER_RESOURCE_GPUS"},
|
|
||||||
env: map[string]string{
|
|
||||||
"DOCKER_RESOURCE_GPUS": "GPU-12345",
|
|
||||||
},
|
|
||||||
expectedDevices: nil,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
description: "multiple preferred env vars, both present, privileged",
|
|
||||||
mountDevices: nil,
|
|
||||||
envvarDevices: "",
|
|
||||||
privileged: true,
|
|
||||||
acceptUnprivileged: false,
|
|
||||||
acceptMounts: true,
|
|
||||||
preferredVisibleDeviceEnvVars: []string{"DOCKER_RESOURCE_GPUS", "DOCKER_RESOURCE_GPUS_ADDITIONAL"},
|
|
||||||
env: map[string]string{
|
|
||||||
"DOCKER_RESOURCE_GPUS": "GPU-12345",
|
|
||||||
"DOCKER_RESOURCE_GPUS_ADDITIONAL": "GPU-67890",
|
|
||||||
},
|
|
||||||
expectedDevices: []string{"GPU-12345", "GPU-67890"},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
description: "preferred env var not present, fallback to NVIDIA_VISIBLE_DEVICES, privileged",
|
|
||||||
mountDevices: nil,
|
|
||||||
envvarDevices: "GPU-12345",
|
|
||||||
privileged: true,
|
|
||||||
acceptUnprivileged: false,
|
|
||||||
acceptMounts: true,
|
|
||||||
preferredVisibleDeviceEnvVars: []string{"DOCKER_RESOURCE_GPUS"},
|
|
||||||
env: map[string]string{
|
|
||||||
EnvVarNvidiaVisibleDevices: "GPU-12345",
|
|
||||||
},
|
|
||||||
expectedDevices: []string{"GPU-12345"},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
for _, tc := range tests {
|
|
||||||
t.Run(tc.description, func(t *testing.T) {
|
|
||||||
// Create env map with both NVIDIA_VISIBLE_DEVICES and any additional env vars
|
|
||||||
env := make(map[string]string)
|
|
||||||
if tc.envvarDevices != "" {
|
|
||||||
env[EnvVarNvidiaVisibleDevices] = tc.envvarDevices
|
|
||||||
}
|
|
||||||
for k, v := range tc.env {
|
|
||||||
env[k] = v
|
|
||||||
}
|
|
||||||
|
|
||||||
image, err := New(
|
|
||||||
WithEnvMap(env),
|
|
||||||
WithMounts(tc.mountDevices),
|
|
||||||
WithPrivileged(tc.privileged),
|
|
||||||
WithAcceptDeviceListAsVolumeMounts(tc.acceptMounts),
|
|
||||||
WithAcceptEnvvarUnprivileged(tc.acceptUnprivileged),
|
|
||||||
WithPreferredVisibleDevicesEnvVars(tc.preferredVisibleDeviceEnvVars...),
|
|
||||||
)
|
|
||||||
require.NoError(t, err)
|
|
||||||
require.Equal(t, tc.expectedDevices, image.VisibleDevices())
|
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -713,7 +224,7 @@ func TestImexChannelsFromEnvVar(t *testing.T) {
|
|||||||
for _, tc := range testCases {
|
for _, tc := range testCases {
|
||||||
for id, baseEnvvars := range map[string][]string{"": nil, "legacy": {"CUDA_VERSION=1.2.3"}} {
|
for id, baseEnvvars := range map[string][]string{"": nil, "legacy": {"CUDA_VERSION=1.2.3"}} {
|
||||||
t.Run(tc.description+id, func(t *testing.T) {
|
t.Run(tc.description+id, func(t *testing.T) {
|
||||||
i, err := newCUDAImageFromEnv(append(baseEnvvars, tc.env...))
|
i, err := NewCUDAImageFromEnv(append(baseEnvvars, tc.env...))
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
|
||||||
channels := i.ImexChannelsFromEnvVar()
|
channels := i.ImexChannelsFromEnvVar()
|
||||||
@@ -723,73 +234,6 @@ func TestImexChannelsFromEnvVar(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestCDIDeviceRequestsFromAnnotations(t *testing.T) {
|
|
||||||
testCases := []struct {
|
|
||||||
description string
|
|
||||||
prefixes []string
|
|
||||||
annotations map[string]string
|
|
||||||
expectedDevices []string
|
|
||||||
}{
|
|
||||||
{
|
|
||||||
description: "no annotations",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
description: "no matching annotations",
|
|
||||||
prefixes: []string{"not-prefix/"},
|
|
||||||
annotations: map[string]string{
|
|
||||||
"prefix/foo": "example.com/device=bar",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
description: "single matching annotation",
|
|
||||||
prefixes: []string{"prefix/"},
|
|
||||||
annotations: map[string]string{
|
|
||||||
"prefix/foo": "example.com/device=bar",
|
|
||||||
},
|
|
||||||
expectedDevices: []string{"example.com/device=bar"},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
description: "multiple matching annotations",
|
|
||||||
prefixes: []string{"prefix/", "another-prefix/"},
|
|
||||||
annotations: map[string]string{
|
|
||||||
"prefix/foo": "example.com/device=bar",
|
|
||||||
"another-prefix/bar": "example.com/device=baz",
|
|
||||||
},
|
|
||||||
expectedDevices: []string{"example.com/device=bar", "example.com/device=baz"},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
description: "multiple matching annotations with duplicate devices",
|
|
||||||
prefixes: []string{"prefix/", "another-prefix/"},
|
|
||||||
annotations: map[string]string{
|
|
||||||
"prefix/foo": "example.com/device=bar",
|
|
||||||
"another-prefix/bar": "example.com/device=bar",
|
|
||||||
},
|
|
||||||
expectedDevices: []string{"example.com/device=bar", "example.com/device=bar"},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
description: "invalid devices are returned as is",
|
|
||||||
prefixes: []string{"prefix/"},
|
|
||||||
annotations: map[string]string{
|
|
||||||
"prefix/foo": "example.com/device",
|
|
||||||
},
|
|
||||||
expectedDevices: []string{"example.com/device"},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, tc := range testCases {
|
|
||||||
t.Run(tc.description, func(t *testing.T) {
|
|
||||||
image, err := New(
|
|
||||||
WithAnnotationsPrefixes(tc.prefixes),
|
|
||||||
WithAnnotations(tc.annotations),
|
|
||||||
)
|
|
||||||
require.NoError(t, err)
|
|
||||||
|
|
||||||
devices := image.cdiDeviceRequestsFromAnnotations()
|
|
||||||
require.ElementsMatch(t, tc.expectedDevices, devices)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func makeTestMounts(paths ...string) []specs.Mount {
|
func makeTestMounts(paths ...string) []specs.Mount {
|
||||||
var mounts []specs.Mount
|
var mounts []specs.Mount
|
||||||
for _, path := range paths {
|
for _, path := range paths {
|
||||||
|
|||||||
@@ -24,39 +24,20 @@ const (
|
|||||||
capSysAdmin = "CAP_SYS_ADMIN"
|
capSysAdmin = "CAP_SYS_ADMIN"
|
||||||
)
|
)
|
||||||
|
|
||||||
type CapabilitiesGetter interface {
|
|
||||||
GetCapabilities() []string
|
|
||||||
}
|
|
||||||
|
|
||||||
type OCISpec specs.Spec
|
|
||||||
|
|
||||||
type OCISpecCapabilities specs.LinuxCapabilities
|
|
||||||
|
|
||||||
// IsPrivileged returns true if the container is a privileged container.
|
// IsPrivileged returns true if the container is a privileged container.
|
||||||
func IsPrivileged(s CapabilitiesGetter) bool {
|
func IsPrivileged(s *specs.Spec) bool {
|
||||||
if s == nil {
|
if s.Process.Capabilities == nil {
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
for _, c := range s.GetCapabilities() {
|
|
||||||
|
// We only make sure that the bounding capabibility set has
|
||||||
|
// CAP_SYS_ADMIN. This allows us to make sure that the container was
|
||||||
|
// actually started as '--privileged', but also allow non-root users to
|
||||||
|
// access the privileged NVIDIA capabilities.
|
||||||
|
for _, c := range s.Process.Capabilities.Bounding {
|
||||||
if c == capSysAdmin {
|
if c == capSysAdmin {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s OCISpec) GetCapabilities() []string {
|
|
||||||
if s.Process == nil || s.Process.Capabilities == nil {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
return (*OCISpecCapabilities)(s.Process.Capabilities).GetCapabilities()
|
|
||||||
}
|
|
||||||
|
|
||||||
func (c OCISpecCapabilities) GetCapabilities() []string {
|
|
||||||
// We only make sure that the bounding capability set has
|
|
||||||
// CAP_SYS_ADMIN. This allows us to make sure that the container was
|
|
||||||
// actually started as '--privileged', but also allow non-root users to
|
|
||||||
// access the privileged NVIDIA capabilities.
|
|
||||||
return c.Bounding
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -1,57 +0,0 @@
|
|||||||
/**
|
|
||||||
# Copyright (c) NVIDIA CORPORATION. All rights reserved.
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
**/
|
|
||||||
|
|
||||||
package image
|
|
||||||
|
|
||||||
import (
|
|
||||||
"testing"
|
|
||||||
|
|
||||||
"github.com/opencontainers/runtime-spec/specs-go"
|
|
||||||
"github.com/stretchr/testify/require"
|
|
||||||
)
|
|
||||||
|
|
||||||
func TestIsPrivileged(t *testing.T) {
|
|
||||||
var tests = []struct {
|
|
||||||
spec specs.Spec
|
|
||||||
expected bool
|
|
||||||
}{
|
|
||||||
{
|
|
||||||
specs.Spec{
|
|
||||||
Process: &specs.Process{
|
|
||||||
Capabilities: &specs.LinuxCapabilities{
|
|
||||||
Bounding: []string{"CAP_SYS_ADMIN"},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
true,
|
|
||||||
},
|
|
||||||
{
|
|
||||||
specs.Spec{
|
|
||||||
Process: &specs.Process{
|
|
||||||
Capabilities: &specs.LinuxCapabilities{
|
|
||||||
Bounding: []string{"CAP_SYS_FOO"},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
false,
|
|
||||||
},
|
|
||||||
}
|
|
||||||
for i, tc := range tests {
|
|
||||||
privileged := IsPrivileged((*OCISpec)(&tc.spec))
|
|
||||||
|
|
||||||
require.Equal(t, tc.expected, privileged, "%d: %v", i, tc)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -62,7 +62,7 @@ load-kmods = true
|
|||||||
#debug = "/var/log/nvidia-container-runtime.log"
|
#debug = "/var/log/nvidia-container-runtime.log"
|
||||||
log-level = "info"
|
log-level = "info"
|
||||||
mode = "auto"
|
mode = "auto"
|
||||||
runtimes = ["runc", "crun"]
|
runtimes = ["docker-runc", "runc", "crun"]
|
||||||
|
|
||||||
[nvidia-container-runtime.modes]
|
[nvidia-container-runtime.modes]
|
||||||
|
|
||||||
|
|||||||
@@ -23,7 +23,6 @@ type cache struct {
|
|||||||
|
|
||||||
sync.Mutex
|
sync.Mutex
|
||||||
devices []Device
|
devices []Device
|
||||||
envVars []EnvVar
|
|
||||||
hooks []Hook
|
hooks []Hook
|
||||||
mounts []Mount
|
mounts []Mount
|
||||||
}
|
}
|
||||||
@@ -52,20 +51,6 @@ func (c *cache) Devices() ([]Device, error) {
|
|||||||
return c.devices, nil
|
return c.devices, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *cache) EnvVars() ([]EnvVar, error) {
|
|
||||||
c.Lock()
|
|
||||||
defer c.Unlock()
|
|
||||||
|
|
||||||
if c.envVars == nil {
|
|
||||||
envVars, err := c.d.EnvVars()
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
c.envVars = envVars
|
|
||||||
}
|
|
||||||
return c.envVars, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (c *cache) Hooks() ([]Hook, error) {
|
func (c *cache) Hooks() ([]Hook, error) {
|
||||||
c.Lock()
|
c.Lock()
|
||||||
defer c.Unlock()
|
defer c.Unlock()
|
||||||
|
|||||||
@@ -22,12 +22,6 @@ type Device struct {
|
|||||||
Path string
|
Path string
|
||||||
}
|
}
|
||||||
|
|
||||||
// EnvVar represents a discovered environment variable.
|
|
||||||
type EnvVar struct {
|
|
||||||
Name string
|
|
||||||
Value string
|
|
||||||
}
|
|
||||||
|
|
||||||
// Mount represents a discovered mount.
|
// Mount represents a discovered mount.
|
||||||
type Mount struct {
|
type Mount struct {
|
||||||
HostPath string
|
HostPath string
|
||||||
@@ -48,7 +42,6 @@ type Hook struct {
|
|||||||
//go:generate moq -rm -fmt=goimports -stub -out discover_mock.go . Discover
|
//go:generate moq -rm -fmt=goimports -stub -out discover_mock.go . Discover
|
||||||
type Discover interface {
|
type Discover interface {
|
||||||
Devices() ([]Device, error)
|
Devices() ([]Device, error)
|
||||||
EnvVars() ([]EnvVar, error)
|
|
||||||
Mounts() ([]Mount, error)
|
Mounts() ([]Mount, error)
|
||||||
Hooks() ([]Hook, error)
|
Hooks() ([]Hook, error)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -20,9 +20,6 @@ var _ Discover = &DiscoverMock{}
|
|||||||
// DevicesFunc: func() ([]Device, error) {
|
// DevicesFunc: func() ([]Device, error) {
|
||||||
// panic("mock out the Devices method")
|
// panic("mock out the Devices method")
|
||||||
// },
|
// },
|
||||||
// EnvVarsFunc: func() ([]EnvVar, error) {
|
|
||||||
// panic("mock out the EnvVars method")
|
|
||||||
// },
|
|
||||||
// HooksFunc: func() ([]Hook, error) {
|
// HooksFunc: func() ([]Hook, error) {
|
||||||
// panic("mock out the Hooks method")
|
// panic("mock out the Hooks method")
|
||||||
// },
|
// },
|
||||||
@@ -39,9 +36,6 @@ type DiscoverMock struct {
|
|||||||
// DevicesFunc mocks the Devices method.
|
// DevicesFunc mocks the Devices method.
|
||||||
DevicesFunc func() ([]Device, error)
|
DevicesFunc func() ([]Device, error)
|
||||||
|
|
||||||
// EnvVarsFunc mocks the EnvVars method.
|
|
||||||
EnvVarsFunc func() ([]EnvVar, error)
|
|
||||||
|
|
||||||
// HooksFunc mocks the Hooks method.
|
// HooksFunc mocks the Hooks method.
|
||||||
HooksFunc func() ([]Hook, error)
|
HooksFunc func() ([]Hook, error)
|
||||||
|
|
||||||
@@ -53,9 +47,6 @@ type DiscoverMock struct {
|
|||||||
// Devices holds details about calls to the Devices method.
|
// Devices holds details about calls to the Devices method.
|
||||||
Devices []struct {
|
Devices []struct {
|
||||||
}
|
}
|
||||||
// EnvVars holds details about calls to the EnvVars method.
|
|
||||||
EnvVars []struct {
|
|
||||||
}
|
|
||||||
// Hooks holds details about calls to the Hooks method.
|
// Hooks holds details about calls to the Hooks method.
|
||||||
Hooks []struct {
|
Hooks []struct {
|
||||||
}
|
}
|
||||||
@@ -64,7 +55,6 @@ type DiscoverMock struct {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
lockDevices sync.RWMutex
|
lockDevices sync.RWMutex
|
||||||
lockEnvVars sync.RWMutex
|
|
||||||
lockHooks sync.RWMutex
|
lockHooks sync.RWMutex
|
||||||
lockMounts sync.RWMutex
|
lockMounts sync.RWMutex
|
||||||
}
|
}
|
||||||
@@ -100,37 +90,6 @@ func (mock *DiscoverMock) DevicesCalls() []struct {
|
|||||||
return calls
|
return calls
|
||||||
}
|
}
|
||||||
|
|
||||||
// EnvVars calls EnvVarsFunc.
|
|
||||||
func (mock *DiscoverMock) EnvVars() ([]EnvVar, error) {
|
|
||||||
callInfo := struct {
|
|
||||||
}{}
|
|
||||||
mock.lockEnvVars.Lock()
|
|
||||||
mock.calls.EnvVars = append(mock.calls.EnvVars, callInfo)
|
|
||||||
mock.lockEnvVars.Unlock()
|
|
||||||
if mock.EnvVarsFunc == nil {
|
|
||||||
var (
|
|
||||||
envVarsOut []EnvVar
|
|
||||||
errOut error
|
|
||||||
)
|
|
||||||
return envVarsOut, errOut
|
|
||||||
}
|
|
||||||
return mock.EnvVarsFunc()
|
|
||||||
}
|
|
||||||
|
|
||||||
// EnvVarsCalls gets all the calls that were made to EnvVars.
|
|
||||||
// Check the length with:
|
|
||||||
//
|
|
||||||
// len(mockedDiscover.EnvVarsCalls())
|
|
||||||
func (mock *DiscoverMock) EnvVarsCalls() []struct {
|
|
||||||
} {
|
|
||||||
var calls []struct {
|
|
||||||
}
|
|
||||||
mock.lockEnvVars.RLock()
|
|
||||||
calls = mock.calls.EnvVars
|
|
||||||
mock.lockEnvVars.RUnlock()
|
|
||||||
return calls
|
|
||||||
}
|
|
||||||
|
|
||||||
// Hooks calls HooksFunc.
|
// Hooks calls HooksFunc.
|
||||||
func (mock *DiscoverMock) Hooks() ([]Hook, error) {
|
func (mock *DiscoverMock) Hooks() ([]Hook, error) {
|
||||||
callInfo := struct {
|
callInfo := struct {
|
||||||
|
|||||||
@@ -1,41 +0,0 @@
|
|||||||
/**
|
|
||||||
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
**/
|
|
||||||
|
|
||||||
package discover
|
|
||||||
|
|
||||||
var _ Discover = (*EnvVar)(nil)
|
|
||||||
|
|
||||||
// Devices returns an empty list of devices for a EnvVar discoverer.
|
|
||||||
func (e EnvVar) Devices() ([]Device, error) {
|
|
||||||
return nil, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// EnvVars returns an empty list of envs for a EnvVar discoverer.
|
|
||||||
func (e EnvVar) EnvVars() ([]EnvVar, error) {
|
|
||||||
return []EnvVar{e}, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Mounts returns an empty list of mounts for a EnvVar discoverer.
|
|
||||||
func (e EnvVar) Mounts() ([]Mount, error) {
|
|
||||||
return nil, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Hooks allows the Hook type to also implement the Discoverer interface.
|
|
||||||
// It returns a single hook
|
|
||||||
func (e EnvVar) Hooks() ([]Hook, error) {
|
|
||||||
return nil, nil
|
|
||||||
}
|
|
||||||
@@ -45,19 +45,6 @@ func (f firstOf) Devices() ([]Device, error) {
|
|||||||
return nil, errs
|
return nil, errs
|
||||||
}
|
}
|
||||||
|
|
||||||
func (f firstOf) EnvVars() ([]EnvVar, error) {
|
|
||||||
var errs error
|
|
||||||
for _, d := range f {
|
|
||||||
envs, err := d.EnvVars()
|
|
||||||
if err != nil {
|
|
||||||
errs = errors.Join(errs, err)
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
return envs, nil
|
|
||||||
}
|
|
||||||
return nil, errs
|
|
||||||
}
|
|
||||||
|
|
||||||
func (f firstOf) Hooks() ([]Hook, error) {
|
func (f firstOf) Hooks() ([]Hook, error) {
|
||||||
var errs error
|
var errs error
|
||||||
for _, d := range f {
|
for _, d := range f {
|
||||||
|
|||||||
@@ -20,7 +20,6 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"runtime"
|
|
||||||
"strings"
|
"strings"
|
||||||
|
|
||||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
|
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
|
||||||
@@ -82,25 +81,14 @@ func NewGraphicsMountsDiscoverer(logger logger.Interface, driver *root.Driver, h
|
|||||||
// vulkan ICD files are at {{ .driverRoot }}/vulkan instead of in /etc/vulkan.
|
// vulkan ICD files are at {{ .driverRoot }}/vulkan instead of in /etc/vulkan.
|
||||||
func newVulkanConfigsDiscover(logger logger.Interface, driver *root.Driver) Discover {
|
func newVulkanConfigsDiscover(logger logger.Interface, driver *root.Driver) Discover {
|
||||||
locator := lookup.First(driver.Configs(), driver.Files())
|
locator := lookup.First(driver.Configs(), driver.Files())
|
||||||
|
|
||||||
required := []string{
|
|
||||||
"vulkan/icd.d/nvidia_icd.json",
|
|
||||||
"vulkan/icd.d/nvidia_layers.json",
|
|
||||||
"vulkan/implicit_layer.d/nvidia_layers.json",
|
|
||||||
}
|
|
||||||
// For some RPM-based driver packages, the vulkan ICD files are installed to
|
|
||||||
// /usr/share/vulkan/icd.d/nvidia_icd.%{_target_cpu}.json
|
|
||||||
// We also include this in the list of candidates for the ICD file.
|
|
||||||
switch runtime.GOARCH {
|
|
||||||
case "amd64":
|
|
||||||
required = append(required, "vulkan/icd.d/nvidia_icd.x86_64.json")
|
|
||||||
case "arm64":
|
|
||||||
required = append(required, "vulkan/icd.d/nvidia_icd.aarch64.json")
|
|
||||||
}
|
|
||||||
return &mountsToContainerPath{
|
return &mountsToContainerPath{
|
||||||
logger: logger,
|
logger: logger,
|
||||||
locator: locator,
|
locator: locator,
|
||||||
required: required,
|
required: []string{
|
||||||
|
"vulkan/icd.d/nvidia_icd.json",
|
||||||
|
"vulkan/icd.d/nvidia_layers.json",
|
||||||
|
"vulkan/implicit_layer.d/nvidia_layers.json",
|
||||||
|
},
|
||||||
containerRoot: "/etc",
|
containerRoot: "/etc",
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -25,7 +25,7 @@ import (
|
|||||||
|
|
||||||
func TestGraphicsLibrariesDiscoverer(t *testing.T) {
|
func TestGraphicsLibrariesDiscoverer(t *testing.T) {
|
||||||
logger, _ := testlog.NewNullLogger()
|
logger, _ := testlog.NewNullLogger()
|
||||||
hookCreator := NewHookCreator()
|
hookCreator := NewHookCreator("/usr/bin/nvidia-cdi-hook", false)
|
||||||
|
|
||||||
testCases := []struct {
|
testCases := []struct {
|
||||||
description string
|
description string
|
||||||
|
|||||||
@@ -23,36 +23,6 @@ import (
|
|||||||
"tags.cncf.io/container-device-interface/pkg/cdi"
|
"tags.cncf.io/container-device-interface/pkg/cdi"
|
||||||
)
|
)
|
||||||
|
|
||||||
// A HookName represents a supported CDI hooks.
|
|
||||||
type HookName string
|
|
||||||
|
|
||||||
const (
|
|
||||||
// AllHooks is a special hook name that allows all hooks to be matched.
|
|
||||||
AllHooks = HookName("all")
|
|
||||||
|
|
||||||
// A ChmodHook is used to set the file mode of the specified paths.
|
|
||||||
// Deprecated: The chmod hook is deprecated and will be removed in a future release.
|
|
||||||
ChmodHook = HookName("chmod")
|
|
||||||
// A CreateSymlinksHook is used to create symlinks in the container.
|
|
||||||
CreateSymlinksHook = HookName("create-symlinks")
|
|
||||||
// DisableDeviceNodeModificationHook refers to the hook used to ensure that
|
|
||||||
// device nodes are not created by libnvidia-ml.so or nvidia-smi in a
|
|
||||||
// container.
|
|
||||||
// Added in v1.17.8
|
|
||||||
DisableDeviceNodeModificationHook = HookName("disable-device-node-modification")
|
|
||||||
// An EnableCudaCompatHook is used to enabled CUDA Forward Compatibility.
|
|
||||||
// Added in v1.17.5
|
|
||||||
EnableCudaCompatHook = HookName("enable-cuda-compat")
|
|
||||||
// An UpdateLDCacheHook is the hook used to update the ldcache in the
|
|
||||||
// container. This allows injected libraries to be discoverable.
|
|
||||||
UpdateLDCacheHook = HookName("update-ldcache")
|
|
||||||
// A CreateSonameSymlinksHook is the hook used to ensure that soname symlinks
|
|
||||||
// for injected libraries exist in the container.
|
|
||||||
CreateSonameSymlinksHook = HookName("create-soname-symlinks")
|
|
||||||
|
|
||||||
defaultNvidiaCDIHookPath = "/usr/bin/nvidia-cdi-hook"
|
|
||||||
)
|
|
||||||
|
|
||||||
var _ Discover = (*Hook)(nil)
|
var _ Discover = (*Hook)(nil)
|
||||||
|
|
||||||
// Devices returns an empty list of devices for a Hook discoverer.
|
// Devices returns an empty list of devices for a Hook discoverer.
|
||||||
@@ -60,11 +30,6 @@ func (h *Hook) Devices() ([]Device, error) {
|
|||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// EnvVars returns an empty list of envs for a Hook discoverer.
|
|
||||||
func (h *Hook) EnvVars() ([]EnvVar, error) {
|
|
||||||
return nil, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Mounts returns an empty list of mounts for a Hook discoverer.
|
// Mounts returns an empty list of mounts for a Hook discoverer.
|
||||||
func (h *Hook) Mounts() ([]Mount, error) {
|
func (h *Hook) Mounts() ([]Mount, error) {
|
||||||
return nil, nil
|
return nil, nil
|
||||||
@@ -80,130 +45,52 @@ func (h *Hook) Hooks() ([]Hook, error) {
|
|||||||
return []Hook{*h}, nil
|
return []Hook{*h}, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
type Option func(*cdiHookCreator)
|
// Option is a function that configures the nvcdilib
|
||||||
|
type Option func(*CDIHook)
|
||||||
|
|
||||||
type cdiHookCreator struct {
|
type CDIHook struct {
|
||||||
nvidiaCDIHookPath string
|
nvidiaCDIHookPath string
|
||||||
disabledHooks map[HookName]bool
|
debugLogging bool
|
||||||
|
|
||||||
fixedArgs []string
|
|
||||||
debugLogging bool
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// An allDisabledHookCreator is a HookCreator that does not create any hooks.
|
|
||||||
type allDisabledHookCreator struct{}
|
|
||||||
|
|
||||||
// Create returns nil for all hooks for an allDisabledHookCreator.
|
|
||||||
func (a *allDisabledHookCreator) Create(name HookName, args ...string) *Hook {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// A HookCreator defines an interface for creating discover hooks.
|
|
||||||
type HookCreator interface {
|
type HookCreator interface {
|
||||||
Create(HookName, ...string) *Hook
|
Create(string, ...string) *Hook
|
||||||
}
|
}
|
||||||
|
|
||||||
// WithDisabledHooks sets the set of hooks that are disabled for the CDI hook creator.
|
func NewHookCreator(nvidiaCDIHookPath string, debugLogging bool) HookCreator {
|
||||||
// This can be specified multiple times.
|
CDIHook := &CDIHook{
|
||||||
func WithDisabledHooks(hooks ...HookName) Option {
|
nvidiaCDIHookPath: nvidiaCDIHookPath,
|
||||||
return func(c *cdiHookCreator) {
|
debugLogging: debugLogging,
|
||||||
for _, hook := range hooks {
|
}
|
||||||
c.disabledHooks[hook] = true
|
|
||||||
|
return CDIHook
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c CDIHook) Create(name string, args ...string) *Hook {
|
||||||
|
if name == "create-symlinks" {
|
||||||
|
if len(args) == 0 {
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// WithNVIDIACDIHookPath sets the path to the nvidia-cdi-hook binary.
|
links := []string{}
|
||||||
func WithNVIDIACDIHookPath(nvidiaCDIHookPath string) Option {
|
for _, arg := range args {
|
||||||
return func(c *cdiHookCreator) {
|
links = append(links, "--link", arg)
|
||||||
c.nvidiaCDIHookPath = nvidiaCDIHookPath
|
}
|
||||||
}
|
args = links
|
||||||
}
|
|
||||||
|
|
||||||
func NewHookCreator(opts ...Option) HookCreator {
|
|
||||||
cdiHookCreator := &cdiHookCreator{
|
|
||||||
nvidiaCDIHookPath: defaultNvidiaCDIHookPath,
|
|
||||||
disabledHooks: make(map[HookName]bool),
|
|
||||||
}
|
|
||||||
for _, opt := range opts {
|
|
||||||
opt(cdiHookCreator)
|
|
||||||
}
|
|
||||||
|
|
||||||
if cdiHookCreator.disabledHooks[AllHooks] {
|
|
||||||
return &allDisabledHookCreator{}
|
|
||||||
}
|
|
||||||
|
|
||||||
cdiHookCreator.fixedArgs = getFixedArgsForCDIHookCLI(cdiHookCreator.nvidiaCDIHookPath)
|
|
||||||
|
|
||||||
return cdiHookCreator
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create creates a new hook with the given name and arguments.
|
|
||||||
// If a hook is disabled, a nil hook is returned.
|
|
||||||
func (c cdiHookCreator) Create(name HookName, args ...string) *Hook {
|
|
||||||
if c.isDisabled(name, args...) {
|
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return &Hook{
|
return &Hook{
|
||||||
Lifecycle: cdi.CreateContainerHook,
|
Lifecycle: cdi.CreateContainerHook,
|
||||||
Path: c.nvidiaCDIHookPath,
|
Path: c.nvidiaCDIHookPath,
|
||||||
Args: append(c.requiredArgs(name), c.transformArgs(name, args...)...),
|
Args: append(c.requiredArgs(name), args...),
|
||||||
Env: []string{fmt.Sprintf("NVIDIA_CTK_DEBUG=%v", c.debugLogging)},
|
Env: []string{fmt.Sprintf("NVIDIA_CTK_DEBUG=%v", c.debugLogging)},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// isDisabled checks if the specified hook name is disabled.
|
func (c CDIHook) requiredArgs(name string) []string {
|
||||||
func (c cdiHookCreator) isDisabled(name HookName, args ...string) bool {
|
base := filepath.Base(c.nvidiaCDIHookPath)
|
||||||
if c.disabledHooks[name] {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
switch name {
|
|
||||||
case CreateSymlinksHook:
|
|
||||||
if len(args) == 0 {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
case ChmodHook:
|
|
||||||
if len(args) == 0 {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
func (c cdiHookCreator) requiredArgs(name HookName) []string {
|
|
||||||
return append(c.fixedArgs, string(name))
|
|
||||||
}
|
|
||||||
|
|
||||||
func (c cdiHookCreator) transformArgs(name HookName, args ...string) []string {
|
|
||||||
switch name {
|
|
||||||
case CreateSymlinksHook:
|
|
||||||
var transformedArgs []string
|
|
||||||
for _, arg := range args {
|
|
||||||
transformedArgs = append(transformedArgs, "--link", arg)
|
|
||||||
}
|
|
||||||
return transformedArgs
|
|
||||||
case ChmodHook:
|
|
||||||
var transformedArgs = []string{"--mode", "755"}
|
|
||||||
for _, arg := range args {
|
|
||||||
transformedArgs = append(transformedArgs, "--path", arg)
|
|
||||||
}
|
|
||||||
return transformedArgs
|
|
||||||
default:
|
|
||||||
return args
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// getFixedArgsForCDIHookCLI returns the fixed arguments for the hook CLI.
|
|
||||||
// If the nvidia-ctk binary is used, hooks are implemented under the hook
|
|
||||||
// subcommand.
|
|
||||||
// For the nvidia-cdi-hook binary, the hooks are implemented as subcommands of
|
|
||||||
// the top-level CLI.
|
|
||||||
func getFixedArgsForCDIHookCLI(nvidiaCDIHookPath string) []string {
|
|
||||||
base := filepath.Base(nvidiaCDIHookPath)
|
|
||||||
if base == "nvidia-ctk" {
|
if base == "nvidia-ctk" {
|
||||||
return []string{base, "hook"}
|
return []string{base, "hook", name}
|
||||||
}
|
}
|
||||||
return []string{base}
|
return []string{base, name}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -51,24 +51,30 @@ func (d ldconfig) Hooks() ([]Hook, error) {
|
|||||||
return nil, fmt.Errorf("failed to discover mounts for ldcache update: %v", err)
|
return nil, fmt.Errorf("failed to discover mounts for ldcache update: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
var args []string
|
h := createLDCacheUpdateHook(
|
||||||
|
d.hookCreator,
|
||||||
if d.ldconfigPath != "" {
|
d.ldconfigPath,
|
||||||
args = append(args, "--ldconfig-path", d.ldconfigPath)
|
getLibraryPaths(mounts),
|
||||||
}
|
|
||||||
|
|
||||||
for _, f := range uniqueFolders(getLibraryPaths(mounts)) {
|
|
||||||
args = append(args, "--folder", f)
|
|
||||||
}
|
|
||||||
|
|
||||||
h := Merge(
|
|
||||||
d.hookCreator.Create(CreateSonameSymlinksHook, args...),
|
|
||||||
d.hookCreator.Create(UpdateLDCacheHook, args...),
|
|
||||||
)
|
)
|
||||||
|
|
||||||
return h.Hooks()
|
return h.Hooks()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// createLDCacheUpdateHook locates the NVIDIA Container Toolkit CLI and creates a hook for updating the LD Cache
|
||||||
|
func createLDCacheUpdateHook(hookCreator HookCreator, ldconfig string, libraries []string) *Hook {
|
||||||
|
var args []string
|
||||||
|
|
||||||
|
if ldconfig != "" {
|
||||||
|
args = append(args, "--ldconfig-path", ldconfig)
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, f := range uniqueFolders(libraries) {
|
||||||
|
args = append(args, "--folder", f)
|
||||||
|
}
|
||||||
|
|
||||||
|
return hookCreator.Create("update-ldcache", args...)
|
||||||
|
}
|
||||||
|
|
||||||
// getLibraryPaths extracts the library dirs from the specified mounts
|
// getLibraryPaths extracts the library dirs from the specified mounts
|
||||||
func getLibraryPaths(mounts []Mount) []string {
|
func getLibraryPaths(mounts []Mount) []string {
|
||||||
var paths []string
|
var paths []string
|
||||||
|
|||||||
@@ -31,7 +31,7 @@ const (
|
|||||||
|
|
||||||
func TestLDCacheUpdateHook(t *testing.T) {
|
func TestLDCacheUpdateHook(t *testing.T) {
|
||||||
logger, _ := testlog.NewNullLogger()
|
logger, _ := testlog.NewNullLogger()
|
||||||
hookCreator := NewHookCreator(WithNVIDIACDIHookPath(testNvidiaCDIHookPath))
|
hookCreator := NewHookCreator(testNvidiaCDIHookPath, false)
|
||||||
|
|
||||||
testCases := []struct {
|
testCases := []struct {
|
||||||
description string
|
description string
|
||||||
@@ -39,24 +39,11 @@ func TestLDCacheUpdateHook(t *testing.T) {
|
|||||||
mounts []Mount
|
mounts []Mount
|
||||||
mountError error
|
mountError error
|
||||||
expectedError error
|
expectedError error
|
||||||
expectedHooks []Hook
|
expectedArgs []string
|
||||||
}{
|
}{
|
||||||
{
|
{
|
||||||
description: "empty mounts",
|
description: "empty mounts",
|
||||||
expectedHooks: []Hook{
|
expectedArgs: []string{"nvidia-cdi-hook", "update-ldcache"},
|
||||||
{
|
|
||||||
Lifecycle: "createContainer",
|
|
||||||
Path: testNvidiaCDIHookPath,
|
|
||||||
Args: []string{"nvidia-cdi-hook", "create-soname-symlinks"},
|
|
||||||
Env: []string{"NVIDIA_CTK_DEBUG=false"},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
Lifecycle: "createContainer",
|
|
||||||
Path: testNvidiaCDIHookPath,
|
|
||||||
Args: []string{"nvidia-cdi-hook", "update-ldcache"},
|
|
||||||
Env: []string{"NVIDIA_CTK_DEBUG=false"},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
description: "mount error",
|
description: "mount error",
|
||||||
@@ -79,20 +66,7 @@ func TestLDCacheUpdateHook(t *testing.T) {
|
|||||||
Path: "/usr/local/lib/libbar.so",
|
Path: "/usr/local/lib/libbar.so",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
expectedHooks: []Hook{
|
expectedArgs: []string{"nvidia-cdi-hook", "update-ldcache", "--folder", "/usr/local/lib", "--folder", "/usr/local/libother"},
|
||||||
{
|
|
||||||
Lifecycle: "createContainer",
|
|
||||||
Path: testNvidiaCDIHookPath,
|
|
||||||
Args: []string{"nvidia-cdi-hook", "create-soname-symlinks", "--folder", "/usr/local/lib", "--folder", "/usr/local/libother"},
|
|
||||||
Env: []string{"NVIDIA_CTK_DEBUG=false"},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
Lifecycle: "createContainer",
|
|
||||||
Path: testNvidiaCDIHookPath,
|
|
||||||
Args: []string{"nvidia-cdi-hook", "update-ldcache", "--folder", "/usr/local/lib", "--folder", "/usr/local/libother"},
|
|
||||||
Env: []string{"NVIDIA_CTK_DEBUG=false"},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
description: "host paths are ignored",
|
description: "host paths are ignored",
|
||||||
@@ -102,38 +76,12 @@ func TestLDCacheUpdateHook(t *testing.T) {
|
|||||||
Path: "/usr/local/lib/libfoo.so",
|
Path: "/usr/local/lib/libfoo.so",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
expectedHooks: []Hook{
|
expectedArgs: []string{"nvidia-cdi-hook", "update-ldcache", "--folder", "/usr/local/lib"},
|
||||||
{
|
|
||||||
Lifecycle: "createContainer",
|
|
||||||
Path: testNvidiaCDIHookPath,
|
|
||||||
Args: []string{"nvidia-cdi-hook", "create-soname-symlinks", "--folder", "/usr/local/lib"},
|
|
||||||
Env: []string{"NVIDIA_CTK_DEBUG=false"},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
Lifecycle: "createContainer",
|
|
||||||
Path: testNvidiaCDIHookPath,
|
|
||||||
Args: []string{"nvidia-cdi-hook", "update-ldcache", "--folder", "/usr/local/lib"},
|
|
||||||
Env: []string{"NVIDIA_CTK_DEBUG=false"},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
description: "explicit ldconfig path is passed",
|
description: "explicit ldconfig path is passed",
|
||||||
ldconfigPath: testLdconfigPath,
|
ldconfigPath: testLdconfigPath,
|
||||||
expectedHooks: []Hook{
|
expectedArgs: []string{"nvidia-cdi-hook", "update-ldcache", "--ldconfig-path", testLdconfigPath},
|
||||||
{
|
|
||||||
Lifecycle: "createContainer",
|
|
||||||
Path: testNvidiaCDIHookPath,
|
|
||||||
Args: []string{"nvidia-cdi-hook", "create-soname-symlinks", "--ldconfig-path", testLdconfigPath},
|
|
||||||
Env: []string{"NVIDIA_CTK_DEBUG=false"},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
Lifecycle: "createContainer",
|
|
||||||
Path: testNvidiaCDIHookPath,
|
|
||||||
Args: []string{"nvidia-cdi-hook", "update-ldcache", "--ldconfig-path", testLdconfigPath},
|
|
||||||
Env: []string{"NVIDIA_CTK_DEBUG=false"},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -144,6 +92,13 @@ func TestLDCacheUpdateHook(t *testing.T) {
|
|||||||
return tc.mounts, tc.mountError
|
return tc.mounts, tc.mountError
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
expectedHook := Hook{
|
||||||
|
Path: testNvidiaCDIHookPath,
|
||||||
|
Args: tc.expectedArgs,
|
||||||
|
Lifecycle: "createContainer",
|
||||||
|
Env: []string{"NVIDIA_CTK_DEBUG=false"},
|
||||||
|
}
|
||||||
|
|
||||||
d, err := NewLDCacheUpdateHook(logger, mountMock, hookCreator, tc.ldconfigPath)
|
d, err := NewLDCacheUpdateHook(logger, mountMock, hookCreator, tc.ldconfigPath)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
|
||||||
@@ -157,7 +112,9 @@ func TestLDCacheUpdateHook(t *testing.T) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
require.EqualValues(t, tc.expectedHooks, hooks)
|
require.Len(t, hooks, 1)
|
||||||
|
|
||||||
|
require.EqualValues(t, hooks[0], expectedHook)
|
||||||
|
|
||||||
devices, err := d.Devices()
|
devices, err := d.Devices()
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
|
|||||||
@@ -53,21 +53,6 @@ func (d list) Devices() ([]Device, error) {
|
|||||||
return allDevices, nil
|
return allDevices, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// EnvVars returns all environment variables from the included discoverers.
|
|
||||||
func (d list) EnvVars() ([]EnvVar, error) {
|
|
||||||
var allEnvs []EnvVar
|
|
||||||
|
|
||||||
for i, di := range d {
|
|
||||||
envs, err := di.EnvVars()
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("error discovering envs for discoverer %v: %w", i, err)
|
|
||||||
}
|
|
||||||
allEnvs = append(allEnvs, envs...)
|
|
||||||
}
|
|
||||||
|
|
||||||
return allEnvs, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Mounts returns all mounts from the included discoverers
|
// Mounts returns all mounts from the included discoverers
|
||||||
func (d list) Mounts() ([]Mount, error) {
|
func (d list) Mounts() ([]Mount, error) {
|
||||||
var allMounts []Mount
|
var allMounts []Mount
|
||||||
|
|||||||
@@ -27,11 +27,6 @@ func (e None) Devices() ([]Device, error) {
|
|||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// EnvVars returns an empty list of devices
|
|
||||||
func (e None) EnvVars() ([]EnvVar, error) {
|
|
||||||
return nil, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Mounts returns an empty list of mounts
|
// Mounts returns an empty list of mounts
|
||||||
func (e None) Mounts() ([]Mount, error) {
|
func (e None) Mounts() ([]Mount, error) {
|
||||||
return nil, nil
|
return nil, nil
|
||||||
|
|||||||
@@ -113,7 +113,7 @@ func TestWithWithDriverDotSoSymlinks(t *testing.T) {
|
|||||||
expectedHooks: []Hook{
|
expectedHooks: []Hook{
|
||||||
{
|
{
|
||||||
Lifecycle: "createContainer",
|
Lifecycle: "createContainer",
|
||||||
Path: "/usr/bin/nvidia-cdi-hook",
|
Path: "/path/to/nvidia-cdi-hook",
|
||||||
Args: []string{"nvidia-cdi-hook", "create-symlinks", "--link", "libcuda.so.1::/usr/lib/libcuda.so"},
|
Args: []string{"nvidia-cdi-hook", "create-symlinks", "--link", "libcuda.so.1::/usr/lib/libcuda.so"},
|
||||||
Env: []string{"NVIDIA_CTK_DEBUG=false"},
|
Env: []string{"NVIDIA_CTK_DEBUG=false"},
|
||||||
},
|
},
|
||||||
@@ -146,7 +146,7 @@ func TestWithWithDriverDotSoSymlinks(t *testing.T) {
|
|||||||
expectedHooks: []Hook{
|
expectedHooks: []Hook{
|
||||||
{
|
{
|
||||||
Lifecycle: "createContainer",
|
Lifecycle: "createContainer",
|
||||||
Path: "/usr/bin/nvidia-cdi-hook",
|
Path: "/path/to/nvidia-cdi-hook",
|
||||||
Args: []string{"nvidia-cdi-hook", "create-symlinks", "--link", "libcuda.so.1::/usr/lib/libcuda.so"},
|
Args: []string{"nvidia-cdi-hook", "create-symlinks", "--link", "libcuda.so.1::/usr/lib/libcuda.so"},
|
||||||
Env: []string{"NVIDIA_CTK_DEBUG=false"},
|
Env: []string{"NVIDIA_CTK_DEBUG=false"},
|
||||||
},
|
},
|
||||||
@@ -178,7 +178,7 @@ func TestWithWithDriverDotSoSymlinks(t *testing.T) {
|
|||||||
expectedHooks: []Hook{
|
expectedHooks: []Hook{
|
||||||
{
|
{
|
||||||
Lifecycle: "createContainer",
|
Lifecycle: "createContainer",
|
||||||
Path: "/usr/bin/nvidia-cdi-hook",
|
Path: "/path/to/nvidia-cdi-hook",
|
||||||
Args: []string{"nvidia-cdi-hook", "create-symlinks", "--link", "libcuda.so.1::/usr/lib/libcuda.so"},
|
Args: []string{"nvidia-cdi-hook", "create-symlinks", "--link", "libcuda.so.1::/usr/lib/libcuda.so"},
|
||||||
Env: []string{"NVIDIA_CTK_DEBUG=false"},
|
Env: []string{"NVIDIA_CTK_DEBUG=false"},
|
||||||
},
|
},
|
||||||
@@ -248,7 +248,7 @@ func TestWithWithDriverDotSoSymlinks(t *testing.T) {
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
Lifecycle: "createContainer",
|
Lifecycle: "createContainer",
|
||||||
Path: "/usr/bin/nvidia-cdi-hook",
|
Path: "/path/to/nvidia-cdi-hook",
|
||||||
Args: []string{"nvidia-cdi-hook", "create-symlinks", "--link", "libcuda.so.1::/usr/lib/libcuda.so"},
|
Args: []string{"nvidia-cdi-hook", "create-symlinks", "--link", "libcuda.so.1::/usr/lib/libcuda.so"},
|
||||||
Env: []string{"NVIDIA_CTK_DEBUG=false"},
|
Env: []string{"NVIDIA_CTK_DEBUG=false"},
|
||||||
},
|
},
|
||||||
@@ -298,7 +298,7 @@ func TestWithWithDriverDotSoSymlinks(t *testing.T) {
|
|||||||
expectedHooks: []Hook{
|
expectedHooks: []Hook{
|
||||||
{
|
{
|
||||||
Lifecycle: "createContainer",
|
Lifecycle: "createContainer",
|
||||||
Path: "/usr/bin/nvidia-cdi-hook",
|
Path: "/path/to/nvidia-cdi-hook",
|
||||||
Args: []string{
|
Args: []string{
|
||||||
"nvidia-cdi-hook", "create-symlinks",
|
"nvidia-cdi-hook", "create-symlinks",
|
||||||
"--link", "libcuda.so.1::/usr/lib/libcuda.so",
|
"--link", "libcuda.so.1::/usr/lib/libcuda.so",
|
||||||
@@ -311,7 +311,7 @@ func TestWithWithDriverDotSoSymlinks(t *testing.T) {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
hookCreator := NewHookCreator()
|
hookCreator := NewHookCreator("/path/to/nvidia-cdi-hook", false)
|
||||||
for _, tc := range testCases {
|
for _, tc := range testCases {
|
||||||
t.Run(tc.description, func(t *testing.T) {
|
t.Run(tc.description, func(t *testing.T) {
|
||||||
d := WithDriverDotSoSymlinks(
|
d := WithDriverDotSoSymlinks(
|
||||||
|
|||||||
@@ -55,11 +55,6 @@ func FromDiscoverer(d discover.Discover) (*cdi.ContainerEdits, error) {
|
|||||||
return nil, fmt.Errorf("failed to discover devices: %v", err)
|
return nil, fmt.Errorf("failed to discover devices: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
envs, err := d.EnvVars()
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("failed to discover environment variables: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
mounts, err := d.Mounts()
|
mounts, err := d.Mounts()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to discover mounts: %v", err)
|
return nil, fmt.Errorf("failed to discover mounts: %v", err)
|
||||||
@@ -79,10 +74,6 @@ func FromDiscoverer(d discover.Discover) (*cdi.ContainerEdits, error) {
|
|||||||
c.Append(edits)
|
c.Append(edits)
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, e := range envs {
|
|
||||||
c.Append(envvar(e).toEdits())
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, m := range mounts {
|
for _, m := range mounts {
|
||||||
c.Append(mount(m).toEdits())
|
c.Append(mount(m).toEdits())
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,39 +0,0 @@
|
|||||||
/**
|
|
||||||
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
**/
|
|
||||||
|
|
||||||
package edits
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
|
|
||||||
"tags.cncf.io/container-device-interface/pkg/cdi"
|
|
||||||
"tags.cncf.io/container-device-interface/specs-go"
|
|
||||||
|
|
||||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
|
||||||
)
|
|
||||||
|
|
||||||
type envvar discover.EnvVar
|
|
||||||
|
|
||||||
// toEdits converts a discovered envvar to CDI Container Edits.
|
|
||||||
func (d envvar) toEdits() *cdi.ContainerEdits {
|
|
||||||
e := cdi.ContainerEdits{
|
|
||||||
ContainerEdits: &specs.ContainerEdits{
|
|
||||||
Env: []string{fmt.Sprintf("%s=%s", d.Name, d.Value)},
|
|
||||||
},
|
|
||||||
}
|
|
||||||
return &e
|
|
||||||
}
|
|
||||||
@@ -23,114 +23,34 @@ import (
|
|||||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
"github.com/NVIDIA/nvidia-container-toolkit/internal/logger"
|
||||||
)
|
)
|
||||||
|
|
||||||
// A RuntimeMode is used to select a specific mode of operation for the NVIDIA Container Runtime.
|
|
||||||
type RuntimeMode string
|
|
||||||
|
|
||||||
const (
|
|
||||||
// In LegacyRuntimeMode the nvidia-container-runtime injects the
|
|
||||||
// nvidia-container-runtime-hook as a prestart hook into the incoming
|
|
||||||
// container config. This hook invokes the nvidia-container-cli to perform
|
|
||||||
// the required modifications to the container.
|
|
||||||
LegacyRuntimeMode = RuntimeMode("legacy")
|
|
||||||
// In CSVRuntimeMode the nvidia-container-runtime processes a set of CSV
|
|
||||||
// files to determine which container modification are required. The
|
|
||||||
// contents of these CSV files are used to generate an in-memory CDI
|
|
||||||
// specification which is used to modify the container config.
|
|
||||||
CSVRuntimeMode = RuntimeMode("csv")
|
|
||||||
// In CDIRuntimeMode the nvidia-container-runtime applies the modifications
|
|
||||||
// to the container config required for the requested CDI devices in the
|
|
||||||
// same way that other CDI clients would.
|
|
||||||
CDIRuntimeMode = RuntimeMode("cdi")
|
|
||||||
// In JitCDIRuntimeMode the nvidia-container-runtime generates in-memory CDI
|
|
||||||
// specifications for requested NVIDIA devices.
|
|
||||||
JitCDIRuntimeMode = RuntimeMode("jit-cdi")
|
|
||||||
)
|
|
||||||
|
|
||||||
type RuntimeModeResolver interface {
|
|
||||||
ResolveRuntimeMode(string) RuntimeMode
|
|
||||||
}
|
|
||||||
|
|
||||||
type modeResolver struct {
|
|
||||||
logger logger.Interface
|
|
||||||
// TODO: This only needs to consider the requested devices.
|
|
||||||
image *image.CUDA
|
|
||||||
propertyExtractor info.PropertyExtractor
|
|
||||||
defaultMode RuntimeMode
|
|
||||||
}
|
|
||||||
|
|
||||||
type Option func(*modeResolver)
|
|
||||||
|
|
||||||
func WithDefaultMode(defaultMode RuntimeMode) Option {
|
|
||||||
return func(mr *modeResolver) {
|
|
||||||
mr.defaultMode = defaultMode
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func WithLogger(logger logger.Interface) Option {
|
|
||||||
return func(mr *modeResolver) {
|
|
||||||
mr.logger = logger
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func WithImage(image *image.CUDA) Option {
|
|
||||||
return func(mr *modeResolver) {
|
|
||||||
mr.image = image
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func WithPropertyExtractor(propertyExtractor info.PropertyExtractor) Option {
|
|
||||||
return func(mr *modeResolver) {
|
|
||||||
mr.propertyExtractor = propertyExtractor
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func NewRuntimeModeResolver(opts ...Option) RuntimeModeResolver {
|
|
||||||
r := &modeResolver{
|
|
||||||
defaultMode: JitCDIRuntimeMode,
|
|
||||||
}
|
|
||||||
for _, opt := range opts {
|
|
||||||
opt(r)
|
|
||||||
}
|
|
||||||
if r.logger == nil {
|
|
||||||
r.logger = &logger.NullLogger{}
|
|
||||||
}
|
|
||||||
|
|
||||||
return r
|
|
||||||
}
|
|
||||||
|
|
||||||
// ResolveAutoMode determines the correct mode for the platform if set to "auto"
|
// ResolveAutoMode determines the correct mode for the platform if set to "auto"
|
||||||
func ResolveAutoMode(logger logger.Interface, mode string, image image.CUDA) (rmode RuntimeMode) {
|
func ResolveAutoMode(logger logger.Interface, mode string, image image.CUDA) (rmode string) {
|
||||||
r := modeResolver{
|
return resolveMode(logger, mode, image, nil)
|
||||||
logger: logger,
|
|
||||||
image: &image,
|
|
||||||
propertyExtractor: nil,
|
|
||||||
}
|
|
||||||
return r.ResolveRuntimeMode(mode)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *modeResolver) ResolveRuntimeMode(mode string) (rmode RuntimeMode) {
|
func resolveMode(logger logger.Interface, mode string, image image.CUDA, propertyExtractor info.PropertyExtractor) (rmode string) {
|
||||||
if mode != "auto" {
|
if mode != "auto" {
|
||||||
m.logger.Infof("Using requested mode '%s'", mode)
|
logger.Infof("Using requested mode '%s'", mode)
|
||||||
return RuntimeMode(mode)
|
return mode
|
||||||
}
|
}
|
||||||
defer func() {
|
defer func() {
|
||||||
m.logger.Infof("Auto-detected mode as '%v'", rmode)
|
logger.Infof("Auto-detected mode as '%v'", rmode)
|
||||||
}()
|
}()
|
||||||
|
|
||||||
if m.image.OnlyFullyQualifiedCDIDevices() {
|
if image.OnlyFullyQualifiedCDIDevices() {
|
||||||
return CDIRuntimeMode
|
return "cdi"
|
||||||
}
|
}
|
||||||
|
|
||||||
nvinfo := info.New(
|
nvinfo := info.New(
|
||||||
info.WithLogger(m.logger),
|
info.WithLogger(logger),
|
||||||
info.WithPropertyExtractor(m.propertyExtractor),
|
info.WithPropertyExtractor(propertyExtractor),
|
||||||
)
|
)
|
||||||
|
|
||||||
switch nvinfo.ResolvePlatform() {
|
switch nvinfo.ResolvePlatform() {
|
||||||
case info.PlatformNVML, info.PlatformWSL:
|
case info.PlatformNVML, info.PlatformWSL:
|
||||||
return m.defaultMode
|
return "legacy"
|
||||||
case info.PlatformTegra:
|
case info.PlatformTegra:
|
||||||
return CSVRuntimeMode
|
return "csv"
|
||||||
}
|
}
|
||||||
return m.defaultMode
|
return "legacy"
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -43,16 +43,11 @@ func TestResolveAutoMode(t *testing.T) {
|
|||||||
mode: "not-auto",
|
mode: "not-auto",
|
||||||
expectedMode: "not-auto",
|
expectedMode: "not-auto",
|
||||||
},
|
},
|
||||||
{
|
|
||||||
description: "legacy resolves to legacy",
|
|
||||||
mode: "legacy",
|
|
||||||
expectedMode: "legacy",
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
description: "no info defaults to legacy",
|
description: "no info defaults to legacy",
|
||||||
mode: "auto",
|
mode: "auto",
|
||||||
info: map[string]bool{},
|
info: map[string]bool{},
|
||||||
expectedMode: "jit-cdi",
|
expectedMode: "legacy",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
description: "non-nvml, non-tegra, nvgpu resolves to csv",
|
description: "non-nvml, non-tegra, nvgpu resolves to csv",
|
||||||
@@ -85,14 +80,14 @@ func TestResolveAutoMode(t *testing.T) {
|
|||||||
expectedMode: "csv",
|
expectedMode: "csv",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
description: "nvml, non-tegra, non-nvgpu resolves to jit-cdi",
|
description: "nvml, non-tegra, non-nvgpu resolves to legacy",
|
||||||
mode: "auto",
|
mode: "auto",
|
||||||
info: map[string]bool{
|
info: map[string]bool{
|
||||||
"nvml": true,
|
"nvml": true,
|
||||||
"tegra": false,
|
"tegra": false,
|
||||||
"nvgpu": false,
|
"nvgpu": false,
|
||||||
},
|
},
|
||||||
expectedMode: "jit-cdi",
|
expectedMode: "legacy",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
description: "nvml, non-tegra, nvgpu resolves to csv",
|
description: "nvml, non-tegra, nvgpu resolves to csv",
|
||||||
@@ -105,14 +100,14 @@ func TestResolveAutoMode(t *testing.T) {
|
|||||||
expectedMode: "csv",
|
expectedMode: "csv",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
description: "nvml, tegra, non-nvgpu resolves to jit-cdi",
|
description: "nvml, tegra, non-nvgpu resolves to legacy",
|
||||||
mode: "auto",
|
mode: "auto",
|
||||||
info: map[string]bool{
|
info: map[string]bool{
|
||||||
"nvml": true,
|
"nvml": true,
|
||||||
"tegra": true,
|
"tegra": true,
|
||||||
"nvgpu": false,
|
"nvgpu": false,
|
||||||
},
|
},
|
||||||
expectedMode: "jit-cdi",
|
expectedMode: "legacy",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
description: "nvml, tegra, nvgpu resolves to csv",
|
description: "nvml, tegra, nvgpu resolves to csv",
|
||||||
@@ -141,7 +136,7 @@ func TestResolveAutoMode(t *testing.T) {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
description: "at least one non-cdi device resolves to jit-cdi",
|
description: "at least one non-cdi device resolves to legacy",
|
||||||
mode: "auto",
|
mode: "auto",
|
||||||
envmap: map[string]string{
|
envmap: map[string]string{
|
||||||
"NVIDIA_VISIBLE_DEVICES": "nvidia.com/gpu=0,0",
|
"NVIDIA_VISIBLE_DEVICES": "nvidia.com/gpu=0,0",
|
||||||
@@ -151,7 +146,7 @@ func TestResolveAutoMode(t *testing.T) {
|
|||||||
"tegra": false,
|
"tegra": false,
|
||||||
"nvgpu": false,
|
"nvgpu": false,
|
||||||
},
|
},
|
||||||
expectedMode: "jit-cdi",
|
expectedMode: "legacy",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
description: "at least one non-cdi device resolves to csv",
|
description: "at least one non-cdi device resolves to csv",
|
||||||
@@ -175,7 +170,7 @@ func TestResolveAutoMode(t *testing.T) {
|
|||||||
expectedMode: "cdi",
|
expectedMode: "cdi",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
description: "cdi mount and non-CDI devices resolves to jit-cdi",
|
description: "cdi mount and non-CDI devices resolves to legacy",
|
||||||
mode: "auto",
|
mode: "auto",
|
||||||
mounts: []string{
|
mounts: []string{
|
||||||
"/var/run/nvidia-container-devices/cdi/nvidia.com/gpu/0",
|
"/var/run/nvidia-container-devices/cdi/nvidia.com/gpu/0",
|
||||||
@@ -186,10 +181,10 @@ func TestResolveAutoMode(t *testing.T) {
|
|||||||
"tegra": false,
|
"tegra": false,
|
||||||
"nvgpu": false,
|
"nvgpu": false,
|
||||||
},
|
},
|
||||||
expectedMode: "jit-cdi",
|
expectedMode: "legacy",
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
description: "cdi mount and non-CDI envvar resolves to cdi",
|
description: "cdi mount and non-CDI envvar resolves to legacy",
|
||||||
mode: "auto",
|
mode: "auto",
|
||||||
envmap: map[string]string{
|
envmap: map[string]string{
|
||||||
"NVIDIA_VISIBLE_DEVICES": "0",
|
"NVIDIA_VISIBLE_DEVICES": "0",
|
||||||
@@ -202,7 +197,7 @@ func TestResolveAutoMode(t *testing.T) {
|
|||||||
"tegra": false,
|
"tegra": false,
|
||||||
"nvgpu": false,
|
"nvgpu": false,
|
||||||
},
|
},
|
||||||
expectedMode: "cdi",
|
expectedMode: "legacy",
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -237,15 +232,8 @@ func TestResolveAutoMode(t *testing.T) {
|
|||||||
image, _ := image.New(
|
image, _ := image.New(
|
||||||
image.WithEnvMap(tc.envmap),
|
image.WithEnvMap(tc.envmap),
|
||||||
image.WithMounts(mounts),
|
image.WithMounts(mounts),
|
||||||
image.WithAcceptDeviceListAsVolumeMounts(true),
|
|
||||||
image.WithAcceptEnvvarUnprivileged(true),
|
|
||||||
)
|
)
|
||||||
mr := NewRuntimeModeResolver(
|
mode := resolveMode(logger, tc.mode, image, properties)
|
||||||
WithLogger(logger),
|
|
||||||
WithImage(&image),
|
|
||||||
WithPropertyExtractor(properties),
|
|
||||||
)
|
|
||||||
mode := mr.ResolveRuntimeMode(tc.mode)
|
|
||||||
require.EqualValues(t, tc.expectedMode, mode)
|
require.EqualValues(t, tc.expectedMode, mode)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,206 +0,0 @@
|
|||||||
/**
|
|
||||||
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
|
||||||
# SPDX-License-Identifier: Apache-2.0
|
|
||||||
#
|
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
# you may not use this file except in compliance with the License.
|
|
||||||
# You may obtain a copy of the License at
|
|
||||||
#
|
|
||||||
# http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
#
|
|
||||||
# Unless required by applicable law or agreed to in writing, software
|
|
||||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
# See the License for the specific language governing permissions and
|
|
||||||
# limitations under the License.
|
|
||||||
**/
|
|
||||||
|
|
||||||
package ldconfig
|
|
||||||
|
|
||||||
import (
|
|
||||||
"fmt"
|
|
||||||
"os"
|
|
||||||
"os/exec"
|
|
||||||
"path/filepath"
|
|
||||||
"strings"
|
|
||||||
|
|
||||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
|
|
||||||
)
|
|
||||||
|
|
||||||
const (
|
|
||||||
// ldsoconfdFilenamePattern specifies the pattern for the filename
|
|
||||||
// in ld.so.conf.d that includes references to the specified directories.
|
|
||||||
// The 00-nvcr prefix is chosen to ensure that these libraries have a
|
|
||||||
// higher precedence than other libraries on the system, but lower than
|
|
||||||
// the 00-cuda-compat that is included in some containers.
|
|
||||||
ldsoconfdFilenamePattern = "00-nvcr-*.conf"
|
|
||||||
)
|
|
||||||
|
|
||||||
type Ldconfig struct {
|
|
||||||
ldconfigPath string
|
|
||||||
inRoot string
|
|
||||||
}
|
|
||||||
|
|
||||||
// NewRunner creates an exec.Cmd that can be used to run ldconfig.
|
|
||||||
func NewRunner(id string, ldconfigPath string, containerRoot string, additionalargs ...string) (*exec.Cmd, error) {
|
|
||||||
args := []string{
|
|
||||||
id,
|
|
||||||
strings.TrimPrefix(config.NormalizeLDConfigPath("@"+ldconfigPath), "@"),
|
|
||||||
containerRoot,
|
|
||||||
}
|
|
||||||
args = append(args, additionalargs...)
|
|
||||||
|
|
||||||
return createReexecCommand(args)
|
|
||||||
}
|
|
||||||
|
|
||||||
// New creates an Ldconfig struct that is used to perform operations on the
|
|
||||||
// ldcache and libraries in a particular root (e.g. a container).
|
|
||||||
func New(ldconfigPath string, inRoot string) (*Ldconfig, error) {
|
|
||||||
l := &Ldconfig{
|
|
||||||
ldconfigPath: ldconfigPath,
|
|
||||||
inRoot: inRoot,
|
|
||||||
}
|
|
||||||
if ldconfigPath == "" {
|
|
||||||
return nil, fmt.Errorf("an ldconfig path must be specified")
|
|
||||||
}
|
|
||||||
if inRoot == "" || inRoot == "/" {
|
|
||||||
return nil, fmt.Errorf("ldconfig must be run in the non-system root")
|
|
||||||
}
|
|
||||||
return l, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// CreateSonameSymlinks uses ldconfig to create the soname symlinks in the
|
|
||||||
// specified directories.
|
|
||||||
func (l *Ldconfig) CreateSonameSymlinks(directories ...string) error {
|
|
||||||
if len(directories) == 0 {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
ldconfigPath, err := l.prepareRoot()
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
args := []string{
|
|
||||||
filepath.Base(ldconfigPath),
|
|
||||||
// Explicitly disable updating the LDCache.
|
|
||||||
"-N",
|
|
||||||
// Specify -n to only process the specified directories.
|
|
||||||
"-n",
|
|
||||||
}
|
|
||||||
args = append(args, directories...)
|
|
||||||
|
|
||||||
return SafeExec(ldconfigPath, args, nil)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (l *Ldconfig) UpdateLDCache(directories ...string) error {
|
|
||||||
ldconfigPath, err := l.prepareRoot()
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
args := []string{
|
|
||||||
filepath.Base(ldconfigPath),
|
|
||||||
// Explicitly specify using /etc/ld.so.conf since the host's ldconfig may
|
|
||||||
// be configured to use a different config file by default.
|
|
||||||
"-f", "/etc/ld.so.conf",
|
|
||||||
}
|
|
||||||
|
|
||||||
if l.ldcacheExists() {
|
|
||||||
args = append(args, "-C", "/etc/ld.so.cache")
|
|
||||||
} else {
|
|
||||||
args = append(args, "-N")
|
|
||||||
}
|
|
||||||
|
|
||||||
// If the ld.so.conf.d directory exists, we create a config file there
|
|
||||||
// containing the required directories, otherwise we add the specified
|
|
||||||
// directories to the ldconfig command directly.
|
|
||||||
if l.ldsoconfdDirectoryExists() {
|
|
||||||
err := createLdsoconfdFile(ldsoconfdFilenamePattern, directories...)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("failed to update ld.so.conf.d: %w", err)
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
args = append(args, directories...)
|
|
||||||
}
|
|
||||||
|
|
||||||
return SafeExec(ldconfigPath, args, nil)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (l *Ldconfig) prepareRoot() (string, error) {
|
|
||||||
// To prevent leaking the parent proc filesystem, we create a new proc mount
|
|
||||||
// in the specified root.
|
|
||||||
if err := mountProc(l.inRoot); err != nil {
|
|
||||||
return "", fmt.Errorf("error mounting /proc: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// We mount the host ldconfig before we pivot root since host paths are not
|
|
||||||
// visible after the pivot root operation.
|
|
||||||
ldconfigPath, err := mountLdConfig(l.ldconfigPath, l.inRoot)
|
|
||||||
if err != nil {
|
|
||||||
return "", fmt.Errorf("error mounting host ldconfig: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// We pivot to the container root for the new process, this further limits
|
|
||||||
// access to the host.
|
|
||||||
if err := pivotRoot(l.inRoot); err != nil {
|
|
||||||
return "", fmt.Errorf("error running pivot_root: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
return ldconfigPath, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (l *Ldconfig) ldcacheExists() bool {
|
|
||||||
if _, err := os.Stat("/etc/ld.so.cache"); err != nil && os.IsNotExist(err) {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
|
|
||||||
func (l *Ldconfig) ldsoconfdDirectoryExists() bool {
|
|
||||||
info, err := os.Stat("/etc/ld.so.conf.d")
|
|
||||||
if os.IsNotExist(err) {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
return info.IsDir()
|
|
||||||
}
|
|
||||||
|
|
||||||
// createLdsoconfdFile creates a file at /etc/ld.so.conf.d/.
|
|
||||||
// The file is created at /etc/ld.so.conf.d/{{ .pattern }} using `CreateTemp` and
|
|
||||||
// contains the specified directories on each line.
|
|
||||||
func createLdsoconfdFile(pattern string, dirs ...string) error {
|
|
||||||
if len(dirs) == 0 {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
ldsoconfdDir := "/etc/ld.so.conf.d"
|
|
||||||
if err := os.MkdirAll(ldsoconfdDir, 0755); err != nil {
|
|
||||||
return fmt.Errorf("failed to create ld.so.conf.d: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
configFile, err := os.CreateTemp(ldsoconfdDir, pattern)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("failed to create config file: %w", err)
|
|
||||||
}
|
|
||||||
defer func() {
|
|
||||||
_ = configFile.Close()
|
|
||||||
}()
|
|
||||||
|
|
||||||
added := make(map[string]bool)
|
|
||||||
for _, dir := range dirs {
|
|
||||||
if added[dir] {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
_, err = fmt.Fprintf(configFile, "%s\n", dir)
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("failed to update config file: %w", err)
|
|
||||||
}
|
|
||||||
added[dir] = true
|
|
||||||
}
|
|
||||||
|
|
||||||
// The created file needs to be world readable for the cases where the container is run as a non-root user.
|
|
||||||
if err := configFile.Chmod(0644); err != nil {
|
|
||||||
return fmt.Errorf("failed to chmod config file: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
@@ -28,29 +28,17 @@ import (
|
|||||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/modifier/cdi"
|
"github.com/NVIDIA/nvidia-container-toolkit/internal/modifier/cdi"
|
||||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
|
"github.com/NVIDIA/nvidia-container-toolkit/internal/oci"
|
||||||
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi"
|
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi"
|
||||||
)
|
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/spec"
|
||||||
|
|
||||||
const (
|
|
||||||
automaticDeviceVendor = "runtime.nvidia.com"
|
|
||||||
automaticDeviceClass = "gpu"
|
|
||||||
automaticDeviceKind = automaticDeviceVendor + "/" + automaticDeviceClass
|
|
||||||
automaticDevicePrefix = automaticDeviceKind + "="
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// NewCDIModifier creates an OCI spec modifier that determines the modifications to make based on the
|
// NewCDIModifier creates an OCI spec modifier that determines the modifications to make based on the
|
||||||
// CDI specifications available on the system. The NVIDIA_VISIBLE_DEVICES environment variable is
|
// CDI specifications available on the system. The NVIDIA_VISIBLE_DEVICES environment variable is
|
||||||
// used to select the devices to include.
|
// used to select the devices to include.
|
||||||
func NewCDIModifier(logger logger.Interface, cfg *config.Config, image image.CUDA, isJitCDI bool) (oci.SpecModifier, error) {
|
func NewCDIModifier(logger logger.Interface, cfg *config.Config, ociSpec oci.Spec) (oci.SpecModifier, error) {
|
||||||
defaultKind := cfg.NVIDIAContainerRuntimeConfig.Modes.CDI.DefaultKind
|
devices, err := getDevicesFromSpec(logger, ociSpec, cfg)
|
||||||
if isJitCDI {
|
if err != nil {
|
||||||
defaultKind = automaticDeviceKind
|
return nil, fmt.Errorf("failed to get required devices from OCI specification: %v", err)
|
||||||
}
|
}
|
||||||
deviceRequestor := newCDIDeviceRequestor(
|
|
||||||
logger,
|
|
||||||
image,
|
|
||||||
defaultKind,
|
|
||||||
)
|
|
||||||
devices := deviceRequestor.DeviceRequests()
|
|
||||||
if len(devices) == 0 {
|
if len(devices) == 0 {
|
||||||
logger.Debugf("No devices requested; no modification required.")
|
logger.Debugf("No devices requested; no modification required.")
|
||||||
return nil, nil
|
return nil, nil
|
||||||
@@ -77,38 +65,87 @@ func NewCDIModifier(logger logger.Interface, cfg *config.Config, image image.CUD
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
|
|
||||||
type deviceRequestor interface {
|
func getDevicesFromSpec(logger logger.Interface, ociSpec oci.Spec, cfg *config.Config) ([]string, error) {
|
||||||
DeviceRequests() []string
|
rawSpec, err := ociSpec.Load()
|
||||||
}
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to load OCI spec: %v", err)
|
||||||
type cdiDeviceRequestor struct {
|
|
||||||
image image.CUDA
|
|
||||||
logger logger.Interface
|
|
||||||
defaultKind string
|
|
||||||
}
|
|
||||||
|
|
||||||
func newCDIDeviceRequestor(logger logger.Interface, image image.CUDA, defaultKind string) deviceRequestor {
|
|
||||||
c := &cdiDeviceRequestor{
|
|
||||||
logger: logger,
|
|
||||||
image: image,
|
|
||||||
defaultKind: defaultKind,
|
|
||||||
}
|
}
|
||||||
return withUniqueDevices(c)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (c *cdiDeviceRequestor) DeviceRequests() []string {
|
annotationDevices, err := getAnnotationDevices(cfg.NVIDIAContainerRuntimeConfig.Modes.CDI.AnnotationPrefixes, rawSpec.Annotations)
|
||||||
if c == nil {
|
if err != nil {
|
||||||
return nil
|
return nil, fmt.Errorf("failed to parse container annotations: %v", err)
|
||||||
}
|
}
|
||||||
|
if len(annotationDevices) > 0 {
|
||||||
|
return annotationDevices, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
container, err := image.NewCUDAImageFromSpec(rawSpec)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
if cfg.AcceptDeviceListAsVolumeMounts {
|
||||||
|
mountDevices := container.CDIDevicesFromMounts()
|
||||||
|
if len(mountDevices) > 0 {
|
||||||
|
return mountDevices, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
var devices []string
|
var devices []string
|
||||||
for _, name := range c.image.VisibleDevices() {
|
seen := make(map[string]bool)
|
||||||
|
for _, name := range container.VisibleDevicesFromEnvVar() {
|
||||||
if !parser.IsQualifiedName(name) {
|
if !parser.IsQualifiedName(name) {
|
||||||
name = fmt.Sprintf("%s=%s", c.defaultKind, name)
|
name = fmt.Sprintf("%s=%s", cfg.NVIDIAContainerRuntimeConfig.Modes.CDI.DefaultKind, name)
|
||||||
|
}
|
||||||
|
if seen[name] {
|
||||||
|
logger.Debugf("Ignoring duplicate device %q", name)
|
||||||
|
continue
|
||||||
}
|
}
|
||||||
devices = append(devices, name)
|
devices = append(devices, name)
|
||||||
}
|
}
|
||||||
|
|
||||||
return devices
|
if len(devices) == 0 {
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if cfg.AcceptEnvvarUnprivileged || image.IsPrivileged(rawSpec) {
|
||||||
|
return devices, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.Warningf("Ignoring devices specified in NVIDIA_VISIBLE_DEVICES: %v", devices)
|
||||||
|
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// getAnnotationDevices returns a list of devices specified in the annotations.
|
||||||
|
// Keys starting with the specified prefixes are considered and expected to contain a comma-separated list of
|
||||||
|
// fully-qualified CDI devices names. If any device name is not fully-quality an error is returned.
|
||||||
|
// The list of returned devices is deduplicated.
|
||||||
|
func getAnnotationDevices(prefixes []string, annotations map[string]string) ([]string, error) {
|
||||||
|
devicesByKey := make(map[string][]string)
|
||||||
|
for key, value := range annotations {
|
||||||
|
for _, prefix := range prefixes {
|
||||||
|
if strings.HasPrefix(key, prefix) {
|
||||||
|
devicesByKey[key] = strings.Split(value, ",")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
seen := make(map[string]bool)
|
||||||
|
var annotationDevices []string
|
||||||
|
for key, devices := range devicesByKey {
|
||||||
|
for _, device := range devices {
|
||||||
|
if !parser.IsQualifiedName(device) {
|
||||||
|
return nil, fmt.Errorf("invalid device name %q in annotation %q", device, key)
|
||||||
|
}
|
||||||
|
if seen[device] {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
annotationDevices = append(annotationDevices, device)
|
||||||
|
seen[device] = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return annotationDevices, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// filterAutomaticDevices searches for "automatic" device names in the input slice.
|
// filterAutomaticDevices searches for "automatic" device names in the input slice.
|
||||||
@@ -118,38 +155,21 @@ func (c *cdiDeviceRequestor) DeviceRequests() []string {
|
|||||||
func filterAutomaticDevices(devices []string) []string {
|
func filterAutomaticDevices(devices []string) []string {
|
||||||
var automatic []string
|
var automatic []string
|
||||||
for _, device := range devices {
|
for _, device := range devices {
|
||||||
if !strings.HasPrefix(device, automaticDevicePrefix) {
|
vendor, class, _ := parser.ParseDevice(device)
|
||||||
continue
|
if vendor == "runtime.nvidia.com" && class == "gpu" {
|
||||||
|
automatic = append(automatic, device)
|
||||||
}
|
}
|
||||||
automatic = append(automatic, device)
|
|
||||||
}
|
}
|
||||||
return automatic
|
return automatic
|
||||||
}
|
}
|
||||||
|
|
||||||
func newAutomaticCDISpecModifier(logger logger.Interface, cfg *config.Config, devices []string) (oci.SpecModifier, error) {
|
func newAutomaticCDISpecModifier(logger logger.Interface, cfg *config.Config, devices []string) (oci.SpecModifier, error) {
|
||||||
logger.Debugf("Generating in-memory CDI specs for devices %v", devices)
|
logger.Debugf("Generating in-memory CDI specs for devices %v", devices)
|
||||||
|
spec, err := generateAutomaticCDISpec(logger, cfg, devices)
|
||||||
var identifiers []string
|
|
||||||
for _, device := range devices {
|
|
||||||
identifiers = append(identifiers, strings.TrimPrefix(device, automaticDevicePrefix))
|
|
||||||
}
|
|
||||||
|
|
||||||
cdilib, err := nvcdi.New(
|
|
||||||
nvcdi.WithLogger(logger),
|
|
||||||
nvcdi.WithNVIDIACDIHookPath(cfg.NVIDIACTKConfig.Path),
|
|
||||||
nvcdi.WithDriverRoot(cfg.NVIDIAContainerCLIConfig.Root),
|
|
||||||
nvcdi.WithVendor(automaticDeviceVendor),
|
|
||||||
nvcdi.WithClass(automaticDeviceClass),
|
|
||||||
)
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("failed to construct CDI library: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
spec, err := cdilib.GetSpec(identifiers...)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to generate CDI spec: %w", err)
|
return nil, fmt.Errorf("failed to generate CDI spec: %w", err)
|
||||||
}
|
}
|
||||||
cdiDeviceRequestor, err := cdi.New(
|
cdiModifier, err := cdi.New(
|
||||||
cdi.WithLogger(logger),
|
cdi.WithLogger(logger),
|
||||||
cdi.WithSpec(spec.Raw()),
|
cdi.WithSpec(spec.Raw()),
|
||||||
)
|
)
|
||||||
@@ -157,29 +177,41 @@ func newAutomaticCDISpecModifier(logger logger.Interface, cfg *config.Config, de
|
|||||||
return nil, fmt.Errorf("failed to construct CDI modifier: %w", err)
|
return nil, fmt.Errorf("failed to construct CDI modifier: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
return cdiDeviceRequestor, nil
|
return cdiModifier, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
type deduplicatedDeviceRequestor struct {
|
func generateAutomaticCDISpec(logger logger.Interface, cfg *config.Config, devices []string) (spec.Interface, error) {
|
||||||
deviceRequestor
|
cdilib, err := nvcdi.New(
|
||||||
}
|
nvcdi.WithLogger(logger),
|
||||||
|
nvcdi.WithNVIDIACDIHookPath(cfg.NVIDIACTKConfig.Path),
|
||||||
func withUniqueDevices(deviceRequestor deviceRequestor) deviceRequestor {
|
nvcdi.WithDriverRoot(cfg.NVIDIAContainerCLIConfig.Root),
|
||||||
return &deduplicatedDeviceRequestor{deviceRequestor: deviceRequestor}
|
nvcdi.WithVendor("runtime.nvidia.com"),
|
||||||
}
|
nvcdi.WithClass("gpu"),
|
||||||
|
)
|
||||||
func (d *deduplicatedDeviceRequestor) DeviceRequests() []string {
|
if err != nil {
|
||||||
if d == nil {
|
return nil, fmt.Errorf("failed to construct CDI library: %w", err)
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
seen := make(map[string]bool)
|
|
||||||
var devices []string
|
identifiers := []string{}
|
||||||
for _, device := range d.deviceRequestor.DeviceRequests() {
|
for _, device := range devices {
|
||||||
if seen[device] {
|
_, _, id := parser.ParseDevice(device)
|
||||||
continue
|
identifiers = append(identifiers, id)
|
||||||
}
|
|
||||||
seen[device] = true
|
|
||||||
devices = append(devices, device)
|
|
||||||
}
|
}
|
||||||
return devices
|
|
||||||
|
deviceSpecs, err := cdilib.GetDeviceSpecsByID(identifiers...)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to get CDI device specs: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
commonEdits, err := cdilib.GetCommonEdits()
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to get common CDI spec edits: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return spec.New(
|
||||||
|
spec.WithDeviceSpecs(deviceSpecs),
|
||||||
|
spec.WithEdits(*commonEdits.ContainerEdits),
|
||||||
|
spec.WithVendor("runtime.nvidia.com"),
|
||||||
|
spec.WithClass("gpu"),
|
||||||
|
)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -17,156 +17,76 @@
|
|||||||
package modifier
|
package modifier
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"fmt"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/opencontainers/runtime-spec/specs-go"
|
|
||||||
testlog "github.com/sirupsen/logrus/hooks/test"
|
|
||||||
"github.com/stretchr/testify/require"
|
"github.com/stretchr/testify/require"
|
||||||
|
|
||||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestDeviceRequests(t *testing.T) {
|
func TestGetAnnotationDevices(t *testing.T) {
|
||||||
logger, _ := testlog.NewNullLogger()
|
|
||||||
|
|
||||||
testCases := []struct {
|
testCases := []struct {
|
||||||
description string
|
description string
|
||||||
input cdiDeviceRequestor
|
|
||||||
spec *specs.Spec
|
|
||||||
prefixes []string
|
prefixes []string
|
||||||
|
annotations map[string]string
|
||||||
expectedDevices []string
|
expectedDevices []string
|
||||||
|
expectedError error
|
||||||
}{
|
}{
|
||||||
{
|
{
|
||||||
description: "empty spec yields no devices",
|
description: "no annotations",
|
||||||
},
|
|
||||||
{
|
|
||||||
description: "cdi devices from mounts",
|
|
||||||
input: cdiDeviceRequestor{
|
|
||||||
defaultKind: "nvidia.com/gpu",
|
|
||||||
},
|
|
||||||
spec: &specs.Spec{
|
|
||||||
Mounts: []specs.Mount{
|
|
||||||
{
|
|
||||||
Destination: "/var/run/nvidia-container-devices/cdi/nvidia.com/gpu/0",
|
|
||||||
Source: "/dev/null",
|
|
||||||
},
|
|
||||||
{
|
|
||||||
Destination: "/var/run/nvidia-container-devices/cdi/nvidia.com/gpu/1",
|
|
||||||
Source: "/dev/null",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
expectedDevices: []string{"nvidia.com/gpu=0", "nvidia.com/gpu=1"},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
description: "cdi devices from envvar",
|
|
||||||
input: cdiDeviceRequestor{
|
|
||||||
defaultKind: "nvidia.com/gpu",
|
|
||||||
},
|
|
||||||
spec: &specs.Spec{
|
|
||||||
Process: &specs.Process{
|
|
||||||
Env: []string{"NVIDIA_VISIBLE_DEVICES=0,example.com/class=device"},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
expectedDevices: []string{"nvidia.com/gpu=0", "example.com/class=device"},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
description: "cdi devices from envvar with default kind",
|
|
||||||
input: cdiDeviceRequestor{
|
|
||||||
defaultKind: "runtime.nvidia.com/gpu",
|
|
||||||
},
|
|
||||||
spec: &specs.Spec{
|
|
||||||
Process: &specs.Process{
|
|
||||||
Env: []string{"NVIDIA_VISIBLE_DEVICES=all"},
|
|
||||||
},
|
|
||||||
},
|
|
||||||
expectedDevices: []string{"runtime.nvidia.com/gpu=all"},
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
description: "no matching annotations",
|
description: "no matching annotations",
|
||||||
prefixes: []string{"not-prefix/"},
|
prefixes: []string{"not-prefix/"},
|
||||||
spec: &specs.Spec{
|
annotations: map[string]string{
|
||||||
Annotations: map[string]string{
|
"prefix/foo": "example.com/device=bar",
|
||||||
"prefix/foo": "example.com/device=bar",
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
description: "single matching annotation",
|
description: "single matching annotation",
|
||||||
prefixes: []string{"prefix/"},
|
prefixes: []string{"prefix/"},
|
||||||
spec: &specs.Spec{
|
annotations: map[string]string{
|
||||||
Annotations: map[string]string{
|
"prefix/foo": "example.com/device=bar",
|
||||||
"prefix/foo": "example.com/device=bar",
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
expectedDevices: []string{"example.com/device=bar"},
|
expectedDevices: []string{"example.com/device=bar"},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
description: "multiple matching annotations",
|
description: "multiple matching annotations",
|
||||||
prefixes: []string{"prefix/", "another-prefix/"},
|
prefixes: []string{"prefix/", "another-prefix/"},
|
||||||
spec: &specs.Spec{
|
annotations: map[string]string{
|
||||||
Annotations: map[string]string{
|
"prefix/foo": "example.com/device=bar",
|
||||||
"prefix/foo": "example.com/device=bar",
|
"another-prefix/bar": "example.com/device=baz",
|
||||||
"another-prefix/bar": "example.com/device=baz",
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
expectedDevices: []string{"example.com/device=baz", "example.com/device=bar"},
|
expectedDevices: []string{"example.com/device=bar", "example.com/device=baz"},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
description: "multiple matching annotations with duplicate devices",
|
description: "multiple matching annotations with duplicate devices",
|
||||||
prefixes: []string{"prefix/", "another-prefix/"},
|
prefixes: []string{"prefix/", "another-prefix/"},
|
||||||
spec: &specs.Spec{
|
annotations: map[string]string{
|
||||||
Annotations: map[string]string{
|
"prefix/foo": "example.com/device=bar",
|
||||||
"prefix/foo": "example.com/device=bar",
|
"another-prefix/bar": "example.com/device=bar",
|
||||||
"another-prefix/bar": "example.com/device=bar",
|
|
||||||
},
|
|
||||||
},
|
},
|
||||||
expectedDevices: []string{"example.com/device=bar", "example.com/device=bar"},
|
expectedDevices: []string{"example.com/device=bar"},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
description: "devices in annotations are expanded",
|
description: "invalid devices",
|
||||||
input: cdiDeviceRequestor{
|
prefixes: []string{"prefix/"},
|
||||||
defaultKind: "nvidia.com/gpu",
|
annotations: map[string]string{
|
||||||
|
"prefix/foo": "example.com/device",
|
||||||
},
|
},
|
||||||
prefixes: []string{"prefix/"},
|
expectedError: fmt.Errorf("invalid device %q", "example.com/device"),
|
||||||
spec: &specs.Spec{
|
|
||||||
Annotations: map[string]string{
|
|
||||||
"prefix/foo": "device",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
expectedDevices: []string{"nvidia.com/gpu=device"},
|
|
||||||
},
|
|
||||||
{
|
|
||||||
description: "invalid devices in annotations are treated as strings",
|
|
||||||
input: cdiDeviceRequestor{
|
|
||||||
defaultKind: "nvidia.com/gpu",
|
|
||||||
},
|
|
||||||
prefixes: []string{"prefix/"},
|
|
||||||
spec: &specs.Spec{
|
|
||||||
Annotations: map[string]string{
|
|
||||||
"prefix/foo": "example.com/device",
|
|
||||||
},
|
|
||||||
},
|
|
||||||
expectedDevices: []string{"nvidia.com/gpu=example.com/device"},
|
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, tc := range testCases {
|
for _, tc := range testCases {
|
||||||
tc.input.logger = logger
|
|
||||||
|
|
||||||
image, err := image.NewCUDAImageFromSpec(
|
|
||||||
tc.spec,
|
|
||||||
image.WithAcceptDeviceListAsVolumeMounts(true),
|
|
||||||
image.WithAcceptEnvvarUnprivileged(true),
|
|
||||||
image.WithAnnotationsPrefixes(tc.prefixes),
|
|
||||||
)
|
|
||||||
require.NoError(t, err)
|
|
||||||
tc.input.image = image
|
|
||||||
|
|
||||||
t.Run(tc.description, func(t *testing.T) {
|
t.Run(tc.description, func(t *testing.T) {
|
||||||
devices := tc.input.DeviceRequests()
|
devices, err := getAnnotationDevices(tc.prefixes, tc.annotations)
|
||||||
|
if tc.expectedError != nil {
|
||||||
|
require.Error(t, err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
require.EqualValues(t, tc.expectedDevices, devices)
|
require.ElementsMatch(t, tc.expectedDevices, devices)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -33,7 +33,7 @@ import (
|
|||||||
// NewCSVModifier creates a modifier that applies modications to an OCI spec if required by the runtime wrapper.
|
// NewCSVModifier creates a modifier that applies modications to an OCI spec if required by the runtime wrapper.
|
||||||
// The modifications are defined by CSV MountSpecs.
|
// The modifications are defined by CSV MountSpecs.
|
||||||
func NewCSVModifier(logger logger.Interface, cfg *config.Config, container image.CUDA) (oci.SpecModifier, error) {
|
func NewCSVModifier(logger logger.Interface, cfg *config.Config, container image.CUDA) (oci.SpecModifier, error) {
|
||||||
if devices := container.VisibleDevices(); len(devices) == 0 {
|
if devices := container.VisibleDevicesFromEnvVar(); len(devices) == 0 {
|
||||||
logger.Infof("No modification required; no devices requested")
|
logger.Infof("No modification required; no devices requested")
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -37,7 +37,7 @@ import (
|
|||||||
//
|
//
|
||||||
// If not devices are selected, no changes are made.
|
// If not devices are selected, no changes are made.
|
||||||
func NewFeatureGatedModifier(logger logger.Interface, cfg *config.Config, image image.CUDA, driver *root.Driver, hookCreator discover.HookCreator) (oci.SpecModifier, error) {
|
func NewFeatureGatedModifier(logger logger.Interface, cfg *config.Config, image image.CUDA, driver *root.Driver, hookCreator discover.HookCreator) (oci.SpecModifier, error) {
|
||||||
if devices := image.VisibleDevices(); len(devices) == 0 {
|
if devices := image.VisibleDevicesFromEnvVar(); len(devices) == 0 {
|
||||||
logger.Infof("No modification required; no devices requested")
|
logger.Infof("No modification required; no devices requested")
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -29,10 +29,9 @@ import (
|
|||||||
|
|
||||||
// NewGraphicsModifier constructs a modifier that injects graphics-related modifications into an OCI runtime specification.
|
// NewGraphicsModifier constructs a modifier that injects graphics-related modifications into an OCI runtime specification.
|
||||||
// The value of the NVIDIA_DRIVER_CAPABILITIES environment variable is checked to determine if this modification should be made.
|
// The value of the NVIDIA_DRIVER_CAPABILITIES environment variable is checked to determine if this modification should be made.
|
||||||
func NewGraphicsModifier(logger logger.Interface, cfg *config.Config, container image.CUDA, driver *root.Driver, hookCreator discover.HookCreator) (oci.SpecModifier, error) {
|
func NewGraphicsModifier(logger logger.Interface, cfg *config.Config, containerImage image.CUDA, driver *root.Driver, hookCreator discover.HookCreator) (oci.SpecModifier, error) {
|
||||||
devices, reason := requiresGraphicsModifier(container)
|
if required, reason := requiresGraphicsModifier(containerImage); !required {
|
||||||
if len(devices) == 0 {
|
logger.Infof("No graphics modifier required: %v", reason)
|
||||||
logger.Infof("No graphics modifier required; %v", reason)
|
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -49,7 +48,7 @@ func NewGraphicsModifier(logger logger.Interface, cfg *config.Config, container
|
|||||||
devRoot := driver.Root
|
devRoot := driver.Root
|
||||||
drmNodes, err := discover.NewDRMNodesDiscoverer(
|
drmNodes, err := discover.NewDRMNodesDiscoverer(
|
||||||
logger,
|
logger,
|
||||||
image.NewVisibleDevices(devices...),
|
containerImage.DevicesFromEnvvars(image.EnvVarNvidiaVisibleDevices),
|
||||||
devRoot,
|
devRoot,
|
||||||
hookCreator,
|
hookCreator,
|
||||||
)
|
)
|
||||||
@@ -65,15 +64,14 @@ func NewGraphicsModifier(logger logger.Interface, cfg *config.Config, container
|
|||||||
}
|
}
|
||||||
|
|
||||||
// requiresGraphicsModifier determines whether a graphics modifier is required.
|
// requiresGraphicsModifier determines whether a graphics modifier is required.
|
||||||
func requiresGraphicsModifier(cudaImage image.CUDA) ([]string, string) {
|
func requiresGraphicsModifier(cudaImage image.CUDA) (bool, string) {
|
||||||
devices := cudaImage.VisibleDevices()
|
if devices := cudaImage.VisibleDevicesFromEnvVar(); len(devices) == 0 {
|
||||||
if len(devices) == 0 {
|
return false, "no devices requested"
|
||||||
return nil, "no devices requested"
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if !cudaImage.GetDriverCapabilities().Any(image.DriverCapabilityGraphics, image.DriverCapabilityDisplay) {
|
if !cudaImage.GetDriverCapabilities().Any(image.DriverCapabilityGraphics, image.DriverCapabilityDisplay) {
|
||||||
return nil, "no required capabilities requested"
|
return false, "no required capabilities requested"
|
||||||
}
|
}
|
||||||
|
|
||||||
return devices, ""
|
return true, ""
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -26,9 +26,9 @@ import (
|
|||||||
|
|
||||||
func TestGraphicsModifier(t *testing.T) {
|
func TestGraphicsModifier(t *testing.T) {
|
||||||
testCases := []struct {
|
testCases := []struct {
|
||||||
description string
|
description string
|
||||||
envmap map[string]string
|
envmap map[string]string
|
||||||
expectedDevices []string
|
expectedRequired bool
|
||||||
}{
|
}{
|
||||||
{
|
{
|
||||||
description: "empty image does not create modifier",
|
description: "empty image does not create modifier",
|
||||||
@@ -52,7 +52,7 @@ func TestGraphicsModifier(t *testing.T) {
|
|||||||
"NVIDIA_VISIBLE_DEVICES": "all",
|
"NVIDIA_VISIBLE_DEVICES": "all",
|
||||||
"NVIDIA_DRIVER_CAPABILITIES": "all",
|
"NVIDIA_DRIVER_CAPABILITIES": "all",
|
||||||
},
|
},
|
||||||
expectedDevices: []string{"all"},
|
expectedRequired: true,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
description: "devices with graphics capability creates modifier",
|
description: "devices with graphics capability creates modifier",
|
||||||
@@ -60,7 +60,7 @@ func TestGraphicsModifier(t *testing.T) {
|
|||||||
"NVIDIA_VISIBLE_DEVICES": "all",
|
"NVIDIA_VISIBLE_DEVICES": "all",
|
||||||
"NVIDIA_DRIVER_CAPABILITIES": "graphics",
|
"NVIDIA_DRIVER_CAPABILITIES": "graphics",
|
||||||
},
|
},
|
||||||
expectedDevices: []string{"all"},
|
expectedRequired: true,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
description: "devices with compute,graphics capability creates modifier",
|
description: "devices with compute,graphics capability creates modifier",
|
||||||
@@ -68,7 +68,7 @@ func TestGraphicsModifier(t *testing.T) {
|
|||||||
"NVIDIA_VISIBLE_DEVICES": "all",
|
"NVIDIA_VISIBLE_DEVICES": "all",
|
||||||
"NVIDIA_DRIVER_CAPABILITIES": "compute,graphics",
|
"NVIDIA_DRIVER_CAPABILITIES": "compute,graphics",
|
||||||
},
|
},
|
||||||
expectedDevices: []string{"all"},
|
expectedRequired: true,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
description: "devices with display capability creates modifier",
|
description: "devices with display capability creates modifier",
|
||||||
@@ -76,7 +76,7 @@ func TestGraphicsModifier(t *testing.T) {
|
|||||||
"NVIDIA_VISIBLE_DEVICES": "all",
|
"NVIDIA_VISIBLE_DEVICES": "all",
|
||||||
"NVIDIA_DRIVER_CAPABILITIES": "display",
|
"NVIDIA_DRIVER_CAPABILITIES": "display",
|
||||||
},
|
},
|
||||||
expectedDevices: []string{"all"},
|
expectedRequired: true,
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
description: "devices with display,graphics capability creates modifier",
|
description: "devices with display,graphics capability creates modifier",
|
||||||
@@ -84,7 +84,7 @@ func TestGraphicsModifier(t *testing.T) {
|
|||||||
"NVIDIA_VISIBLE_DEVICES": "all",
|
"NVIDIA_VISIBLE_DEVICES": "all",
|
||||||
"NVIDIA_DRIVER_CAPABILITIES": "display,graphics",
|
"NVIDIA_DRIVER_CAPABILITIES": "display,graphics",
|
||||||
},
|
},
|
||||||
expectedDevices: []string{"all"},
|
expectedRequired: true,
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -94,7 +94,7 @@ func TestGraphicsModifier(t *testing.T) {
|
|||||||
image.WithEnvMap(tc.envmap),
|
image.WithEnvMap(tc.envmap),
|
||||||
)
|
)
|
||||||
required, _ := requiresGraphicsModifier(image)
|
required, _ := requiresGraphicsModifier(image)
|
||||||
require.EqualValues(t, tc.expectedDevices, required)
|
require.EqualValues(t, tc.expectedRequired, required)
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -41,11 +41,6 @@ func (d *byPathHookDiscoverer) Devices() ([]discover.Device, error) {
|
|||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// EnvVars returns the empty list for the by-path hook discoverer
|
|
||||||
func (d *byPathHookDiscoverer) EnvVars() ([]discover.EnvVar, error) {
|
|
||||||
return nil, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Hooks returns the hooks for the GPU device.
|
// Hooks returns the hooks for the GPU device.
|
||||||
// The following hooks are detected:
|
// The following hooks are detected:
|
||||||
// 1. A hook to create /dev/dri/by-path symlinks
|
// 1. A hook to create /dev/dri/by-path symlinks
|
||||||
|
|||||||
@@ -106,10 +106,6 @@ func (d *nvsandboxutilsDGPU) Devices() ([]discover.Device, error) {
|
|||||||
return devices, nil
|
return devices, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (d *nvsandboxutilsDGPU) EnvVars() ([]discover.EnvVar, error) {
|
|
||||||
return nil, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Hooks returns a hook to create the by-path symlinks for the discovered devices.
|
// Hooks returns a hook to create the by-path symlinks for the discovered devices.
|
||||||
func (d *nvsandboxutilsDGPU) Hooks() ([]discover.Hook, error) {
|
func (d *nvsandboxutilsDGPU) Hooks() ([]discover.Hook, error) {
|
||||||
if len(d.deviceLinks) == 0 {
|
if len(d.deviceLinks) == 0 {
|
||||||
|
|||||||
@@ -183,7 +183,7 @@ func TestDiscovererFromCSVFiles(t *testing.T) {
|
|||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
hookCreator := discover.NewHookCreator()
|
hookCreator := discover.NewHookCreator("/usr/bin/nvidia-cdi-hook", false)
|
||||||
for _, tc := range testCases {
|
for _, tc := range testCases {
|
||||||
t.Run(tc.description, func(t *testing.T) {
|
t.Run(tc.description, func(t *testing.T) {
|
||||||
defer setGetTargetsFromCSVFiles(tc.moutSpecs)()
|
defer setGetTargetsFromCSVFiles(tc.moutSpecs)()
|
||||||
|
|||||||
@@ -18,6 +18,7 @@ package runtime
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"os"
|
||||||
|
|
||||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
|
"github.com/NVIDIA/nvidia-container-toolkit/internal/config"
|
||||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
|
"github.com/NVIDIA/nvidia-container-toolkit/internal/config/image"
|
||||||
@@ -65,17 +66,29 @@ func newNVIDIAContainerRuntime(logger logger.Interface, cfg *config.Config, argv
|
|||||||
|
|
||||||
// newSpecModifier is a factory method that creates constructs an OCI spec modifer based on the provided config.
|
// newSpecModifier is a factory method that creates constructs an OCI spec modifer based on the provided config.
|
||||||
func newSpecModifier(logger logger.Interface, cfg *config.Config, ociSpec oci.Spec, driver *root.Driver) (oci.SpecModifier, error) {
|
func newSpecModifier(logger logger.Interface, cfg *config.Config, ociSpec oci.Spec, driver *root.Driver) (oci.SpecModifier, error) {
|
||||||
mode, image, err := initRuntimeModeAndImage(logger, cfg, ociSpec)
|
rawSpec, err := ociSpec.Load()
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to load OCI spec: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
image, err := image.NewCUDAImageFromSpec(rawSpec)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
modeModifier, err := newModeModifier(logger, mode, cfg, *image)
|
hookCreator := discover.NewHookCreator(
|
||||||
|
cfg.NVIDIACTKConfig.Path,
|
||||||
|
cfg.NVIDIAContainerRuntimeConfig.DebugFilePath == "" || cfg.NVIDIAContainerRuntimeConfig.DebugFilePath == os.DevNull,
|
||||||
|
)
|
||||||
|
|
||||||
|
mode := info.ResolveAutoMode(logger, cfg.NVIDIAContainerRuntimeConfig.Mode, image)
|
||||||
|
// We update the mode here so that we can continue passing just the config to other functions.
|
||||||
|
cfg.NVIDIAContainerRuntimeConfig.Mode = mode
|
||||||
|
modeModifier, err := newModeModifier(logger, mode, cfg, ociSpec, image)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
hookCreator := discover.NewHookCreator(discover.WithNVIDIACDIHookPath(cfg.NVIDIACTKConfig.Path))
|
|
||||||
var modifiers modifier.List
|
var modifiers modifier.List
|
||||||
for _, modifierType := range supportedModifierTypes(mode) {
|
for _, modifierType := range supportedModifierTypes(mode) {
|
||||||
switch modifierType {
|
switch modifierType {
|
||||||
@@ -84,13 +97,13 @@ func newSpecModifier(logger logger.Interface, cfg *config.Config, ociSpec oci.Sp
|
|||||||
case "nvidia-hook-remover":
|
case "nvidia-hook-remover":
|
||||||
modifiers = append(modifiers, modifier.NewNvidiaContainerRuntimeHookRemover(logger))
|
modifiers = append(modifiers, modifier.NewNvidiaContainerRuntimeHookRemover(logger))
|
||||||
case "graphics":
|
case "graphics":
|
||||||
graphicsModifier, err := modifier.NewGraphicsModifier(logger, cfg, *image, driver, hookCreator)
|
graphicsModifier, err := modifier.NewGraphicsModifier(logger, cfg, image, driver, hookCreator)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
modifiers = append(modifiers, graphicsModifier)
|
modifiers = append(modifiers, graphicsModifier)
|
||||||
case "feature-gated":
|
case "feature-gated":
|
||||||
featureGatedModifier, err := modifier.NewFeatureGatedModifier(logger, cfg, *image, driver, hookCreator)
|
featureGatedModifier, err := modifier.NewFeatureGatedModifier(logger, cfg, image, driver, hookCreator)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
@@ -101,69 +114,26 @@ func newSpecModifier(logger logger.Interface, cfg *config.Config, ociSpec oci.Sp
|
|||||||
return modifiers, nil
|
return modifiers, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func newModeModifier(logger logger.Interface, mode info.RuntimeMode, cfg *config.Config, image image.CUDA) (oci.SpecModifier, error) {
|
func newModeModifier(logger logger.Interface, mode string, cfg *config.Config, ociSpec oci.Spec, image image.CUDA) (oci.SpecModifier, error) {
|
||||||
switch mode {
|
switch mode {
|
||||||
case info.LegacyRuntimeMode:
|
case "legacy":
|
||||||
return modifier.NewStableRuntimeModifier(logger, cfg.NVIDIAContainerRuntimeHookConfig.Path), nil
|
return modifier.NewStableRuntimeModifier(logger, cfg.NVIDIAContainerRuntimeHookConfig.Path), nil
|
||||||
case info.CSVRuntimeMode:
|
case "csv":
|
||||||
return modifier.NewCSVModifier(logger, cfg, image)
|
return modifier.NewCSVModifier(logger, cfg, image)
|
||||||
case info.CDIRuntimeMode, info.JitCDIRuntimeMode:
|
case "cdi":
|
||||||
return modifier.NewCDIModifier(logger, cfg, image, mode == info.JitCDIRuntimeMode)
|
return modifier.NewCDIModifier(logger, cfg, ociSpec)
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil, fmt.Errorf("invalid runtime mode: %v", cfg.NVIDIAContainerRuntimeConfig.Mode)
|
return nil, fmt.Errorf("invalid runtime mode: %v", cfg.NVIDIAContainerRuntimeConfig.Mode)
|
||||||
}
|
}
|
||||||
|
|
||||||
// initRuntimeModeAndImage constructs an image from the specified OCI runtime
|
|
||||||
// specification and runtime config.
|
|
||||||
// The image is also used to determine the runtime mode to apply.
|
|
||||||
// If a non-CDI mode is detected we ensure that the image does not process
|
|
||||||
// annotation devices.
|
|
||||||
func initRuntimeModeAndImage(logger logger.Interface, cfg *config.Config, ociSpec oci.Spec) (info.RuntimeMode, *image.CUDA, error) {
|
|
||||||
rawSpec, err := ociSpec.Load()
|
|
||||||
if err != nil {
|
|
||||||
return "", nil, fmt.Errorf("failed to load OCI spec: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
image, err := image.NewCUDAImageFromSpec(
|
|
||||||
rawSpec,
|
|
||||||
image.WithLogger(logger),
|
|
||||||
image.WithAcceptDeviceListAsVolumeMounts(cfg.AcceptDeviceListAsVolumeMounts),
|
|
||||||
image.WithAcceptEnvvarUnprivileged(cfg.AcceptEnvvarUnprivileged),
|
|
||||||
image.WithAnnotationsPrefixes(cfg.NVIDIAContainerRuntimeConfig.Modes.CDI.AnnotationPrefixes),
|
|
||||||
)
|
|
||||||
if err != nil {
|
|
||||||
return "", nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
modeResolver := info.NewRuntimeModeResolver(
|
|
||||||
info.WithLogger(logger),
|
|
||||||
info.WithImage(&image),
|
|
||||||
)
|
|
||||||
mode := modeResolver.ResolveRuntimeMode(cfg.NVIDIAContainerRuntimeConfig.Mode)
|
|
||||||
// We update the mode here so that we can continue passing just the config to other functions.
|
|
||||||
cfg.NVIDIAContainerRuntimeConfig.Mode = string(mode)
|
|
||||||
|
|
||||||
if mode == "cdi" || len(cfg.NVIDIAContainerRuntimeConfig.Modes.CDI.AnnotationPrefixes) == 0 {
|
|
||||||
return mode, &image, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// For non-cdi modes we explicitly set the annotation prefixes to nil and
|
|
||||||
// call this function again to force a reconstruction of the image.
|
|
||||||
// Note that since the mode is now explicitly set, we will effectively skip
|
|
||||||
// the mode resolution.
|
|
||||||
cfg.NVIDIAContainerRuntimeConfig.Modes.CDI.AnnotationPrefixes = nil
|
|
||||||
|
|
||||||
return initRuntimeModeAndImage(logger, cfg, ociSpec)
|
|
||||||
}
|
|
||||||
|
|
||||||
// supportedModifierTypes returns the modifiers supported for a specific runtime mode.
|
// supportedModifierTypes returns the modifiers supported for a specific runtime mode.
|
||||||
func supportedModifierTypes(mode info.RuntimeMode) []string {
|
func supportedModifierTypes(mode string) []string {
|
||||||
switch mode {
|
switch mode {
|
||||||
case info.CDIRuntimeMode, info.JitCDIRuntimeMode:
|
case "cdi":
|
||||||
// For CDI mode we make no additional modifications.
|
// For CDI mode we make no additional modifications.
|
||||||
return []string{"nvidia-hook-remover", "mode"}
|
return []string{"nvidia-hook-remover", "mode"}
|
||||||
case info.CSVRuntimeMode:
|
case "csv":
|
||||||
// For CSV mode we support mode and feature-gated modification.
|
// For CSV mode we support mode and feature-gated modification.
|
||||||
return []string{"nvidia-hook-remover", "feature-gated", "mode"}
|
return []string{"nvidia-hook-remover", "feature-gated", "mode"}
|
||||||
default:
|
default:
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ Build-Depends: debhelper (>= 9)
|
|||||||
|
|
||||||
Package: nvidia-container-toolkit
|
Package: nvidia-container-toolkit
|
||||||
Architecture: any
|
Architecture: any
|
||||||
Depends: ${misc:Depends}, nvidia-container-toolkit-base (= @VERSION@), libnvidia-container-tools (= @VERSION@), libnvidia-container-tools (<< 2.0.0)
|
Depends: ${misc:Depends}, nvidia-container-toolkit-base (= @VERSION@), libnvidia-container-tools (>= @LIBNVIDIA_CONTAINER_TOOLS_VERSION@), libnvidia-container-tools (<< 2.0.0)
|
||||||
Breaks: nvidia-container-runtime (<= 3.5.0-1), nvidia-container-runtime-hook
|
Breaks: nvidia-container-runtime (<= 3.5.0-1), nvidia-container-runtime-hook
|
||||||
Replaces: nvidia-container-runtime (<= 3.5.0-1), nvidia-container-runtime-hook
|
Replaces: nvidia-container-runtime (<= 3.5.0-1), nvidia-container-runtime-hook
|
||||||
Description: NVIDIA Container toolkit
|
Description: NVIDIA Container toolkit
|
||||||
|
|||||||
@@ -1,5 +1,3 @@
|
|||||||
nvidia-container-runtime /usr/bin
|
nvidia-container-runtime /usr/bin
|
||||||
nvidia-ctk /usr/bin
|
nvidia-ctk /usr/bin
|
||||||
nvidia-cdi-hook /usr/bin
|
nvidia-cdi-hook /usr/bin
|
||||||
nvidia-cdi-refresh.service /etc/systemd/system/
|
|
||||||
nvidia-cdi-refresh.path /etc/systemd/system/
|
|
||||||
|
|||||||
@@ -5,15 +5,6 @@ set -e
|
|||||||
case "$1" in
|
case "$1" in
|
||||||
configure)
|
configure)
|
||||||
/usr/bin/nvidia-ctk --quiet config --config-file=/etc/nvidia-container-runtime/config.toml --in-place
|
/usr/bin/nvidia-ctk --quiet config --config-file=/etc/nvidia-container-runtime/config.toml --in-place
|
||||||
|
|
||||||
# Enable nvidia-cdi-refresh services on both install and upgrade
|
|
||||||
if command -v systemctl >/dev/null 2>&1 \
|
|
||||||
&& systemctl --quiet is-system-running 2>/dev/null; then
|
|
||||||
|
|
||||||
systemctl daemon-reload || { echo "Warning: Failed to reload systemd daemon" >&2; true; }
|
|
||||||
systemctl enable --now nvidia-cdi-refresh.path || { echo "Warning: Failed to enable nvidia-cdi-refresh.path" >&2; true; }
|
|
||||||
systemctl enable --now nvidia-cdi-refresh.service || { echo "Warning: Failed to enable nvidia-cdi-refresh.service" >&2; true; }
|
|
||||||
fi
|
|
||||||
;;
|
;;
|
||||||
|
|
||||||
abort-upgrade|abort-remove|abort-deconfigure)
|
abort-upgrade|abort-remove|abort-deconfigure)
|
||||||
|
|||||||
@@ -3,6 +3,7 @@
|
|||||||
set -e
|
set -e
|
||||||
|
|
||||||
sed -i "s;@SECTION@;${SECTION:+$SECTION/};g" debian/control
|
sed -i "s;@SECTION@;${SECTION:+$SECTION/};g" debian/control
|
||||||
|
sed -i "s;@LIBNVIDIA_CONTAINER_TOOLS_VERSION@;${LIBNVIDIA_CONTAINER_TOOLS_VERSION:+$LIBNVIDIA_CONTAINER_TOOLS_VERSION};g" debian/control
|
||||||
sed -i "s;@VERSION@;${VERSION:+$VERSION};g" debian/control
|
sed -i "s;@VERSION@;${VERSION:+$VERSION};g" debian/control
|
||||||
|
|
||||||
if [ -n "$DISTRIB" ]; then
|
if [ -n "$DISTRIB" ]; then
|
||||||
|
|||||||
@@ -5,14 +5,3 @@
|
|||||||
|
|
||||||
%:
|
%:
|
||||||
dh $@
|
dh $@
|
||||||
|
|
||||||
override_dh_fixperms:
|
|
||||||
dh_fixperms
|
|
||||||
chmod 755 debian/$(shell dh_listpackages)/usr/bin/nvidia-container-runtime-hook || true
|
|
||||||
chmod 755 debian/$(shell dh_listpackages)/usr/bin/nvidia-container-runtime || true
|
|
||||||
chmod 755 debian/$(shell dh_listpackages)/usr/bin/nvidia-container-runtime.cdi || true
|
|
||||||
chmod 755 debian/$(shell dh_listpackages)/usr/bin/nvidia-container-runtime.legacy || true
|
|
||||||
chmod 755 debian/$(shell dh_listpackages)/usr/bin/nvidia-ctk || true
|
|
||||||
chmod 755 debian/$(shell dh_listpackages)/usr/bin/nvidia-cdi-hook || true
|
|
||||||
chmod 644 debian/$(shell dh_listpackages)/etc/systemd/system/nvidia-cdi-refresh.service || true
|
|
||||||
chmod 644 debian/$(shell dh_listpackages)/etc/systemd/system/nvidia-cdi-refresh.path || true
|
|
||||||
|
|||||||
@@ -17,33 +17,27 @@ Source3: nvidia-container-runtime
|
|||||||
Source4: nvidia-container-runtime.cdi
|
Source4: nvidia-container-runtime.cdi
|
||||||
Source5: nvidia-container-runtime.legacy
|
Source5: nvidia-container-runtime.legacy
|
||||||
Source6: nvidia-cdi-hook
|
Source6: nvidia-cdi-hook
|
||||||
Source7: nvidia-cdi-refresh.service
|
|
||||||
Source8: nvidia-cdi-refresh.path
|
|
||||||
|
|
||||||
Obsoletes: nvidia-container-runtime <= 3.5.0-1, nvidia-container-runtime-hook <= 1.4.0-2
|
Obsoletes: nvidia-container-runtime <= 3.5.0-1, nvidia-container-runtime-hook <= 1.4.0-2
|
||||||
Provides: nvidia-container-runtime
|
Provides: nvidia-container-runtime
|
||||||
Provides: nvidia-container-runtime-hook
|
Provides: nvidia-container-runtime-hook
|
||||||
Requires: libnvidia-container-tools == %{version}-%{release}, libnvidia-container-tools < 2.0.0
|
Requires: libnvidia-container-tools >= %{libnvidia_container_tools_version}, libnvidia-container-tools < 2.0.0
|
||||||
Requires: nvidia-container-toolkit-base == %{version}-%{release}
|
Requires: nvidia-container-toolkit-base == %{version}-%{release}
|
||||||
|
|
||||||
%description
|
%description
|
||||||
Provides tools and utilities to enable GPU support in containers.
|
Provides tools and utilities to enable GPU support in containers.
|
||||||
|
|
||||||
%prep
|
%prep
|
||||||
cp %{SOURCE0} %{SOURCE1} %{SOURCE2} %{SOURCE3} %{SOURCE4} %{SOURCE5} %{SOURCE6} %{SOURCE7} %{SOURCE8} .
|
cp %{SOURCE0} %{SOURCE1} %{SOURCE2} %{SOURCE3} %{SOURCE4} %{SOURCE5} %{SOURCE6} .
|
||||||
|
|
||||||
%install
|
%install
|
||||||
mkdir -p %{buildroot}%{_bindir}
|
mkdir -p %{buildroot}%{_bindir}
|
||||||
mkdir -p %{buildroot}%{_sysconfdir}/systemd/system/
|
|
||||||
|
|
||||||
install -m 755 -t %{buildroot}%{_bindir} nvidia-container-runtime-hook
|
install -m 755 -t %{buildroot}%{_bindir} nvidia-container-runtime-hook
|
||||||
install -m 755 -t %{buildroot}%{_bindir} nvidia-container-runtime
|
install -m 755 -t %{buildroot}%{_bindir} nvidia-container-runtime
|
||||||
install -m 755 -t %{buildroot}%{_bindir} nvidia-container-runtime.cdi
|
install -m 755 -t %{buildroot}%{_bindir} nvidia-container-runtime.cdi
|
||||||
install -m 755 -t %{buildroot}%{_bindir} nvidia-container-runtime.legacy
|
install -m 755 -t %{buildroot}%{_bindir} nvidia-container-runtime.legacy
|
||||||
install -m 755 -t %{buildroot}%{_bindir} nvidia-ctk
|
install -m 755 -t %{buildroot}%{_bindir} nvidia-ctk
|
||||||
install -m 755 -t %{buildroot}%{_bindir} nvidia-cdi-hook
|
install -m 755 -t %{buildroot}%{_bindir} nvidia-cdi-hook
|
||||||
install -m 644 -t %{buildroot}%{_sysconfdir}/systemd/system nvidia-cdi-refresh.service
|
|
||||||
install -m 644 -t %{buildroot}%{_sysconfdir}/systemd/system nvidia-cdi-refresh.path
|
|
||||||
|
|
||||||
%post
|
%post
|
||||||
if [ $1 -gt 1 ]; then # only on package upgrade
|
if [ $1 -gt 1 ]; then # only on package upgrade
|
||||||
@@ -51,14 +45,6 @@ if [ $1 -gt 1 ]; then # only on package upgrade
|
|||||||
cp -af %{_bindir}/nvidia-container-runtime-hook %{_localstatedir}/lib/rpm-state/nvidia-container-toolkit
|
cp -af %{_bindir}/nvidia-container-runtime-hook %{_localstatedir}/lib/rpm-state/nvidia-container-toolkit
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Reload systemd unit cache and enable nvidia-cdi-refresh services on both install and upgrade
|
|
||||||
if command -v systemctl >/dev/null 2>&1 \
|
|
||||||
&& systemctl --quiet is-system-running 2>/dev/null; then
|
|
||||||
systemctl daemon-reload || { echo "Warning: Failed to reload systemd daemon" >&2; true; }
|
|
||||||
systemctl enable --now nvidia-cdi-refresh.path || { echo "Warning: Failed to enable nvidia-cdi-refresh.path" >&2; true; }
|
|
||||||
systemctl enable --now nvidia-cdi-refresh.service || { echo "Warning: Failed to enable nvidia-cdi-refresh.service" >&2; true; }
|
|
||||||
fi
|
|
||||||
|
|
||||||
%posttrans
|
%posttrans
|
||||||
if [ ! -e %{_bindir}/nvidia-container-runtime-hook ]; then
|
if [ ! -e %{_bindir}/nvidia-container-runtime-hook ]; then
|
||||||
# repairing lost file nvidia-container-runtime-hook
|
# repairing lost file nvidia-container-runtime-hook
|
||||||
@@ -83,7 +69,7 @@ fi
|
|||||||
# As of 1.10.0-1 we generate the release information automatically
|
# As of 1.10.0-1 we generate the release information automatically
|
||||||
* %{release_date} NVIDIA CORPORATION <cudatools@nvidia.com> %{version}-%{release}
|
* %{release_date} NVIDIA CORPORATION <cudatools@nvidia.com> %{version}-%{release}
|
||||||
- See https://gitlab.com/nvidia/container-toolkit/container-toolkit/-/blob/%{git_commit}/CHANGELOG.md
|
- See https://gitlab.com/nvidia/container-toolkit/container-toolkit/-/blob/%{git_commit}/CHANGELOG.md
|
||||||
- Bump libnvidia-container dependency to libnvidia-container-tools == %{version}-%{release}
|
- Bump libnvidia-container dependency to libnvidia-container-tools >= %{libnvidia_container_tools_version}
|
||||||
|
|
||||||
# The BASE package consists of the NVIDIA Container Runtime and the NVIDIA Container Toolkit CLI.
|
# The BASE package consists of the NVIDIA Container Runtime and the NVIDIA Container Toolkit CLI.
|
||||||
# This allows the package to be installed on systems where no NVIDIA Container CLI is available.
|
# This allows the package to be installed on systems where no NVIDIA Container CLI is available.
|
||||||
@@ -103,8 +89,6 @@ Provides tools such as the NVIDIA Container Runtime and NVIDIA Container Toolkit
|
|||||||
%{_bindir}/nvidia-container-runtime
|
%{_bindir}/nvidia-container-runtime
|
||||||
%{_bindir}/nvidia-ctk
|
%{_bindir}/nvidia-ctk
|
||||||
%{_bindir}/nvidia-cdi-hook
|
%{_bindir}/nvidia-cdi-hook
|
||||||
%{_sysconfdir}/systemd/system/nvidia-cdi-refresh.service
|
|
||||||
%{_sysconfdir}/systemd/system/nvidia-cdi-refresh.path
|
|
||||||
|
|
||||||
# The OPERATOR EXTENSIONS package consists of components that are required to enable GPU support in Kubernetes.
|
# The OPERATOR EXTENSIONS package consists of components that are required to enable GPU support in Kubernetes.
|
||||||
# This package is not distributed as part of the NVIDIA Container Toolkit RPMs.
|
# This package is not distributed as part of the NVIDIA Container Toolkit RPMs.
|
||||||
|
|||||||
@@ -21,13 +21,12 @@ import (
|
|||||||
"tags.cncf.io/container-device-interface/pkg/cdi"
|
"tags.cncf.io/container-device-interface/pkg/cdi"
|
||||||
"tags.cncf.io/container-device-interface/specs-go"
|
"tags.cncf.io/container-device-interface/specs-go"
|
||||||
|
|
||||||
"github.com/NVIDIA/nvidia-container-toolkit/internal/discover"
|
|
||||||
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/spec"
|
"github.com/NVIDIA/nvidia-container-toolkit/pkg/nvcdi/spec"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Interface defines the API for the nvcdi package
|
// Interface defines the API for the nvcdi package
|
||||||
type Interface interface {
|
type Interface interface {
|
||||||
GetSpec(...string) (spec.Interface, error)
|
GetSpec() (spec.Interface, error)
|
||||||
GetCommonEdits() (*cdi.ContainerEdits, error)
|
GetCommonEdits() (*cdi.ContainerEdits, error)
|
||||||
GetAllDeviceSpecs() ([]specs.Device, error)
|
GetAllDeviceSpecs() ([]specs.Device, error)
|
||||||
GetGPUDeviceEdits(device.Device) (*cdi.ContainerEdits, error)
|
GetGPUDeviceEdits(device.Device) (*cdi.ContainerEdits, error)
|
||||||
@@ -37,35 +36,14 @@ type Interface interface {
|
|||||||
GetDeviceSpecsByID(...string) ([]specs.Device, error)
|
GetDeviceSpecsByID(...string) ([]specs.Device, error)
|
||||||
}
|
}
|
||||||
|
|
||||||
// A HookName represents one of the predefined NVIDIA CDI hooks.
|
// A HookName refers to one of the predefined set of CDI hooks that may be
|
||||||
type HookName = discover.HookName
|
// included in the generated CDI specification.
|
||||||
|
type HookName string
|
||||||
|
|
||||||
const (
|
const (
|
||||||
// AllHooks is a special hook name that allows all hooks to be matched.
|
// HookEnableCudaCompat refers to the hook used to enable CUDA Forward Compatibility.
|
||||||
AllHooks = discover.AllHooks
|
// This was added with v1.17.5 of the NVIDIA Container Toolkit.
|
||||||
|
HookEnableCudaCompat = HookName("enable-cuda-compat")
|
||||||
// A CreateSymlinksHook is used to create symlinks in the container.
|
|
||||||
CreateSymlinksHook = discover.CreateSymlinksHook
|
|
||||||
// DisableDeviceNodeModificationHook refers to the hook used to ensure that
|
|
||||||
// device nodes are not created by libnvidia-ml.so or nvidia-smi in a
|
|
||||||
// container.
|
|
||||||
// Added in v1.17.8
|
|
||||||
DisableDeviceNodeModificationHook = discover.DisableDeviceNodeModificationHook
|
|
||||||
// An EnableCudaCompatHook is used to enabled CUDA Forward Compatibility.
|
|
||||||
// Added in v1.17.5
|
|
||||||
EnableCudaCompatHook = discover.EnableCudaCompatHook
|
|
||||||
// An UpdateLDCacheHook is used to update the ldcache in the container.
|
|
||||||
UpdateLDCacheHook = discover.UpdateLDCacheHook
|
|
||||||
// A CreateSonameSymlinksHook is the hook used to ensure that soname symlinks
|
|
||||||
// for injected libraries exist in the container.
|
|
||||||
CreateSonameSymlinksHook = discover.CreateSonameSymlinksHook
|
|
||||||
|
|
||||||
// Deprecated: Use CreateSymlinksHook instead.
|
|
||||||
HookCreateSymlinks = CreateSymlinksHook
|
|
||||||
// Deprecated: Use EnableCudaCompatHook instead.
|
|
||||||
HookEnableCudaCompat = EnableCudaCompatHook
|
|
||||||
// Deprecated: Use UpdateLDCacheHook instead.
|
|
||||||
HookUpdateLDCache = UpdateLDCacheHook
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// A FeatureFlag refers to a specific feature that can be toggled in the CDI api.
|
// A FeatureFlag refers to a specific feature that can be toggled in the CDI api.
|
||||||
|
|||||||
@@ -82,7 +82,7 @@ func (l *nvcdilib) newDriverVersionDiscoverer(version string) (discover.Discover
|
|||||||
|
|
||||||
// NewDriverLibraryDiscoverer creates a discoverer for the libraries associated with the specified driver version.
|
// NewDriverLibraryDiscoverer creates a discoverer for the libraries associated with the specified driver version.
|
||||||
func (l *nvcdilib) NewDriverLibraryDiscoverer(version string) (discover.Discover, error) {
|
func (l *nvcdilib) NewDriverLibraryDiscoverer(version string) (discover.Discover, error) {
|
||||||
libraryPaths, libCudaDirectoryPath, err := getVersionLibs(l.logger, l.driver, version)
|
libraryPaths, err := getVersionLibs(l.logger, l.driver, version)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, fmt.Errorf("failed to get libraries for driver version: %v", err)
|
return nil, fmt.Errorf("failed to get libraries for driver version: %v", err)
|
||||||
}
|
}
|
||||||
@@ -106,22 +106,15 @@ func (l *nvcdilib) NewDriverLibraryDiscoverer(version string) (discover.Discover
|
|||||||
)
|
)
|
||||||
discoverers = append(discoverers, driverDotSoSymlinksDiscoverer)
|
discoverers = append(discoverers, driverDotSoSymlinksDiscoverer)
|
||||||
|
|
||||||
// TODO: The following should use the version directly.
|
if l.HookIsSupported(HookEnableCudaCompat) {
|
||||||
cudaCompatLibHookDiscoverer := discover.NewCUDACompatHookDiscoverer(l.logger, l.hookCreator, l.driver)
|
// TODO: The following should use the version directly.
|
||||||
discoverers = append(discoverers, cudaCompatLibHookDiscoverer)
|
cudaCompatLibHookDiscoverer := discover.NewCUDACompatHookDiscoverer(l.logger, l.hookCreator, l.driver)
|
||||||
|
discoverers = append(discoverers, cudaCompatLibHookDiscoverer)
|
||||||
|
}
|
||||||
|
|
||||||
updateLDCache, _ := discover.NewLDCacheUpdateHook(l.logger, libraries, l.hookCreator, l.ldconfigPath)
|
updateLDCache, _ := discover.NewLDCacheUpdateHook(l.logger, libraries, l.hookCreator, l.ldconfigPath)
|
||||||
discoverers = append(discoverers, updateLDCache)
|
discoverers = append(discoverers, updateLDCache)
|
||||||
|
|
||||||
disableDeviceNodeModification := l.hookCreator.Create(DisableDeviceNodeModificationHook)
|
|
||||||
discoverers = append(discoverers, disableDeviceNodeModification)
|
|
||||||
|
|
||||||
environmentVariable := &discover.EnvVar{
|
|
||||||
Name: "NVIDIA_CTK_LIBCUDA_DIR",
|
|
||||||
Value: libCudaDirectoryPath,
|
|
||||||
}
|
|
||||||
discoverers = append(discoverers, environmentVariable)
|
|
||||||
|
|
||||||
d := discover.Merge(discoverers...)
|
d := discover.Merge(discoverers...)
|
||||||
|
|
||||||
return d, nil
|
return d, nil
|
||||||
@@ -209,41 +202,39 @@ func NewDriverBinariesDiscoverer(logger logger.Interface, driverRoot string) dis
|
|||||||
// getVersionLibs checks the LDCache for libraries ending in the specified driver version.
|
// getVersionLibs checks the LDCache for libraries ending in the specified driver version.
|
||||||
// Although the ldcache at the specified driverRoot is queried, the paths are returned relative to this driverRoot.
|
// Although the ldcache at the specified driverRoot is queried, the paths are returned relative to this driverRoot.
|
||||||
// This allows the standard mount location logic to be used for resolving the mounts.
|
// This allows the standard mount location logic to be used for resolving the mounts.
|
||||||
func getVersionLibs(logger logger.Interface, driver *root.Driver, version string) ([]string, string, error) {
|
func getVersionLibs(logger logger.Interface, driver *root.Driver, version string) ([]string, error) {
|
||||||
logger.Infof("Using driver version %v", version)
|
logger.Infof("Using driver version %v", version)
|
||||||
|
|
||||||
libCudaPaths, err := cuda.New(
|
libCudaPaths, err := cuda.New(
|
||||||
driver.Libraries(),
|
driver.Libraries(),
|
||||||
).Locate("." + version)
|
).Locate("." + version)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, "", fmt.Errorf("failed to locate libcuda.so.%v: %v", version, err)
|
return nil, fmt.Errorf("failed to locate libcuda.so.%v: %v", version, err)
|
||||||
}
|
}
|
||||||
libCudaDirectoryPath := filepath.Dir(libCudaPaths[0])
|
libRoot := filepath.Dir(libCudaPaths[0])
|
||||||
|
|
||||||
libraries := lookup.NewFileLocator(
|
libraries := lookup.NewFileLocator(
|
||||||
lookup.WithLogger(logger),
|
lookup.WithLogger(logger),
|
||||||
lookup.WithSearchPaths(
|
lookup.WithSearchPaths(
|
||||||
libCudaDirectoryPath,
|
libRoot,
|
||||||
filepath.Join(libCudaDirectoryPath, "vdpau"),
|
filepath.Join(libRoot, "vdpau"),
|
||||||
),
|
),
|
||||||
lookup.WithOptional(true),
|
lookup.WithOptional(true),
|
||||||
)
|
)
|
||||||
|
|
||||||
libs, err := libraries.Locate("*.so." + version)
|
libs, err := libraries.Locate("*.so." + version)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, "", fmt.Errorf("failed to locate libraries for driver version %v: %v", version, err)
|
return nil, fmt.Errorf("failed to locate libraries for driver version %v: %v", version, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if driver.Root == "/" || driver.Root == "" {
|
if driver.Root == "/" || driver.Root == "" {
|
||||||
return libs, libCudaDirectoryPath, nil
|
return libs, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
libCudaDirectoryPath = driver.RelativeToRoot(libCudaDirectoryPath)
|
|
||||||
|
|
||||||
var relative []string
|
var relative []string
|
||||||
for _, l := range libs {
|
for _, l := range libs {
|
||||||
relative = append(relative, strings.TrimPrefix(l, driver.Root))
|
relative = append(relative, strings.TrimPrefix(l, driver.Root))
|
||||||
}
|
}
|
||||||
|
|
||||||
return relative, libCudaDirectoryPath, nil
|
return relative, nil
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -40,12 +40,13 @@ var requiredDriverStoreFiles = []string{
|
|||||||
|
|
||||||
// newWSLDriverDiscoverer returns a Discoverer for WSL2 drivers.
|
// newWSLDriverDiscoverer returns a Discoverer for WSL2 drivers.
|
||||||
func newWSLDriverDiscoverer(logger logger.Interface, driverRoot string, hookCreator discover.HookCreator, ldconfigPath string) (discover.Discover, error) {
|
func newWSLDriverDiscoverer(logger logger.Interface, driverRoot string, hookCreator discover.HookCreator, ldconfigPath string) (discover.Discover, error) {
|
||||||
if err := dxcore.Init(); err != nil {
|
err := dxcore.Init()
|
||||||
return nil, fmt.Errorf("failed to initialize dxcore: %w", err)
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to initialize dxcore: %v", err)
|
||||||
}
|
}
|
||||||
defer func() {
|
defer func() {
|
||||||
if err := dxcore.Shutdown(); err != nil {
|
if err := dxcore.Shutdown(); err != nil {
|
||||||
logger.Warningf("failed to shutdown dxcore: %w", err)
|
logger.Warningf("failed to shutdown dxcore: %v", err)
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
@@ -53,19 +54,32 @@ func newWSLDriverDiscoverer(logger logger.Interface, driverRoot string, hookCrea
|
|||||||
if len(driverStorePaths) == 0 {
|
if len(driverStorePaths) == 0 {
|
||||||
return nil, fmt.Errorf("no driver store paths found")
|
return nil, fmt.Errorf("no driver store paths found")
|
||||||
}
|
}
|
||||||
if len(driverStorePaths) > 1 {
|
|
||||||
logger.Warningf("Found multiple driver store paths: %v", driverStorePaths)
|
|
||||||
}
|
|
||||||
logger.Infof("Using WSL driver store paths: %v", driverStorePaths)
|
logger.Infof("Using WSL driver store paths: %v", driverStorePaths)
|
||||||
|
|
||||||
driverStorePaths = append(driverStorePaths, "/usr/lib/wsl/lib")
|
return newWSLDriverStoreDiscoverer(logger, driverRoot, hookCreator, ldconfigPath, driverStorePaths)
|
||||||
|
}
|
||||||
|
|
||||||
driverStoreMounts := discover.NewMounts(
|
// newWSLDriverStoreDiscoverer returns a Discoverer for WSL2 drivers in the driver store associated with a dxcore adapter.
|
||||||
|
func newWSLDriverStoreDiscoverer(logger logger.Interface, driverRoot string, hookCreator discover.HookCreator, ldconfigPath string, driverStorePaths []string) (discover.Discover, error) {
|
||||||
|
var searchPaths []string
|
||||||
|
seen := make(map[string]bool)
|
||||||
|
for _, path := range driverStorePaths {
|
||||||
|
if seen[path] {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
searchPaths = append(searchPaths, path)
|
||||||
|
}
|
||||||
|
if len(searchPaths) > 1 {
|
||||||
|
logger.Warningf("Found multiple driver store paths: %v", searchPaths)
|
||||||
|
}
|
||||||
|
searchPaths = append(searchPaths, "/usr/lib/wsl/lib")
|
||||||
|
|
||||||
|
libraries := discover.NewMounts(
|
||||||
logger,
|
logger,
|
||||||
lookup.NewFileLocator(
|
lookup.NewFileLocator(
|
||||||
lookup.WithLogger(logger),
|
lookup.WithLogger(logger),
|
||||||
lookup.WithSearchPaths(
|
lookup.WithSearchPaths(
|
||||||
driverStorePaths...,
|
searchPaths...,
|
||||||
),
|
),
|
||||||
lookup.WithCount(1),
|
lookup.WithCount(1),
|
||||||
),
|
),
|
||||||
@@ -75,14 +89,14 @@ func newWSLDriverDiscoverer(logger logger.Interface, driverRoot string, hookCrea
|
|||||||
|
|
||||||
symlinkHook := nvidiaSMISimlinkHook{
|
symlinkHook := nvidiaSMISimlinkHook{
|
||||||
logger: logger,
|
logger: logger,
|
||||||
mountsFrom: driverStoreMounts,
|
mountsFrom: libraries,
|
||||||
hookCreator: hookCreator,
|
hookCreator: hookCreator,
|
||||||
}
|
}
|
||||||
|
|
||||||
ldcacheHook, _ := discover.NewLDCacheUpdateHook(logger, driverStoreMounts, hookCreator, ldconfigPath)
|
ldcacheHook, _ := discover.NewLDCacheUpdateHook(logger, libraries, hookCreator, ldconfigPath)
|
||||||
|
|
||||||
d := discover.Merge(
|
d := discover.Merge(
|
||||||
driverStoreMounts,
|
libraries,
|
||||||
symlinkHook,
|
symlinkHook,
|
||||||
ldcacheHook,
|
ldcacheHook,
|
||||||
)
|
)
|
||||||
@@ -121,7 +135,7 @@ func (m nvidiaSMISimlinkHook) Hooks() ([]discover.Hook, error) {
|
|||||||
}
|
}
|
||||||
link := "/usr/bin/nvidia-smi"
|
link := "/usr/bin/nvidia-smi"
|
||||||
links := []string{fmt.Sprintf("%s::%s", target, link)}
|
links := []string{fmt.Sprintf("%s::%s", target, link)}
|
||||||
symlinkHook := m.hookCreator.Create(CreateSymlinksHook, links...)
|
symlinkHook := m.hookCreator.Create("create-symlinks", links...)
|
||||||
|
|
||||||
return symlinkHook.Hooks()
|
return symlinkHook.Hooks()
|
||||||
}
|
}
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user